Commit d3f5a354 authored by Thomas Matthews's avatar Thomas Matthews

Merge branch 'development'

parents 26934e11 9e31d877
Pipeline #7982 passed with stage
in 8 minutes and 56 seconds
......@@ -12,3 +12,6 @@ pynsist_pkgs
wxPython*
.virtualenv
.cache
.idea/
.python-version
.pytest_cache/
This diff is collapsed.
......@@ -48,7 +48,10 @@ def read_config_option(key, expected_type=None, default_value=None):
try:
if not expected_type:
value = conf_parser.get("Settings", key)
logging.info("Got configuration for key {}: {}".format(key, value))
if key is "password":
logging.info("Got configuration for key {}: ****".format(key))
else:
logging.info("Got configuration for key {}: {}".format(key, value))
return conf_parser.get("Settings", key)
elif expected_type is bool:
return conf_parser.getboolean("Settings", key)
......
import os
import logging
import time
import threading
from wx.lib.pubsub import pub
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler, FileCreatedEvent
from API.pubsub import send_message
from API.directoryscanner import find_runs_in_directory
from GUI.SettingsDialog import SettingsDialog
toMonitor = True
TIMEBETWEENMONITOR = 120
class DirectoryMonitorTopics(object):
"""Topics for monitoring directories for new runs."""
new_run_observed = "new_run_observed"
finished_discovering_run = "finished_discovering_run"
shut_down_directory_monitor = "shut_down_directory_monitor"
start_up_directory_monitor = "start_up_directory_monitor"
finished_uploading_run = "finished_uploading_run"
class RunMonitor(threading.Thread):
"""A convenience thread wrapper for monitoring a directory for runs"""
def __init__(self, directory, cond, name="RunMonitorThread"):
"""Initialize a `RunMonitor`"""
self._directory = directory
self._condition = cond
super(RunMonitor, self).__init__(name=name)
def run(self):
"""Initiate directory monitor. The monitor checks the default
directory every 2 minutes
"""
monitor_directory(self._directory, self._condition)
class CompletedJobInfoEventHandler(FileSystemEventHandler):
"""A subclass of watchdog.events.FileSystemEventHandler that will run
a directory scan on the monitored directory. This will filter explicitly on
a file creation event for a file with the name `CompletedJobInfo.xml`."""
def on_created(self, event):
"""Overrides `on_created` in `FileSystemEventHandler` to filter on
file creation events for `CompletedJobInfo.xml`."""
if isinstance(event, FileCreatedEvent) and event.src_path.endswith('CompletedJobInfo.xml'):
logging.info("Observed new run in {}, telling the UI to start uploading it.".format(event.src_path))
directory = os.path.dirname(event.src_path)
# tell the UI to clean itself up before observing new runs
send_message(DirectoryMonitorTopics.new_run_observed)
# this will send a bunch of events that the UI is listening for, but
# unlike the UI (which runs this in a separate thread), we're going to do this
# in our own thread and block on it so we can tell the UI to start
# uploading once we've finished discovering the run
find_runs_in_directory(directory)
# now tell the UI to start
send_message(DirectoryMonitorTopics.finished_discovering_run)
else:
logging.debug("Ignoring file event [{}] with path [{}]".format(str(event), event.src_path))
def monitor_directory(directory):
"""Starts monitoring the specified directory in a background thread. File events
will be passed to the `CompletedJobInfoEventHandler`.
Arguments:
directory: the directory to monitor.
def join(self, timeout=None):
"""Kill the thread"""
global toMonitor
logging.info("going to kill monitoring")
toMonitor = False
threading.Thread.join(self, timeout)
def on_created(directory, cond):
"""When a CompletedJobInfo.xml file is found without a .miseqUploaderInfo file,
an automatic upload is triggered
"""
observer = Observer()
logging.info("Observed new run in {}, telling the UI to start uploading it.".format(directory))
directory = os.path.dirname(directory)
# tell the UI to clean itself up before observing new runs
send_message(DirectoryMonitorTopics.new_run_observed)
if toMonitor:
find_runs_in_directory(directory)
# check if monitoring is still "on" after returning from find_runs_in_directory()
if toMonitor:
send_message(DirectoryMonitorTopics.finished_discovering_run)
# using locks to prevent the monitor from running while an upload is happening.
cond.acquire()
cond.wait()
cond.release()
send_message(DirectoryMonitorTopics.finished_uploading_run)
def monitor_directory(directory, cond):
"""Calls the function searches the default directory every 2 minutes unless
monitoring is no longer required
"""
global toMonitor
logging.info("Getting ready to monitor directory {}".format(directory))
event_handler = CompletedJobInfoEventHandler()
observer.schedule(event_handler, directory, recursive=True)
def stop_monitoring(*args, **kwargs):
"""Tells watchdog to stop watching the directory when the newly processed run
was discovered."""
logging.info("Halting monitoring on directory because run was discovered.")
observer.stop()
observer.join()
pub.subscribe(stop_monitoring, SettingsDialog.settings_closed_topic)
pub.subscribe(stop_monitoring, DirectoryMonitorTopics.new_run_observed)
pub.subscribe(stop_monitoring, DirectoryMonitorTopics.shut_down_directory_monitor)
pub.subscribe(start_monitoring, DirectoryMonitorTopics.start_up_directory_monitor)
time.sleep(10)
while toMonitor:
search_for_upload(directory, cond)
i = 0
while toMonitor and i < TIMEBETWEENMONITOR:
time.sleep(10)
i = i+10
def search_for_upload(directory, cond):
"""loop through subdirectories of the default directory looking for CompletedJobInfo.xml without
.miseqUploaderInfo files.
"""
global toMonitor
if not os.access(directory, os.W_OK):
logging.warning("Could not access directory while monitoring for samples, directory is not writeable {}".format(directory))
return
root = next(os.walk(directory))[0]
dirs = next(os.walk(directory))[1]
for name in dirs:
check_for_comp_job = os.path.join(root, name, "CompletedJobInfo.xml")
check_for_miseq = os.path.join(root, name, ".miseqUploaderInfo")
if os.path.isfile(check_for_comp_job):
if not os.path.isfile(check_for_miseq):
path_to_upload = check_for_comp_job
if toMonitor:
on_created(path_to_upload, cond)
# After upload, start back at the start of directories
return
# Check each step of loop if monitoring is still required
if not toMonitor:
return
return
def stop_monitoring():
"""Stop directory monitoring by setting toMonitor to False"""
global toMonitor
if toMonitor:
logging.info("Halting monitoring on directory.")
toMonitor = False
observer.start()
def start_monitoring():
"""Restart directory monitoring by setting toMonitor to True"""
global toMonitor
if not toMonitor:
logging.info("Restarting monitor on directory")
toMonitor = True
import json
import logging
from os import path
import os
from Exceptions.SampleSheetError import SampleSheetError
from Exceptions.SequenceFileError import SequenceFileError
from Exceptions.SampleError import SampleError
from Validation.offlineValidation import validate_sample_sheet, validate_sample_list
from Parsers.miseqParser import parse_metadata, complete_parse_samples
from Model.SequencingRun import SequencingRun
from API.fileutils import find_file_by_name
from API.pubsub import send_message
class DirectoryScannerTopics(object):
"""Topics issued by `find_runs_in_directory`"""
finished_run_scan = "finished_run_scan"
......@@ -17,6 +17,7 @@ class DirectoryScannerTopics(object):
garbled_sample_sheet = "garbled_sample_sheet"
missing_files = "missing_files"
def find_runs_in_directory(directory):
"""Find and validate all runs the specified directory.
......@@ -27,43 +28,100 @@ def find_runs_in_directory(directory):
Arguments:
directory -- the directory to find sequencing runs
Usage:
Can be used on the directory containing the SampleSheet.csv file (a single run)
Can be used on the directory containing directories with SampleSheet.csv files in them (a group of runs)
Returns: a list of populated sequencing run objects found
in the directory, ready to be uploaded.
"""
logging.info("looking for sample sheet in {}".format(directory))
sample_sheets = find_file_by_name(directory = directory,
name_pattern = '^SampleSheet.csv$',
depth = 2)
logging.info("found sample sheets: {}".format(", ".join(sample_sheets)))
# filter directories that have been completely uploaded
sheets_to_upload = filter(lambda dir: not run_is_uploaded(path.dirname(dir)), sample_sheets)
logging.info("filtered sample sheets: {}".format(", ".join(sheets_to_upload)))
sequencing_runs = [process_sample_sheet(sheet) for sheet in sheets_to_upload]
def find_run_directory_list(run_dir):
"""Find and return all directories (including this one) in the specified directory.
send_message(DirectoryScannerTopics.finished_run_scan)
Arguments:
directory -- the directory to find directories in
return sequencing_runs
Returns: a list of directories including current directory
"""
def run_is_uploaded(run_directory):
"""Check if a run has already been uploaded.
# Checks if we can access to the given directory, return empty and log a warning if we cannot.
if not os.access(run_dir, os.W_OK):
logging.warning("The following directory is not writeable, "
"can not upload any samples from this directory {}".format(run_dir))
return []
This function checks for the existence of a file `.miseqUploaderInfo`, then
evaluates the status of the run by looking at the "Upload Status" field.
dir_list = next(os.walk(run_dir))[1] # Gets the list of directories in the directory
dir_list.append(run_dir) # Add the current directory to the list too
return dir_list
Arguments:
run_directory -- the sequencing run directory
def dir_has_samples_not_uploaded(sample_dir):
"""Find and validate runs in the specified directory.
Returns: true if the run has already been uploaded, false if it has not.
"""
uploader_info_file = find_file_by_name(run_directory, '.miseqUploaderInfo', depth = 1)
Validates if run already has been uploaded, partially uploaded, or not uploaded
Arguments:
directory -- the directory to find sequencing runs
Returns: Boolean,
True: Directory has samples not uploaded,
Directory has partially uploaded samples
False: Directory has no samples
Directory samples are already uploaded
Directory can not be read, permissions issue
"""
if uploader_info_file:
with open(uploader_info_file[0], "rb") as reader:
info_file = json.load(reader)
return info_file["Upload Status"] == "Complete"
# Checks if we can write to the directory, return false and log a warning if we cannot.
if not os.access(sample_dir, os.W_OK):
logging.warning("The following directory is not writeable, "
"can not upload any samples from this directory {}".format(sample_dir))
return False
file_list = next(os.walk(sample_dir))[2] # Gets the list of files in the directory
if 'SampleSheet.csv' in file_list:
if '.miseqUploaderInfo' in file_list: # Must check status of upload to determine if upload is completed
uploader_info_file = os.path.join(sample_dir, '.miseqUploaderInfo')
with open(uploader_info_file, "rb") as reader:
info_file = json.load(reader)
return info_file["Upload Status"] != "Complete" # if True, has samples, not completed uploading
else: # SampleSheet.csv with no .miseqUploaderInfo file, has samples not uploaded yet
return True
return False # No SampleSheet.csv, does not have samples
logging.info("looking for sample sheet in {}".format(directory))
sample_sheets = []
directory_list = find_run_directory_list(directory)
for d in directory_list:
current_directory = os.path.join(directory, d)
if dir_has_samples_not_uploaded(current_directory):
sample_sheets.append(os.path.join(current_directory, 'SampleSheet.csv'))
logging.info("found sample sheets (filtered): {}".format(", ".join(sample_sheets)))
# Only appending sheets to the list that do not have errors
# The errors are collected to create a list to show the user
sequencing_runs = []
for sheet in sample_sheets:
try:
sequencing_runs.append(process_sample_sheet(sheet))
except SampleSheetError, e:
logging.exception("Failed to parse sample sheet.")
send_message(DirectoryScannerTopics.garbled_sample_sheet, sample_sheet=sheet, error=e)
except SampleError, e:
logging.exception("Failed to parse sample.")
send_message(DirectoryScannerTopics.garbled_sample_sheet, sample_sheet=sheet, error=e)
except SequenceFileError as e:
logging.exception("Failed to find files for sample sheet.")
send_message(DirectoryScannerTopics.missing_files, sample_sheet=sheet, error=e)
send_message(DirectoryScannerTopics.finished_run_scan)
return sequencing_runs
return False
def process_sample_sheet(sample_sheet):
"""Create a SequencingRun object for the specified sample sheet.
......@@ -75,33 +133,23 @@ def process_sample_sheet(sample_sheet):
Returns: an individual SequencingRun object for the sample sheet,
ready to be uploaded.
"""
try:
logging.info("going to parse metadata")
run_metadata = parse_metadata(sample_sheet)
logging.info("going to parse samples")
samples = complete_parse_samples(sample_sheet)
logging.info("going to parse metadata")
run_metadata = parse_metadata(sample_sheet)
logging.info("going to parse samples")
samples = complete_parse_samples(sample_sheet)
logging.info("going to build sequencing run")
sequencing_run = SequencingRun(run_metadata, samples, sample_sheet)
logging.info("going to build sequencing run")
sequencing_run = SequencingRun(run_metadata, samples, sample_sheet)
logging.info("going to validate sequencing run")
validate_run(sequencing_run)
logging.info("going to validate sequencing run")
validate_run(sequencing_run)
send_message(DirectoryScannerTopics.run_discovered, run=sequencing_run)
send_message(DirectoryScannerTopics.run_discovered, run=sequencing_run)
return sequencing_run
except SampleSheetError, e:
logging.exception("Failed to parse sample sheet.")
send_message(DirectoryScannerTopics.garbled_sample_sheet, sample_sheet=sample_sheet, error=e)
except SampleError, e:
logging.exception("Failed to parse sample.")
send_message(DirectoryScannerTopics.garbled_sample_sheet, sample_sheet=sample_sheet, error=e)
except SequenceFileError as e:
logging.exception("Failed to find files for sample sheet.")
send_message(DirectoryScannerTopics.missing_files, sample_sheet=sample_sheet, error=e)
return sequencing_run
return None
def validate_run(sequencing_run):
"""Do the validation on a run, its samples, and files.
......@@ -118,8 +166,10 @@ def validate_run(sequencing_run):
validation = validate_sample_sheet(sequencing_run.sample_sheet)
if not validation.is_valid():
send_message(sequencing_run.offline_validation_topic, run=sequencing_run, errors=validation.get_errors())
raise SampleSheetError('Sample sheet {} is invalid. Reason:\n {}'.format(sample_sheet, validation.get_errors()), validation.error_list())
raise SampleSheetError('Sample sheet {} is invalid. Reason:\n {}'.format(sample_sheet, validation.get_errors()),
validation.error_list())
validation = validate_sample_list(sequencing_run.sample_list)
if not validation.is_valid():
raise SampleError('Sample sheet {} is invalid. Reason:\n {}'.format(sample_sheet, validation.get_errors()), validation.error_list())
raise SampleError('Sample sheet {} is invalid. Reason:\n {}'.format(sample_sheet, validation.get_errors()),
validation.error_list())
import os
import re
def find_file_by_name(directory, name_pattern, depth=-1):
"""Find a file by a name pattern in a directory
Traverse through a directory and a level below it searching for
a file that matches the given SampleSheet pattern.
Arguments:
directory -- top level directory to start searching from
name_pattern -- SampleSheet pattern to try and match
using fnfilter/ fnmatch.filter
depth -- optional, the max depth to descend into the directory. a depth of
-1 implies no limit.
Returns: list containing files that match pattern
"""
if depth == -1:
walk = lambda directory, depth: os.walk(directory)
else:
walk = lambda directory, depth: walklevel(directory, depth)
result_list = []
if os.path.isdir(directory):
for root, dirs, files in walk(directory, depth):
for filename in filter(lambda file: re.search(name_pattern, file), files):
result_list.append(os.path.join(root, filename))
return result_list
def walklevel(directory, depth):
"""Descend into a directory, but only to the specified depth.
This method is gracelessly borrowed from:
http://stackoverflow.com/questions/229186/os-walk-without-digging-into-directories-below
Arguments:
directory -- the directory in which to start the walk
depth -- the depth to descend into the directory.
Returns: a generator for directories in under the top level directory.
"""
directory = directory.rstrip(os.path.sep)
assert os.path.isdir(directory)
num_sep = directory.count(os.path.sep)
for root, dirs, files in os.walk(directory):
yield root, dirs, files
num_sep_this = root.count(os.path.sep)
if num_sep + depth <= num_sep_this:
del dirs[:]
......@@ -23,7 +23,7 @@ class RunUploaderTopics(object):
class RunUploader(threading.Thread):
"""A convenience thread wrapper for uploading runs to the server."""
def __init__(self, api, runs, post_processing_task=None, name='RunUploaderThread'):
def __init__(self, api, runs, cond, post_processing_task=None, name='RunUploaderThread'):
"""Initialize a `RunUploader`.
Args:
......@@ -36,12 +36,14 @@ class RunUploader(threading.Thread):
self._api = api
self._runs = runs
self._post_processing_task = post_processing_task
self._condition = cond
threading.Thread.__init__(self, name=name)
def run(self):
"""Initiate upload. The upload happens serially, one run at a time."""
for run in self._runs:
upload_run_to_server(api=self._api, sequencing_run=run)
upload_run_to_server(api=self._api, sequencing_run=run, condition=self._condition)
send_message(RunUploaderTopics.finished_uploading_samples)
# once the run uploads are complete, we can launch the post-processing
# command
if self._post_processing_task:
......@@ -72,7 +74,7 @@ class RunUploader(threading.Thread):
self._api._kill_connections()
threading.Thread.join(self, timeout)
def upload_run_to_server(api, sequencing_run):
def upload_run_to_server(api, sequencing_run, condition):
"""Upload a single run to the server.
Arguments:
......@@ -128,7 +130,12 @@ def upload_run_to_server(api, sequencing_run):
# only send samples that aren't already on the server
samples_to_create = filter(lambda sample: not sample_exists(api, sample), sequencing_run.sample_list)
logging.info("Sending samples to server: [{}].".format(", ".join([str(x) for x in samples_to_create])))
api.send_samples(samples_to_create)
try:
api.send_samples(samples_to_create)
except Exception as e:
logging.exception("Encountered error while uploading files to server, updating status of run to error state.")
api.set_seq_run_error(run_id)
raise
for sample in sequencing_run.samples_to_upload:
pub.subscribe(_handle_upload_sample_complete, sample.upload_completed_topic)
......@@ -144,6 +151,10 @@ def upload_run_to_server(api, sequencing_run):
upload_id = run_id)
send_message("finished_uploading_samples", sheet_dir = sequencing_run.sample_sheet_dir)
send_message(sequencing_run.upload_completed_topic)
# acquring lock so it can be released so that directory monitoring can resume if it was running
condition.acquire()
condition.notify()
condition.release()
api.set_seq_run_complete(run_id)
_create_miseq_uploader_info_file(sequencing_run.sample_sheet_dir, run_id, "Complete")
except Exception as e:
......
2.0.0 to 2.1.4
==============
* Internal Changes to how threads are started and killed
* Added a timeout + retry to calls to IRIDA, so dropped conections will be retried.
* Updated the `requests` library for security patch
* Changed the api layer to use a singleton so multiple instances of the api are not spun off.
* Small bug fixes for GUI
* Fixed UI hanging when recursive directories are searched for runs.
1.7.0 to 2.0.0
==============
* The UI is completely rewritten to better show what's happened with samples as they're uploaded, and to facilitate quality control implementation later.
......
class ProjectError(Exception):
pass
class ProjectError(Exception):
pass
class SampleError(Exception):
"""An exception to be raised when issues with samples arise.
Examples include when IRIDA responds with an error during sample creation,
or when the parsing component can't parse the sample section of the sample
sheet.
"""
def __init__(self, message, errors):
"""Initialize a SampleError.
Args:
message: the summary message that's causing the error.
errors: a more detailed list of errors.
"""
self._message = message
self._errors = errors
@property
def message(self):
return self._message
@property
def errors(self):
return self._errors
def __str__(self):
return self.message
class SampleError(Exception):
"""An exception to be raised when issues with samples arise.
Examples include when IRIDA responds with an error during sample creation,
or when the parsing component can't parse the sample section of the sample
sheet.
"""
def __init__(self, message, errors):
"""Initialize a SampleError.
Args:
message: the summary message that's causing the error.
errors: a more detailed list of errors.
"""
self._message = message
self._errors = errors
@property
def message(self):
return self._message
@property
def errors(self):
return self._errors
def __str__(self):
return self.message
class SampleSheetError(Exception):
"""An exception raised when errors are encountered with a sample sheet.
Examples include when a sample sheet can't be parsed because it's garbled, or
if IRIDA rejects the creation of a run because fields are missing or invalid
from the sample sheet.
"""
def __init__(self, message, errors):
"""Initalize a SampleSheetError.
Args:
message: a summary message that's causing the error.
errors: a more detailed list of errors.
"""
self._message = message
self._errors = errors
@property
def message(self):
return self._message
@property
def errors(self):
return self._errors
def __str__(self):
return self.message
class SampleSheetError(Exception):
"""An exception raised when errors are encountered with a sample sheet.
Examples include when a sample sheet can't be parsed because it's garbled, or
if IRIDA rejects the creation of a run because fields are missing or invalid
from the sample sheet.
"""
def __init__(self, message, errors):
"""Initalize a SampleSheetError.
Args:
message: a summary message that's causing the error.
errors: a more detailed list of errors.
"""
self._message = message
self._errors = errors
@property
def message(self):
return self._message
@property
def errors(self):
return self._errors
def __str__(self):
return self.message
class SequenceFileError(Exception):
"""An exception that's raised when errors are encountered with a sequence file.
Examples include when files cannot be found for samples that are in the sample
sheet, or when the server rejects a file during upload.
"""
def __init__(self, message, errors):
"""Initialize a SequenceFileError.
Args:
message: a summary message of the error.
errors: a more detailed list of errors.
"""
self._message = message
self._errors = errors
@property
def message(self):
return self._message
@property
def errors(self):
return self._errors
def __str__(self):
return self.message
class SequenceFileError(Exception):
"""An exception that's raised when errors are encountered with a sequence file.
Examples include when files cannot be found for samples that are in the sample
sheet, or when the server rejects a file during upload.
"""
def __init__(self, message, errors):
"""Initialize a SequenceFileError.
Args:
message: a summary message of the error.
errors: a more detailed list of errors.
"""
self._message = message
self._errors = errors
@property
def message(self):
return self._message
@property
def errors(self):
return self._errors
def __str__(self):
return self.message
......@@ -9,6 +9,7 @@ from wx.lib.wordwrap import wordwrap
from Exceptions import SampleError, SampleSheetError, SequenceFileError
from API.directoryscanner import DirectoryScannerTopics
from API.directorymonitor import DirectoryMonitorTopics
from API.pubsub import send_message
from GUI.SettingsDialog import SettingsDialog
......@@ -71,6 +72,7 @@ class InvalidSampleSheetsPanel(wx.Panel):
sample_sheet: the sample sheet that failed to be parsed.
error: the error that was raised during validation.
"""
send_message(DirectoryMonitorTopics.shut_down_directory_monitor)
sheet_name = basename(dirname(sample_sheet)) + separator + "SampleSheet.csv"
logging.info("Handling sample sheet error for {}".format(sheet_name))
self.Freeze()
......@@ -79,9 +81,9 @@ class InvalidSampleSheetsPanel(wx.Panel):
sheet_errors_type = None
if isinstance(error, SampleError):