Commit bebaf48e authored by Peter Kruczkiewicz's avatar Peter Kruczkiewicz

Set Sample specific values for Galaxy pipeline parameters using "templating"...

Set Sample specific values for Galaxy pipeline parameters using "templating" with `{{ <Sample.fieldName||MetadataEntry.keyName> }}`
Values from the sample metadata will be preferentially used over sample private field values.
parent 8568e4cd
Pipeline #7591 passed with stage
in 80 minutes and 1 second
......@@ -6,11 +6,12 @@ import static com.google.common.base.Preconditions.checkNotNull;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang3.reflect.FieldUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -36,6 +37,7 @@ import ca.corefacility.bioinformatics.irida.exceptions.galaxy.GalaxyDatasetExcep
import ca.corefacility.bioinformatics.irida.model.enums.AnalysisType;
import ca.corefacility.bioinformatics.irida.model.project.ReferenceFile;
import ca.corefacility.bioinformatics.irida.model.sample.Sample;
import ca.corefacility.bioinformatics.irida.model.sample.metadata.MetadataEntry;
import ca.corefacility.bioinformatics.irida.model.sequenceFile.SequenceFilePair;
import ca.corefacility.bioinformatics.irida.model.sequenceFile.SequencingObject;
import ca.corefacility.bioinformatics.irida.model.sequenceFile.SingleEndSequenceFile;
......@@ -55,7 +57,6 @@ import ca.corefacility.bioinformatics.irida.pipeline.upload.galaxy.GalaxyLibrari
import ca.corefacility.bioinformatics.irida.pipeline.upload.galaxy.GalaxyWorkflowService;
import ca.corefacility.bioinformatics.irida.service.SequencingObjectService;
import ca.corefacility.bioinformatics.irida.service.analysis.workspace.AnalysisWorkspaceService;
import ca.corefacility.bioinformatics.irida.service.remote.SampleRemoteService;
import ca.corefacility.bioinformatics.irida.service.workflow.IridaWorkflowsService;
/**
......@@ -222,8 +223,13 @@ public class AnalysisWorkspaceServiceGalaxy implements AnalysisWorkspaceService
String workflowId = analysisSubmission.getRemoteWorkflowId();
WorkflowDetails workflowDetails = galaxyWorkflowService.getWorkflowDetails(workflowId);
WorkflowInputsGalaxy workflowInputsGalaxy = analysisParameterServiceGalaxy
.prepareAnalysisParameters(analysisSubmission.getInputParameters(), iridaWorkflow);
Set<Sample> sampleSet = new HashSet<>(pairedFiles.keySet());
sampleSet.addAll(singleFiles.keySet());
final Map<String, String> inputParameters = setSampleSpecificToolParameterValues(analysisSubmission,
workflowInput, sampleSet);
WorkflowInputsGalaxy workflowInputsGalaxy = analysisParameterServiceGalaxy.prepareAnalysisParameters(
inputParameters, iridaWorkflow);
WorkflowInputs inputs = workflowInputsGalaxy.getInputsObject();
inputs.setDestination(new WorkflowInputs.ExistingHistory(workflowHistory.getId()));
inputs.setWorkflowId(workflowDetails.getId());
......@@ -259,6 +265,111 @@ public class AnalysisWorkspaceServiceGalaxy implements AnalysisWorkspaceService
return new PreparedWorkflowGalaxy(analysisId, workflowLibrary.getId(), new WorkflowInputsGalaxy(inputs));
}
/**
* Set {@link Sample} specific {@link IridaWorkflow} tool parameter values using information found in a {@link Sample} field or {@link MetadataEntry}.
* <p>
* If a tool parameter value is "{{ whatever }}" this method will try to look for a a field that has the name
* "whatever" or a {@link Sample} {@link MetadataEntry} field name that case-insensitively matches "whatever" and
* use the field or metadata value as the tool parameter value.
*
* @param analysisSubmission {@link AnalysisSubmission} that is being submitted to Galaxy
* @param workflowInput {@link IridaWorkflowInput} for the {@link AnalysisSubmission}
* @param sampleSet {@link Sample}s
* @return {@link IridaWorkflowInput} tool parameter keys and values
*/
private Map<String, String> setSampleSpecificToolParameterValues(AnalysisSubmission analysisSubmission,
IridaWorkflowInput workflowInput, Set<Sample> sampleSet) {
final Map<String, String> inputParameters = analysisSubmission.getInputParameters();
// Regex pattern to match any tool parameter values surrounded by double curly braces (i.e. "{{ Whatever }}")
Pattern pattern = Pattern.compile("^\\{\\{\\s*(.+)\\s*\\}\\}$");
Sample sample = null;
if (workflowInput.requiresSingleSample() && sampleSet.size() == 1) {
sample = sampleSet.iterator()
.next();
} else {
logger.trace("Cannot handle setting sample specific tool parameter values for multiple Samples.");
}
for (Map.Entry<String, String> entry : inputParameters.entrySet()) {
final String paramValue = entry.getValue();
final Matcher matcher = pattern.matcher(paramValue);
if (matcher.matches()) {
final String group = matcher.group(1)
.trim();
final String paramKey = entry.getKey();
logger.trace("Parameter '" + paramKey + "' value '" + paramValue + "' regex match is '" + group + "'");
if (sample != null) {
final String sampleFieldValue = getSampleFieldValue(sample, group);
final String sampleMetadataValue = getSampleMetadataValue(sample, group);
logger.trace("For sample id=" + sample.getId() + " found field value='" + sampleFieldValue
+ "' and metadata value='" + sampleMetadataValue + "' for tool parameter='" + paramKey
+ "'.");
// Prefer to use the matching metadata value over the sample value
if (sampleMetadataValue != null) {
inputParameters.put(paramKey, sampleMetadataValue);
} else if (sampleFieldValue != null) {
inputParameters.put(paramKey, sampleFieldValue);
} else {
inputParameters.put(paramKey, "");
}
} else {
logger.trace("Sample is null for submission id=" + analysisSubmission.getId()
+ " so cannot extract tool parameter value for key='" + paramKey
+ "'. Setting tool parameter value to \"\" (empty string)");
inputParameters.put(paramKey, "");
}
}
}
return inputParameters;
}
/**
* Search all {@link Sample} fields for a field that matches the `paramSearchValue`.
*
* @param sample {@link Sample} to search for matching {@link MetadataEntry}
* @param paramSearchValue Case-insensitive {@link MetadataEntry} field name search value
* @return If `sample` field with name matching `paramSearchValue` found, return the value of that field, otherwise return `null`.
*/
private String getSampleFieldValue(Sample sample, String paramSearchValue) {
try {
final Object sampleInfoValue = FieldUtils.readField(sample, paramSearchValue, true);
logger.trace(
"Found sample id=" + sample.getId() + " field info: " + paramSearchValue + "=" + sampleInfoValue);
return sampleInfoValue.toString();
} catch (IllegalAccessException | IllegalArgumentException e) {
logger.trace("Could not access '" + paramSearchValue + "' in Sample id=" + sample.getId() + ": " + e);
}
return null;
}
/**
* Search all {@link Sample} {@link MetadataEntry} objects for a field that matches the `paramSearchValue`.
*
* @param sample {@link Sample} to search for matching {@link MetadataEntry}
* @param paramSearchValue Case-insensitive {@link MetadataEntry} field name search value
* @return Matching {@link Sample} {@link MetadataEntry} value, otherwise, return null if no Sample Metadata matches.
*/
private String getSampleMetadataValue(Sample sample, String paramSearchValue) {
final List<MetadataEntry> metadataEntries = sample.getMetadata()
.entrySet()
.stream()
.filter(x -> Objects.equals(x.getKey()
.getLabel()
.toLowerCase(), paramSearchValue.toLowerCase()))
.map(Map.Entry::getValue)
.collect(Collectors.toList());
if (!metadataEntries.isEmpty()) {
final MetadataEntry metadataEntry = metadataEntries.get(0);
final String metadataEntryValue = metadataEntry.getValue();
logger.trace("Found metadata entry for " + paramSearchValue + "=" + metadataEntryValue + " for sample id="
+ sample.getId());
return metadataEntryValue;
} else {
logger.trace(
"Could not find '" + paramSearchValue + "' in Sample Metadata for sample_id=" + sample.getId());
return null;
}
}
/**
* Determines if the two data structures of samples/sequence files share a
* common sample.
......
......@@ -60,6 +60,15 @@ public class TestDataFactory {
public static final String PROJECT_ORGANISM = "E. coli";
public static final Long PROJECT_ID = 1L;
private static final Long PROJECT_MODIFIED_DATE = 1403723706L;
// @formatter:off
private static final Map<String, String> OUTPUT_KEY_TO_FILE = ImmutableMap.of(
"tree", "snp_tree.tree",
"matrix", "test_file_1.fastq",
"table", "test_file_2.fastq",
"contigs-with-repeats", "test_file.fasta",
"refseq-masher-matches", "refseq-masher-matches.tsv"
);
// @formatter:on
/**
* Construct a simple {@link ca.corefacility.bioinformatics.irida.model.sample.Sample}.
......@@ -118,15 +127,13 @@ public class TestDataFactory {
}
public static Analysis constructAnalysis() {
Map<String, AnalysisOutputFile> analysisOutputFiles = new ImmutableMap.Builder<String, AnalysisOutputFile>()
.put("tree", constructAnalysisOutputFile("snp_tree.tree", null))
.put("matrix", constructAnalysisOutputFile("test_file_1.fastq", null))
.put("table", constructAnalysisOutputFile("test_file_2.fastq", null))
.put("contigs-with-repeats", constructAnalysisOutputFile("test_file.fasta", null))
.put("refseq-masher-matches", constructAnalysisOutputFile("refseq-masher-matches.tsv", 9000L))
.build();
Analysis analysis = new Analysis(FAKE_EXECUTION_MANAGER_ID, analysisOutputFiles, AnalysisType.PHYLOGENOMICS);
return analysis;
final ImmutableMap.Builder<String, AnalysisOutputFile> stringAnalysisOutputFileBuilder = new ImmutableMap.Builder<>();
Long aofId = 0L;
for (Map.Entry<String, String> entry : OUTPUT_KEY_TO_FILE.entrySet()) {
stringAnalysisOutputFileBuilder.put(entry.getKey(), constructAnalysisOutputFile(entry.getValue(), ++aofId));
}
Map<String, AnalysisOutputFile> analysisOutputFiles = stringAnalysisOutputFileBuilder.build();
return new Analysis(FAKE_EXECUTION_MANAGER_ID, analysisOutputFiles, AnalysisType.PHYLOGENOMICS);
}
public static User constructUser() {
......@@ -170,7 +177,11 @@ public class TestDataFactory {
public static IridaWorkflow getIridaWorkflow(UUID id) {
IridaWorkflowInput input = new IridaWorkflowInput();
List<IridaWorkflowOutput> outputs = ImmutableList.of(new IridaWorkflowOutput());
final ImmutableList.Builder<IridaWorkflowOutput> builder = ImmutableList.builder();
for (Map.Entry<String, String> entry : OUTPUT_KEY_TO_FILE.entrySet()) {
builder.add(new IridaWorkflowOutput(entry.getKey(), entry.getValue()));
}
List<IridaWorkflowOutput> outputs = builder.build();
List<IridaWorkflowToolRepository> tools = ImmutableList.of();
List<IridaWorkflowParameter> parameters = ImmutableList.of();
IridaWorkflowDescription description = new IridaWorkflowDescription(id, "My Workflow", "V1",
......
......@@ -246,11 +246,17 @@ public class DatabaseSetupGalaxyITService {
List<SequenceFilePair> sequenceFilePairs = setupSampleSequenceFileInDatabase(sampleId, sequenceFilePaths1,
sequenceFilePaths2);
ReferenceFile referenceFile = referenceFileRepository.save(new ReferenceFile(referenceFilePath));
final AnalysisSubmission.Builder builder = AnalysisSubmission.builder(iridaWorkflowId)
.name("paired analysis")
.inputFiles(Sets.newHashSet(sequenceFilePairs));
if (referenceFilePath != null) {
ReferenceFile referenceFile = referenceFileRepository.save(new ReferenceFile(referenceFilePath));
builder.referenceFile(referenceFile);
}
AnalysisSubmission submission = builder.inputParameters(parameters).build();
AnalysisSubmission submission = AnalysisSubmission.builder(iridaWorkflowId).name("paired analysis")
.inputFiles(Sets.newHashSet(sequenceFilePairs)).referenceFile(referenceFile)
.inputParameters(parameters).build();
submission.setAnalysisState(state);
analysisSubmissionService.create(submission);
......
......@@ -167,6 +167,7 @@ public class AnalysisWorkspaceServiceGalaxyIT {
private static final UUID validWorkflowIdPairedWithParameters = UUID.fromString("23434bf8-e551-4efd-9957-e61c6f649f8b");
private static final UUID validWorkflowIdSinglePaired = UUID.fromString("d92e9918-1e3d-4dea-b2b9-089f1256ac1b");
private static final UUID phylogenomicsWorkflowId = UUID.fromString("1f9ea289-5053-4e4a-bc76-1f0c60b179f8");
private static final UUID workflowIdForSampleFieldAndMetadataParamValuesTest = UUID.fromString("deadbeef-feed-cafe-babe-fedabaddcaca");
private static final String OUTPUT1_KEY = "output1";
private static final String OUTPUT2_KEY = "output2";
......@@ -520,6 +521,63 @@ public class AnalysisWorkspaceServiceGalaxyIT {
assertEquals("coverageMaxValue should have been changed", "20", coverageMaxValue);
}
/**
* Test that sample field and metadata values are retrieved for tool parameter values if present.
*
* @throws ExecutionManagerException
* @throws IOException
* @throws IridaWorkflowException
*/
@Test
@WithMockUser(username = "aaron", roles = "ADMIN")
public void testSetSampleFieldAndMetadataParameters() throws ExecutionManagerException,
IOException, IridaWorkflowException {
History history = new History();
history.setName("testSetSampleFieldAndMetadataParameters");
HistoriesClient historiesClient = localGalaxy.getGalaxyInstanceAdmin().getHistoriesClient();
WorkflowsClient workflowsClient = localGalaxy.getGalaxyInstanceAdmin().getWorkflowsClient();
History createdHistory = historiesClient.create(history);
IridaWorkflow iridaWorkflow = iridaWorkflowsService.getIridaWorkflow(workflowIdForSampleFieldAndMetadataParamValuesTest);
Path workflowPath = iridaWorkflow.getWorkflowStructure().getWorkflowFile();
String workflowString = new String(Files.readAllBytes(workflowPath), StandardCharsets.UTF_8);
Workflow galaxyWorkflow = workflowsClient.importWorkflow(workflowString);
Map<String, String> parameters = ImmutableMap.of("fieldSampleName", "{{ sampleName }}", "metadataGenus", "{{ genus }}", "metadataNotThere", "{{ notThere }}", "metadataDescription", "{{ description }}");
AnalysisSubmission analysisSubmission = analysisExecutionGalaxyITService.setupPairSubmissionInDatabase(1L,
pairSequenceFiles1A, pairSequenceFiles2A, null, parameters,
workflowIdForSampleFieldAndMetadataParamValuesTest);
analysisSubmission.setRemoteAnalysisId(createdHistory.getId());
analysisSubmission.setRemoteWorkflowId(galaxyWorkflow.getId());
PreparedWorkflowGalaxy preparedWorkflow = analysisWorkspaceService.prepareAnalysisFiles(analysisSubmission);
assertEquals("the response history id should match the input history id", createdHistory.getId(),
preparedWorkflow.getRemoteAnalysisId());
WorkflowInputsGalaxy workflowInputsGalaxy = preparedWorkflow.getWorkflowInputs();
assertNotNull("the returned workflow inputs should not be null", workflowInputsGalaxy);
assertNotNull("the returned library id should not be null", preparedWorkflow.getRemoteDataId());
// verify correct files have been uploaded
List<HistoryContents> historyContents = historiesClient.showHistoryContents(createdHistory.getId());
assertEquals("the created history has an invalid number of elements", 3, historyContents.size());
WorkflowInputs workflowInputs = preparedWorkflow.getWorkflowInputs().getInputsObject();
assertNotNull("created workflowInputs is null", workflowInputs);
Map<String, Object> toolParameters = workflowInputs.getParameters().get(
"tool");
assertNotNull("toolParameters must not be null", toolParameters);
String fieldSampleName = (String) toolParameters.get("sample");
assertEquals("Param value for 'sample' should be 'sample1'", "sample1", fieldSampleName);
final String metadataDescription = (String) toolParameters.get("description");
assertEquals("Param value for 'genus' should be 'Salmonella'", "Salmonella", toolParameters.get("genus").toString());
assertEquals("Param value 'description' should be 'This sample kills people!' instead of 'description1' which is the value from the Sample field.", "This sample kills people!", metadataDescription);
assertFalse("Should not have a value for 'notThere' tool parameter", toolParameters.containsKey("notThere"));
}
/**
* Tests out successfully preparing paired workflow input files for
* execution, no parameters set.
......
<?xml version="1.0" encoding="UTF-8"?>
<iridaWorkflow>
<id>deadbeef-feed-cafe-babe-fedabaddcaca</id>
<name>TestSampleFieldAndMetadataParamValues</name>
<version>0.1</version>
<analysisType>assembly-annotation</analysisType>
<inputs>
<sequenceReadsPaired>sequence_reads_paired</sequenceReadsPaired>
<requiresSingleSample>true</requiresSingleSample>
</inputs>
<parameters>
<parameter name="metadataDescription" defaultValue="{{ description }}">
<toolParameter toolId="tool" parameterName="description"/>
</parameter>
<parameter name="fieldSampleName" defaultValue="{{ sampleName }}">
<toolParameter toolId="tool" parameterName="sample"/>
</parameter>
<parameter name="metadataGenus" defaultValue="{{ genus }}">
<toolParameter toolId="tool" parameterName="genus"/>
</parameter>
<parameter name="metadataNotThere" defaultValue="{{ notThere }}">
<toolParameter toolId="tool" parameterName="notThere"/>
</parameter>
</parameters>
<outputs>
<output name="contigs" fileName="contigs.fasta"/>
</outputs>
<toolRepositories>
<repository>
<name>test</name>
<owner>test</owner>
<url>http://localhost/</url>
<revision>1</revision>
</repository>
</toolRepositories>
</iridaWorkflow>
{
"a_galaxy_workflow": "true",
"annotation": "",
"format-version": "0.1",
"name": "TestSampleFieldAndMetadataParamValues",
"steps": {
"0": {
"annotation": "",
"id": 0,
"input_connections": {},
"inputs": [
{
"description": "",
"name": "sequence_reads_paired"
}
],
"name": "Input dataset collection",
"outputs": [],
"position": {
"left": 200,
"top": 200
},
"tool_errors": null,
"tool_id": null,
"tool_state": "{\"collection_type\": \"list:paired\", \"name\": \"sequence_reads_paired\"}",
"tool_version": null,
"type": "data_collection_input",
"user_outputs": []
},
"1": {
"annotation": "",
"id": 1,
"input_connections": {
"sequence_reads": {
"id": 0,
"output_name": "output"
}
},
"inputs": [],
"name": "tool",
"outputs": [
{
"name": "contigs",
"type": "fasta"
}
],
"position": {
"left": 462,
"top": 200
},
"post_job_actions": {
"RenameDatasetActioncontigs": {
"action_arguments": {
"newname": "contigs.fasta"
},
"action_type": "RenameDatasetAction",
"output_name": "contigs"
}
},
"tool_errors": null,
"tool_id": "tool",
"tool_state": "{\"__page__\": 0, \"sequence_reads\": \"null\", \"sample\": \"\\\"\\\"\", \"genus\": \"\\\"\\\"\", \"notThere\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"conditional\": \"{\\\"coverageMid\\\": \\\"15\\\", \\\"conditional_select\\\": \\\"all\\\", \\\"__current_case__\\\": 0}\", \"fasta\": \"null\", \"coverageMax\": \"\\\"20\\\"\"}",
"tool_version": "0.1.0",
"type": "tool",
"user_outputs": []
}
},
"uuid": "deadbeef-feed-cafe-babe-fedabaddcaca"
}
......@@ -95,5 +95,15 @@
phoneNumber="0000" username="admin" system_role="ROLE_ADMIN"
credentialsNonExpired="true" enabled="true" />
<!--Sample 1 metadata for testing that Galaxy workflow tool parameters can be set by metadata values-->
<!--see AnalysisWorkspaceServiceGalaxyIT#testSetSampleFieldAndMetadataParameters-->
<metadata_field id="1" label="genus" type="text" />
<metadata_field id="2" label="description" type="text" />
<metadata_entry id="1" value="Salmonella" type="text" />
<metadata_entry id="2" value="This sample kills people!" type="text" />
<sample_metadata_entry sample_id="1" metadata_id="1" metadata_KEY="1"/>
<sample_metadata_entry sample_id="1" metadata_id="2" metadata_KEY="2"/>
</dataset>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment