Commit d334b0ed authored by Philip Mabon's avatar Philip Mabon

Merge branch 'galtooldev' into 'development'

Galtooldev

Adding wrappers for:
filter-stats
find-repeats
snp-matrix
verify_map
Updated the snvphyl package to add another perl module.

See merge request !1
parents 1a6fb91d 828180a1
......@@ -8,7 +8,7 @@ This guide describes the installation procedure for the SNVPhyl whole genome phy
| **msa_datatypes** | 70227007b991 | 0 (2014-04-22) | [Galaxy Main Shed][] |
| **bcftools_view** | 6572c40a8505 | 8 (2012-10-08) | [Galaxy Main Shed][] |
| **samtools_mpileup** | 973fea5b4bdf | 3 (2014-03-27) | [Galaxy Main Shed][] |
| **sam_to_bam** | 8176b2575aa1 | 4 (2014-03-27) | [Galaxy Main Shed][] |
| **sam_to_bam** | c73bf16b45df | 5 (2015-03-05) | [Galaxy Main Shed][] |
| **core_pipeline** | 0737c0310cab | 0 (2014-10-07) | [IRIDA Main Shed][] |
| **freebayes** | 386bc6e45b68 | 0 (2014-10-07) | [IRIDA Main Shed][] |
| **phyml** | b5867c5c7674 | 0 (2014-10-07) | [IRIDA Main Shed][] |
......@@ -21,17 +21,6 @@ To install these tools, please proceed through the following steps.
Some of these tools require additional dependencies to be installed on the Galaxy server. For a cluster environment please make sure these are available on all cluster nodes by installing to a shared directory.
1. [SAMTools][]: Please download and install [SAMTools 0.1.18][] and add to your `PATH` in the `$GALAXY_ENV` file.
2. **Perl Modules**: The following lists the dependency Perl modules required by this pipeline. These can be installed using the [cpanm][] command.
```bash
cpanm Clone Parallel::ForkManager
```
In addition, [BioPerl][] version 1.6.901 must be installed. Please run the following command to install.
```bash
cpanm http://search.cpan.org/CPAN/authors/id/C/CJ/CJFIELDS/BioPerl-1.6.901.tar.gz
```
## Step 2: Install Galaxy Tools
......
<tool id="filter_unique_basepairs" name="Filter Unique Basepairs" version="1.0.0">
<description>Determines what position in a pseudoalignment file for given number of strains have unique basepairs compared to other strains in the file</description>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="perl">filter_unique_basepairs.pl --tsv $tsv_file --tree $tree_file --clade $clade_output --output $output
#if $valid_flag
--valid false
#end if
#for $f in $vcf_collection.keys# --vcf "$f=$vcf_collection[$f]" #end for#
</command>
</command>
<inputs>
......
<tool id="filtervcf" name="Filter vcf" version ="0.0.1">
<description>filter out indels and complex SNPS</description>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
</requirements>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="perl">
/\$VCF_LIB/filterVcf.pl --noindels $vcf
-o $vcfout
......
......@@ -3,6 +3,7 @@
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
<requirement type="package" version="3.23">mummer</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="perl">
......
......@@ -2,6 +2,7 @@
<description>Create SNP matrix from Phylip file</description>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="perl">
......
<?xml version="1.0"?>
<tool_dependency>
<package name="mummer" version="3.23">
<repository name="package_mummer_3_23" owner="iuc" />
</package>
<package name="perl" version="5.18.1">
<repository name="package_perl_5_18" owner="iuc" prior_installation_required="True" />
</package>
<package name="core-phylogenomics" version="1.1">
<install version="1.0">
<actions>
<action type="shell_command">git clone --recursive https://github.com/apetkau/core-phylogenomics.git</action>
<action type="change_directory">lib/vcf2pseudoalignment</action>
<action type="setup_perl_environment">
<repository name="package_perl_5_18" owner="iuc" >
<package name="perl" version="5.18.1" />
</repository>
<package>Parallel::ForkManager</package>
<package>https://cpan.metacpan.org/authors/id/C/CJ/CJFIELDS/BioPerl-1.6.901.tar.gz</package>
<package>https://cpan.metacpan.org/authors/id/A/AJ/AJPAGE/Bio-Pipeline-Comparison-1.123050.tar.gz</package>
</action>
<action type="change_directory">..</action>
<action type="shell_command">git clone --recursive https://github.com/apetkau/core-phylogenomics.git</action>
<action type="change_directory">core-phylogenomics</action>
<action type="shell_command">git checkout ec125272170beb2baa7821d948079102a37ac173</action>
<action type="change_directory">lib/vcf2pseudoalignment</action>
<action type="shell_command">git checkout 4bf2cddaa8d79d1e05523da6afb535651335668c</action>
<action type="change_directory">../..</action>
<action type="move_directory_files">
<source_directory>.</source_directory>
<destination_directory>$INSTALL_DIR</destination_directory>
</action>
<action type="change_directory">../..</action>
<action type="move_directory_files">
<source_directory>.</source_directory>
<destination_directory>$INSTALL_DIR/core-phylogenomics</destination_directory>
</action>
<action type="set_environment">
<environment_variable name="VCF2PSEUDO" action="set_to">$INSTALL_DIR/lib/vcf2pseudoalignment</environment_variable>
<environment_variable name="PERL5LIB" action="prepend_to">$INSTALL_DIR/lib/vcf2pseudoalignment</environment_variable>
<environment_variable name="VCF_LIB" action="set_to">$INSTALL_DIR/lib</environment_variable>
<environment_variable name="SCRIPTS" action="set_to">$INSTALL_DIR/scripts</environment_variable>
<environment_variable name="VCF2PSEUDO" action="set_to">$INSTALL_DIR/core-phylogenomics/lib/vcf2pseudoalignment</environment_variable>
<environment_variable name="PERL5LIB" action="prepend_to">$INSTALL_DIR/lib/perl5</environment_variable>
<environment_variable name="VCF_LIB" action="set_to">$INSTALL_DIR/core-phylogenomics/lib</environment_variable>
<environment_variable name="SCRIPTS" action="set_to">$INSTALL_DIR/core-phylogenomics/scripts</environment_variable>
</action>
<action type="shell_command">svn checkout http://svn.code.sf.net/p/vcftools/code/trunk/ vcftools</action>
<action type="move_file">
<source>vcftools/perl/Vcf.pm</source>
<destination>$INSTALL_DIR/lib/vcf2pseudoalignment</destination>
</action>
</actions>
</install>
<readme>
</readme>
</package>
<package name="mummer" version="3.23">
<repository changeset_revision="8be3c8826f17" name="package_mummer_3_23" owner="aaron" toolshed="https://irida.corefacility.ca/galaxy-shed/" />
</package>
</tool_dependency>
</tool_dependency>
\ No newline at end of file
<tool id="vcf2core" name="VCF 2 % Core" version ="0.0.3">
<description>Determine genomics core from Mpileup vcf files</description>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
</requirements>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="perl">
/\$VCF2PSEUDO/vcf2core.pl --fasta "$fasta"
......
<tool id="vcf2pseudoalignment" name="VCF 2 pseudoalignment" version ="0.0.10">
<description>create a pseudo alignment from multiple VCFs files</description>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
<!--requirement type="package" version="1.0">smalt</requirement>
<requirement type="package" version="1.0">samtools</requirement>
<requirement type="package" version="1.0">freebayes</requirement>
<requirement type="package" version="1.0">bcftools_view</requirement>
<requirement type="package" version="1.0">msa_datatypes</requirement-->
</requirements>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="bash">
vcf2pseudoalignment.sh $positions $phylip $fasta
......@@ -34,10 +30,10 @@
<param name="freebayes_collection" type="data_collection" label="FreeBayes filtered VCF" help="" optional="false" format="vcf" collection_type="list" />
<param name="mpileup_collection" type="data_collection" label="Mpileup VCF" help="" optional="false" format="tabular" collection_type="list" />
<param name="numcpus" type="select" label="Number of CPUS" multiple="false" optional="false">
<option value="4">4</option>
<option value="8">8</option>
<option value="16">16</option>
<option value="32">32</option>
<option value="4">4</option>
<option value="8">8</option>
<option value="16">16</option>
<option value="32">32</option>
</param>
</inputs>
......
......@@ -11,6 +11,7 @@
<package name="perl" version="5.18.1" />
</repository>
<package>Parallel::ForkManager</package>
<package>https://cpan.metacpan.org/authors/id/S/SA/SANKO/Readonly-2.00.tar.gz</package>
<package>https://cpan.metacpan.org/authors/id/C/CJ/CJFIELDS/BioPerl-1.6.901.tar.gz</package>
<package>https://cpan.metacpan.org/authors/id/A/AJ/AJPAGE/Bio-Pipeline-Comparison-1.123050.tar.gz</package>
<package>https://cpan.metacpan.org/authors/id/R/RE/REHSACK/List-MoreUtils-0.410.tar.gz</package>
......
<tool id="filterstat" name="Filter Stats" version ="0.0.1">
<description>Identify repeat elements using Mummer</description>
<requirements>
<requirement type="package" version="1.1">snvphyl</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="perl">
/\$VCF2PSEUDO/filter-stats.pl -i $tabfile -o $out
#if $summary == "invalids":
-a
#end if
</command>
<inputs>
<param name="tabfile" type="data" label="Pseudo-alignment positions tab delimited file" format="tabular"/>
<param name="summary" type="select" display="checkboxes" label="Summary Options" multiple="true">
<option value="invalids">Include all entries marked as 'filtered-invalid'</option>
</param>
</inputs>
<outputs>
<data format="tabular" name="out" />
</outputs>
<stdio>
<exit_code range="1:" level="fatal" description="Unknown error has occured"/>
</stdio>
<tests>
<test>
<param name="tabfile" value="filter-stats-in.tsv" />
<output name="out" file="filter-stats-out.tabular" />
</test>
</tests>
<help>
What it does
============
This script prints a stat summary of the number of N's and -'s found in the psudo-alignment positions tab delimited file.
Usage
=====
**Parameters**
- input - The psudo-alignment positions tab delimited file
**Options**
- -all When this option is set, the summary will include all the entries marked as 'filtered-invalid'
</help>
</tool>
#Chromosome Position Status Reference query B
chr 5 valid A T A
chr 10 filtered-coverage A G -
Chromosome Genomes
chr ALL B query
Total number of N's and -'s 1 1 0
Total percent of N's and -'s 50.00 50.00 0.00
<?xml version="1.0"?>
<tool_dependency>
<package name="snvphyl" version="1.1">
<repository name="package_snvphyl" owner="phil" />
</package>
</tool_dependency>
<tool id="findrepeat" name="Find Repeats" version ="0.0.2-dev">
<description>Identify repeat elements using Mummer</description>
<requirements>
<requirement type="package" version="1.1">snvphyl</requirement>
<requirement type="package" version="3.23">mummer</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="perl">
/\$VCF2PSEUDO/find-repeats.pl -l $length -p $pid $fasta > $out
</command>
<inputs>
<param name="fasta" type="data" label="Fasta file" format="fasta"/>
<param name="length" label="Minimum length of repeat region" type="integer" value="150"/>
<param name="pid" label="Minimum PID of repeat region" type="integer" value="90"/>
</inputs>
<outputs>
<data format="tabular" name="out" />
</outputs>
<stdio>
<exit_code range="1:" level="fatal" description="Unknown error has occured"/>
</stdio>
<tests>
<test>
<param name="length" value="150"/>
<param name="pid" value="90"/>
<param name="fasta" value="find-repeats-input-1.fasta"/>
<output name="out" file="find-repeats-output-1.tabular"/>
</test>
</tests>
<help>
What it does
============
Searches a fasta reference file for repeats.
Usage
=====
**Parameters**
- Fasta file: A fasta reference file to search for repeats.
**Options**
- Minimum length of repeat region (150).
- Minimum PID of repeat region (90).
</help>
</tool>
>A
CCCGCTCGCCACGCTTTGGCCATAGTGCTGCCTTCTACGATGTGTAAACCGTGCAACTTAATGCCATCGGTGCCTACCTT
CAGTACTTGCTGTAACGTGGTGAGGTTTTCAGTGCGCTCTTCACCGGGTAACCCAACAATCAAGTGAGTACACACTTTGA
TACCTAACGCTCTAGCTTTGGCAGTGATCTCTGCGTAGCAGGCAAAATCGTGCCCGCGGTTAATGCGTTTTAAAGTCTGG
>B
CCCGCTCGCCACGCTTTGGCCATAGTGCTGCCTTCTACGATGTGTAAACCGTGCAACTTAATGCCATCGGTGCCTACCTT
CAGTACTTGCTGTAACGTGGTGAGGTTTTCAGTGCGCTCTTCACCGGGTAACCCAACAATCAAGTGAGTACACACTTTGA
TACCTAACGCTCTAGCTTTGGCAGTGATCTCTGCGTAGCAGGCAAAATCGTGCCCGCGGTTAATGCGTTTTAAAGTCTGG
<?xml version="1.0"?>
<tool_dependency>
<package name="snvphyl" version="1.1">
<repository name="package_snvphyl" owner="phil" />
</package>
<package name="mummer" version="3.23">
<repository name="package_mummer_3_23" owner="iuc" />
</package>
</tool_dependency>
<tool id="snpmatrix" name="SNP Matrix" version ="0.0.1">
<description>Create SNP matrix from Phylip file</description>
<requirements>
<requirement type="package" version="1.1">snvphyl</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="perl">
/\$VCF2PSEUDO/snp_matrix.pl $phylip
-o $csv
</command>
<inputs>
<param name="phylip" type="data" label="Phylip file" format="phylip"/>
</inputs>
<outputs>
<data format="csv" name="csv"/>
</outputs>
<stdio>
</stdio>
<tests>
<test>
<param name="phylip" value="pseudoalign-13.phy"/>
<output name="csv" file="pseudoalign-13.phy.out"/>
</test>
</tests>
<help>
What it does
============
Create SNP matrix from Phylip file format
Usage
=====
**Parameters**
- Phylip file: Phylogenetic file (.ph, .phy)
</help>
</tool>
5 3
'a' AAY
'b' AAA
'c' AAA
'd' TAA
'e' NGG
strain a e b c d
a 0 1 0 0 0
e 1 0 1 1 1
b 0 1 0 0 0
c 0 1 0 0 0
d 0 1 0 0 0
<?xml version="1.0"?>
<tool_dependency>
<package name="snvphyl" version="1.1">
<repository name="package_snvphyl" owner="phil" />
</package>
</tool_dependency>
==========Reference Mapping Quality===========
NUMBER OF BP's IN REFERENCE GENOME: 48502
MINIMUM DEPTH: 10
MINIMUM MAPPING: 80
<?xml version="1.0"?>
<tool_dependency>
<package name="snvphyl" version="1.1">
<repository name="package_snvphyl" owner="phil" />
</package>
<package name="samtools" version="0.1.19">
<repository name="package_samtools_0_1_19" owner="ericenns" />
</package>
</tool_dependency>
<tool id="verify_map" name="Verify Mapping Quality" version="0.0.1">
<description>Checks the mapping quality of all BAM files generated in the core SNP pipeline.</description>
<requirements>
<requirement type="package" version="1.1">snvphyl</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
<requirement type="package" version="0.1.19">samtools</requirement>
</requirements>
<command interpreter="perl">
/\$VCF2PSEUDO/verify_mapping_quality.pl
#for $f in $bams.keys# --bam "$f=$bams[$f]" #end for#
#if $mindepth :
--min-depth $mindepth
#end if
#if $minmap :
--min-map $minmap
#end ifs
</command>
<inputs>
<param name="bams" type="data_collection" label="Collect of BAM files" format="bam" />
<param name="mindepth" type="integer" label="The minimum depth of coverage required in each BAM file." format="" optional="true" />
<param name="minmap" type="integer" label="The minimum percent coverage required in each BAM file." format="" optional="true" />
</inputs>
<outputs>
<data format="txt" name="output_log" from_work_dir="mapping_percentage.log" label="Mapping Percentage Log"/>
</outputs>
<stdio>
<exit_code range="" level="" description="" />
</stdio>
<tests>
<test>
<param name="bams">
<collection type="list">
<element name="s1" value="sample1.bam"/>
<element name="s2" value="sample2.bam"/>
<element name="s3" value="sample3.bam"/>
<element name="s4" value="sample4.bam"/>
</collection>
</param>
<output name="output_log" file="test_percent_log.txt" ftype="txt"/>
</test>
</tests>
<help>
What it does
============
This script checks the mapping quality of all BAM files generated in the core SNP pipeline.
Usage
=====
**Parameters**
- input - The collection of BAM files in the dataset.
**Options**
- min-depth - The minimum depth of coverage required in each BAM file.
- min-map - The minimum perecent coverage required in each BAM file.
</help>
</tool>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment