Commit aa045e02 authored by Takadonet's avatar Takadonet

deleting version one of the pipeline

parent 22e68b27
<tool id="filter_unique_basepairs" name="Filter Unique Basepairs" version="1.0.0">
<description>Determines what position in a pseudoalignment file for given number of strains have unique basepairs compared to other strains in the file</description>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="perl">filter_unique_basepairs.pl --tsv $tsv_file --tree $tree_file --clade $clade_output --output $output
#if $valid_flag
--valid false
#end if
#for $f in $vcf_collection.keys# --vcf "$f=$vcf_collection[$f]" #end for#
</command>
<inputs>
<param name="tsv_file" type="data" format="tabular" label="Pseudoalignment-positions file" />
<param name="tree_file" type="data" format="newick" label="Tree file" />
<param name="valid_flag" type="boolean" label="Include non-valid entries" />
<param name="vcf_collection" type="data_collection" label="Filtered VCF" help="" optional="false" collection_type="list" />
<!--repeat name="vcf_files" title="VCF Files">
<param name="vcf" format="vcf" type="data" label="VCF" optional="false"/>
</repeat-->
</inputs>
<outputs>
<data format="tabular" name="output" label="Output.tsv"/>
<data format="newick" name="clade_output" label="Clades.tre"/>
</outputs>
</tool>
<tool id="filtervcf" name="Filter vcf" version ="0.0.1">
<description>filter out indels and complex SNPS</description>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="perl">
/\$VCF_LIB/filterVcf.pl --noindels $vcf
-o $vcfout
</command>
<inputs>
<param name="vcf" type="data" label="VCF file" format="vcf"/>
</inputs>
<outputs>
<data format="vcf" name="vcfout" />
</outputs>
<stdio>
</stdio>
<tests>
<test>
<param name="vcf" value="filterVcf.input.1.vcf"/>
<output name="vcfout" file="filterVcf.output.1.vcf"/>
</test>
</tests>
<help>
What it does
============
Filter out indels and complex variants from VCF file
Usage
=====
**Parameters**
- VCF file: A VCF file from Freebayes
</help>
</tool>
<tool id="findrepeat" name="Find Repeats" version ="0.0.2-dev">
<description>Identify repeat elements using Mummer</description>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
<requirement type="package" version="3.23">mummer</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="perl">
/\$VCF2PSEUDO/find-repeats.pl -l $length -p $pid $fasta > $out
</command>
<inputs>
<param name="fasta" type="data" label="Fasta file" format="fasta"/>
<param name="length" label="Minimum length of repeat region" type="integer" value="150"/>
<param name="pid" label="Minimum PID of repeat region" type="integer" value="90"/>
</inputs>
<outputs>
<data format="tabular" name="out" />
</outputs>
<stdio>
<exit_code range="1:" level="fatal" description="Unknown error has occured"/>
</stdio>
<tests>
<test>
<param name="length" value="150"/>
<param name="pid" value="90"/>
<param name="fasta" value="find-repeats-input-1.fasta"/>
<output name="out" file="find-repeats-output-1.tabular"/>
</test>
</tests>
<help>
What it does
============
Searches a fasta reference file for repeats.
Usage
=====
**Parameters**
- Fasta file: A fasta reference file to search for repeats.
**Options**
- Minimum length of repeat region (150).
- Minimum PID of repeat region (90).
</help>
</tool>
<tool id="snpmatrix" name="SNP Matrix" version ="0.0.1">
<description>Create SNP matrix from Phylip file</description>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="perl">
/\$SCRIPTS/snp_matrix.pl $phylip
-o $csv
</command>
<inputs>
<param name="phylip" type="data" label="Phylip file" format="phylip"/>
</inputs>
<outputs>
<data format="csv" name="csv"/>
</outputs>
<stdio>
</stdio>
<tests>
<test>
<param name="phylip" value="pseudoalign-3.phy"/>
<output name="csv" file="pseudoalign-3.phy.out"/>
</test>
</tests>
<help>
What it does
============
Create SNP matrix from Phylip file format
Usage
=====
**Parameters**
- Phylip file: Phylogenetic file (.ph, .phy)
</help>
</tool>
##fileformat=VCFv4.1
##fileDate=20140423
##source=freeBayes version 0.9.8
##reference=/home/aaron/microbialinformatics2014/core-snp-tutorial/output-10-subsample/reference/2010EL-1749.2010EL-1786-c1_2000_2400kb.fasta
##phasing=none
##commandline="/opt/freebayes/freebayes --bam /home/aaron/microbialinformatics2014/core-snp-tutorial/output-10-subsample/bam/2010EL-1749.bam --vcf /home/aaron/microbialinformatics2014/core-snp-tutorial/output-10-subsample/vcf/2010EL-1749.vcf --fasta-reference /home/aaron/microbialinformatics2014/core-snp-tutorial/output-10-subsample/reference/2010EL-1749.2010EL-1786-c1_2000_2400kb.fasta --min-coverage 2 --pvar 0 --ploidy 1 --left-align-indels --min-mapping-quality 30 --min-base-quality 30 --min-alternate-fraction 0.75"
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">
##INFO=<ID=RO,Number=1,Type=Integer,Description="Reference allele observations">
##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations">
##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">
##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome">
##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without.">
##INFO=<ID=XRM,Number=1,Type=Float,Description="Reference allele read mismatch rate: The rate of SNPs + MNPs + INDELs in reads supporting the reference allele.">
##INFO=<ID=XRS,Number=1,Type=Float,Description="Reference allele read SNP rate: The rate of per-base mismatches (SNPs + MNPs) in reads supporting the reference allele.">
##INFO=<ID=XRI,Number=1,Type=Float,Description="Reference allele read INDEL rate: The rate of INDELs (gaps) in reads supporting the reference allele.">
##INFO=<ID=XAM,Number=A,Type=Float,Description="Alternate allele read mismatch rate: The rate of SNPs + MNPs + INDELs in reads supporting the alternate allele, excluding the called variant.">
##INFO=<ID=XAS,Number=A,Type=Float,Description="Alternate allele read SNP rate: The rate of per-base mismatches (SNPs + MNPs) in reads supporting the alternate allele, excluding the called variant.">
##INFO=<ID=XAI,Number=A,Type=Float,Description="Alternate allele read INDEL rate: The rate of INDELs (gaps) in reads supporting the alternate allele, excluding the called variant.">
##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best.">
##INFO=<ID=BVAR,Number=0,Type=Flag,Description="The best genotype combination in the posterior is variant (non homozygous).">
##INFO=<ID=CpG,Number=0,Type=Flag,Description="CpG site (either CpG, TpG or CpA)">
##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex.">
##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR.">
##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position.">
##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles.">
##INFO=<ID=HWE,Number=1,Type=Float,Description="Phred-scaled discrete HWE prior probability of the genotyping across all samples.">
##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length">
##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles">
##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles">
##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments">
##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype">
##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
##FORMAT=<ID=GLE,Number=1,Type=String,Description="Genotype Likelihood Explicit, same as GL, but with tags to indicate the specific genotype. For instance, 0^-75.22|1^-223.42|0/0^-323.03|1/0^-99.29|1/1^-802.53 represents both haploid and diploid genotype likilehoods in a biallelic context">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count">
##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations">
##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count">
##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT unknown
gi|360034408|ref|NC_016445.1|_2000000_2400000 128 . GA AT 197.867 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=3;CIGAR=1X;DP=3;DPRA=0;EPP=3.73412;EPPR=0;HWE=-0;LEN=1;MEANALT=1;MQM=54;MQMR=0;NS=1;NUMALT=1;ODDS=45.5605;PAIRED=0;PAIREDR=0;RO=0;RPP=3.73412;RPPR=0;RUN=1;SAP=3.73412;SRP=0;TYPE=snp;XAI=0;XAM=0;XAS=0;XRI=0;XRM=0;XRS=0;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:3:0:0:3:212:-19.7867,0
gi|360034408|ref|NC_016445.1|_2000000_2400000 256 . A C 250.1 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=6;CIGAR=1X;DP=8;DPRA=0;EPP=8.80089;EPPR=3.0103;HWE=-0;LEN=1;MEANALT=1;MQM=54;MQMR=54;NS=1;NUMALT=1;ODDS=57.5877;PAIRED=0;PAIREDR=0;RO=2;RPP=8.80089;RPPR=7.35324;RUN=1;SAP=4.45795;SRP=3.0103;TYPE=snp;XAI=0;XAM=0;XAS=0;XRI=0;XRM=0;XRS=0;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:8:2:142:6:420:-38.5,-13.49
gi|360034408|ref|NC_016445.1|_2000000_2400000 512 . AT C 250.1 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=6;CIGAR=1X;DP=8;DPRA=0;EPP=8.80089;EPPR=3.0103;HWE=-0;LEN=1;MEANALT=1;MQM=54;MQMR=54;NS=1;NUMALT=1;ODDS=57.5877;PAIRED=0;PAIREDR=0;RO=2;RPP=8.80089;RPPR=7.35324;RUN=1;SAP=4.45795;SRP=3.0103;TYPE=snp;XAI=0;XAM=0;XAS=0;XRI=0;XRM=0;XRS=0;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:8:2:142:6:420:-38.5,-13.49
##fileformat=VCFv4.1
##fileDate=20140423
##source=freeBayes version 0.9.8
##reference=/home/aaron/microbialinformatics2014/core-snp-tutorial/output-10-subsample/reference/2010EL-1749.2010EL-1786-c1_2000_2400kb.fasta
##phasing=none
##commandline="/opt/freebayes/freebayes --bam /home/aaron/microbialinformatics2014/core-snp-tutorial/output-10-subsample/bam/2010EL-1749.bam --vcf /home/aaron/microbialinformatics2014/core-snp-tutorial/output-10-subsample/vcf/2010EL-1749.vcf --fasta-reference /home/aaron/microbialinformatics2014/core-snp-tutorial/output-10-subsample/reference/2010EL-1749.2010EL-1786-c1_2000_2400kb.fasta --min-coverage 2 --pvar 0 --ploidy 1 --left-align-indels --min-mapping-quality 30 --min-base-quality 30 --min-alternate-fraction 0.75"
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">
##INFO=<ID=RO,Number=1,Type=Integer,Description="Reference allele observations">
##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations">
##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">
##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome">
##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without.">
##INFO=<ID=XRM,Number=1,Type=Float,Description="Reference allele read mismatch rate: The rate of SNPs + MNPs + INDELs in reads supporting the reference allele.">
##INFO=<ID=XRS,Number=1,Type=Float,Description="Reference allele read SNP rate: The rate of per-base mismatches (SNPs + MNPs) in reads supporting the reference allele.">
##INFO=<ID=XRI,Number=1,Type=Float,Description="Reference allele read INDEL rate: The rate of INDELs (gaps) in reads supporting the reference allele.">
##INFO=<ID=XAM,Number=A,Type=Float,Description="Alternate allele read mismatch rate: The rate of SNPs + MNPs + INDELs in reads supporting the alternate allele, excluding the called variant.">
##INFO=<ID=XAS,Number=A,Type=Float,Description="Alternate allele read SNP rate: The rate of per-base mismatches (SNPs + MNPs) in reads supporting the alternate allele, excluding the called variant.">
##INFO=<ID=XAI,Number=A,Type=Float,Description="Alternate allele read INDEL rate: The rate of INDELs (gaps) in reads supporting the alternate allele, excluding the called variant.">
##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best.">
##INFO=<ID=BVAR,Number=0,Type=Flag,Description="The best genotype combination in the posterior is variant (non homozygous).">
##INFO=<ID=CpG,Number=0,Type=Flag,Description="CpG site (either CpG, TpG or CpA)">
##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex.">
##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR.">
##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position.">
##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles.">
##INFO=<ID=HWE,Number=1,Type=Float,Description="Phred-scaled discrete HWE prior probability of the genotyping across all samples.">
##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length">
##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles">
##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles">
##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments">
##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype">
##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
##FORMAT=<ID=GLE,Number=1,Type=String,Description="Genotype Likelihood Explicit, same as GL, but with tags to indicate the specific genotype. For instance, 0^-75.22|1^-223.42|0/0^-323.03|1/0^-99.29|1/1^-802.53 represents both haploid and diploid genotype likilehoods in a biallelic context">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count">
##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations">
##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count">
##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT unknown
gi|360034408|ref|NC_016445.1|_2000000_2400000 128 . G A 197.867 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=3;CIGAR=1X;DP=3;DPRA=0;EPP=3.73412;EPPR=0;HWE=-0;LEN=1;MEANALT=1;MQM=54;MQMR=0;NS=1;NUMALT=1;ODDS=45.5605;PAIRED=0;PAIREDR=0;RO=0;RPP=3.73412;RPPR=0;RUN=1;SAP=3.73412;SRP=0;TYPE=snp;XAI=0;XAM=0;XAS=0;XRI=0;XRM=0;XRS=0;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:3:0:0:3:212:-19.7867,0
gi|360034408|ref|NC_016445.1|_2000000_2400000 129 . A T 197.867 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=3;CIGAR=1X;DP=3;DPRA=0;EPP=3.73412;EPPR=0;HWE=-0;LEN=1;MEANALT=1;MQM=54;MQMR=0;NS=1;NUMALT=1;ODDS=45.5605;PAIRED=0;PAIREDR=0;RO=0;RPP=3.73412;RPPR=0;RUN=1;SAP=3.73412;SRP=0;TYPE=snp;XAI=0;XAM=0;XAS=0;XRI=0;XRM=0;XRS=0;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:3:0:0:3:212:-19.7867,0
gi|360034408|ref|NC_016445.1|_2000000_2400000 256 . A C 250.1 . AB=0;ABP=0;AC=1;AF=1;AN=1;AO=6;CIGAR=1X;DP=8;DPRA=0;EPP=8.80089;EPPR=3.0103;HWE=-0;LEN=1;MEANALT=1;MQM=54;MQMR=54;NS=1;NUMALT=1;ODDS=57.5877;PAIRED=0;PAIREDR=0;RO=2;RPP=8.80089;RPPR=7.35324;RUN=1;SAP=4.45795;SRP=3.0103;TYPE=snp;XAI=0;XAM=0;XAS=0;XRI=0;XRM=0;XRS=0;BVAR GT:GQ:DP:RO:QR:AO:QA:GL 1:50000:8:2:142:6:420:-38.5,-13.49
>A
CCCGCTCGCCACGCTTTGGCCATAGTGCTGCCTTCTACGATGTGTAAACCGTGCAACTTAATGCCATCGGTGCCTACCTT
CAGTACTTGCTGTAACGTGGTGAGGTTTTCAGTGCGCTCTTCACCGGGTAACCCAACAATCAAGTGAGTACACACTTTGA
TACCTAACGCTCTAGCTTTGGCAGTGATCTCTGCGTAGCAGGCAAAATCGTGCCCGCGGTTAATGCGTTTTAAAGTCTGG
>B
CCCGCTCGCCACGCTTTGGCCATAGTGCTGCCTTCTACGATGTGTAAACCGTGCAACTTAATGCCATCGGTGCCTACCTT
CAGTACTTGCTGTAACGTGGTGAGGTTTTCAGTGCGCTCTTCACCGGGTAACCCAACAATCAAGTGAGTACACACTTTGA
TACCTAACGCTCTAGCTTTGGCAGTGATCTCTGCGTAGCAGGCAAAATCGTGCCCGCGGTTAATGCGTTTTAAAGTCTGG
#Chromosome Position Status Reference v1 v2
ref1 2 valid T A A
ref2 3 valid A T T
#Reference,total length,total invalid pos, total core,Percentage in core
ref1,4,N/A,4,100.00
ref2,4,N/A,4,100.00
all,8,N/A,8,100
>v1
ATT
>v2
ANA
>vcf2pseudoalignment.input.1.reference.fasta
TGA
#Chromosome Position Status Reference v1 v2
ref1 2 valid T A A
ref1 5 filtered-invalid C A N
ref1 7 filtered-coverage G T -
ref2 3 valid A T A
<?xml version="1.0"?>
<tool_dependency>
<package name="mummer" version="3.23">
<repository name="package_mummer_3_23" owner="iuc" />
</package>
<package name="perl" version="5.18.1">
<repository name="package_perl_5_18" owner="iuc" prior_installation_required="True" />
</package>
<package name="core-phylogenomics" version="1.1">
<install version="1.0">
<actions>
<action type="setup_perl_environment">
<repository name="package_perl_5_18" owner="iuc" >
<package name="perl" version="5.18.1" />
</repository>
<package>Parallel::ForkManager</package>
<package>https://cpan.metacpan.org/authors/id/C/CJ/CJFIELDS/BioPerl-1.6.901.tar.gz</package>
<package>https://cpan.metacpan.org/authors/id/A/AJ/AJPAGE/Bio-Pipeline-Comparison-1.123050.tar.gz</package>
</action>
<action type="change_directory">..</action>
<action type="shell_command">git clone --recursive https://github.com/apetkau/core-phylogenomics.git</action>
<action type="change_directory">core-phylogenomics</action>
<action type="shell_command">git checkout ec125272170beb2baa7821d948079102a37ac173</action>
<action type="change_directory">lib/vcf2pseudoalignment</action>
<action type="shell_command">git checkout 4bf2cddaa8d79d1e05523da6afb535651335668c</action>
<action type="change_directory">../..</action>
<action type="move_directory_files">
<source_directory>.</source_directory>
<destination_directory>$INSTALL_DIR/core-phylogenomics</destination_directory>
</action>
<action type="set_environment">
<environment_variable name="VCF2PSEUDO" action="set_to">$INSTALL_DIR/core-phylogenomics/lib/vcf2pseudoalignment</environment_variable>
<environment_variable name="PERL5LIB" action="prepend_to">$INSTALL_DIR/lib/perl5</environment_variable>
<environment_variable name="VCF_LIB" action="set_to">$INSTALL_DIR/core-phylogenomics/lib</environment_variable>
<environment_variable name="SCRIPTS" action="set_to">$INSTALL_DIR/core-phylogenomics/scripts</environment_variable>
</action>
</actions>
</install>
<readme>
</readme>
</package>
</tool_dependency>
\ No newline at end of file
<tool id="vcf2core" name="VCF 2 % Core" version ="0.0.3">
<description>Determine genomics core from Mpileup vcf files</description>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="perl">
/\$VCF2PSEUDO/vcf2core.pl --fasta "$fasta"
--coverage-cutoff "$coverage"
--positions "$positions"
#for $f in $mpileup_collection.keys# --mpileup "$f=$mpileup_collection[$f]" #end for#
> $out
</command>
<inputs>
<param name="fasta" type="data" label="Fasta file" format="fasta"/>
<param name="coverage" type="integer" value="15" label="minimum coverage"/>
<param name="positions" type="data" label="Pseudo Positions file" format="tabular"/>
<param name="mpileup_collection" type="data_collection" label="Mpileup VCF" help="" optional="false" format="tabular" collection_type="list" />
</inputs>
<outputs>
<data format="tabular" name="out" />
</outputs>
<stdio>
<exit_code range="1:" level="fatal" description="Unknown error has occured"/>
</stdio>
<!--<tests>
<test>
<param name="fasta" value="vcf2core/reference.fasta"/>
<param name="coverage" value="4"/>
<param name="positions" value="vcf2core/expected.positions.tsv"/>
<param name="mpileup_collection">
<collection type="list">
<element name="v1" value="vcf2core/mpileup/v1.vcf.gz"/>
<element name="v2" value="vcf2core/mpileup/v2.vcf.gz"/>
</collection>
</param>
<output name="out" file="vcf2core/expected_core.csv"/>
</test>
</tests>-->
<help>
What it does
============
Determine genomics core from Mpileup vcf files
Usage
=====
**Parameters**
- Fasta reference file
- Coverage cutoff (integer): minimum coverage for base pair to be considered
- Masking positions file (tab separated file): positions to mask in the analysis
- VCF file from mpileup
</help>
</tool>
#!/bin/bash
positions=$1
shift
phylip=$1
shift
fasta=$1
shift
#get working directory so we can find the output files
CUR_DIR=`pwd`
mkdir $CUR_DIR/pseudoalign
#run vcf2pseudoalignment
$VCF2PSEUDO/vcf2pseudoalignment.pl $@ -o $CUR_DIR/pseudoalign
#mv output files to where galaxy can use them
mv $CUR_DIR/pseudoalign-positions.tsv $positions
mv $CUR_DIR/pseudoalign.fasta $fasta
mv $CUR_DIR/pseudoalign.phy $phylip
exit 0
<tool id="vcf2pseudoalignment" name="VCF 2 pseudoalignment" version ="0.0.10">
<description>create a pseudo alignment from multiple VCFs files</description>
<requirements>
<requirement type="package" version="1.1">core-phylogenomics</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
</requirements>
<command interpreter="bash">
vcf2pseudoalignment.sh $positions $phylip $fasta
-r "$reference"
#if str($invalid) != 'None':
--invalid-pos "$invalid"
#end if
-c "$coverage"
-f fasta
-f phylip
-v
--numcpus $numcpus
$ambiguous
#for $f in $freebayes_collection.keys# --vcfsplit "$f=$freebayes_collection[$f]" #end for#
#for $f in $mpileup_collection.keys# --mpileup "$f=$mpileup_collection[$f]" #end for#
</command>
<inputs>
<param name="reference" type="text" label="Reference Name" value='reference' format="fasta"/>
<param name="invalid" type="data" label="Invalid position file" format="tabular" optional="true"/>
<param name="coverage" type="integer" value="15" label="minimum coverage"/>
<param name="ambiguous" truevalue="--keep-ambiguous" falsevalue="" type="boolean" checked="false" label="Keep ambiguous base pair"/>
<param name="freebayes_collection" type="data_collection" label="FreeBayes filtered VCF" help="" optional="false" format="vcf" collection_type="list" />
<param name="mpileup_collection" type="data_collection" label="Mpileup VCF" help="" optional="false" format="tabular" collection_type="list" />
<param name="numcpus" type="select" label="Number of CPUS" multiple="false" optional="false">
<option value="4">4</option>
<option value="8">8</option>
<option value="16">16</option>
<option value="32">32</option>
</param>
</inputs>
<outputs>
<data format="tabular" name="positions" />
<data format="fasta" name="fasta" />
<data format="phylip" name="phylip" />
</outputs>
<stdio>
<exit_code range="1:" level="fatal" description="Unknown error has occured"/>
</stdio>
<tests>
<test>
<param name="reference" value="vcf2pseudoalignment.input.1.reference.fasta"/>
<param name="coverage" value="4"/>
<param name="invalid" value="vcf2pseudoalignment.input.1.invalid_positions.tsv"/>
<param name="ambiguous" value="true"/>
<param name="freebayes_collection">
<collection type="list">
<element name="v1" value="vcfsplit/vcf2pseudoalignment.input.1.v1.vcf.gz"/>
<element name="v2" value="vcfsplit/vcf2pseudoalignment.input.1.v2.vcf.gz"/>
</collection>
</param>
<param name="mpileup_collection">
<collection type="list">
<element name="v1" value="mpileup/vcf2pseudoalignment.input.1.v1.vcf.gz"/>
<element name="v2" value="mpileup/vcf2pseudoalignment.input.1.v2.vcf.gz"/>
</collection>
</param>
<param name="numcpus" value="4"/>
<output name="tabular" file="vcf2pseudoalignment.output.1.positions.tsv"/>
<output name="fasta">
<assert_contents>
<has_text text=">vcf2pseudoalignment.input.1.reference.fasta"/>
<has_text text="TGA"/>
<has_text text=">v1"/>
<has_text text="ATT"/>
<has_text text=">v2"/>
<has_text text="ANA"/>
</assert_contents>
</output>
<output name="phylip">
<assert_contents>
<has_line_matching expression="vcf2pseudoalignment.input.1\S+\s+TGA"/>
<has_line_matching expression="v1\s+ATT"/>
<has_line_matching expression="v2\s+ANA"/>
</assert_contents>
</output>
</test>
</tests>
<help>
What it does
============
Create a pseudo alignment from multiple VCFs files
Usage
=====
**Parameters**
- Reference Name: Sequence name from reference file
- Minimum coverage: minimum coverage for base pair to be considered
- Keep ambiguous file: yes/no
- FreeBayes filtered VCF: VCF file from FreeBayes
- Mpileup VCF: VCF file from Mpileup
- Number of CPUs
**Options**
- Masking positions file: positions to mask in the analysis
</help>
</tool>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment