Commit a3175750 authored by JCabral's avatar JCabral

adding nml seqtk sample tool

parent f7ad074e
#!/usr/bin/env perl
use strict;
use Bio::SeqIO;
my ($fastaref, $type, $coverage, $outfile, $outfile_rev, $length, $subsample_size, @fastqs, $rv);
$fastaref = $ARGV[0];
$type = $ARGV[0];
if ($type eq "single"){
$fastqs[0] = $ARGV[0];
$fastqs[0] = $ARGV[0];
$fastqs[1] = $ARGV[0];
$coverage = $ARGV[0];
$outfile = $ARGV[0];
if ($type ne "single"){
$outfile_rev = $ARGV[0];
if (!($coverage)){
$coverage = 50;
my $seq_in = Bio::SeqIO->new(
-format => 'fasta',
-file => $fastaref,
while ( my $seq = $seq_in->next_seq()) {
$length += $seq->length();
my $all_fastq = join (' ' , map { "\"$_\"" } @fastqs);
#one liner from : with some modification by Philip Mabon
my $total=`cat $all_fastq | perl -ne '\$s=<>;<>;<>;chomp(\$s);print length(\$s)."\n";' | sort | uniq -c | perl -e 'while(my \$line=<>){chomp \$line; \$line =~s/^\\s+//;(\$l,\$n)=split/\\s+/,\$line; \$t+= \$l*\$n;} print "\$t\n"'`;
print "Total number of Basepairs: $total\n";
print "Length of Reference: $length\n";
my $rawcoverage = $total/$length;
printf "Raw Coverage: %.3f\n",$rawcoverage;
my $out = $outfile;
my $i = 0;
if($rawcoverage > $coverage){
#need to downsample
#calculate $subsample_size
$subsample_size = $coverage/$rawcoverage;
printf("subsample: %.3f",$subsample_size);
my $out = $outfile;
my $i = 0;
foreach my $fastq (@fastqs){
if($i > 0){
$out = $outfile_rev;
#seed always set to 42 for reproducibility
my $seqCommand = "seqtk sample -s42 $fastq $subsample_size > $out";
$rv = system($seqCommand);
$i ++;
} else {
#no sampling needed, just copy the fastq's to the output
print "Subsampling not required\n";
foreach my $fastq (@fastqs){
if($i > 0){
$out = $outfile_rev;
my $copyCommand = "scp $fastq $out";
$rv = system($copyCommand);
$i ++;
exit $rv >> 8 if $rv >>8;
#return $rv >> 8;
<tool id="seqtk_nml_sample" name="seqTK Sample NML" version="0.0.1">
<description>Runs seqTK sample if raw coverage is above user defined threshold </description>
<requirement type="package" version="1.0-r75-dirty">seqtk</requirement>
<requirement type="package" version="5.18.1">perl</requirement>
<requirement type="package" version="1.6.922">bioperl</requirement>
<command interpreter="perl"> $fastar
#if $single_or_paired.type == "single"
#elif $single_or_paired.type == "paired"
$single_or_paired.forward_pe $single_or_paired.reverse_pe
$single_or_paired.fastq_collection.forward $single_or_paired.fastq_collection.reverse
#end if
$coverage $output
#if $single_or_paired.type != "single"
#end if
<exit_code range="1:" level="fatal" description="Unknown error has occured"/>
<conditional name="single_or_paired">
<param name="type" type="select" label="Read type">
<option value="single">Single-end</option>
<option value="paired">Paired-end</option>
<option value="collection">Collection Paired-end</option>
<when value="single">
<param name="input_se" type="data" format="fastqsanger" label="Single end read file(s)"/>
<when value="paired">
<param name="forward_pe" type="data" format="fastqsanger" label="Forward paired-end read file"/>
<param name="reverse_pe" type="data" format="fastqsanger" label="Reverse paired-end read file"/>
<when value="collection">
<param name="fastq_collection" type="data_collection" label="Paired-end reads collection" optional="false" format="txt" collection_type="paired" />
<param name="fastar" type="data" label="Fasta Reference File" format="fasta" />
<param name="coverage" type="integer" label="Desired Coverage" value="50" />
<data format="fastqsanger" name="output" label="SubSampled Fastq" />
<data format="fastqsanger" name="output_rev" label="SubSampled Fastq Reverse">
What it does
Calculates raw coverage. If the raw coverage is greater than desired coverage, runs seqTK sample to generate downsampled reads.
- Fastq reads (single end, paired end, or paired end collection)
- Fasta reference file
- Desired coverage (50)
<?xml version="1.0"?>
<package name="perl" version="5.18.1">
<repository name="package_perl_5_18" owner="iuc" />
<package name="seqtk" version="1.0-r75-dirty">
<repository name="package_seqtk_1_0_r75" owner="iuc" />
<package name="bioperl" version="1.6.922">
<repository name="package_bioperl_1_6" owner="iuc" />
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment