diff --git a/CHANGELOG.md b/CHANGELOG.md index 82f08e4..789a654 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# 2023/04/18 # + +* updated snpSort.sh, concat2merge.pl : speed up vcf sort and merge +* updated init.sh ; use HP_DP as minimum depth instead of 100 +* update README.md + # 2023/03/03 # * updated downsampleSam.sh, downsampleSam.pl, filterSam.pl : alignments are sorted by the reads name before downsampling diff --git a/README.md b/README.md index 311ccd0..8bcc2c3 100644 --- a/README.md +++ b/README.md @@ -394,3 +394,22 @@ bwa_mem.sh fastq/sample bams/sample ls bams/ + +### How to update the SNV filtering criteria ### + +* By default, the SNV with DP<100 or labeled as "strict_strand|strand_bias|base_qual|map_qual|weak_evidence|slippage|position|HP" are filtered out + +* To change the criteria, update the following 2 variables in the "init.sh" file + + export HP_FRULE="egrep -v 'strict_strand|strand_bias|base_qual|map_qual|weak_evidence|slippage|position|HP'" + export HP_DP=100 + +* Samples with multiple problems (failing haplocheck, multiple NUMTs, multiple heteroplasmies belonging to a different haplogroup, average coverage < HP_DP ...) are saved in "*.suspicious.{id,tab}" files + +* By default, suspicious samples are not removed; The users should check these files and maually remove the samples + + + + + + diff --git a/VERSION.md b/VERSION.md index 7956071..378aed5 100644 --- a/VERSION.md +++ b/VERSION.md @@ -1 +1 @@ -20230303 +20230418 diff --git a/scripts/concat2merge.pl b/scripts/concat2merge.pl index 522d782..9a49331 100755 --- a/scripts/concat2merge.pl +++ b/scripts/concat2merge.pl @@ -44,7 +44,10 @@ close(IN); ####################################################### - my ($key,$sample,@keys,%keys,%GT_DP_AF); + + my ($pkey,$sample,@keys,%keys,%GT_DP_AF,$header); + my ($AC,$AN)=(0,0); + while(<>) { if(/^##/) @@ -60,86 +63,66 @@ } else { + #0 1 2 3 4 + #chrM 42 . T TC + + if(!$header) { print join "\t",("#CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO","FORMAT",@samples); print "\n"; $header=1} + my @F=split; - $F[2]="."; - $F[5]="."; - $F[6]="."; + @F[2,5,6]=(".",".","."); + + if($F[-1]=~/^(.+):(.+):1$/) { $F[-1]="1:$2:1" } + elsif($F[-1]=~/^(.+):(.+):(.+)$/) { $F[-1]="0/1:$2:$3" } + - my $sample;; - if($F[7]=~/SM=(.+?);(.+)/) - { - $sample=$1; - $F[7]=$2; - } - elsif($F[7]=~/SM=(.+)/) - { - $sample=$1; - $F[7]="."; - } + my $sample; + if($F[7]=~/SM=(.+?);(.+)/) { ($sample,$F[7])=($1,"$2;");} + elsif($F[7]=~/SM=(.+)/) { ($sample,$F[7])=($1,"");} - defined($samples{$sample}) or die "ERROR3: $_"; + defined($samples{$sample}) or die "ERROR: sample not defined in $_"; my $key=join "\t",@F[0..7]; + + if($pkey and $key ne $pkey) + { + my @P=($pkey."AC=$AC;AN=$AN","GT:DP:AF"); - #april 28 2022 - $F[-1]="1:$2:1" if($F[-1]=~/^(.+):(.+):(.+)$/ and $3==1); + foreach my $sample (@samples) + { + if($GT_DP_AF{$sample}) { push @P,$GT_DP_AF{$sample} ; } + else { push @P,".:.:." } + } - $GT_DP_AF{$key}{$sample}=$F[-1]; + print join "\t",@P; print "\n"; - if(!$keys{$key}) - { - $keys{$key}=1; - push @keys,$key; + %GT_DP_AF=(); + ($AC,$AN)=(0,0); + } + + $pkey=$key; + + $GT_DP_AF{$sample}=$F[-1]; + $AC++; + $AN++; + $AN++ if($GT_DP_AF{$sample}=~/\|/ or $GT_DP_AF{$sample}=~/\//) + } } - print join "\t",("#CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO","FORMAT",@samples); print "\n"; - foreach my $key (@keys) - { - my @F=(); - my @SF; - push @F,($key,"GT:DP:AF"); - - my $AC=0; - my $AN=0; - foreach my $i (1..@samples) - { - my $sample=$samples[$i-1]; + #end + if($pkey) + { + my @P=($pkey."AC=$AC;AN=$AN","GT:DP:AF"); - if($GT_DP_AF{$key}{$sample}) - { - push @F,$GT_DP_AF{$key}{$sample} ; - push @SF,$i; - - my $GT_DP_AF=$GT_DP_AF{$key}{$sample}; - $GT_DP_AF=~/(.+?):/; - my $GT=$1; - my @GT=split /[|\/]/,$GT; - - #orig - #if($GT[0]==1 or @GT>1 and $GT[1]==1) { $AC++; $AN+=2; } - #else { $AN+=2; } - - #new; April 28 2022 - $AC++; - $AN++; - if($F[-1]=~/\|/ or $F[-1]=~/\//) - { - $AN++; - } - else - { - $AN+=0; - } - } - else { push @F,".:.:." } - } + foreach my $sample (@samples) + { + if($GT_DP_AF{$sample}) { push @P,$GT_DP_AF{$sample} ; } + else { push @P,".:.:." } + } - if($F[0]=~/(.+)\t\.$/) { $F[0]="$1\tAC=$AC;AN=$AN" } - elsif($F[0]=~/(.+)\t(\S+)$/) { $F[0]="$1\tAC=$AC;AN=$AN;$2" } - print join "\t",@F; - print "\n"; + print join "\t",@P; print "\n"; } + exit 0; } diff --git a/scripts/filter.sh b/scripts/filter.sh index 86954bf..4073af2 100755 --- a/scripts/filter.sh +++ b/scripts/filter.sh @@ -117,7 +117,7 @@ fi #if [ ! -s $O.count ] ; then samtools idxstats $O.bam -@ $HP_P | idxstats2count.pl -sample $S -chrM $HP_MT > $O.count ; fi if [ ! -s $O.cvg ] ; then cat $O.bam | bedtools bamtobed -cigar | grep "^$HP_MT" | bedtools genomecov -i - -g $HP_RDIR/$HP_MT.fa.fai -d | tee $O.cvg | cut -f3 | st.pl | perl -ane 'if($.==1) { print "Run\t$_" } else { print "$ENV{S}\t$_" }' > $O.cvg.stat ; fi -if [ ! -s $OR.cvg ] ; then cat $OR.bam | bedtools bamtobed -cigar | grep "^$HP_MT" | bedtools genomecov -i - -g $HP_RDIR/$HP_MTR.fa.fai -d | tee $OR.cvg | cut -f3 | st.pl | perl -ane 'if($.==1) { print "Run\t$_" } else { print "$ENV{S}\t$_" }' > $OR.cvg.stat ; fi +#if [ ! -s $OR.cvg ] ; then cat $OR.bam | bedtools bamtobed -cigar | grep "^$HP_MT" | bedtools genomecov -i - -g $HP_RDIR/$HP_MTR.fa.fai -d | tee $OR.cvg | cut -f3 | st.pl | perl -ane 'if($.==1) { print "Run\t$_" } else { print "$ENV{S}\t$_" }' > $OR.cvg.stat ; fi if [ ! -f $O.sa.bed ] ; then samtools view -h $O.bam -@ $HP_P | sam2bedSA.pl | uniq.pl -i 3 | sort -k2,2n -k3,3n > $O.sa.bed ; fi ######################################################################################################################################### @@ -230,7 +230,7 @@ if [ ! -s $OS.bam ] ; then rm -f $OS.*sam $OS.score $O.fq $ON.score fi -rm -f $O.bam* +rm -f $O.bam* $OR.bam* rm -f $OS.*idx $OS.*tsv $OS.*stats # exit if the number of iterations is set to 1 @@ -246,7 +246,7 @@ fi #if [ ! -s $OS.count ] ; then samtools idxstats $OS.bam -@ $HP_P | idxstats2count.pl -sample $S -chrM $S > $OS.count ; fi if [ ! -s $OS.cvg ] ; then cat $OS.bam | bedtools bamtobed -cigar | grep "^$S" | bedtools genomecov -i - -g $OS.fa.fai -d | tee $OS.cvg | cut -f3 | st.pl | perl -ane 'if($.==1) { print "Run\t$_" } else { print "$ENV{S}\t$_" }' > $OS.cvg.stat ; fi -if [ ! -s $OSR.cvg ] ; then cat $OSR.bam | bedtools bamtobed -cigar | grep "^$S" | bedtools genomecov -i - -g $OSR.fa.fai -d | tee $OSR.cvg | cut -f3 | st.pl | perl -ane 'if($.==1) { print "Run\t$_" } else { print "$ENV{S}\t$_" }' > $OSR.cvg.stat ; fi +#if [ ! -s $OSR.cvg ] ; then cat $OSR.bam | bedtools bamtobed -cigar | grep "^$S" | bedtools genomecov -i - -g $OSR.fa.fai -d | tee $OSR.cvg | cut -f3 | st.pl | perl -ane 'if($.==1) { print "Run\t$_" } else { print "$ENV{S}\t$_" }' > $OSR.cvg.stat ; fi if [ ! -f $OS.sa.bed ] ; then samtools view -h $OS.bam | sam2bedSA.pl | uniq.pl -i 3 | sort -k2,2n -k3,3n > $OS.sa.bed ; fi ######################################################################################################################################### diff --git a/scripts/getSummary.sh b/scripts/getSummary.sh index 27dfe0d..3c827c2 100755 --- a/scripts/getSummary.sh +++ b/scripts/getSummary.sh @@ -26,13 +26,13 @@ V=$HP_V #count,cvg awk '{print $3}' $HP_IN | sed "s|$|.cvg.stat|" | xargs cat | uniq.pl -i 0 > $ODIR/cvg.tab -awk '{print $3}' $HP_IN | sed "s|$|.$S.00.vcf|" | xargs cat | uniq.pl | bedtools sort -header | eval $HP_FRULE > $ODIR/$S.00.concat.vcf -#Sept 1 ; ARIC CDG only -#reAnnotateVcf.sh $ODIR/$S.00.concat.vcf $ODIR/$S.00.concat.vcf2 ; mv $ODIR/$S.00.concat.vcf2 $ODIR/$S.00.concat.vcf +#March 7th 2023 +awk '{print $3}' $HP_IN | sed "s|$|.$S.00.vcf|" | xargs cat | grep -v "^##sample=" | uniq.pl | bedtools sort -header | eval $HP_FRULE > $ODIR/$S.00.concat.vcf snpSort.sh $ODIR/$S.00.concat cat $ODIR/$S.00.concat.vcf | grep -v "^#" | sed 's|:|\t|g' | count.pl -i -1 -round 100| sort -n > $ODIR/$S.00.AF.histo + if [ $V ] ; then awk '{print $3}' $HP_IN | sed "s|$|.$V.00.vcf|" | xargs cat | uniq.pl | bedtools sort -header | eval $HP_FRULE > $ODIR/$V.00.concat.vcf ; fi #haplogroups @@ -69,12 +69,7 @@ if [ $S == "mutserve" ] ; then exit 0 ; fi SS=$S.$S awk '{print $3}' $HP_IN | sed "s|$|.$S.cvg.stat|" | xargs cat | uniq.pl -i 0 > $ODIR/$S.cvg.tab -awk '{print $3}' $HP_IN | sed "s|$|.$SS.00.vcf|" | xargs cat | uniq.pl | bedtools sort -header > $ODIR/$SS.00.concat.vcf - -#Sept 1 2022 ; ARIC CDG only; March 1 2023 edited/commented the following 3 lines (were meant for an older project) -#awk '{print $3}' $HP_IN | sed "s|$|.mutect2.max.vcf|" | xargs cat > $ODIR/$S.max.vcf -#intersectVcf.pl $ODIR/$S.00.concat.vcf $ODIR/$S.max.vcf -sm | cat $ODIR/$SS.00.concat.vcf - | bedtools sort -header > $ODIR/$SS.00.concat.vcf2; mv $ODIR/$SS.00.concat.vcf2 $ODIR/$SS.00.concat.vcf -#reAnnotateVcf.sh $ODIR/$SS.00.concat.vcf $ODIR/$SS.00.concat.vcf2 ; mv $ODIR/$SS.00.concat.vcf2 $ODIR/$SS.00.concat.vcf +awk '{print $3}' $HP_IN | sed "s|$|.$SS.00.vcf|" | xargs cat | grep -v "^##sample=" | grep -v "^##bcftools_annotateCommand" | uniq.pl | bedtools sort -header | eval $HP_FRULE > $ODIR/$SS.00.concat.vcf snpSort.sh $ODIR/$SS.00.concat cat $ODIR/$SS.00.concat.vcf | grep -v "^#" | sed 's|:|\t|g' | count.pl -i -1 -round 100| sort -n > $ODIR/$SS.00.AF.histo diff --git a/scripts/init.sh b/scripts/init.sh index 5768b93..192b903 100755 --- a/scripts/init.sh +++ b/scripts/init.sh @@ -33,8 +33,8 @@ export PATH=$HP_SDIR:$HP_BDIR:$PATH #hs38DH(default) export HP_RNAME=hs38DH export HP_RMT=chrM -export HP_RNUMT="chr1:629084-634672 chr17:22521208-22521639" # 150bp reads -#export HP_RNUMT="chr1:629080-634925 chr2:148881723-148881858 chr5:80651184-80651597 chr11:10508892-10509738 chr13:109424123-109424381 chr17:22521208-22521639" # 100bp reads +export HP_RNUMT="chr1:629084-634672 chr17:22521208-22521639" # 150bp reads chr2:32916199-32916630 +#export HP_RNUMT="chr1:629080-634925 chr1:76971223-76971280 chr2:148881723-148881858 chr5:80651184-80651597 chr11:10508892-10509738 chr13:109424123-109424381 chr17:22521208-22521639" # 100bp reads export HP_RCOUNT=3366 # 195(hs38DH-no_alt); 194 (hs38DH-no_alt_EBV) export HP_RURL=ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/GRCh38_reference_genome/GRCh38_full_analysis_set_plus_decoy_hla.fa @@ -52,6 +52,9 @@ export HP_RURL=ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/GRCh #export HP_RCOUNT=24 #export HP_RURL=https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chm13.draft_v1.1.fasta.gz +#CHM13.v2; to compute NUMT regions +#export HP_RURL=https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.fna.gz + #Mouse #export HP_RNAME=mm39 #export HP_RMT=chrM @@ -105,7 +108,7 @@ export HP_T3=10 export HP_DP=100 export HP_V= # SV caller: gridssexport HP_DP=100 # minimum coverage: Ex 100 -export HP_FRULE="perl -ane 'print unless(/strict_strand|strand_bias|base_qual|map_qual|weak_evidence|slippage|position|Homopolymer/ and /:0\.[01234]\d+$/);' | bcftools filter -e 'DP<100'" # filter rule (or just "tee") +export HP_FRULE="perl -ane 'print unless(/strict_strand|strand_bias|base_qual|map_qual|weak_evidence|slippage|position|Homopolymer/ and /:0\.[01234]\d+$/);' | bcftools filter -e 'DP<$HP_DP'" # filter rule (or just "tee") export HP_P=1 # number of processors export HP_MM="3G" # maximum memory diff --git a/scripts/sam2bedDA.pl b/scripts/sam2bedDA.pl deleted file mode 100755 index 496fc70..0000000 --- a/scripts/sam2bedDA.pl +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use warnings; -use Getopt::Long; - -############################################################################### -# -# Main program -# -############################################################################### - -MAIN: -{ - # define variables - my %options; - my %len; - - # validate input parameters - my $result = GetOptions( - "min_ins=i" => \$options{min_ins}, - "max_ins=i" => \$options{max_ins}, - ); - die "ERROR: $! " if (!$result); - - #################################### - - while(<>) - { - my @F=split; - next unless(@F); - - #0 1 2 3 4 5 6 7 8 9 - #L7C2CCXX160303:1:1110:11667:61784 163 chrM 1 26 41S110M = 59 209 TAA... * AS:i:105 XS:i:94 SA:Z:chrM,16529,+,41M110S,26,0; XA:Z:chr17,+22521368,8S134M9S,8; MC:Z:151M MQ:i:60 MD:Z:72A37 NM:i:1 RG:Z:HL7C2.1 - - if(/^\@SQ\s+SN:(\S+)\s+LN:(\S+)/) - { - $len{$1}=$2; - next; - } - elsif(/^\@/ or @F<11) - { - next; - } - - next if($F[1]!~/^\d+$/); - next if($F[1] & 0x4); - next if($options{mated} and !($F[1] & 0x2) ); - next if($F[2] eq "*"); - next if($F[1] and $F[1] & 0x100) ; #secondary alignment - next if($F[8]!~/\d+$/); - next if($options{min_ins} and abs($F[8])<$options{min_ins}); - next if($options{max_ins} and abs($F[8])>$options{max_ins}); - - ################################################# - - #SA:Z:chrM,6577,-,101S50M,60,0; - #if(/\tSA:Z:($F[2],\S+)/) - if(/\tMC:Z:(\S+)/ ) - { - my $sacigar=$1; - - ################################################# - my ($ref,$begin,$end,$CIGAR,$cigar,$qry,$strand,$score); - - $qry=$F[0]; - #$qry=$qry."/1" if($F[1] & 0x40); - #$qry=$qry."/2" if($F[1] & 0x80); - $strand=($F[1] & 0x10 )?"-":"+"; - $ref=$F[2]; - - $CIGAR=$cigar=$F[5]; - $begin=$end=$F[3]-1 ; - while($cigar and $cigar=~/(\d+)(\w)(.*)/) - { - $end+=$1 if($2 eq "M" or $2 eq "D" or $2 eq "N") ; - $cigar=$3; - } - $score=$end-$begin; - - ################################################# - - my ($saref,$saqry,$sabegin,$saend,$saCIGAR,$sastrand,$sascore); - - $saqry=$F[0]; - #$saqry=$saqry."/1" if($F[1] & 0x80); - #$saqry=$saqry."/2" if($F[1] & 0x40); - $sastrand=($F[1] & 0x20 )?"-":"+"; - if($F[6] eq "=") { $saref=$ref;}else {$saref=$F[6]} - $sabegin=$saend=$F[7]-1; - $saCIGAR=$sacigar; - - while($sacigar and $sacigar=~/(\d+)(\w)(.*)/) - { - $saend+=$1 if($2 eq "M" or $2 eq "D" or $2 eq "N") ; - $sacigar=$3; - } - $sascore=$saend-$sabegin; - - if($begin<$sabegin) - { - print join "\t",($ref,$begin,$end,$qry,$score,$strand,$CIGAR);print "\t"; - print join "\t",($saref,$sabegin,$saend,$saqry,$sascore,$sastrand,$saCIGAR);print "\n"; - } - else - { - print join "\t",($saref,$sabegin,$saend,$qry,$sascore,$sastrand,$saCIGAR);print "\t"; - print join "\t",($ref,$begin,$end,$saqry,$score,$strand,$CIGAR);print "\n"; - } - } - } - exit 0; -} - diff --git a/scripts/snpCount.sh b/scripts/snpCount.sh index 7695d3c..8bd62fc 100755 --- a/scripts/snpCount.sh +++ b/scripts/snpCount.sh @@ -51,9 +51,18 @@ fi if [ -s $HP_ODIR/$S.00.concat.vcf ] ; then cat $HP_ODIR/$S.00.concat.vcf | filterVcf.pl -p 0.$T -suspicious $HP_ODIR/$HP_M.$T.suspicious.ids > $HP_ODIR/$S.$T.concat.vcf ; fi test -s $HP_ODIR/$S.$T.concat.vcf -cat $HP_ODIR/$S.$T.concat.vcf | concat2cat.pl > $HP_ODIR/$S.$T.vcf -cat $HP_ODIR/$S.$T.concat.vcf | concat2merge.pl -in $HP_IN -suspicious $HP_ODIR/$HP_M.$T.suspicious.ids | bedtools sort -header | tee $HP_ODIR/$S.$T.merge.vcf | vcf2sitesOnly.pl > $HP_ODIR/$S.$T.merge.sitesOnly.vcf -cat $HP_ODIR/$S.$T.concat.vcf | snpCount.pl -in $HP_IN -suspicious $HP_ODIR/$HP_M.$T.suspicious.ids | tee $HP_ODIR/$S.$T.tab | getSummaryN.pl > $HP_ODIR/$S.$T.summary -cat $HP_ODIR/$S.$T.concat.vcf | concat2pos.pl -in $HP_IN -suspicious $HP_ODIR/$HP_M.$T.suspicious.ids | sort -k2,2n -k4,4 -k5,5 > $HP_ODIR/$S.$T.pos +cat $HP_ODIR/$S.$T.concat.vcf | concat2cat.pl > $HP_ODIR/$S.$T.vcf + +# Jan 25 2023 +#cat $HP_ODIR/$S.$T.concat.vcf | concat2merge.pl -in $HP_IN -suspicious $HP_ODIR/$HP_M.$T.suspicious.ids | bedtools sort -header | tee $HP_ODIR/$S.$T.merge.vcf | vcf2sitesOnly.pl > $HP_ODIR/$S.$T.merge.sitesOnly.vcf +#cat $HP_ODIR/$S.$T.concat.vcf | snpCount.pl -in $HP_IN -suspicious $HP_ODIR/$HP_M.$T.suspicious.ids | tee $HP_ODIR/$S.$T.tab | getSummaryN.pl > $HP_ODIR/$S.$T.summary +#cat $HP_ODIR/$S.$T.concat.vcf | concat2pos.pl -in $HP_IN -suspicious $HP_ODIR/$HP_M.$T.suspicious.ids | sort -k2,2n -k4,4 -k5,5 > $HP_ODIR/$S.$T.pos + + +# March 7th 2023 +#cat $HP_ODIR/$S.$T.concat.vcf | concat2merge.pl -in $HP_IN | bedtools sort -header | tee $HP_ODIR/$S.$T.merge.vcf | vcf2sitesOnly.pl > $HP_ODIR/$S.$T.merge.sitesOnly.vcf +cat $HP_ODIR/$S.$T.concat.vcf | concat2merge.pl -in $HP_IN | tee $HP_ODIR/$S.$T.merge.vcf | vcf2sitesOnly.pl > $HP_ODIR/$S.$T.merge.sitesOnly.vcf +cat $HP_ODIR/$S.$T.concat.vcf | snpCount.pl -in $HP_IN | tee $HP_ODIR/$S.$T.tab | getSummaryN.pl > $HP_ODIR/$S.$T.summary +cat $HP_ODIR/$S.$T.concat.vcf | concat2pos.pl -in $HP_IN | sort -k2,2n -k4,4 -k5,5 > $HP_ODIR/$S.$T.pos diff --git a/scripts/snpSort.sh b/scripts/snpSort.sh index 10c0c1d..c620320 100755 --- a/scripts/snpSort.sh +++ b/scripts/snpSort.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash -set -e - +set -ex ############################################################################################################## # Program that sorts SNVs @@ -10,7 +9,8 @@ set -e if [ "$#" -lt 1 ]; then exit 0 ; fi test -s $1.vcf -cat $1.vcf | grep "^#" | grep -v "^##bcftools_annotate" > $1.srt.vcf -cat $1.vcf | grep -v "^#" | sort -k1,1 -k2,2n -k4,4 -k5,5 >> $1.srt.vcf -mv $1.srt.vcf $1.vcf +cat $1.vcf | grep "^#" > $1.srt.vcf +#cat $1.vcf | grep -v "^#" | sort -k1,1 -k2,2n -k4,4 -k5,5 >> $1.srt.vcf +cat $1.vcf | grep -v "^#" | LC_ALL=C sort -k1,1 -k2,2n -k4,4 -k5,5 --parallel=$HP_P -S $HP_MM >> $1.srt.vcf +mv $1.srt.vcf $1.vcf