Skip to content

Commit

Permalink
Update FluViewer_v_0_0_1.py
Browse files Browse the repository at this point in the history
  • Loading branch information
KevinKuchinski committed Mar 16, 2022
1 parent 516c54e commit e58b142
Showing 1 changed file with 113 additions and 48 deletions.
161 changes: 113 additions & 48 deletions FluViewer_v_0_0_1.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import argparse as ap
import sys as sys
import os as os
import subprocess as sp
import shutil as sh
Expand All @@ -8,66 +8,131 @@

def main():
version = '0.0.1'
args = parse_args(sys.argv, version)
print(f'\nFluViewer v{version}')
print('https://github.com/KevinKuchinski/FluViewer\n')
args = parse_arguments()
if args.mode not in ['assemble', 'align']:
print(f'\nERROR: {args.mode} is not a valid FluViewer mode. Must choose assemble or align.\n')
if args['-m'] not in ['assemble', 'align']:
print(f'\nERROR: "{args["-m"]}" is not a valid FluViewer mode. Must choose "assemble" or "align".\n')
exit(1)
print(f'Analysis mode: {args.mode}')
print(f'Ref seq DB: {args.db}')
print(f'Analysis mode: {args["-m"]}')
print(f'Ref seq DB: {args["-d"]}')
print()
print(f'Analyzing library {args.output}...')
check_input_exists([args.fwd_reads, args.rev_reads, args.db])
check_input_empty([args.fwd_reads, args.rev_reads, args.db])
print(f'Fwd reads: {args.fwd_reads}')
print(f'Rev reads: {args.rev_reads}')
print(f'Analyzing library {args["-o"]}...')
check_input_exists([args['-f'], args['-r'], args['-d']])
check_input_empty([args['-f'], args['-r'], args['-d']])
print(f'Fwd reads: {args["-f"]}')
print(f'Rev reads: {args["-r"]}')
print()
make_out_dir(args.output)
make_out_dir(args['-o'])
print('\nGENERATING CONSENSUS SEQS...')
contigs = assemble_contigs(args.output, args.fwd_reads, args.rev_reads)
blast_out = align_contigs_to_ref_seqs(args.output, contigs, args.db)
blast_results = filter_alignments(args.output, blast_out, args.min_cov, args.min_id)
if args.mode == 'assemble':
ref_seqs = write_best_contigs_fasta(args.output, blast_results, contigs)
elif args.mode == 'align':
ref_seqs = write_best_ref_seqs_fasta(args.output, blast_results, args.db)
bam_out = map_reads(args.output, ref_seqs, args.fwd_reads, args.rev_reads)
vcf_out = call_variants(args.output, args.min_depth, args.min_qual, ref_seqs, bam_out)
consensus_seqs = make_consensus_seqs(args.output, bam_out, args.min_depth, args.min_cov, ref_seqs, vcf_out)
contigs = assemble_contigs(args['-o'], args['-f'], args['-r'])
blast_out = align_contigs_to_ref_seqs(args['-o'], contigs, args['-d'])
blast_results = filter_alignments(args['-o'], blast_out, args['-c'], args['-i'])
if args['-m'] == 'assemble':
ref_seqs = write_best_contigs_fasta(args['-o'], blast_results, contigs)
elif args['-m'] == 'align':
ref_seqs = write_best_ref_seqs_fasta(args['-o'], blast_results, args['-d'])
bam_out = map_reads(args['-o'], ref_seqs, args['-f'], args['-r'])
vcf_out = call_variants(args['-o'], args['-D'], args['-q'], ref_seqs, bam_out)
consensus_seqs = make_consensus_seqs(args['-o'], bam_out, args['-D'], args['-c'], ref_seqs, vcf_out)
print('\nWRITING REPORT...')
sequenced_bases = count_sequenced_bases_in_consensus_seqs(consensus_seqs)
consensus_seq_lengths = get_consensus_seq_lengths(consensus_seqs)
reads_mapped_to_consensus_seqs = count_reads_mapped_to_consensus_seqs(args.output, bam_out)
write_reports(args.output, sequenced_bases, consensus_seq_lengths, reads_mapped_to_consensus_seqs)
reads_mapped_to_consensus_seqs = count_reads_mapped_to_consensus_seqs(args['-o'], bam_out)
write_reports(args['-o'], sequenced_bases, consensus_seq_lengths, reads_mapped_to_consensus_seqs)
clean_headers(consensus_seqs)
if args.garbage == True:
garbage_collection(args.output)
if args['-g'] == 'yes':
garbage_collection(args['-o'])
print('\nDONE: Analysis finished succesfully.\n')
exit(0)


def parse_arguments():
'''Parses command line arguments.'''
parser = ap.ArgumentParser()
parser.add_argument('-f', '--fwd_reads', type=str, required=True, help='Fwd reads')
parser.add_argument('-r', '--rev_reads', type=str, required=True, help='Rev reads')
parser.add_argument('-d', '--db', type=str, required=True, help='Ref seqs DB')
parser.add_argument('-o', '--output', type=str, required=True, help='Output dir')
parser.add_argument('-D', '--min_depth', type=int, required=False, default=20,
help='Min read depth for base calling (int, default=20)')
parser.add_argument('-q', '--min_qual', type=int, required=False, default=30,
help='Min PHRED qual for base calling and read mapping (int, default=30)')
parser.add_argument('-c', '--min_cov', type=float, required=False, default=25,
help='Min ref seq coverage for contigs (percentage, default=25)')
parser.add_argument('-i', '--min_id', type=float, required=False, default=95,
help='Min nucleotide identity for contigs (percentage, default=95)')
parser.add_argument('-m', '--mode', type=str, required=False, default='assemble',
help='FluViewer mode; determines if contigs or ref seqs from DB are used for read mapping.')
parser.add_argument('-g', '--garbage', action='store_false',
help='Garbage collection; if set, garbage collection is disactivated and intermediate files are kept.')
args = parser.parse_args()
return args
def parse_args(args, version):
arg_values = {}
for arg_1, arg_2 in zip(args[1:-1], args[2:]):
if arg_1[0] == '-':
if arg_2[0] != '-':
arg_values[arg_1] = arg_2
else:
arg_values[arg_1] = ''
if args[-1][0] == '-':
arg_values[args[-1]] = ''
# Set defaults, mins, and maxs
required_args = {'-f', '-r', '-d', '-m', '-o'}
arg_value_types = {'-f': str, '-r': str, '-d': str, '-m': str, '-o': str, '-D': int, '-q': int, '-c': float, '-i': float, '-g': str}
min_arg_values = {'-D': 1, '-q': 0, '-c': 0, '-i': 0}
max_arg_values = {'-c': 100, '-i': 100}
default_arg_values = {'-D': 20, '-q': 30, '-c': 25, '-i': 90, '-g': 'yes'}
# Check if all required arguments were provided
missing_args = set()
for required_arg in required_args:
if required_arg not in arg_values.keys() or arg_values[required_arg] == '':
missing_args = missing_args | {required_arg}
if missing_args != set():
print(f'\nERROR: Values must be provided for the argument following arguments: {", ".join(sorted(missing_args))}')
print_usage(version)
exit(1)
# Check if unrecognized arguments were provided
recognized_args = required_args | set(arg_value_types.keys())
unrecognized_args = set()
for provided_arg in arg_values.keys():
if provided_arg not in recognized_args:
unrecognized_args = unrecognized_args | {provided_arg}
if unrecognized_args != set():
print(f'\nERROR: The following arguments are not recognized: {", ".join(sorted(unrecognized_args))}')
print_usage(version)
exit(1)
# Check if arguments were provided without values
empty_args = set()
for arg, value in arg_values.items():
if value == '':
empty_args = empty_args | {arg}
if empty_args != set():
print(f'\nERROR: The following arguments were provided without values: {", ".join(sorted(empty_args))}')
print_usage(version)
exit(1)
# Check if provided values are of the correct type
for arg, value in arg_values.items():
try:
arg_values[arg] = arg_value_types[arg](value)
except ValueError:
print(f'\nERROR: Value for argument {arg} must be of type {str(arg_value_types[arg].__name__)}')
print_usage(version)
exit(1)
# Check if provided values are within the correct range
for arg, value in arg_values.items():
if arg in min_arg_values.keys() and value <= min_arg_values[arg]:
print(f'\nERROR: Value for argument {arg} must be greater than {min_arg_values[arg]}')
print_usage(version)
exit(1)
if arg in max_arg_values.keys() and value >= max_arg_values[arg]:
print(f'\nERROR: Value for argument {arg} must be less than {max_arg_values[arg]}')
print_usage(version)
exit(1)
# Assign default values to unspecified arguments
for arg, value in default_arg_values.items():
if arg not in arg_values.keys():
arg_values[arg] = value
# Return keyword args and their values
return arg_values


def print_usage(version):
print(f'\nFluViewer v{version}')
print('https://github.com/KevinKuchinski/FluViewer\n')
print('Usage: FluViewer -f <path_to_fwd_reads> -r <path_to_rev_reads> -d <path_to_db_file> -o <output_name> -m <mode> [-D <min_depth> -q <min_qual> -c <min_cov> -i <min_id> -g <garbage_collection>]\n')
print('Required arguments:')
print(' -f : path to FASTQ file containing forward reads')
print(' -r : path to FASTQ file containing reverse reads')
print(' -d : path to FASTA file containing FluViewer database')
print(' -o : output name')
print(' -m : run mode ("align" or "assemble")\n')
print('Optional arguments:')
print(' -D : minimum read depth for base calling (default = 20)')
print(' -q : Minimum PHRED score for base quality and mapping quality (default = 30)')
print(' -c : Minimum coverage of database reference sequence by contig (percentage, default = 25)')
print(' -i : Minimum nucleotide sequence identity between database reference sequence and contig (percentage, default = 90)')
print(' -g : Collect garbage and remove intermediate files ("yes" or "no", default = "yes")\n')


def run(terminal_command, error_msg, stdout_file, stderr_file):
Expand Down Expand Up @@ -206,6 +271,7 @@ def filter_alignments(output, blast_out, min_cov, min_id):
if len(blast_results) == 0:
print('DONE: No valid contigs found.')
exit(0)
blast_results.to_csv('blah.tsv')
return blast_results


Expand Down Expand Up @@ -481,4 +547,3 @@ def garbage_collection(output):

if __name__ == '__main__':
main()

0 comments on commit e58b142

Please sign in to comment.