From 4dc499ea7651c746cf903f6ecc995ed0d2d63d2d Mon Sep 17 00:00:00 2001 From: Gerry Tonkin-Hill Date: Tue, 24 Sep 2024 04:08:18 +0200 Subject: [PATCH] added option specify 'none' as the aligner. This allows for alignment to be run separately as requested in #306 --- panaroo/__main__.py | 9 +++++---- panaroo/generate_alignments.py | 2 ++ panaroo/generate_output.py | 19 +++++++++++++++++++ panaroo/post_run_alignment_gen.py | 13 +++++++------ 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/panaroo/__main__.py b/panaroo/__main__.py index 0e51864..4f3965d 100755 --- a/panaroo/__main__.py +++ b/panaroo/__main__.py @@ -243,7 +243,7 @@ def get_options(args): help= "Specify an aligner. Options:'prank', 'clustal', and default: 'mafft'", type=str, - choices=['prank', 'clustal', 'mafft'], + choices=['prank', 'clustal', 'mafft', 'none'], default="mafft") core.add_argument( "--codons", @@ -550,9 +550,10 @@ def main(): if args.verbose: print("generating pan genome MSAs...") generate_pan_genome_alignment(G, temp_dir, args.output_dir, args.n_cpu, args.alr, args.codons, isolate_names) - core_nodes = get_core_gene_nodes(G, args.core, len(args.input_files)) - core_names = [G.nodes[x]["name"] for x in core_nodes] - concatenate_core_genome_alignments(core_names, args.output_dir, args.hc_threshold) + if args.alr!='none': + core_nodes = get_core_gene_nodes(G, args.core, len(args.input_files)) + core_names = [G.nodes[x]["name"] for x in core_nodes] + concatenate_core_genome_alignments(core_names, args.output_dir, args.hc_threshold) elif args.aln == "core": if args.verbose: print("generating core genome MSAs...") generate_core_genome_alignment(G, temp_dir, args.output_dir, diff --git a/panaroo/generate_alignments.py b/panaroo/generate_alignments.py index e7d2a6a..0340617 100755 --- a/panaroo/generate_alignments.py +++ b/panaroo/generate_alignments.py @@ -40,6 +40,8 @@ def check_aligner_install(aligner): command = "prank -help" elif aligner == "mafft": command = "mafft --help" + elif aligner == "none": + return True else: sys.stderr.write("Incorrect aligner specification\n") sys.exit() diff --git a/panaroo/generate_output.py b/panaroo/generate_output.py index 8de12c2..18acaa2 100755 --- a/panaroo/generate_output.py +++ b/panaroo/generate_output.py @@ -315,6 +315,11 @@ def generate_pan_genome_alignment(G, temp_dir, output_dir, threads, aligner, aligner, threads) else: + if aligner=='none': + temp_dir = output_dir + "unaligned_gene_sequences/" + if not os.path.exists(temp_dir): + os.mkdir(temp_dir) + #Multithread writing gene sequences to disk (temp directory) so aligners can find them unaligned_sequence_files = Parallel(n_jobs=threads)( delayed(output_sequence)(G.nodes[x], isolates, temp_dir, output_dir) @@ -323,6 +328,10 @@ def generate_pan_genome_alignment(G, temp_dir, output_dir, threads, aligner, #remove single sequence files unaligned_sequence_files = filter(None, unaligned_sequence_files) + if aligner=='none': + print("No aligner specified. Returning unaligned gene fasta files.") + return + #Get Biopython command calls for each output gene sequences commands = [ get_alignment_commands(fastafile, output_dir, aligner, threads) @@ -531,10 +540,20 @@ def generate_core_genome_alignment( output_dir, temp_dir, aligner, threads) else: + if aligner=='none': + temp_dir = output_dir + "unaligned_gene_sequences/" + if not os.path.exists(temp_dir): + os.mkdir(temp_dir) + #Output core node sequences unaligned_sequence_files = Parallel(n_jobs=threads)( delayed(output_sequence)(G.nodes[x], isolates, temp_dir, output_dir) for x in tqdm(core_genes)) + + if aligner=='none': + print("No aligner specified. Returning unaligned gene fasta files.") + return + #remove single sequence files unaligned_sequence_files = filter(None, unaligned_sequence_files) diff --git a/panaroo/post_run_alignment_gen.py b/panaroo/post_run_alignment_gen.py index c8450cc..331af62 100755 --- a/panaroo/post_run_alignment_gen.py +++ b/panaroo/post_run_alignment_gen.py @@ -41,7 +41,7 @@ def get_options(): help= "Specify an aligner. Options:'prank', 'clustal', and default: 'mafft'", type=str, - choices={'prank', 'clustal', 'mafft'}, + choices={'prank', 'clustal', 'mafft', 'none'}, default="mafft") core.add_argument( "--codons", @@ -109,11 +109,12 @@ def main(): if args.verbose: print("generating pan genome MSAs...") generate_pan_genome_alignment(G, temp_dir, args.output_dir, args.n_cpu, args.alr, args.codons, isolate_names) - - core_nodes = get_core_gene_nodes(G, args.core, len(isolate_names)) - core_names = [G.nodes[x]["name"] for x in core_nodes] - concatenate_core_genome_alignments(core_names, args.output_dir, - args.hc_threshold) + + if args.alr!='none': + core_nodes = get_core_gene_nodes(G, args.core, len(isolate_names)) + core_names = [G.nodes[x]["name"] for x in core_nodes] + concatenate_core_genome_alignments(core_names, args.output_dir, + args.hc_threshold) elif args.aln == "core": if args.verbose: print("generating core genome MSAs...") generate_core_genome_alignment(G, temp_dir, args.output_dir,