From 1a6c59a11fdb2e7361239763d38241b1fe51db23 Mon Sep 17 00:00:00 2001 From: Gerry Tonkin-Hill Date: Wed, 17 Jan 2024 02:09:22 +0100 Subject: [PATCH] added extra warnings if sequence reprted in GFF are missing from fasta. Panaroo will throw an error if no valid sequneces are found in a GFF --- panaroo/prokka.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/panaroo/prokka.py b/panaroo/prokka.py index b62eedc..243f2fb 100755 --- a/panaroo/prokka.py +++ b/panaroo/prokka.py @@ -159,6 +159,7 @@ def get_gene_sequences(gff_file_name, file_number, filter_seqs, table): continue scaffold_id = None + gene_sequence = None for sequence_index in range(len(sequences)): scaffold_id = sequences[sequence_index].id if scaffold_id == entry.seqid: @@ -207,6 +208,15 @@ def get_gene_sequences(gff_file_name, file_number, filter_seqs, table): scaffold_genes[scaffold_id] = scaffold_genes.get( scaffold_id, []) scaffold_genes[scaffold_id].append(gene_record) + if gene_sequence is None: + print('Sequence ID not found in Fasta!', entry.seqid) + if filter_seqs: continue + else: raise ValueError("Invalid gene sequence!") + + if len(scaffold_genes) == 0: + print("No valid sequences found in GFF!", gff_file_name) + raise ValueError("Invalid GFF!") + for scaffold in scaffold_genes: scaffold_genes[scaffold] = sorted(scaffold_genes[scaffold], key=lambda x: x[0])