Skip to content

Commit

Permalink
added extra warnings if sequence reprted in GFF are missing from fast…
Browse files Browse the repository at this point in the history
…a. Panaroo will throw an error if no valid sequneces are found in a GFF
  • Loading branch information
gtonkinhill committed Jan 17, 2024
1 parent a962e84 commit 1a6c59a
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions panaroo/prokka.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def get_gene_sequences(gff_file_name, file_number, filter_seqs, table):
continue

scaffold_id = None
gene_sequence = None
for sequence_index in range(len(sequences)):
scaffold_id = sequences[sequence_index].id
if scaffold_id == entry.seqid:
Expand Down Expand Up @@ -207,6 +208,15 @@ def get_gene_sequences(gff_file_name, file_number, filter_seqs, table):
scaffold_genes[scaffold_id] = scaffold_genes.get(
scaffold_id, [])
scaffold_genes[scaffold_id].append(gene_record)
if gene_sequence is None:
print('Sequence ID not found in Fasta!', entry.seqid)
if filter_seqs: continue
else: raise ValueError("Invalid gene sequence!")

if len(scaffold_genes) == 0:
print("No valid sequences found in GFF!", gff_file_name)
raise ValueError("Invalid GFF!")

for scaffold in scaffold_genes:
scaffold_genes[scaffold] = sorted(scaffold_genes[scaffold],
key=lambda x: x[0])
Expand Down

0 comments on commit 1a6c59a

Please sign in to comment.