Skip to content

Commit

Permalink
Pick correct models for nmt-tRNAs
Browse files Browse the repository at this point in the history
  • Loading branch information
AntonPetrov committed Feb 2, 2025
1 parent 9261c95 commit 334e465
Show file tree
Hide file tree
Showing 11 changed files with 1,828 additions and 12 deletions.
16 changes: 16 additions & 0 deletions examples/gtrnadb-mito-vert.fasta
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,19 @@ AAGAAAGUGGCAGAGUGGUUAUGUGUUUGGCUUGAAACCAAUUUACGGGGGUUCAAUUCCCUCCUUUCUCG
AGCCCUGAGGUGUAUUUAACAUUUCAGAUUGCAAAUCUGAAGAAGCAGAUUAACGUCUGCCGGGGCUU
>URS0002616B70_9606 Ser NNN/GCT
GAGAAAGCTCACAAGGCCATGCCCCCATGTCTAACAACATGGCTTTCTCACCA
>URS00006B5A23
ACTTTTAAAGGATTAGAGTTAACCATTGGTCTAAGGAACCAAAAACACTGGTGCAACTCCAAATAAAAGTA
>URS000066635D
ACTTTTAAAGGATAAGAGTTATCCATTGGTCTCAGGAACCAAAAACATCGGTGCAACTCCAAATAAAAGTA
>URS00019DBB15
ACTTTTAAATAATAGAAGTAATACATTGGCCTTAAAAGCCAAGAAAATTGGTGCAGCTCCAAATAAAAGTA
>chr1_46824277_46824348
GTTAAGGTGACAGAGCTGGTaATTGTGTAGAACTTAGACTTATAAtCAGAGGTGCAACTCTTCTGCTTAACC
>URS000066B91E
ACUUUUAGAGGAUAGUAGUUAUGCAUUGGUCUCAGGAACCAAAAACUUUGGUGCAACUCCAAAUAAAAGGA
>URS0000C88790
AGUUGUAGAGGAUAAGAGCUAUCCAUUGGUCUCAGGAACCAAAAACAUUGGUGCAACUCCAAAUACAAGUA
>URS00006B0D4F
ACUUUUAAAGGAUAAGAGUUAUCCAUUGGUCUCAGGAACCAAAAACAUUGGUGCAACUCCAAAUAAAAGUA
>URS000068492E
ACUUUUAAAGGAUAAGAGUUAUCCGUUGGUCUCAGGAACCAAAAACAUUGGUGCAACUCCAAAUAAAAGUA
223 changes: 223 additions & 0 deletions tests/examples/gtrnadb/mito-vert/URS000066635D-M_LeuTAG.colored.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
221 changes: 221 additions & 0 deletions tests/examples/gtrnadb/mito-vert/URS000066B91E-M_LeuTAG.colored.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
224 changes: 224 additions & 0 deletions tests/examples/gtrnadb/mito-vert/URS000068492E-M_LeuTAG.colored.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
224 changes: 224 additions & 0 deletions tests/examples/gtrnadb/mito-vert/URS00006B0D4F-M_LeuTAG.colored.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
222 changes: 222 additions & 0 deletions tests/examples/gtrnadb/mito-vert/URS00006B5A23-M_LeuTAG.colored.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
223 changes: 223 additions & 0 deletions tests/examples/gtrnadb/mito-vert/URS0000C88790-M_LeuTAG.colored.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
222 changes: 222 additions & 0 deletions tests/examples/gtrnadb/mito-vert/URS00019DBB15-M_LeuTAG.colored.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 8 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,14 @@ class TestGtrnadbMitoVert(R2dtTestCase):
"URS0000043FFB_392897-M_SerGCT.colored.svg",
"URS0000247C4D_392897-M_Cys.colored.svg",
"URS0002616B70_9606-M_SerGCT.colored.svg",
"chr1_46824277_46824348-M_LeuTAA.colored.svg",
"URS0000C88790-M_LeuTAG.colored.svg",
"URS00006B0D4F-M_LeuTAG.colored.svg",
"URS00006B5A23-M_LeuTAG.colored.svg",
"URS00019DBB15-M_LeuTAG.colored.svg",
"URS000066B91E-M_LeuTAG.colored.svg",
"URS000066635D-M_LeuTAG.colored.svg",
"URS000068492E-M_LeuTAG.colored.svg",
]

def test_examples(self):
Expand Down
38 changes: 26 additions & 12 deletions utils/gtrnadb.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def setup():
get_trnascan_cm(domain, isotype)


def verify_anticodon(isotype, anticodon, start, end):
# pylint: disable=too-many-arguments
def verify_anticodon(isotype, anticodon, start, end, note, domain):
"""
When multiple models are possible, select the most likely one.
Expand All @@ -66,8 +67,23 @@ def verify_anticodon(isotype, anticodon, start, end):
>= 70 bp are usually SerTGA (with a D-arm).
< 65 are usually SerGCT (without a D-arm).
in between can be either.
Special Case: nmt-tRNAs (nuclear-embedded mitochondrial DNA sequences).
nmt-tRNAs can accumulate mutations and may have different anticodons.
In these cases, the note field may contain "inconsistent_ac (anticodon)".
For example, "inconsistent_ac (TAG)".
When this note is present, the anticodon specified in the note field
should be used instead of the one in the anticodon field.
"""
if anticodon != "NNN":
if domain != "M vert": # only mitochondria have multiple models
return anticodon
if anticodon != "NNN" and "inconsistent_ac" not in note:
return anticodon # use the anticodon from the tRNAScan output
match = re.match(r"inconsistent_ac \(([A-Z]{3})\)", note) # inconsistent_ac (TAG)
if match:
anticodon = match.group(1)
return anticodon
seq_length = abs(end - start) + 1
if isotype == "Leu":
Expand All @@ -82,7 +98,7 @@ def verify_anticodon(isotype, anticodon, start, end):
return anticodon


def parse_trnascan_output(filename):
def parse_trnascan_output(filename, domain):
"""
Sequence tRNA Bounds tRNA Anti Intron Bounds Inf
Name tRNA # Begin End Type Codon Begin End Score Note
Expand All @@ -95,7 +111,7 @@ def parse_trnascan_output(filename):
if i in [0, 1, 2]:
continue # skip 3 header lines
parts = [x.strip() for x in line.split("\t")]
seq_id, _, start, end, isotype, anticodon, _, _, score, note = parts
seq_id, _, start, end, isotype, anticodon, _, _, score, *_, note = parts
score = float(score)
start = int(start)
end = int(end)
Expand All @@ -106,7 +122,9 @@ def parse_trnascan_output(filename):
data[seq_id] = {
"score": score,
"isotype": isotype,
"anticodon": verify_anticodon(isotype, anticodon, start, end),
"anticodon": verify_anticodon(
isotype, anticodon, start, end, note, domain
),
"note": note.lower(),
"start": start,
"end": end,
Expand All @@ -116,18 +134,14 @@ def parse_trnascan_output(filename):

def run_trnascan(fasta_input, output_folder, domain):
"""Launch tRNAScan-SE and return parsed results."""
_, extension = os.path.splitext(fasta_input)
output_file = os.path.join(
output_folder,
domain + "-" + os.path.basename(fasta_input).replace(extension, ".txt"),
)
output_file = os.path.join(output_folder, f"{domain}-trnascan.txt")
if domain == "M":
domain = "M vert"
if not os.path.exists(output_file):
runner.run(
f"tRNAscan-SE -c {TRNASCAN_CONF} -q -{domain} -o {output_file} {fasta_input}"
f"tRNAscan-SE --detail -c {TRNASCAN_CONF} -q -{domain} -o {output_file} {fasta_input}"
)
return parse_trnascan_output(output_file)
return parse_trnascan_output(output_file, domain)


def skip_trna(entry):
Expand Down

0 comments on commit 334e465

Please sign in to comment.