forked from geohot/corona
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlib.py
59 lines (56 loc) · 1.34 KB
/
lib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# Asn or Asp / B AAU, AAC; GAU, GAC
# Gln or Glu / Z CAA, CAG; GAA, GAG
# START AUG
tt = """Ala / A GCU, GCC, GCA, GCG
Ile / I AUU, AUC, AUA
Arg / R CGU, CGC, CGA, CGG; AGA, AGG
Leu / L CUU, CUC, CUA, CUG; UUA, UUG
Asn / N AAU, AAC
Lys / K AAA, AAG
Asp / D GAU, GAC
Met / M AUG
Phe / F UUU, UUC
Cys / C UGU, UGC
Pro / P CCU, CCC, CCA, CCG
Gln / Q CAA, CAG
Ser / S UCU, UCC, UCA, UCG; AGU, AGC
Glu / E GAA, GAG
Thr / T ACU, ACC, ACA, ACG
Trp / W UGG
Gly / G GGU, GGC, GGA, GGG
Tyr / Y UAU, UAC
His / H CAU, CAC
Val / V GUU, GUC, GUA, GUG
STOP UAA, UGA, UAG
""".strip()
dec = {}
for t in tt.split("\n"):
k = t[:len("Val / V")].strip()
v = t[len("Val / V "):]
if '/' in k:
k = k.split("/")[-1].strip()
k = k.replace("STOP", "*")
v = v.replace(",", "").replace(";", "").lower().replace("u", "t").split(" ")
for vv in v:
if vv in dec:
print("dup", vv)
dec[vv.strip()] = k
def translate(x, protein=False):
x = x.lower()
aa = []
for i in range(0, len(x)-2, 3):
aa.append(dec[x[i:i+3]])
aa = ''.join(aa)
if protein:
if aa[0] != "M" or aa[-1] != "*":
print("BAD PROTEIN")
print(aa)
return None
aa = aa[:-1]
return aa
import pathlib
import os
import json
with open(os.path.join(pathlib.Path(__file__).parent.absolute(), "data", "allseq.json")) as f:
allseq = json.load(f)
cc = allseq['MN908947']