-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfr3d_2d.py
executable file
·98 lines (80 loc) · 2.96 KB
/
fr3d_2d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
"""
Generate a FASTA file with secondary structure in dot bracket notation
based on FR3D annotations of an RNA 3D structure.
Usage:
python fr3d_2d.py 2QUS_B
>2QUS_B
GGGAGCCCUGUCACCGGAUGUGCUUUCCGGUCUGAUGAGUCCGUGAGGACAAAACAGGGCUCCCGAAUU
.((((((((((.((((((.....{.))))))(....).((((...}))))...))))))))))......
"""
import json
import os
import sys
BASEPAIRS_ALL = 'data/basepairs/all'
BASEPAIRS_NESTED = 'data/basepairs/nested'
def get_rna3dhub_annotations(pdb_id):
"""
Get FR3D annotations in JSON format.
"""
if not os.path.exists(BASEPAIRS_ALL):
os.system('mkdir -p {}'.format(BASEPAIRS_ALL))
if not os.path.exists(BASEPAIRS_NESTED):
os.system('mkdir -p {}'.format(BASEPAIRS_NESTED))
bps_all = os.path.join(BASEPAIRS_ALL, '{}.json'.format(pdb_id))
bps_nested = os.path.join(BASEPAIRS_NESTED, '{}.json'.format(pdb_id))
pdb_code, chain = pdb_id.split('_')
if not os.path.exists(bps_all):
cmd = "curl -s -o {} 'http://rna.bgsu.edu/rna3dhub/rest/getSequenceBasePairs?pdb_id={}&chain={}&only_nested=False'".format(bps_all, pdb_code, chain)
os.system(cmd)
if not os.path.exists(bps_nested):
cmd = "curl -s -o {} 'http://rna.bgsu.edu/rna3dhub/rest/getSequenceBasePairs?pdb_id={}&chain={}&only_nested=True'".format(bps_nested, pdb_code, chain)
os.system(cmd)
def parse_json(filename):
"""
Some JSON files do not exist because the NMR structures are not annotated
(e.g. 1ju7).
"""
if not os.path.exists(filename):
print('File {} does not exist'.format(filename))
return None
with open(filename, 'r') as f:
data = json.load(f)
if data['sequence'] == 'No sequence was found for the given id':
return None
sequence = data['sequence']
structure = list('.' * len(sequence))
for annotation in data['annotations']:
if annotation['bp'] == 'cWW':
structure[int(annotation['seq_id1'])-1] = '('
structure[int(annotation['seq_id2'])-1] = ')'
return {
'1d': sequence,
'2d': structure,
}
def generate_output(pdb_id, bp_all, bp_nested):
bp_all_list = list(bp_all['2d'])
bp_nested_list = list(bp_nested['2d'])
final = []
for i, character in enumerate(bp_all_list):
if character in ['(', ')'] and bp_nested_list[i] == '.':
if character == '(':
final.append('{')
elif character == ')':
final.append('}')
else:
final.append(character)
output = """>{}
{}
{}
""".format(pdb_id, bp_all['1d'], ''.join(final))
return output
def fr3d_2d(pdb_id):
get_rna3dhub_annotations(pdb_id)
bp_all = parse_json(os.path.join(BASEPAIRS_ALL, pdb_id + '.json'))
bp_nested = parse_json(os.path.join(BASEPAIRS_NESTED, pdb_id + '.json'))
if not bp_all or not bp_nested:
return None
return generate_output(pdb_id, bp_all, bp_nested)
if __name__ == '__main__':
print(fr3d_2d(sys.argv[1]))