-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
337 lines (279 loc) · 12.8 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
'''
By M.F.L. Meersman.
In order to use this model, please first contact m.f.l.meersman@student.tudelft.nl to ask for my permission.
If you just want to have a look at the processed data, please check out file 'dataframe.csv'.
All the produced graphs can be viewed and downloaded as '.PNG' images. It is however recommended to download the '.html'
files since these allow you to interact with the figures such that you can find and look up specific molecules.
In order to process the data yourself, please uncomment '# main()' of line 336 in this file, then run this file.
Going to 'https://rcbs.org' leads you to the international protein databank. From here, you can copy the
respective 'pdb-code-names' and paste them into one of three 'xxxxx_code_list.csv' files to process those respective
molecules.
This model simulaties the time required for the disintegration of possible lifeforms on Enceladus.
Going to 'dataframe.csv'-file will lead you to an overview of all the simulated molecule structures.
You can open this file best in Excel or any spreadsheet viewer. The meaning of all columns is:
- Identifier: This indicates whether the molecule is DNA, fatty-acid or proten/amino-acid
- code: This is the universal naming of the molecule that is simulated. Typing in this code in Google can give you additional info about the molecule.
- Required Height [m]: This is the distance you need to be elevated above the surface in order to capture the molecule before it brakes down.
- id: binary indication for Identifier
- Area [Å2]: This is the geometric area of the molecule structure, i.e. the area that will capture radiation flux.
- Time required [s]: This is the time from exiting the vent untill the molecule breaks down.
- URL: the link that leads to all the information of the molecule.
- Molecular complexity [-]: expressed as a function of number of atoms and variations of atoms.
'''
import csv
import os
import urllib.request
import webbrowser
from os.path import isfile, join
import freesasa
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.graph_objs as go
from Bio.PDB import PDBParser
from scipy.interpolate import make_interp_spline
def make_code_list(filename):
file = open(filename)
csvreader = csv.reader(file)
rows = []
for row in csvreader:
rows.append(row)
return np.array(rows).flatten()
def download_pdb(pdbcode, datadir, downloadurl="https://files.rcsb.org/download/"):
pdbfn = pdbcode + ".pdb"
url = downloadurl + pdbfn
outfnm = os.path.join(datadir, pdbfn)
print("Downloading molecular data files... ", outfnm)
try:
urllib.request.urlretrieve(url, outfnm)
return outfnm
except Exception as err:
print("Couldn't find this molecule :(")
return None
def url_list(pdbcode_list, viewurl= "https://www.rcsb.org/structure/"):
lst = []
for code in pdbcode_list:
url = viewurl + code
lst.append(url)
return lst
def parser(name):
parser = PDBParser()
structure = parser.get_structure(name)
result, sasa_classes = freesasa.calcBioPDB(structure)
print('Calculating geometric area of molecule:', name)
return result, sasa_classes
def smooth(x, y):
X_Y_Spline = make_interp_spline(x, y)
X_ = np.linspace(x.min(), x.max(), 20)
Y_ = X_Y_Spline(X_)
return(X_, Y_)
def do_click(trace, points, state):
if points.point_inds:
ind = points.point_inds[0]
url = df.link.iloc[ind]
webbrowser.open_new_tab(url)
def complexity(directory):
lst = []
for file in os.listdir(directory):
b = os.path.getsize(directory + '/' +file)
lst.append(b)
return lst
def sasa(filename):
structure = freesasa.Structure(filename)
result = freesasa.calc(structure)
area_classes = freesasa.classifyResults(result, structure)
print("Total : %.2f A2" % result.totalArea())
for key in area_classes:
print (key, ": %.2f A2" % area_classes[key])
return result.totalArea()
def integral(xarr, yarr):
area = 0
for i in range(5):
avg = (yarr[i]+yarr[i+1])*0.5
block = avg*(xarr[i+1]-xarr[i])
area += block
return area
def H(list):
heights = []
avg_cov = 4.094 #eV
rad_per_sec_cm = 144675998.37885764
for area in list:
height = (avg_cov * 1000) / ((area/(10**16)) * rad_per_sec_cm * 10**6)
heights.append(height)
return heights
def main():
# Enceladus
pro = np.array([10e12, 10e5, 10e2, 10e1, 10e-1, 10e-3]) # [#/cm^2/day]
el = np.array([10e12, 10e5, 10e2, 10e-1, 10e-3, 10e-6]) # [#/cm^2/day]
x1 = np.array([0, 15, 75, 100, 300, 1000]) # [MeV]
x2 = np.array([0, 10, 50, 100, 300, 1000]) # [MeV]
daily_pro = integral(x1, pro)
daily_el = integral(x2, el)
daily_rad = daily_pro + daily_el
rad_per_sec_cm = daily_rad/(24*60**2)
plt.plot(x1, pro) #enceladus proton
plt.plot(x2, el) #enceladus electron
plt.title("Enceladus integral photon and electron")
plt.ylabel("Integral Flux, #/cm2/day")
plt.xlabel("Particle Energy [MeV]")
plt.yscale("log")
plt.grid()
plt.figure()
#plt.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=x1, y=pro,
mode='lines+markers',
name='proton radiation', fill='tonexty'))
fig.add_trace(go.Scatter(x=x2, y=el,
mode='lines+markers',
name='electron radiation', fill='tonexty'))
fig.update_yaxes(type="log")
fig.update_layout(title='Enceladus integral photon and electron by SATRAD model',
xaxis_title='Particle Energy [MeV]',
yaxis_title='Particle Energy [MeV]')
#fig.show()
# Molecule Areas
avg_cov = 4.094 #[eV], average covalent bond strenghth for organic molecules with temperature correction to 200K
## DNA ##
print("\nDNA")
dna_code_lst = make_code_list('dna_code_list.csv')
for code in dna_code_lst:
download_pdb(code, 'datadirdna', downloadurl="https://files.rcsb.org/download/")
DNA_area_lst = []
for file in os.listdir('datadirdna'):
f = os.path.join('datadirdna', file)
# checking if it is a file
if os.path.isfile(f):
DNA_area = sasa(f)
DNA_area_lst.append(DNA_area)
DNA_area = np.average(DNA_area_lst)
DNA_area_min = min(DNA_area_lst)
## Lipids ##
print("\nLipids")
lipid_code_lst = make_code_list('lipid_code_list.csv')
for code in lipid_code_lst:
download_pdb(code, 'datadirlipid', downloadurl="https://files.rcsb.org/download/")
lipid_area_lst = []
for file in os.listdir('datadirlipid'):
f = os.path.join('datadirlipid', file)
# checking if it is a file
if os.path.isfile(f):
lipid_area = sasa(f)
lipid_area_lst.append(lipid_area)
lipid_area = np.average(lipid_area_lst)
lipid_area_min = min(lipid_area_lst)
## Amino-acids ##
print("\nAmino-Acids")
amino_code_lst = make_code_list('amino_code_list.csv')
for code in amino_code_lst:
download_pdb(code, 'datadiramino', downloadurl="https://files.rcsb.org/download/")
amino_area_lst = []
for file in os.listdir('datadiramino'):
f = os.path.join('datadiramino', file)
# checking if it is a file
if os.path.isfile(f):
amino_area = sasa(f)
amino_area_lst.append(amino_area)
amino_area = np.average(amino_area_lst)
amino_area_min = min(amino_area_lst)
## Requirements on time and height
print("Radiation energy (Electron and proton) per second per cm2 [MeV]:", rad_per_sec_cm)
print('\nDNA')
DNA_energy_received = (DNA_area/(10**16)) * rad_per_sec_cm * 10**6
print('Energy received per second [eV]: ', DNA_energy_received)
t_dna = avg_cov/DNA_energy_received
print('Time req. for dissociation [s]: ', t_dna)
print('Height requirement [m]', t_dna * 1000)
DNA_height_min = (avg_cov * 1000) / ((DNA_area_min/(10**16)) * rad_per_sec_cm * 10**6)
print('Height requirement var [m]', t_dna*1000 + DNA_height_min, t_dna*1000 - DNA_height_min)
print('\nLIPIDS')
lipid_energy_received = (lipid_area/(10**16))*rad_per_sec_cm * 10**6
print('Energy received per second [eV]: ', lipid_energy_received)
t_lipid = avg_cov/lipid_energy_received
print('Time req. for dissociation [s]: ', t_lipid)
print('Height requirement [m]', t_lipid*1000)
lipid_height_min = (avg_cov * 1000) / ((lipid_area_min/(10**16)) * rad_per_sec_cm * 10**6)
print('Height requirement var [m]', t_lipid*1000 + lipid_height_min, t_lipid*1000 - lipid_height_min)
print('\nAMINO-ACIDS')
amino_energy_received = (amino_area/(10**16))*rad_per_sec_cm * 10**6
print('Energy received per second [eV]: ', amino_energy_received)
t_amino = avg_cov/amino_energy_received
print('Time req. for dissociation [s]: ', t_amino)
print('Height requirement [m]', t_amino * 1000)
amino_height_min = (avg_cov * 1000) / ((amino_area_min/(10**16)) * rad_per_sec_cm * 10**6)
print('Height requirement var [m]', t_amino*1000 + amino_height_min, t_amino*1000 - amino_height_min)
dna_names = ['DNA'] * len(DNA_area_lst)
dna_id = [0]*len(dna_names)
lipid_names = ['lipid'] * len(lipid_area_lst)
lipid_id = [0.1]*len(lipid_names)
amino_names = ['amino'] * len(amino_area_lst)
amino_id = [0.2] *len(amino_names)
dna_codes = [f[:-4] for f in os.listdir('datadirdna') if isfile(join('datadirdna', f))]
dna_urls = url_list(dna_codes, viewurl="https://www.rcsb.org/structure/")
lipid_codes = [f[:-4] for f in os.listdir('datadirlipid') if isfile(join('datadirlipid', f))]
lipid_urls = url_list(lipid_codes, viewurl="https://www.rcsb.org/structure/")
amino_codes = [f[:-4] for f in os.listdir('datadiramino') if isfile(join('datadiramino', f))]
amino_urls = url_list(amino_codes, viewurl="https://www.rcsb.org/structure/")
urls = dna_urls + lipid_urls + amino_urls
codes = dna_codes + lipid_codes + amino_codes
dna_comp = complexity('datadirdna')
lipid_comp = complexity('datadirlipid')
amino_comp = complexity('datadiramino')
d = {'Identifier': dna_names + lipid_names + amino_names, 'code': codes,
'Required Height before dissociation [m]': H(DNA_area_lst) + H(lipid_area_lst) + H(amino_area_lst),
"id": dna_id + lipid_id + amino_id, "Area [Å2]": DNA_area_lst + lipid_area_lst + amino_area_lst,
"Time to measure before dissociation [t]": [i / 1000 for i in H(DNA_area_lst) + H(lipid_area_lst) + H(amino_area_lst)], "URL": urls,
"Molecular Complexity [-]": dna_comp+lipid_comp+amino_comp}
df = pd.DataFrame(data=d)
df.to_csv('dataframe.csv')
def generateresults():
df = pd.read_csv('dataframe.csv')
fig = px.scatter(df, x="Molecular Complexity [-]", y="Required Height before dissociation [m]",
size="Area [Å2]", color="Identifier",
hover_name="code", size_max=35, hover_data=["URL", "Time to measure before dissociation [t]"],
title="Required height above Enceladus to capture molecules before dissociating in a 1 [km/s] flow")
fig.update_yaxes(type="log")
# fig.update_xaxes(type="log")
fig.show()
fig.write_html("Height_Complexity.html")
fig = px.histogram(df, x="Time to measure before dissociation [t]", color="Identifier", marginal="rug",
hover_data=df.columns, title="Dissociation time on Enceladus in a 1 [km/s] flow")
fig.show()
fig.write_html("Time_Histogram.html")
''' This is an interactive application that allows people
to click different points in plots and gets them redirected
to all the information related to the molecule. For now in Alpha state'''
app = dash.Dash(__name__)
app.layout = html.Div(
[
html.A(
children="Please click this link",
id="link",
href="https://dash.plot.ly",
target="_blank",
),
dcc.Graph(
id="figure",
# specify custom_data to be the "urls" column
figure=px.scatter(df, x="molecule_complexity", y="Required Height before dissociation [m]",
size="Area [angstron2]", color="Identifier",
hover_name="Identifier", size_max=35, custom_data=("urls",)),
),
]
)
@app.callback(
Output('link', 'href'),
[Input('figure', 'hoverData')])
def display_hover_data(hoverData):
if hoverData:
target = hoverData['points'][0]['customdata']
return target
else:
raise PreventUpdate
app.run_server(debug=False)
# Run once in order to generate all the data
if __name__ == "__main__":
# main() # Comment this line out if you ran main() the simulations.
generateresults()