Skip to content

Commit c12e722

Browse files
committed
Inputs added for classical molecular networking
1 parent 421d456 commit c12e722

File tree

2 files changed

+45
-16
lines changed

2 files changed

+45
-16
lines changed

pages/1_📁_Data_Preparation.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,41 @@
2121
)
2222
ft, md = pd.DataFrame(), pd.DataFrame()
2323

24-
file_origin = st.radio("File origin", ["Quantification table and meta data files", "GNPS(2) task ID", "Example dataset from publication", "Small example dataset for testing"])
24+
file_origin = st.radio("File origin",
25+
["Quantification table and meta data files",
26+
"GNPS(2) task ID",
27+
"Example dataset from publication",
28+
"Small example dataset for testing",
29+
"GNPS2 classical molecular networking (CMN)"])
30+
2531
# b661d12ba88745639664988329c1363e
2632
if file_origin == "Small example dataset for testing":
2733
ft, md = load_example()
2834

29-
if file_origin == "GNPS(2) task ID" or file_origin == "Example dataset from publication":
35+
if file_origin == "GNPS(2) task ID" or file_origin == "Example dataset from publication" or "GNPS2 classical molecular networking (CMN)":
3036
st.warning("💡 This tool only supports task ID from GNPS1 and 2 not from Quickstart GNPS1.")
3137
if file_origin == "Example dataset from publication":
3238
task_id_default = "b661d12ba88745639664988329c1363e" # 63e8b3da08df41fe95031e4710e0476b
33-
39+
disabled = True
40+
elif file_origin == "GNPS2 classical molecular networking (CMN)":
41+
task_id_default = "2a65f90094654235a4c8d337fdca11e1" # 63e8b3da08df41fe95031e4710e0476b
3442
disabled = True
3543
else:
3644
task_id_default = ""
3745
disabled = False
3846
task_id = st.text_input("GNPS task ID", task_id_default, disabled=disabled)
3947
_, c2, _ = st.columns(3)
48+
4049
if c2.button("Load filed from GNPS", type="primary", disabled=len(task_id) == 0, use_container_width=True):
41-
st.session_state["ft_gnps"], st.session_state["md_gnps"] = load_from_gnps(task_id)
50+
st.session_state["ft_gnps"], st.session_state["md_gnps"] = load_from_gnps(task_id, cmn=True)
51+
52+
if md.empty:
53+
st.warning("Meta data is empty. Please upload one.")
54+
55+
md_file = st.file_uploader("Meta Data Table")
56+
if md_file:
57+
md = load_md(md_file)
58+
st.success("Meta data was loaded successfully!")
4259

4360
if "ft_gnps" in st.session_state:
4461
if not st.session_state["ft_gnps"].empty:

src/fileselection.py

+24-12
Original file line numberDiff line numberDiff line change
@@ -54,26 +54,38 @@ def load_example():
5454
return ft, md
5555

5656
@st.cache_data
57-
def load_from_gnps(task_id):
57+
def load_from_gnps(task_id, cmn=False):
5858
try: # GNPS2 will run here
5959
ft = workflow_fbmn.get_quantification_dataframe(task_id, gnps2=True)
6060
md = workflow_fbmn.get_metadata_dataframe(task_id, gnps2=True).set_index("filename")
6161
an = taskresult.get_gnps2_task_resultfile_dataframe(task_id, "nf_output/library/merged_results_with_gnps.tsv")[["#Scan#", "Compound_Name"]].set_index("#Scan#")
6262
except urllib.error.HTTPError: # GNPS1 task IDs can not be retrieved and throw HTTP Error 500
63-
ft_url = f"https://proteomics2.ucsd.edu/ProteoSAFe/DownloadResultFile?task={task_id}&file=quantification_table_reformatted/&block=main"
64-
md_url = f"https://proteomics2.ucsd.edu/ProteoSAFe/DownloadResultFile?task={task_id}&file=metadata_merged/&block=main"
63+
if cmn:
64+
ft_url = f"https://gnps2.org/resultfile?task={task_id}&file=nf_output/clustering/featuretable_reformatted_precursorintensity.csv"
65+
md_url = f"https://gnps2.org/resultfile?task={task_id}&file=nf_output/metadata/merged_metadata.tsv"
66+
else:
67+
ft_url = f"https://proteomics2.ucsd.edu/ProteoSAFe/DownloadResultFile?task={task_id}&file=quantification_table_reformatted/&block=main"
68+
md_url = f"https://proteomics2.ucsd.edu/ProteoSAFe/DownloadResultFile?task={task_id}&file=metadata_merged/&block=main"
6569
ft = pd.read_csv(ft_url)
66-
md = pd.read_csv(md_url, sep = "\t", index_col="filename")
67-
an_url = f"https://proteomics2.ucsd.edu/ProteoSAFe/DownloadResultFile?task={task_id}&file=DB_result/&block=main"
68-
an = pd.read_csv(an_url, sep = "\t")[["#Scan#", "Compound_Name"]].set_index("#Scan#")
69-
70-
index_with_annotations = pd.Index(ft.apply(lambda x: f'{x["row ID"]}_{round(x["row m/z"], 4)}_{round(x["row retention time"], 2)}', axis=1))
71-
ft.index = index_with_annotations
72-
st.session_state["df_gnps_annotations"].index = index_with_annotations
73-
st.session_state["df_gnps_annotations"]["GNPS annotation"] = ft["row ID"].apply(lambda x: an.loc[x, "Compound_Name"] if x in an.index else pd.NA)
74-
st.session_state["df_gnps_annotations"].dropna(inplace=True)
70+
try:
71+
md = pd.read_csv(md_url, sep = "\t", index_col="filename")
72+
except pd.errors.EmptyDataError:
73+
md = pd.DataFrame()
74+
if not cmn:
75+
an_url = f"https://proteomics2.ucsd.edu/ProteoSAFe/DownloadResultFile?task={task_id}&file=DB_result/&block=main"
76+
an = pd.read_csv(an_url, sep = "\t")[["#Scan#", "Compound_Name"]].set_index("#Scan#")
77+
if cmn:
78+
ft.index = ft["row ID"]
79+
ft = ft.drop(columns=["row m/z", "row retention time", "row ID"])
80+
else:
81+
index_with_mz_RT = pd.Index(ft.apply(lambda x: f'{x["row ID"]}_{round(x["row m/z"], 4)}_{round(x["row retention time"], 2)}', axis=1))
82+
ft.index = index_with_mz_RT
83+
st.session_state["df_gnps_annotations"].index = index_with_mz_RT
84+
st.session_state["df_gnps_annotations"]["GNPS annotation"] = ft["row ID"].apply(lambda x: an.loc[x, "Compound_Name"] if x in an.index else pd.NA)
85+
st.session_state["df_gnps_annotations"].dropna(inplace=True)
7586
return ft, md
7687

88+
7789
def load_ft(ft_file):
7890
ft = open_df(ft_file)
7991
ft = ft.dropna(axis=1)

0 commit comments

Comments
 (0)