gnps annotations

axelwalter · axelwalter · commit c1c441e9c9d9 · 2024-03-28T10:34:32.000+01:00
- always fetch annotations
- they are stores in a df in session state
- not used now
diff --git a/pages/1_📁_Data_Preparation.py b/pages/1_📁_Data_Preparation.py
@@ -36,10 +36,9 @@
             task_id_default = ""
             disabled = False
         task_id = st.text_input("GNPS task ID", task_id_default, disabled=disabled)
-        c1, c2 = st.columns(2)
-        merge_annotations = c1.checkbox("Annotate metabolites", True, help="Merge annotations from GNPS FBMN and analog search if available.")
+        _, c2, _ = st.columns(3)
         if c2.button("Load filed from GNPS", type="primary", disabled=len(task_id) == 0, use_container_width=True):
-            st.session_state["ft_gnps"], st.session_state["md_gnps"] = load_from_gnps(task_id, merge_annotations)
+            st.session_state["ft_gnps"], st.session_state["md_gnps"] = load_from_gnps(task_id)
         
         if "ft_gnps" in st.session_state:
             if not st.session_state["ft_gnps"].empty:
diff --git a/src/common.py b/src/common.py
@@ -14,7 +14,8 @@
                    "df_important_features",
                    "df_oob",
                    "ft_gnps",
-                   "md_gnps")
+                   "md_gnps",
+                   "df_gnps_annotations")
 
 corrections_map = {"Bonferroni": "bonf",
                    "Sidak": "sidak",
diff --git a/src/fileselection.py b/src/fileselection.py
@@ -54,24 +54,24 @@ def load_example():
     return ft, md
 
 @st.cache_data
-def load_from_gnps(task_id, merge_annotations):
+def load_from_gnps(task_id):
     try: # GNPS2 will run here
         ft = workflow_fbmn.get_quantification_dataframe(task_id, gnps2=True)
         md = workflow_fbmn.get_metadata_dataframe(task_id, gnps2=True).set_index("filename")
-        if merge_annotations:
-            an = taskresult.get_gnps2_task_resultfile_dataframe(task_id, "nf_output/library/merged_results_with_gnps.tsv")[["#Scan#", "Compound_Name"]].set_index("#Scan#")
+        an = taskresult.get_gnps2_task_resultfile_dataframe(task_id, "nf_output/library/merged_results_with_gnps.tsv")[["#Scan#", "Compound_Name"]].set_index("#Scan#")
     except urllib.error.HTTPError: # GNPS1 task IDs can not be retrieved and throw HTTP Error 500
         ft_url = f"https://proteomics2.ucsd.edu/ProteoSAFe/DownloadResultFile?task={task_id}&file=quantification_table_reformatted/&block=main"
         md_url = f"https://proteomics2.ucsd.edu/ProteoSAFe/DownloadResultFile?task={task_id}&file=metadata_merged/&block=main"
         ft = pd.read_csv(ft_url)
         md = pd.read_csv(md_url, sep = "\t", index_col="filename")
-        if merge_annotations:
-            an_url = f"https://proteomics2.ucsd.edu/ProteoSAFe/DownloadResultFile?task={task_id}&file=DB_result/&block=main"
-            an = pd.read_csv(an_url, sep = "\t")[["#Scan#", "Compound_Name"]].set_index("#Scan#")
-    if merge_annotations:
-            ft.index = pd.Index(ft.apply(lambda x: f'{an.loc[x["row ID"], "Compound_Name"]}_{round(x["row m/z"], 4)}_{round(x["row retention time"], 2)}' if x["row ID"] in an.index else f'{x["row ID"]}_{round(x["row m/z"], 4)}_{round(x["row retention time"], 2)}', axis=1))
-    else:
-        ft.index = pd.Index(ft.apply(lambda x: f'{x["row ID"]}_{round(x["row m/z"], 4)}_{round(x["row retention time"], 2)}', axis=1))
+        an_url = f"https://proteomics2.ucsd.edu/ProteoSAFe/DownloadResultFile?task={task_id}&file=DB_result/&block=main"
+        an = pd.read_csv(an_url, sep = "\t")[["#Scan#", "Compound_Name"]].set_index("#Scan#")
+    
+    index_with_annotations = pd.Index(ft.apply(lambda x: f'{x["row ID"]}_{round(x["row m/z"], 4)}_{round(x["row retention time"], 2)}', axis=1))
+    ft.index = index_with_annotations
+    st.session_state["df_gnps_annotations"].index = index_with_annotations
+    st.session_state["df_gnps_annotations"]["GNPS annotation"] = ft["row ID"].apply(lambda x: an.loc[x, "Compound_Name"] if x in an.index else pd.NA)
+    st.session_state["df_gnps_annotations"].dropna(inplace=True)
     return ft, md
 
 def load_ft(ft_file):