From e74c2fbe50cc956f8470bb80a6fae77087e549e8 Mon Sep 17 00:00:00 2001 From: Roman Joeres Date: Thu, 15 Aug 2024 15:52:52 +0200 Subject: [PATCH] Minor code to build atomic pretraining dataset --- gifflar/acquisition/__init__.py | 0 gifflar/acquisition/candycrunsh.py | 23 +++++++++++++++++++++++ gifflar/data.py | 2 +- requirements.txt | 2 +- 4 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 gifflar/acquisition/__init__.py create mode 100644 gifflar/acquisition/candycrunsh.py diff --git a/gifflar/acquisition/__init__.py b/gifflar/acquisition/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gifflar/acquisition/candycrunsh.py b/gifflar/acquisition/candycrunsh.py new file mode 100644 index 0000000..ed883e2 --- /dev/null +++ b/gifflar/acquisition/candycrunsh.py @@ -0,0 +1,23 @@ +import pickle + +from gifflar.data import GlycanStorage + +with open("collected.pkl", "rb") as f: + _, unique_glycans, _ = pickle.load(f) + +gs = GlycanStorage("C:/Users/joere/Desktop") +print("Loaded GlycanStorage:", len(gs.data)) + +data = {} +for i, iupac in enumerate(unique_glycans): + try: + print(f"\r{i}", end="") + res = gs.query(iupac) + if res: + data[iupac] = res["smiles"] + except Exception as e: + print(e) + +print(len(data)) +with open("glycan_smiles.pkl", "wb") as f: + pickle.dump(data, f) diff --git a/gifflar/data.py b/gifflar/data.py index aa4049d..8d63826 100644 --- a/gifflar/data.py +++ b/gifflar/data.py @@ -224,7 +224,7 @@ def hetero_collate(data: Optional[Union[List[List[HeteroData]], List[HeteroData] class GlycanStorage: - def __init__(self, path: Optional[Path] = None): + def __init__(self, path: Optional[Path | str] = None): """ Initialize the wrapper around a dict. diff --git a/requirements.txt b/requirements.txt index f3dabce..8f74bef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ scikit-learn numpy pandas git+https://github.com/KalininaLab/glyles.git -git+https://github.com/BojarLab/glycowork.git@ab73b62d7d500d321592cca20cdf3b8507f60026 +git+https://github.com/BojarLab/glycowork.git@3b8a6619ecc4b256cb28b43a47f17ef7df0ecaf3 jsonargparse rich pytorch-lightning