Skip to content

Commit

Permalink
Minor code to build atomic pretraining dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
Old-Shatterhand committed Aug 15, 2024
1 parent 4e9327f commit e74c2fb
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 2 deletions.
Empty file added gifflar/acquisition/__init__.py
Empty file.
23 changes: 23 additions & 0 deletions gifflar/acquisition/candycrunsh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pickle

from gifflar.data import GlycanStorage

with open("collected.pkl", "rb") as f:
_, unique_glycans, _ = pickle.load(f)

gs = GlycanStorage("C:/Users/joere/Desktop")
print("Loaded GlycanStorage:", len(gs.data))

data = {}
for i, iupac in enumerate(unique_glycans):
try:
print(f"\r{i}", end="")
res = gs.query(iupac)
if res:
data[iupac] = res["smiles"]
except Exception as e:
print(e)

print(len(data))
with open("glycan_smiles.pkl", "wb") as f:
pickle.dump(data, f)
2 changes: 1 addition & 1 deletion gifflar/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def hetero_collate(data: Optional[Union[List[List[HeteroData]], List[HeteroData]


class GlycanStorage:
def __init__(self, path: Optional[Path] = None):
def __init__(self, path: Optional[Path | str] = None):
"""
Initialize the wrapper around a dict.
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ scikit-learn
numpy
pandas
git+https://github.com/KalininaLab/glyles.git
git+https://github.com/BojarLab/glycowork.git@ab73b62d7d500d321592cca20cdf3b8507f60026
git+https://github.com/BojarLab/glycowork.git@3b8a6619ecc4b256cb28b43a47f17ef7df0ecaf3
jsonargparse
rich
pytorch-lightning
Expand Down

0 comments on commit e74c2fb

Please sign in to comment.