From a65b301b428ac5b0235708daa11870ea3481fe4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tennur=20K=C4=B1l=C4=B1=C3=A7?=
 <99681146+tnnrklc@users.noreply.github.com>
Date: Fri, 12 Aug 2022 00:10:13 +0300
Subject: [PATCH 01/32] Add files via upload

---
 pypath/inputs/chembl.py   | 278 +++++++++++++++++++++++++
 pypath/inputs/drugbank.py | 418 ++++++++++++++++++++++++++++++++++++++
 pypath/inputs/hpo.py      | 166 +++++++++++++++
 3 files changed, 862 insertions(+)
 create mode 100644 pypath/inputs/chembl.py
 create mode 100644 pypath/inputs/drugbank.py
 create mode 100644 pypath/inputs/hpo.py

diff --git a/pypath/inputs/chembl.py b/pypath/inputs/chembl.py
new file mode 100644
index 000000000..aecdc4eda
--- /dev/null
+++ b/pypath/inputs/chembl.py
@@ -0,0 +1,278 @@
+from typing import List
+
+import json
+import collections
+
+import pypath.share.curl as curl
+import pypath.resources.urls as urls
+
+def chembl_targets() -> List[tuple] :
+    """
+    Retrieves targets data from ChEMBL.
+    
+    Returns: 
+        namedtuple.
+    """
+
+    fields_target = ('accession','target_chembl_id')
+
+    Target = collections.namedtuple('Target', fields_target,defaults = ("None",) * len(fields_target))
+
+    trgtlst = []
+
+    flag = 0
+
+    while True:
+
+        if flag == 0:
+
+            url = urls.urls['chembl']['url'] + urls.urls['chembl']['target']
+            c = curl.Curl(url, large=True, silent=False)
+            flag = 1
+
+        else:
+
+            if lst['page_meta']['next']:
+
+                url = urls.urls['chembl']['url'] + lst['page_meta']['next']
+                c = curl.Curl(url, large=True, silent=False)
+
+            else:
+
+                break
+        
+        fileObject = open(c.fileobj.name)
+        lst = json.loads(fileObject.read())
+        
+        for trgt_attr in lst['targets']:
+
+            if trgt_attr['target_components']:
+
+                trgtlst.append(
+                    Target(
+                        accession = trgt_attr['target_components'][0]['accession'],
+                        target_chembl_id = trgt_attr['target_chembl_id'],
+                        )
+                    )
+
+            else:
+                
+                trgtlst.append(
+                    Target(
+                        target_chembl_id = trgt_attr['target_chembl_id'],
+                        )
+                    )
+
+    return trgtlst
+
+def chembl_assays() -> List[tuple] :
+    """
+    Retrieves assays data from ChEMBL.
+    
+    Returns: 
+        namedtuple.
+    """
+
+    fields_assay = ('assay_chembl_id','assay_organism','assay_type','confidence_score','target_chembl_id')
+
+    Assay = collections.namedtuple('Assay', fields_assay,defaults = ("None",) * len(fields_assay))
+
+    assylst = []
+
+    flag = 0
+
+    while True:
+
+        if flag == 0:
+
+            url = urls.urls['chembl']['url'] + urls.urls['chembl']['assay']
+            c = curl.Curl(url, large=True, silent=False)
+            flag = 1
+
+        else:
+
+            if lst['page_meta']['next']:
+
+                url = urls.urls['chembl']['url'] + lst['page_meta']['next']
+                c = curl.Curl(url, large=True, silent=False)
+
+            else:
+
+                break
+        
+        fileObject = open(c.fileobj.name)
+        lst = json.loads(fileObject.read())
+        
+        for assy_attr in lst['assays']:
+            
+            assylst.append(
+                Assay(
+                    assay_chembl_id = assy_attr['assay_chembl_id'],
+                    assay_organism = assy_attr['assay_organism'],
+                    assay_type = assy_attr['assay_type'],
+                    confidence_score = assy_attr['confidence_score'],
+                    target_chembl_id = assy_attr['target_chembl_id'],
+                    )
+                )
+
+    return assylst
+
+def chembl_molecules() -> List[tuple] :
+    """
+    Retrieves molecules data from ChEMBL.
+    
+    Returns: 
+        namedtuple.
+    """
+
+    fields_molecule = ('alogp','conanicle_smiles','chirality','full_mwt','heavy_atoms','standard_inchi_key','molecular_species',
+                        'molecul_type','molecule_chembl_id','parent_chembl_id','prodrug','standard_inchi', 'xrefs')
+
+    Molecule = collections.namedtuple('Molecule', fields_molecule,defaults = ("None",) * len(fields_molecule))
+
+    mlcllst = []
+
+    flag = 0
+
+    while True:
+
+        if flag == 0:
+
+            url = urls.urls['chembl']['url'] + urls.urls['chembl']['molecule']
+            c = curl.Curl(url, large=True, silent=False)
+            flag = 1
+
+        else:
+
+            if lst['page_meta']['next']:
+
+                url = urls.urls['chembl']['url'] + lst['page_meta']['next']
+                c = curl.Curl(url, large=True, silent=False)
+
+            else:
+
+                break
+        
+        fileObject = open(c.fileobj.name)
+        lst = json.loads(fileObject.read())
+        
+        for mlcl_attr in lst['molecules']:
+
+            xrefs = []
+            mlcllst.append(
+                Molecule(
+                    chirality = mlcl_attr['chirality'],
+                    molecul_type = mlcl_attr['molecule_type'],
+                    prodrug = mlcl_attr['prodrug'],
+                    )
+                )
+                
+            if mlcl_attr['molecule_hierarchy'] != None:
+                mlcllst[-1] = mlcllst[-1]._replace(
+                    molecule_chembl_id = mlcl_attr['molecule_hierarchy']['molecule_chembl_id'],
+                    parent_chembl_id = mlcl_attr['molecule_hierarchy']['parent_chembl_id'],
+                )
+
+            if mlcl_attr['molecule_properties'] != None:
+                mlcllst[-1] = mlcllst[-1]._replace(
+                    alogp = mlcl_attr['molecule_properties']['alogp'],
+                    full_mwt = mlcl_attr['molecule_properties']['full_mwt'],
+                    heavy_atoms = mlcl_attr['molecule_properties']['heavy_atoms'],
+                    molecular_species = mlcl_attr['molecule_properties']['molecular_species'],
+                )   
+            
+            if mlcl_attr['molecule_structures'] != None:
+                mlcllst[-1] = mlcllst[-1]._replace(
+                    conanicle_smiles = mlcl_attr['molecule_structures']['canonical_smiles'],
+                    standard_inchi_key = mlcl_attr['molecule_structures']['standard_inchi_key'],
+                    standard_inchi = mlcl_attr['molecule_structures']['standard_inchi'],
+                )
+            
+            if mlcl_attr['cross_references'] != None:
+
+                for rec in mlcl_attr['cross_references']:
+                    
+                    xrefs.append({'xref_id' : rec['xref_id'], 'xref_src': rec['xref_src']})
+
+                mlcllst[-1] = mlcllst[-1]._replace(
+                    xrefs = xrefs
+                )
+
+                
+    return mlcllst
+
+def chembl_activities(
+        pchembl_value_none: bool = False,
+        standard_relation: bool = '=',
+    ) -> List[tuple] :
+    """
+    Retrieves activities data from ChEMBL.
+
+    Args:
+        pchembl_value_none (bool): Whether the pchembl value should be none or not.
+        standard_relation (str): Which standard relation in needed.
+
+    Returns: 
+        namedtuple.
+            standard_flag and standard_units attributes are not included in the returned namedtuple.
+            Only records returned are the ones where data_validity_comment is none.
+    """
+
+    fields_activity = ('assay_chembl_id','data_validity_comment','molecule_chembl_id','pchembl_value',
+                        'standard_relation','standard_value','target_chembl_id')
+
+    Activity = collections.namedtuple('Activity', fields_activity,defaults = ("None",) * len(fields_activity))
+
+    actvtylst = []
+
+    flag = 0
+
+    while True:
+
+        if flag == 0:
+
+            if pchembl_value_none == True:
+                
+                url = urls.urls['chembl']['url'] + urls.urls['chembl']['activity']+'&pchembl_value__isnull=true'
+                
+            else:
+                
+                url = urls.urls['chembl']['url'] + urls.urls['chembl']['activity']+'&pchembl_value__isnull=false'
+
+            url = url + '&standard_relation__exact='+standard_relation
+            c = curl.Curl(url, large=True, silent=False)
+            flag = 1
+
+        else:
+
+            if lst['page_meta']['next']:
+
+                url = urls.urls['chembl']['url'] + lst['page_meta']['next']
+                c = curl.Curl(url, large=True, silent=False)
+
+            else:
+
+                break
+        
+        fileObject = open(c.fileobj.name)
+        lst = json.loads(fileObject.read())
+
+        
+        for actvty_attr in lst['activities']:
+
+            if actvty_attr['data_validity_comment'] == None:
+                
+                actvtylst.append(
+                    Activity(
+                        assay_chembl_id = actvty_attr['assay_chembl_id'],
+                        data_validity_comment = actvty_attr['data_validity_comment'],
+                        molecule_chembl_id = actvty_attr['molecule_chembl_id'],
+                        pchembl_value = actvty_attr['pchembl_value'],
+                        standard_relation = actvty_attr['standard_relation'],
+                        standard_value = actvty_attr['standard_value'],
+                        target_chembl_id = actvty_attr['target_chembl_id'],
+                        )
+                    )  
+
+                
+    return actvtylst
\ No newline at end of file
diff --git a/pypath/inputs/drugbank.py b/pypath/inputs/drugbank.py
new file mode 100644
index 000000000..9f4ed3ae2
--- /dev/null
+++ b/pypath/inputs/drugbank.py
@@ -0,0 +1,418 @@
+from typing import List
+
+import os
+import csv
+import collections
+import base64
+
+import pypath.resources.urls as urls
+import pypath.share.curl as curl
+import pypath.share.session as session
+import pypath.share.settings as settings
+
+_logger = session.Logger(name = 'drugbank')
+_log = _logger._log
+
+def add_prot_id(
+        user: str, 
+        passwd: str, 
+        pharma_active: bool = False,
+    ) -> List[tuple] :
+    """
+    Retrieves protein identifiers from Drugbank.
+
+    Args:
+        user (str): E-mail address for login to DrugBank.
+        passwd (str): Password for login to DrugBank.
+        pharma_active (bool): Wheter to include pharmacologically active identifiers.
+
+    Returns:
+        namedtuple.
+    """
+
+    credentials = {'user': user, 'passwd': passwd}
+
+    auth_str = base64.b64encode(
+        ('%s:%s' % (credentials['user'], credentials['passwd'])).encode()
+    ).decode()
+
+    decoded = 'Basic %s' % auth_str
+
+    req_hdrs = ['Authorization: %s' % decoded]
+    req_hdrs.extend([settings.get('user_agent')])
+
+    fields = ('DrugBank_ID','Target_UniProt_ID','Transporter_UniProt_ID','Enzym_UniProt_ID','Carrier_UniProt_ID')
+
+    ProteinIdentifiers = collections.namedtuple('ProteinIndetifiers', fields,defaults = ("",) * len(fields))
+
+    url = urls.urls['drugbank']['drug_enzym_identifiers']
+    c = curl.Curl(
+        url,
+        large = True,
+        silent = False,
+        req_headers = req_hdrs,
+        cache = False,
+    ) 
+
+    os.rename(c.fileobj.name, c.fileobj.name + ".csv.zip")
+    zipfile = curl.FileOpener(c.fileobj.name + ".csv.zip")
+    enzym = list(csv.DictReader(zipfile.result["all.csv"], delimiter = ','))
+
+    if pharma_active:
+
+        active = list(csv.DictReader(zipfile.result["pharmacologically_active.csv"], delimiter = ','))
+
+        for rec in active:
+
+            enzym.append(rec)
+    
+    result = []
+
+    result.append(
+        ProteinIdentifiers(
+            DrugBank_ID = "",
+            )
+        )
+
+    for enzym_attr in enzym:
+
+        DrugBank_IDs = [i for i in enzym_attr['Drug IDs'].replace(" ","").split(';')]
+
+        for id in DrugBank_IDs:
+
+            index = 0
+            flag = 0
+
+            for res_attr in result:
+
+                if id == res_attr.DrugBank_ID:
+
+                    flag = 1
+
+                    if res_attr.Enzym_UniProt_ID == "":
+
+                        result[index] = result[index]._replace(
+                        Enzym_UniProt_ID = enzym_attr['UniProt ID'],)
+
+                    else:
+
+                        result[index] = result[index]._replace(
+                        Enzym_UniProt_ID = result[index].Enzym_UniProt_ID + ";" + enzym_attr['UniProt ID'],)
+
+                    break
+
+                index += 1
+
+            if flag == 0:
+
+                result.append(
+                    ProteinIdentifiers(
+                        DrugBank_ID = id,
+                        Enzym_UniProt_ID = enzym_attr['UniProt ID'],
+                        )
+                    )
+
+    del result[0]
+
+    url = urls.urls['drugbank']['drug_carrier_identifiers']
+    c = curl.Curl(
+        url,
+        large = True,
+        silent = False,
+        req_headers = req_hdrs,
+        cache = False,
+    ) 
+
+    os.rename(c.fileobj.name, c.fileobj.name + ".csv.zip")
+    zipfile = curl.FileOpener(c.fileobj.name + ".csv.zip")
+    carrier = list(csv.DictReader(zipfile.result["all.csv"], delimiter = ','))
+
+    if pharma_active:
+
+        active = list(csv.DictReader(zipfile.result["pharmacologically_active.csv"], delimiter = ','))
+
+        for rec in active:
+
+            carrier.append(rec)
+
+    for carrier_attr in carrier:
+
+        DrugBank_IDs = [i for i in carrier_attr['Drug IDs'].replace(" ","").split(';')]
+
+        for id in DrugBank_IDs:
+
+            index = 0
+            flag = 0
+
+            for res_attr in result:
+
+                if id == res_attr.DrugBank_ID:
+
+                    flag = 1
+
+                    if res_attr.Carrier_UniProt_ID == "":
+
+                        result[index] = result[index]._replace(
+                        Carrier_UniProt_ID = carrier_attr['UniProt ID'],)
+
+                    else:
+
+                        result[index] = result[index]._replace(
+                        Carrier_UniProt_ID = result[index].Carrier_UniProt_ID + ";" + carrier_attr['UniProt ID'],)
+
+                    break
+
+                index += 1
+
+            if flag == 0:
+
+                result.append(
+                    ProteinIdentifiers(
+                        DrugBank_ID = id,
+                        Carrier_UniProt_ID = carrier_attr['UniProt ID'],
+                        )
+                    )
+
+
+    url = urls.urls['drugbank']['drug_transporter_identifiers']
+    c = curl.Curl(
+        url,
+        large = True,
+        silent = False,
+        req_headers = req_hdrs,
+        cache = False,
+    ) 
+
+    os.rename(c.fileobj.name, c.fileobj.name + ".csv.zip")
+    zipfile = curl.FileOpener(c.fileobj.name + ".csv.zip")
+    transporter = list(csv.DictReader(zipfile.result["all.csv"], delimiter = ','))
+
+    if pharma_active:
+
+        active = list(csv.DictReader(zipfile.result["pharmacologically_active.csv"], delimiter = ','))
+
+        for rec in active:
+
+            transporter.append(rec)
+
+    for transporter_attr in transporter:
+
+        DrugBank_IDs = [i for i in transporter_attr['Drug IDs'].replace(" ","").split(';')]
+
+        for id in DrugBank_IDs:
+
+            index = 0
+            flag = 0
+
+            for res_attr in result:
+
+                if id == res_attr.DrugBank_ID:
+
+                    flag = 1
+
+                    if res_attr.Transporter_UniProt_ID == "":
+
+                        result[index] = result[index]._replace(
+                        Transporter_UniProt_ID = transporter_attr['UniProt ID'],)
+
+                    else:
+
+                        result[index] = result[index]._replace(
+                        Transporter_UniProt_ID = result[index].Transporter_UniProt_ID + ";" + transporter_attr['UniProt ID'],)
+
+                    break
+
+                index += 1
+
+            if flag == 0:
+
+                result.append(
+                    ProteinIdentifiers(
+                        DrugBank_ID = id,
+                        Transporter_UniProt_ID = transporter_attr['UniProt ID'],
+                        )
+                    )
+
+    url = urls.urls['drugbank']['drug_target_identifiers']
+    c = curl.Curl(
+        url,
+        large = True,
+        silent = False,
+        req_headers = req_hdrs,
+        cache = False,
+    ) 
+
+    os.rename(c.fileobj.name, c.fileobj.name + ".csv.zip")
+    zipfile = curl.FileOpener(c.fileobj.name + ".csv.zip")
+    target = list(csv.DictReader(zipfile.result["all.csv"], delimiter = ','))
+
+    if pharma_active:
+
+        active = list(csv.DictReader(zipfile.result["pharmacologically_active.csv"], delimiter = ','))
+
+        for rec in active:
+
+            target.append(rec)
+
+    for target_attr in target:
+
+        DrugBank_IDs = [i for i in target_attr['Drug IDs'].replace(" ","").split(';')]
+
+        for id in DrugBank_IDs:
+
+            index = 0
+            flag = 0
+
+            for res_attr in result:
+
+                if id == res_attr.DrugBank_ID:
+
+                    flag = 1
+
+                    if res_attr.Target_UniProt_ID == "":
+
+                        result[index] = result[index]._replace(
+                        Target_UniProt_ID = target_attr['UniProt ID'],)
+
+                    else:
+
+                        result[index] = result[index]._replace(
+                        Target_UniProt_ID = result[index].Target_UniProt_ID + ";" + target_attr['UniProt ID'],)
+
+                    break
+
+                index += 1
+                
+            if flag == 0:
+
+                result.append(
+                    ProteinIdentifiers(
+                        DrugBank_ID = id,
+                        Target_UniProt_ID = target_attr['UniProt ID'],
+                        )
+                    )
+
+    return result
+
+def drug_bank(
+        user: str, 
+        passwd: str, 
+        addprotid: bool = True, 
+        pharma_active: bool = False,
+    ) -> List[tuple] :
+    """
+    Retrieves structures, external links and protein identifiers from Drugbank.
+
+    Args:
+        user (str): E-mail address for login to DrugBank.
+        passwd (str): Password for login to DrugBank.
+        addprotid (bool): Wheter to include protein identifiers from DrugBank.
+        pharma_active (bool): Wheter to include pharmacologically active identifiers.
+
+    Returns:
+        namedtuple.
+    """
+
+    fields = ('DrugBank_ID','Name','CAS_Number','Drug_Groups','InChIKey','InChI','SMILES','Formula',
+                'KEGG_Compound_ID','KEGG_Drug_ID','PubChem_Compound_ID','PubChem_Substance_ID','ChEBI_ID',
+                'ChEMBL_ID','Drug_Type','PharmGKB_ID','HET_ID','Target_UniProt_ID','Transporter_UniProt_ID',
+                'Enzym_UniProt_ID','Carrier_UniProt_ID')
+
+    credentials = {'user': user, 'passwd': passwd}
+
+    auth_str = base64.b64encode(
+        ('%s:%s' % (credentials['user'], credentials['passwd'])).encode()
+    ).decode()
+
+    decoded = 'Basic %s' % auth_str
+
+    req_hdrs = ['Authorization: %s' % decoded]
+    req_hdrs.extend([settings.get('user_agent')])
+
+    url = urls.urls['drugbank']['all_structures']
+    c = curl.Curl(
+        url,
+        large = True,
+        silent = False,
+        req_headers = req_hdrs,
+        cache = False
+    ) 
+
+    os.rename(c.fileobj.name, c.fileobj.name + ".zip")
+    zipfile = curl.FileOpener(c.fileobj.name + ".zip")
+    structure_links = list(csv.DictReader(zipfile.result["structure links.csv"], delimiter = ','))
+
+    url = urls.urls['drugbank']['all_drug']
+    c = curl.Curl(
+        url,
+        large = True,
+        silent = False,
+        req_headers = req_hdrs,
+        cache = False
+    ) 
+
+    os.rename(c.fileobj.name, c.fileobj.name + ".zip")
+    zipfile = curl.FileOpener(c.fileobj.name + ".zip")
+    drug_links = list(csv.DictReader(zipfile.result["drug links.csv"], delimiter = ','))
+
+    if addprotid:
+
+        Combine = collections.namedtuple('Combine', fields,defaults = ("",) * len(fields))
+
+    else:
+        Combine = collections.namedtuple('Combine', fields[:17],defaults = ("",) * len(fields[:17]))
+
+    result = []
+    
+    for struct_attr in structure_links:
+
+        for drug_attr in drug_links:
+
+            if struct_attr['DrugBank ID'] == drug_attr['DrugBank ID']:
+
+                result.append(
+                    Combine(
+                        DrugBank_ID = struct_attr['DrugBank ID'],
+                        Name = struct_attr['Name'],
+                        CAS_Number = struct_attr['CAS Number'],
+                        Drug_Groups = struct_attr['Drug Groups'],
+                        InChIKey = struct_attr['InChIKey'],
+                        InChI = struct_attr['InChI'],
+                        SMILES = struct_attr['SMILES'],
+                        Formula = struct_attr['Formula'],
+                        KEGG_Compound_ID = struct_attr['KEGG Compound ID'],
+                        KEGG_Drug_ID = struct_attr['KEGG Drug ID'],
+                        PubChem_Compound_ID = struct_attr['PubChem Compound ID'],
+                        PubChem_Substance_ID = struct_attr['PubChem Substance ID'],
+                        ChEBI_ID = struct_attr['ChEBI ID'],
+                        ChEMBL_ID = struct_attr['ChEMBL ID'],
+                        Drug_Type = drug_attr['Drug Type'],
+                        PharmGKB_ID = drug_attr['PharmGKB ID'],
+                        HET_ID = drug_attr['HET ID'],
+                    )
+                )
+    
+    if addprotid:
+        
+        identifiers_list = add_prot_id(user, passwd, pharma_active)
+        index = 0
+        
+        for res_attr in result:
+
+            for iden_attr in identifiers_list:
+
+                if res_attr.DrugBank_ID == iden_attr.DrugBank_ID:
+                
+                    result[index] = result[index]._replace(
+                        Target_UniProt_ID = iden_attr.Target_UniProt_ID,
+                        Transporter_UniProt_ID = iden_attr.Transporter_UniProt_ID,
+                        Enzym_UniProt_ID = iden_attr.Enzym_UniProt_ID,
+                        Carrier_UniProt_ID = iden_attr.Carrier_UniProt_ID,  
+                    )
+                
+                    break
+
+            index += 1
+
+
+    return result
\ No newline at end of file
diff --git a/pypath/inputs/hpo.py b/pypath/inputs/hpo.py
new file mode 100644
index 000000000..b314a63fa
--- /dev/null
+++ b/pypath/inputs/hpo.py
@@ -0,0 +1,166 @@
+from typing import List, Dict
+
+import csv
+import collections
+
+import pypath.utils.mapping as map
+import pypath.share.curl as curl
+import pypath.resources.urls as urls
+import pypath.formats.obo as obo
+
+def hpo_gene_annotations() -> Dict[str, list]:
+    """
+    Retrieves Gene-HPO relationships from HPO.
+    
+    Returns: 
+        namedtuple.
+    """
+
+    url = urls.urls['hpo']['gene']
+    c = curl.Curl(url, large = True, silent = False)
+
+    gene = list(csv.DictReader(c.result, delimiter = ','))
+
+    fields = ('entrez_gene_id','entrez_gene_symbol','HPO_Term_ID')
+
+    HPOGeneAnnotations = collections.namedtuple('HPOGeneAnnotations', fields,defaults = ("",) * len(fields))
+
+    annotations = collections.defaultdict(list)
+
+    for rec in gene:
+
+        values = rec.values()
+        values = list(values)[0].replace('\t',',').split(',')
+        id = map.map_name(values[1], 'genesymbol', 'uniprot')
+        id = list(id)
+        
+        if id:
+            
+            annotations[id[0]].append(
+                HPOGeneAnnotations(
+                    entrez_gene_id = values[0],
+                    entrez_gene_symbol = values[1],
+                    HPO_Term_ID = values[2],
+                    )
+            )
+
+    return annotations
+
+def hpo_disease_annotations() -> List[tuple] :
+    """
+    Retrieves Disease-HPO relationships from HPO.
+    
+    Returns: 
+        namedtuple.
+    """
+
+    url = urls.urls['hpo']['disease']
+    c = curl.Curl(url, large = True, silent = False)
+
+    disease = list(csv.DictReader(c.result, delimiter = '\t'))
+
+    fields = ('DatabaseID', 'DiseaseName', 'Qualifier', 'HPO_ID', 'Reference', 'Evidence', 'Aspect')
+
+    HPODiseaseAnnotations = collections.namedtuple('HPODiseaseAnnotations', fields,defaults = ("",) * len(fields))
+
+    result = []
+    
+    for i in range(4,len(disease)):
+
+        values = disease[i].values()
+        values = list(values)
+
+        result.append(
+            HPODiseaseAnnotations(
+                DatabaseID = values[0],
+                DiseaseName = values[1][0],
+                Qualifier = values[1][1],
+                HPO_ID = values[1][2],
+                Reference = values[1][3],
+                Evidence = values[1][4],
+                Aspect = values[1][9],
+                )
+            )
+
+
+    return result
+
+def hpo_ontology() -> List[tuple] :
+    """
+    Retrieves ontology from HPO.
+    
+    Returns: 
+        namedtuple.
+    """
+
+    url = urls.urls['hpo']['ontology']
+    reader = obo.Obo(url)
+    hpo_ontology = [i for i in reader]
+    
+
+    fields = ('hpo_id','term_name','synonyms','xrefs','is_a')
+
+    Ontology = collections.namedtuple('Ontology', fields,defaults = ("",) * len(fields))
+
+
+    result = []
+
+    for rec in hpo_ontology:
+
+        syn_lst = []
+        xref_lst = []
+        isa_lst = []
+
+        if rec[2][1]:
+
+            name = rec[2][0] + " " + rec[2][1]
+
+        else:
+
+            name = rec[2][0]
+
+        result.append(
+            Ontology(
+                hpo_id = rec[1][0],
+                term_name = name,
+            )
+        )
+
+        if rec[5].get('synonym'):
+
+            synonym = list(rec[5].get('synonym'))
+
+            for i in synonym:
+
+                syn = i[0] + " " + i[1]
+                syn_lst.append(syn)
+            
+            result[-1] = result[-1]._replace(
+                synonyms = syn_lst
+            )
+
+        if rec[5].get('xref'):
+
+            xref = list(rec[5].get('xref'))
+
+            for i in xref:
+
+                xref_lst.append(i[0])
+            
+            result[-1] = result[-1]._replace(
+                xrefs = xref_lst
+            )
+
+        if rec[5].get('is_a'):
+
+            is_a = list(rec[5].get('is_a'))
+
+            for i in is_a:
+                
+                isa_lst.append(i[0] + " : " + i[2])
+            
+            result[-1] = result[-1]._replace(
+                is_a = isa_lst
+            )
+            
+    return result

From 933a78e2a140cea6327a359a0f978c5797f6e56c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tennur=20K=C4=B1l=C4=B1=C3=A7?=
 <99681146+tnnrklc@users.noreply.github.com>
Date: Fri, 12 Aug 2022 00:13:37 +0300
Subject: [PATCH 02/32] Update urls.py

---
 pypath/resources/urls.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/pypath/resources/urls.py b/pypath/resources/urls.py
index e1fecfdd4..71de13390 100644
--- a/pypath/resources/urls.py
+++ b/pypath/resources/urls.py
@@ -1537,6 +1537,29 @@
         'interactions': 'https://unmtid-shinyapps.net/download/DrugCentral/2021_09_01/drug.target.interaction.tsv.gz',
         'SMILES_InChI' : 'https://unmtid-shinyapps.net/download/DrugCentral/2021_09_01/structures.smiles.tsv',
     },
+    'drugbank': {
+        'label': 'DrugBank database',
+        'all_structures': 'https://go.drugbank.com/releases/5-1-9/downloads/all-structure-links',
+        'all_drug': 'https://go.drugbank.com/releases/5-1-9/downloads/all-drug-links',
+        'drug_target_identifiers' : 'https://go.drugbank.com/releases/5-1-9/downloads/target-all-polypeptide-ids',
+        'drug_enzym_identifiers' : 'https://go.drugbank.com/releases/5-1-9/downloads/enzyme-all-polypeptide-ids',
+        'drug_carrier_identifiers' : 'https://go.drugbank.com/releases/5-1-9/downloads/carrier-all-polypeptide-ids',
+        'drug_transporter_identifiers' : 'https://go.drugbank.com/releases/5-1-9/downloads/transporter-all-polypeptide-ids',
+    }, 
+    'chembl': {
+        'label': 'ChEMBL database',
+        'url': 'https://www.ebi.ac.uk',
+        'target': '/chembl/api/data/target.json?limit=1000',
+        'assay' : '/chembl/api/data/assay.json?limit=1000',
+        'activity' : '/chembl/api/data/activity.json?limit=1000',
+        'molecule' : '/chembl/api/data/molecule.json?limit=1000',
+    },
+    'hpo': {
+        'label': 'HPO database',
+        'ontology': 'https://raw.githubusercontent.com/obophenotype/human-phenotype-ontology/master/hp.obo',
+        'disease' : 'http://purl.obolibrary.org/obo/hp/hpoa/phenotype.hpoa',
+        'gene' : 'http://purl.obolibrary.org/obo/hp/hpoa/genes_to_phenotype.txt',
+    },
 }
 
 

From 0542e76cc27858c4228caca1221b37fa26d9bcc1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tennur=20K=C4=B1l=C4=B1=C3=A7?=
 <99681146+tnnrklc@users.noreply.github.com>
Date: Fri, 12 Aug 2022 00:13:38 +0300
Subject: [PATCH 03/32] Add files via upload

---
 pypath/data/licenses/hpo.json | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 pypath/data/licenses/hpo.json

diff --git a/pypath/data/licenses/hpo.json b/pypath/data/licenses/hpo.json
new file mode 100644
index 000000000..538c5ab81
--- /dev/null
+++ b/pypath/data/licenses/hpo.json
@@ -0,0 +1,7 @@
+{
+    "name": "HPO",
+    "full_name": "HPO License",
+    "url": "https://hpo.jax.org/app/license",
+    "purpose": "academic",
+    "sharing": "alike"
+}
\ No newline at end of file

From f111b607beddefb78491a8cc535f037fc600afa0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tennur=20K=C4=B1l=C4=B1=C3=A7?=
 <99681146+tnnrklc@users.noreply.github.com>
Date: Fri, 12 Aug 2022 00:17:25 +0300
Subject: [PATCH 04/32] Update annot.py

---
 pypath/core/annot.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/pypath/core/annot.py b/pypath/core/annot.py
index 2e7fb4f67..034476831 100644
--- a/pypath/core/annot.py
+++ b/pypath/core/annot.py
@@ -6906,3 +6906,27 @@ def get_db(
         )
 
     return globals()['db']
+
+class HPO(AnnotationBase):
+
+    _eq_fields = ()
+
+    def __init__(self, **kwargs):
+        """
+        HPO Gene Annotations from the HPO database.
+        """
+
+        kwargs.pop('ncbi_tax_id', None)
+
+        AnnotationBase.__init__(
+            self,
+            name = 'HPO',
+            ncbi_tax_id = constants.NOT_ORGANISM_SPECIFIC,
+            input_method = 'hpo.hpo_gene_annotations',
+            **kwargs
+        )
+
+    def _process_method(self):
+        #  already the appropriate format, no processing needed
+        self.annot = self.data
+        delattr(self, 'data')

From 60db316403c1ca8a16ec41a96eb6e1f4063eb730 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tennur=20K=C4=B1l=C4=B1=C3=A7?=
 <99681146+tnnrklc@users.noreply.github.com>
Date: Fri, 12 Aug 2022 00:20:32 +0300
Subject: [PATCH 05/32] Update resources.json

---
 pypath/resources/data/resources.json | 34 ++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/pypath/resources/data/resources.json b/pypath/resources/data/resources.json
index e13e4809d..71ff23dae 100644
--- a/pypath/resources/data/resources.json
+++ b/pypath/resources/data/resources.json
@@ -4616,5 +4616,39 @@
         "https://academic.oup.com/nar/article/49/D1/D1160/5957163?login=false"
       ]
     }
+  },
+  "DrugBank": {
+    "license": "CC BY-NC 4.0",
+    "urls": {
+      "webpages": ["https://go.drugbank.com/"],
+      "articles": [
+        "https://pubmed.ncbi.nlm.nih.gov/29126136/",
+        "https://pubmed.ncbi.nlm.nih.gov/24203711/",
+        "https://pubmed.ncbi.nlm.nih.gov/21059682/",
+        "https://pubmed.ncbi.nlm.nih.gov/18048412/",
+        "https://pubmed.ncbi.nlm.nih.gov/16381955/"
+      ]
+    }
+  },
+  "ChEMBL": {
+    "license": "CC BY-SA 3.0",
+    "urls": {
+      "webpages": ["https://www.ebi.ac.uk/chembl/"],
+      "articles": [
+        "http://europepmc.org/article/PMC/5210557",
+        "http://europepmc.org/article/PMC/4489243",
+        "http://europepmc.org/article/MED/24413672"
+      ]
+    }
+  },
+  "HPO": {
+    "full_name": "The Human Phenotype Ontology",
+    "license": "HPO",
+    "urls": {
+      "webpages": ["https://hpo.jax.org/app/"],
+      "articles": [
+        "https://pubmed.ncbi.nlm.nih.gov/33264411/"
+      ]
+    }
   }
 }

From a0f8dd7503e1b255ac423ed837c092a2213c4186 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Fri, 12 Aug 2022 18:41:40 +0200
Subject: [PATCH 06/32] PRs from Tennur: removed trailing spaces, added headers

---
 pypath/inputs/chembl.py      | 88 +++++++++++++++++++++++-------------
 pypath/inputs/drugbank.py    | 67 ++++++++++++++++++---------
 pypath/inputs/drugcentral.py | 65 ++++++++++++++++++--------
 pypath/inputs/hpo.py         | 55 ++++++++++++++++------
 4 files changed, 187 insertions(+), 88 deletions(-)

diff --git a/pypath/inputs/chembl.py b/pypath/inputs/chembl.py
index aecdc4eda..d93818aa7 100644
--- a/pypath/inputs/chembl.py
+++ b/pypath/inputs/chembl.py
@@ -1,4 +1,27 @@
-from typing import List
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#
+#  This file is part of the `pypath` python module
+#
+#  Copyright
+#  2014-2022
+#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
+#
+#  Authors: Dénes Türei (turei.denes@gmail.com)
+#           Nicolàs Palacio
+#           Sebastian Lobentanzer
+#           Erva Ulusoy
+#           Olga Ivanova
+#           Ahmet Rifaioglu
+#           Tennur Kılıç
+#
+#  Distributed under the GPLv3 License.
+#  See accompanying file LICENSE.txt or copy at
+#      http://www.gnu.org/licenses/gpl-3.0.html
+#
+#  Website: http://pypath.omnipathdb.org/
+#
 
 import json
 import collections
@@ -6,11 +29,12 @@
 import pypath.share.curl as curl
 import pypath.resources.urls as urls
 
-def chembl_targets() -> List[tuple] :
+
+def chembl_targets() -> list[tuple]:
     """
     Retrieves targets data from ChEMBL.
-    
-    Returns: 
+
+    Returns:
         namedtuple.
     """
 
@@ -40,10 +64,10 @@ def chembl_targets() -> List[tuple] :
             else:
 
                 break
-        
+
         fileObject = open(c.fileobj.name)
         lst = json.loads(fileObject.read())
-        
+
         for trgt_attr in lst['targets']:
 
             if trgt_attr['target_components']:
@@ -56,7 +80,7 @@ def chembl_targets() -> List[tuple] :
                     )
 
             else:
-                
+
                 trgtlst.append(
                     Target(
                         target_chembl_id = trgt_attr['target_chembl_id'],
@@ -68,8 +92,8 @@ def chembl_targets() -> List[tuple] :
 def chembl_assays() -> List[tuple] :
     """
     Retrieves assays data from ChEMBL.
-    
-    Returns: 
+
+    Returns:
         namedtuple.
     """
 
@@ -99,12 +123,12 @@ def chembl_assays() -> List[tuple] :
             else:
 
                 break
-        
+
         fileObject = open(c.fileobj.name)
         lst = json.loads(fileObject.read())
-        
+
         for assy_attr in lst['assays']:
-            
+
             assylst.append(
                 Assay(
                     assay_chembl_id = assy_attr['assay_chembl_id'],
@@ -120,8 +144,8 @@ def chembl_assays() -> List[tuple] :
 def chembl_molecules() -> List[tuple] :
     """
     Retrieves molecules data from ChEMBL.
-    
-    Returns: 
+
+    Returns:
         namedtuple.
     """
 
@@ -152,10 +176,10 @@ def chembl_molecules() -> List[tuple] :
             else:
 
                 break
-        
+
         fileObject = open(c.fileobj.name)
         lst = json.loads(fileObject.read())
-        
+
         for mlcl_attr in lst['molecules']:
 
             xrefs = []
@@ -166,7 +190,7 @@ def chembl_molecules() -> List[tuple] :
                     prodrug = mlcl_attr['prodrug'],
                     )
                 )
-                
+
             if mlcl_attr['molecule_hierarchy'] != None:
                 mlcllst[-1] = mlcllst[-1]._replace(
                     molecule_chembl_id = mlcl_attr['molecule_hierarchy']['molecule_chembl_id'],
@@ -179,26 +203,26 @@ def chembl_molecules() -> List[tuple] :
                     full_mwt = mlcl_attr['molecule_properties']['full_mwt'],
                     heavy_atoms = mlcl_attr['molecule_properties']['heavy_atoms'],
                     molecular_species = mlcl_attr['molecule_properties']['molecular_species'],
-                )   
-            
+                )
+
             if mlcl_attr['molecule_structures'] != None:
                 mlcllst[-1] = mlcllst[-1]._replace(
                     conanicle_smiles = mlcl_attr['molecule_structures']['canonical_smiles'],
                     standard_inchi_key = mlcl_attr['molecule_structures']['standard_inchi_key'],
                     standard_inchi = mlcl_attr['molecule_structures']['standard_inchi'],
                 )
-            
+
             if mlcl_attr['cross_references'] != None:
 
                 for rec in mlcl_attr['cross_references']:
-                    
+
                     xrefs.append({'xref_id' : rec['xref_id'], 'xref_src': rec['xref_src']})
 
                 mlcllst[-1] = mlcllst[-1]._replace(
                     xrefs = xrefs
                 )
 
-                
+
     return mlcllst
 
 def chembl_activities(
@@ -212,7 +236,7 @@ def chembl_activities(
         pchembl_value_none (bool): Whether the pchembl value should be none or not.
         standard_relation (str): Which standard relation in needed.
 
-    Returns: 
+    Returns:
         namedtuple.
             standard_flag and standard_units attributes are not included in the returned namedtuple.
             Only records returned are the ones where data_validity_comment is none.
@@ -232,11 +256,11 @@ def chembl_activities(
         if flag == 0:
 
             if pchembl_value_none == True:
-                
+
                 url = urls.urls['chembl']['url'] + urls.urls['chembl']['activity']+'&pchembl_value__isnull=true'
-                
+
             else:
-                
+
                 url = urls.urls['chembl']['url'] + urls.urls['chembl']['activity']+'&pchembl_value__isnull=false'
 
             url = url + '&standard_relation__exact='+standard_relation
@@ -253,15 +277,15 @@ def chembl_activities(
             else:
 
                 break
-        
+
         fileObject = open(c.fileobj.name)
         lst = json.loads(fileObject.read())
 
-        
+
         for actvty_attr in lst['activities']:
 
             if actvty_attr['data_validity_comment'] == None:
-                
+
                 actvtylst.append(
                     Activity(
                         assay_chembl_id = actvty_attr['assay_chembl_id'],
@@ -272,7 +296,7 @@ def chembl_activities(
                         standard_value = actvty_attr['standard_value'],
                         target_chembl_id = actvty_attr['target_chembl_id'],
                         )
-                    )  
+                    )
+
 
-                
-    return actvtylst
\ No newline at end of file
+    return actvtylst
diff --git a/pypath/inputs/drugbank.py b/pypath/inputs/drugbank.py
index 9f4ed3ae2..d55a2c146 100644
--- a/pypath/inputs/drugbank.py
+++ b/pypath/inputs/drugbank.py
@@ -1,3 +1,28 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#
+#  This file is part of the `pypath` python module
+#
+#  Copyright
+#  2014-2022
+#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
+#
+#  Authors: Dénes Türei (turei.denes@gmail.com)
+#           Nicolàs Palacio
+#           Sebastian Lobentanzer
+#           Erva Ulusoy
+#           Olga Ivanova
+#           Ahmet Rifaioglu
+#           Tennur Kılıç
+#
+#  Distributed under the GPLv3 License.
+#  See accompanying file LICENSE.txt or copy at
+#      http://www.gnu.org/licenses/gpl-3.0.html
+#
+#  Website: http://pypath.omnipathdb.org/
+#
+
 from typing import List
 
 import os
@@ -14,8 +39,8 @@
 _log = _logger._log
 
 def add_prot_id(
-        user: str, 
-        passwd: str, 
+        user: str,
+        passwd: str,
         pharma_active: bool = False,
     ) -> List[tuple] :
     """
@@ -52,7 +77,7 @@ def add_prot_id(
         silent = False,
         req_headers = req_hdrs,
         cache = False,
-    ) 
+    )
 
     os.rename(c.fileobj.name, c.fileobj.name + ".csv.zip")
     zipfile = curl.FileOpener(c.fileobj.name + ".csv.zip")
@@ -65,7 +90,7 @@ def add_prot_id(
         for rec in active:
 
             enzym.append(rec)
-    
+
     result = []
 
     result.append(
@@ -121,7 +146,7 @@ def add_prot_id(
         silent = False,
         req_headers = req_hdrs,
         cache = False,
-    ) 
+    )
 
     os.rename(c.fileobj.name, c.fileobj.name + ".csv.zip")
     zipfile = curl.FileOpener(c.fileobj.name + ".csv.zip")
@@ -181,7 +206,7 @@ def add_prot_id(
         silent = False,
         req_headers = req_hdrs,
         cache = False,
-    ) 
+    )
 
     os.rename(c.fileobj.name, c.fileobj.name + ".csv.zip")
     zipfile = curl.FileOpener(c.fileobj.name + ".csv.zip")
@@ -240,7 +265,7 @@ def add_prot_id(
         silent = False,
         req_headers = req_hdrs,
         cache = False,
-    ) 
+    )
 
     os.rename(c.fileobj.name, c.fileobj.name + ".csv.zip")
     zipfile = curl.FileOpener(c.fileobj.name + ".csv.zip")
@@ -282,7 +307,7 @@ def add_prot_id(
                     break
 
                 index += 1
-                
+
             if flag == 0:
 
                 result.append(
@@ -295,9 +320,9 @@ def add_prot_id(
     return result
 
 def drug_bank(
-        user: str, 
-        passwd: str, 
-        addprotid: bool = True, 
+        user: str,
+        passwd: str,
+        addprotid: bool = True,
         pharma_active: bool = False,
     ) -> List[tuple] :
     """
@@ -336,7 +361,7 @@ def drug_bank(
         silent = False,
         req_headers = req_hdrs,
         cache = False
-    ) 
+    )
 
     os.rename(c.fileobj.name, c.fileobj.name + ".zip")
     zipfile = curl.FileOpener(c.fileobj.name + ".zip")
@@ -349,7 +374,7 @@ def drug_bank(
         silent = False,
         req_headers = req_hdrs,
         cache = False
-    ) 
+    )
 
     os.rename(c.fileobj.name, c.fileobj.name + ".zip")
     zipfile = curl.FileOpener(c.fileobj.name + ".zip")
@@ -363,7 +388,7 @@ def drug_bank(
         Combine = collections.namedtuple('Combine', fields[:17],defaults = ("",) * len(fields[:17]))
 
     result = []
-    
+
     for struct_attr in structure_links:
 
         for drug_attr in drug_links:
@@ -391,28 +416,28 @@ def drug_bank(
                         HET_ID = drug_attr['HET ID'],
                     )
                 )
-    
+
     if addprotid:
-        
+
         identifiers_list = add_prot_id(user, passwd, pharma_active)
         index = 0
-        
+
         for res_attr in result:
 
             for iden_attr in identifiers_list:
 
                 if res_attr.DrugBank_ID == iden_attr.DrugBank_ID:
-                
+
                     result[index] = result[index]._replace(
                         Target_UniProt_ID = iden_attr.Target_UniProt_ID,
                         Transporter_UniProt_ID = iden_attr.Transporter_UniProt_ID,
                         Enzym_UniProt_ID = iden_attr.Enzym_UniProt_ID,
-                        Carrier_UniProt_ID = iden_attr.Carrier_UniProt_ID,  
+                        Carrier_UniProt_ID = iden_attr.Carrier_UniProt_ID,
                     )
-                
+
                     break
 
             index += 1
 
 
-    return result
\ No newline at end of file
+    return result
diff --git a/pypath/inputs/drugcentral.py b/pypath/inputs/drugcentral.py
index cc1801e0f..edeff5436 100644
--- a/pypath/inputs/drugcentral.py
+++ b/pypath/inputs/drugcentral.py
@@ -1,3 +1,28 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#
+#  This file is part of the `pypath` python module
+#
+#  Copyright
+#  2014-2022
+#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
+#
+#  Authors: Dénes Türei (turei.denes@gmail.com)
+#           Nicolàs Palacio
+#           Sebastian Lobentanzer
+#           Erva Ulusoy
+#           Olga Ivanova
+#           Ahmet Rifaioglu
+#           Tennur Kılıç
+#
+#  Distributed under the GPLv3 License.
+#  See accompanying file LICENSE.txt or copy at
+#      http://www.gnu.org/licenses/gpl-3.0.html
+#
+#  Website: http://pypath.omnipathdb.org/
+#
+
 from typing import List
 
 import csv
@@ -7,9 +32,9 @@
 import pypath.resources.urls as urls
 
 def drug_central(
-        organism: str = "Homo sapiens", 
-        SMILES: bool = False, 
-        InChI: bool = False, 
+        organism: str = "Homo sapiens",
+        SMILES: bool = False,
+        InChI: bool = False,
         CAS_RN: bool = False,
     ) -> List[tuple]:
     """
@@ -24,7 +49,7 @@ def drug_central(
     Returns:
         namedtuple.
     """
-    
+
     fields = ('DRUG_NAME','TARGET_NAME','TARGET_CLASS',
             'TARGET_ACCESSION','GENE','ACT_VALUE','ACT_TYPE',
             'ACTION_TYPE','TDL','ORGANISM','SMILES','InChI',
@@ -35,7 +60,7 @@ def drug_central(
     interactions = list(csv.DictReader(c.result, delimiter = '\t'))
 
     temp_inter = []
-    
+
     for rec in interactions:
 
         if rec not in temp_inter:
@@ -51,9 +76,9 @@ def drug_central(
         url = urls.urls['drugcentral']['SMILES_InChI']
         c = curl.Curl(url, large = True, silent = False)
         structures = list(csv.DictReader(c.result, delimiter = '\t'))
-        
+
         temp_struct = []
-    
+
         for rec in structures:
 
             if rec not in temp_struct:
@@ -111,10 +136,10 @@ def drug_central(
                         ACT_TYPE = inter_attr['ACT_TYPE'],
                         ACTION_TYPE = inter_attr['ACTION_TYPE'],
                         TDL = inter_attr['TDL'],
-                        ORGANISM = inter_attr['ORGANISM'],    
+                        ORGANISM = inter_attr['ORGANISM'],
                         )
                     )
-                  
+
                 for struct_attr in structures:
 
                     if inter_attr['STRUCT_ID'] == struct_attr['ID']:
@@ -125,7 +150,7 @@ def drug_central(
                                 SMILES = struct_attr['SMILES'],
                                 InChI = struct_attr['InChI'],
                                 InChIKey = struct_attr['InChIKey'],
-                                CAS_RN = struct_attr['CAS_RN'],      
+                                CAS_RN = struct_attr['CAS_RN'],
                             )
 
                         elif SMILES == True and InChI == True and CAS_RN == False:
@@ -133,20 +158,20 @@ def drug_central(
                             result[-1] = result[-1]._replace(
                                 SMILES = struct_attr['SMILES'],
                                 InChI = struct_attr['InChI'],
-                                InChIKey = struct_attr['InChIKey'],     
+                                InChIKey = struct_attr['InChIKey'],
                             )
 
                         elif SMILES == True and InChI == False and CAS_RN == True:
 
                             result[-1] = result[-1]._replace(
                                 SMILES = struct_attr['SMILES'],
-                                CAS_RN = struct_attr['CAS_RN'],      
+                                CAS_RN = struct_attr['CAS_RN'],
                             )
 
                         elif SMILES == True and InChI == False and CAS_RN == False:
 
                             result[-1] = result[-1]._replace(
-                                SMILES = struct_attr['SMILES'],   
+                                SMILES = struct_attr['SMILES'],
                             )
 
                         elif SMILES == False and InChI == True and CAS_RN == True:
@@ -154,22 +179,22 @@ def drug_central(
                             result[-1] = result[-1]._replace(
                                 InChI = struct_attr['InChI'],
                                 InChIKey = struct_attr['InChIKey'],
-                                CAS_RN = struct_attr['CAS_RN'],      
+                                CAS_RN = struct_attr['CAS_RN'],
                             )
 
                         elif SMILES == False and InChI == False and CAS_RN == True:
 
                             result[-1] = result[-1]._replace(
-                                CAS_RN = struct_attr['CAS_RN'],      
+                                CAS_RN = struct_attr['CAS_RN'],
                             )
 
                         elif SMILES == False and InChI == True and CAS_RN == False:
 
                             result[-1] = result[-1]._replace(
                                 InChI = struct_attr['InChI'],
-                                InChIKey = struct_attr['InChIKey'],    
+                                InChIKey = struct_attr['InChIKey'],
                             )
-    
+
     else:
 
         DrugTargetInteractions = collections.namedtuple('DrugTargetInteractions', fields[0:10])
@@ -177,7 +202,7 @@ def drug_central(
         for inter_attr in interactions:
 
             if organism == inter_attr['ORGANISM']:
-                
+
                 result.append(
                     DrugTargetInteractions(
                         DRUG_NAME = inter_attr['DRUG_NAME'],
@@ -189,8 +214,8 @@ def drug_central(
                         ACT_TYPE = inter_attr['ACT_TYPE'],
                         ACTION_TYPE = inter_attr['ACTION_TYPE'],
                         TDL = inter_attr['TDL'],
-                        ORGANISM = inter_attr['ORGANISM'],    
+                        ORGANISM = inter_attr['ORGANISM'],
                     )
                 )
 
-    return result
\ No newline at end of file
+    return result
diff --git a/pypath/inputs/hpo.py b/pypath/inputs/hpo.py
index b314a63fa..4a7e8f413 100644
--- a/pypath/inputs/hpo.py
+++ b/pypath/inputs/hpo.py
@@ -1,3 +1,28 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#
+#  This file is part of the `pypath` python module
+#
+#  Copyright
+#  2014-2022
+#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
+#
+#  Authors: Dénes Türei (turei.denes@gmail.com)
+#           Nicolàs Palacio
+#           Sebastian Lobentanzer
+#           Erva Ulusoy
+#           Olga Ivanova
+#           Ahmet Rifaioglu
+#           Tennur Kılıç
+#
+#  Distributed under the GPLv3 License.
+#  See accompanying file LICENSE.txt or copy at
+#      http://www.gnu.org/licenses/gpl-3.0.html
+#
+#  Website: http://pypath.omnipathdb.org/
+#
+
 from typing import List, Dict
 
 import csv
@@ -11,8 +36,8 @@
 def hpo_gene_annotations() -> Dict[str, list]:
     """
     Retrieves Gene-HPO relationships from HPO.
-    
-    Returns: 
+
+    Returns:
         namedtuple.
     """
 
@@ -33,9 +58,9 @@ def hpo_gene_annotations() -> Dict[str, list]:
         values = list(values)[0].replace('\t',',').split(',')
         id = map.map_name(values[1], 'genesymbol', 'uniprot')
         id = list(id)
-        
+
         if id:
-            
+
             annotations[id[0]].append(
                 HPOGeneAnnotations(
                     entrez_gene_id = values[0],
@@ -49,8 +74,8 @@ def hpo_gene_annotations() -> Dict[str, list]:
 def hpo_disease_annotations() -> List[tuple] :
     """
     Retrieves Disease-HPO relationships from HPO.
-    
-    Returns: 
+
+    Returns:
         namedtuple.
     """
 
@@ -64,7 +89,7 @@ def hpo_disease_annotations() -> List[tuple] :
     HPODiseaseAnnotations = collections.namedtuple('HPODiseaseAnnotations', fields,defaults = ("",) * len(fields))
 
     result = []
-    
+
     for i in range(4,len(disease)):
 
         values = disease[i].values()
@@ -88,15 +113,15 @@ def hpo_disease_annotations() -> List[tuple] :
 def hpo_ontology() -> List[tuple] :
     """
     Retrieves ontology from HPO.
-    
-    Returns: 
+
+    Returns:
         namedtuple.
     """
 
     url = urls.urls['hpo']['ontology']
     reader = obo.Obo(url)
     hpo_ontology = [i for i in reader]
-    
+
 
     fields = ('hpo_id','term_name','synonyms','xrefs','is_a')
 
@@ -134,7 +159,7 @@ def hpo_ontology() -> List[tuple] :
 
                 syn = i[0] + " " + i[1]
                 syn_lst.append(syn)
-            
+
             result[-1] = result[-1]._replace(
                 synonyms = syn_lst
             )
@@ -146,7 +171,7 @@ def hpo_ontology() -> List[tuple] :
             for i in xref:
 
                 xref_lst.append(i[0])
-            
+
             result[-1] = result[-1]._replace(
                 xrefs = xref_lst
             )
@@ -156,11 +181,11 @@ def hpo_ontology() -> List[tuple] :
             is_a = list(rec[5].get('is_a'))
 
             for i in is_a:
-                
+
                 isa_lst.append(i[0] + " : " + i[2])
-            
+
             result[-1] = result[-1]._replace(
                 is_a = isa_lst
             )
-            
+
     return result

From 2eccd6d679fafb35ab0bd34d2c858257e174897d Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Fri, 12 Aug 2022 19:51:24 +0200
Subject: [PATCH 07/32] refactored new module `inputs.chembl`

---
 pypath/inputs/chembl.py | 396 ++++++++++++++++++++++------------------
 1 file changed, 216 insertions(+), 180 deletions(-)

diff --git a/pypath/inputs/chembl.py b/pypath/inputs/chembl.py
index d93818aa7..0bfbae8f4 100644
--- a/pypath/inputs/chembl.py
+++ b/pypath/inputs/chembl.py
@@ -23,6 +23,8 @@
 #  Website: http://pypath.omnipathdb.org/
 #
 
+from typing import Literal
+
 import json
 import collections
 
@@ -35,268 +37,302 @@ def chembl_targets() -> list[tuple]:
     Retrieves targets data from ChEMBL.
 
     Returns:
-        namedtuple.
+        List of drug target records as named tuples.
     """
 
-    fields_target = ('accession','target_chembl_id')
-
-    Target = collections.namedtuple('Target', fields_target,defaults = ("None",) * len(fields_target))
+    fields_target = (
+        'accession',
+        'target_chembl_id',
+    )
 
-    trgtlst = []
+    ChemblTarget = collections.namedtuple(
+        'ChemblTarget',
+        fields_target,
+        defaults = (None,) * len(fields_target),
+    )
 
-    flag = 0
+    tgt_lst = []
+    page_dct = {}
 
     while True:
 
-        if flag == 0:
-
-            url = urls.urls['chembl']['url'] + urls.urls['chembl']['target']
-            c = curl.Curl(url, large=True, silent=False)
-            flag = 1
-
-        else:
-
-            if lst['page_meta']['next']:
-
-                url = urls.urls['chembl']['url'] + lst['page_meta']['next']
-                c = curl.Curl(url, large=True, silent=False)
+        if not page_dct:
 
-            else:
+            url = (
+                f"{urls.urls['chembl']['url']}"
+                f"{urls.urls['chembl']['target']}"
+            )
 
-                break
+        elif page_dct['page_meta']['next']:
 
-        fileObject = open(c.fileobj.name)
-        lst = json.loads(fileObject.read())
+            url = (
+                f"{urls.urls['chembl']['url']}"
+                f"{page_dct['page_meta']['next']}"
+            )
 
-        for trgt_attr in lst['targets']:
+        else:
 
-            if trgt_attr['target_components']:
+            break
 
-                trgtlst.append(
-                    Target(
-                        accession = trgt_attr['target_components'][0]['accession'],
-                        target_chembl_id = trgt_attr['target_chembl_id'],
-                        )
-                    )
+        c = curl.Curl(url, large=True, silent=False)
+        fileobj = open(c.fileobj.name)
+        page_dct = json.loads(fileobj.read())
 
-            else:
+        tgt_lst.extend(
+            ChemblTarget(
+                accession = (
+                    tgt['target_components'][0]['accession']
+                        if 'target_components' in tgt else
+                    None
+                ),
+                target_chembl_id = tgt['target_chembl_id'],
+            )
+            for tgt in page_dct['targets']
+        )
 
-                trgtlst.append(
-                    Target(
-                        target_chembl_id = trgt_attr['target_chembl_id'],
-                        )
-                    )
+    return tgt_lst
 
-    return trgtlst
 
-def chembl_assays() -> List[tuple] :
+def chembl_assays() -> list[tuple] :
     """
     Retrieves assays data from ChEMBL.
 
     Returns:
-        namedtuple.
+        List of assay records as named tuples.
     """
 
-    fields_assay = ('assay_chembl_id','assay_organism','assay_type','confidence_score','target_chembl_id')
+    fields_assay = (
+        'assay_chembl_id',
+        'assay_organism',
+        'assay_type',
+        'confidence_score',
+        'target_chembl_id',
+    )
 
-    Assay = collections.namedtuple('Assay', fields_assay,defaults = ("None",) * len(fields_assay))
+    ChemblAssay = collections.namedtuple(
+        'ChemblAssay',
+        fields_assay,
+        defaults = (None,) * len(fields_assay),
+    )
 
-    assylst = []
-
-    flag = 0
+    assay_lst = []
+    page_dct = {}
 
     while True:
 
-        if flag == 0:
+        if not page_dct:
 
-            url = urls.urls['chembl']['url'] + urls.urls['chembl']['assay']
-            c = curl.Curl(url, large=True, silent=False)
-            flag = 1
+            url = (
+                f"{urls.urls['chembl']['url']}"
+                f"{urls.urls['chembl']['assay']}"
+            )
 
-        else:
+        elif page_dct['page_meta']['next']:
 
-            if lst['page_meta']['next']:
+            url = (
+                f"{urls.urls['chembl']['url']}"
+                f"{page_dct['page_meta']['next']}"
+            )
 
-                url = urls.urls['chembl']['url'] + lst['page_meta']['next']
-                c = curl.Curl(url, large=True, silent=False)
-
-            else:
+        else:
 
-                break
+            break
 
-        fileObject = open(c.fileobj.name)
-        lst = json.loads(fileObject.read())
+        c = curl.Curl(url, large=True, silent=False)
+        fileobj = open(c.fileobj.name)
+        page_dct = json.loads(fileobj.read())
 
-        for assy_attr in lst['assays']:
+        assay_lst.extend(
+            ChemblAssay(
+                assay_chembl_id = assy_attr['assay_chembl_id'],
+                assay_organism = assy_attr['assay_organism'],
+                assay_type = assy_attr['assay_type'],
+                confidence_score = assy_attr['confidence_score'],
+                target_chembl_id = assy_attr['target_chembl_id'],
+            )
+            for assy_attr in page_dct['assays']
+        )
 
-            assylst.append(
-                Assay(
-                    assay_chembl_id = assy_attr['assay_chembl_id'],
-                    assay_organism = assy_attr['assay_organism'],
-                    assay_type = assy_attr['assay_type'],
-                    confidence_score = assy_attr['confidence_score'],
-                    target_chembl_id = assy_attr['target_chembl_id'],
-                    )
-                )
+    return assay_lst
 
-    return assylst
 
-def chembl_molecules() -> List[tuple] :
+def chembl_molecules() -> list[tuple]:
     """
     Retrieves molecules data from ChEMBL.
 
     Returns:
-        namedtuple.
+        Molecule records as named tuples.
     """
 
-    fields_molecule = ('alogp','conanicle_smiles','chirality','full_mwt','heavy_atoms','standard_inchi_key','molecular_species',
-                        'molecul_type','molecule_chembl_id','parent_chembl_id','prodrug','standard_inchi', 'xrefs')
+    def _get(mol, key0, key1):
 
-    Molecule = collections.namedtuple('Molecule', fields_molecule,defaults = ("None",) * len(fields_molecule))
+        return mol.get(f'molecule_{key0}', {}).get(key1, None)
 
-    mlcllst = []
 
-    flag = 0
+    fields_molecule = (
+        'alogp',
+        'canonical_smiles',
+        'chirality',
+        'full_mwt',
+        'heavy_atoms',
+        'std_inchi_key',
+        'species',
+        'type',
+        'chembl',
+        'parent_chembl',
+        'prodrug',
+        'std_inchi',
+        'xrefs',
+    )
+
+    ChemblMolecule = collections.namedtuple(
+        'ChemblMolecule',
+        fields_molecule,
+        defaults = (None,) * len(fields_molecule),
+    )
+
+    mol_lst = []
+    page_dct = {}
 
     while True:
 
-        if flag == 0:
+        if not page_dct:
 
             url = urls.urls['chembl']['url'] + urls.urls['chembl']['molecule']
             c = curl.Curl(url, large=True, silent=False)
-            flag = 1
-
-        else:
-
-            if lst['page_meta']['next']:
-
-                url = urls.urls['chembl']['url'] + lst['page_meta']['next']
-                c = curl.Curl(url, large=True, silent=False)
-
-            else:
-
-                break
 
-        fileObject = open(c.fileobj.name)
-        lst = json.loads(fileObject.read())
+        elif page_dct['page_meta']['next']:
 
-        for mlcl_attr in lst['molecules']:
+            url = (
+                f"{urls.urls['chembl']['url']}"
+                f"{lst['page_meta']['next']}"
+            )
 
-            xrefs = []
-            mlcllst.append(
-                Molecule(
-                    chirality = mlcl_attr['chirality'],
-                    molecul_type = mlcl_attr['molecule_type'],
-                    prodrug = mlcl_attr['prodrug'],
-                    )
-                )
-
-            if mlcl_attr['molecule_hierarchy'] != None:
-                mlcllst[-1] = mlcllst[-1]._replace(
-                    molecule_chembl_id = mlcl_attr['molecule_hierarchy']['molecule_chembl_id'],
-                    parent_chembl_id = mlcl_attr['molecule_hierarchy']['parent_chembl_id'],
-                )
-
-            if mlcl_attr['molecule_properties'] != None:
-                mlcllst[-1] = mlcllst[-1]._replace(
-                    alogp = mlcl_attr['molecule_properties']['alogp'],
-                    full_mwt = mlcl_attr['molecule_properties']['full_mwt'],
-                    heavy_atoms = mlcl_attr['molecule_properties']['heavy_atoms'],
-                    molecular_species = mlcl_attr['molecule_properties']['molecular_species'],
-                )
-
-            if mlcl_attr['molecule_structures'] != None:
-                mlcllst[-1] = mlcllst[-1]._replace(
-                    conanicle_smiles = mlcl_attr['molecule_structures']['canonical_smiles'],
-                    standard_inchi_key = mlcl_attr['molecule_structures']['standard_inchi_key'],
-                    standard_inchi = mlcl_attr['molecule_structures']['standard_inchi'],
-                )
-
-            if mlcl_attr['cross_references'] != None:
-
-                for rec in mlcl_attr['cross_references']:
-
-                    xrefs.append({'xref_id' : rec['xref_id'], 'xref_src': rec['xref_src']})
+        else:
 
-                mlcllst[-1] = mlcllst[-1]._replace(
-                    xrefs = xrefs
+            break
+
+        c = curl.Curl(url, large=True, silent=False)
+        fileobj = open(c.fileobj.name)
+        page_dct = json.loads(fileobj.read())
+
+        mol_lst.extend(
+            ChemblMolecule(
+                chirality = mol['chirality'],
+                type = mol['molecule_type'],
+                prodrug = mol['prodrug'],
+
+                chembl = _get(mol, 'hierarchy', 'molecule_chembl_id'),
+                parent_chembl = _get(mol, 'hierarchy', 'parent_chembl_id'),
+
+                alogp = _get(mol, 'properties', 'alogp'),
+                full_mwt = _get(mol, 'properties', 'full_mwt'),
+                heavy_atoms = _get(mol, 'properties', 'heavy_atoms'),
+                species = _get(mol, 'properties', 'molecular_species'),
+
+                canonical_smiles = _get(mol, 'structures', 'canonical_smiles'),
+                std_inchi_key = _get(mol, 'structures', 'standard_inchi_key'),
+                std_inchi = _get(mol, 'structures', 'standard_inchi'),
+
+                xrefs = (
+                    [
+                        {
+                            'xref_id': rec['xref_id'],
+                            'xref_src': rec['xref_src'],
+                        }
+                        for rec in mol['cross_references']
+                    ]
+                        if mol['cross_references'] else
+                    None
                 )
+            )
+            for mol in page_dct['molecules']
+        )
 
+    return mol_lst
 
-    return mlcllst
 
 def chembl_activities(
         pchembl_value_none: bool = False,
-        standard_relation: bool = '=',
-    ) -> List[tuple] :
+        #TODO: are these below all the allowed values?
+        standard_relation: Literal['=', '>', '<', '>=', '<='],
+    ) -> list[tuple] :
     """
     Retrieves activities data from ChEMBL.
 
     Args:
-        pchembl_value_none (bool): Whether the pchembl value should be none or not.
-        standard_relation (str): Which standard relation in needed.
+        pchembl_value_none:
+            # TODO: it is allowed to be None or must be None?
+            Whether the pchembl value should be none or not.
+        standard_relation:
+            Which standard relation in needed.
 
     Returns:
-        namedtuple.
-            standard_flag and standard_units attributes are not included in the returned namedtuple.
-            Only records returned are the ones where data_validity_comment is none.
+        List of activity records as named tuples. `standard_flag` and
+        `standard_units` attributes are not included in the returned records.
+        # TODO: then why the data_validity_comment is part of the records?
+        Only records without `data_validity_comment` are returned.
     """
 
-    fields_activity = ('assay_chembl_id','data_validity_comment','molecule_chembl_id','pchembl_value',
-                        'standard_relation','standard_value','target_chembl_id')
-
-    Activity = collections.namedtuple('Activity', fields_activity,defaults = ("None",) * len(fields_activity))
-
-    actvtylst = []
-
-    flag = 0
+    fields_activity = (
+        'assay_chembl',
+        'data_validity_comment',
+        'chembl',
+        'pchembl',
+        'standard_relation',
+        'standard_value',
+        'target_chembl',
+    )
+
+    ChemblActivity = collections.namedtuple(
+        'ChemblActivity',
+        fields_activity,
+        defaults = (None,) * len(fields_activity),
+    )
+
+    activity_lst = []
+    page_dct = {}
 
     while True:
 
-        if flag == 0:
+        if not page_lst:
 
-            if pchembl_value_none == True:
 
-                url = urls.urls['chembl']['url'] + urls.urls['chembl']['activity']+'&pchembl_value__isnull=true'
+            url = (
+                f"{urls.urls['chembl']['url']}"
+                f"{urls.urls['chembl']['activity']}"
+                f"&pchembl_value__isnull={str(pchembl_value_none).lower()}"
+                f"&standard_relation__exact={standard_relation}"
+            )
 
-            else:
+        elif page_dct['page_meta']['next']:
 
-                url = urls.urls['chembl']['url'] + urls.urls['chembl']['activity']+'&pchembl_value__isnull=false'
-
-            url = url + '&standard_relation__exact='+standard_relation
-            c = curl.Curl(url, large=True, silent=False)
-            flag = 1
+            url = (
+                f"{urls.urls['chembl']['url']}"
+                f"{lst['page_meta']['next']}"
+            )
 
         else:
 
-            if lst['page_meta']['next']:
-
-                url = urls.urls['chembl']['url'] + lst['page_meta']['next']
-                c = curl.Curl(url, large=True, silent=False)
-
-            else:
-
-                break
-
-        fileObject = open(c.fileobj.name)
-        lst = json.loads(fileObject.read())
-
-
-        for actvty_attr in lst['activities']:
+            break
 
-            if actvty_attr['data_validity_comment'] == None:
+        c = curl.Curl(url, large=True, silent=False)
+        fileobj = open(c.fileobj.name)
+        page_dct = json.loads(fileobj.read())
 
-                actvtylst.append(
-                    Activity(
-                        assay_chembl_id = actvty_attr['assay_chembl_id'],
-                        data_validity_comment = actvty_attr['data_validity_comment'],
-                        molecule_chembl_id = actvty_attr['molecule_chembl_id'],
-                        pchembl_value = actvty_attr['pchembl_value'],
-                        standard_relation = actvty_attr['standard_relation'],
-                        standard_value = actvty_attr['standard_value'],
-                        target_chembl_id = actvty_attr['target_chembl_id'],
-                        )
-                    )
 
+        activity_lst.extend(
+            ChemblActivity(
+                assay_chembl = act['assay_chembl_id'],
+                data_validity_comment = act['data_validity_comment'],
+                chembl = act['molecule_chembl_id'],
+                pchembl = act['pchembl_value'],
+                standard_relation = act['standard_relation'],
+                standard_value = act['standard_value'],
+                target_chembl = act['target_chembl_id'],
+            )
+            for act in page_dct['activities']
+            if act['data_validity_comment'] is None
+        )
 
-    return actvtylst
+    return activity_lst

From cb1b2d9411af82ba6ba2f53afb49aba5e4a43984 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Wed, 17 Aug 2022 16:01:47 +0200
Subject: [PATCH 08/32] drugbank & drugcentral: trivial refactoring

---
 pypath/inputs/drugbank.py    | 50 ++++++++++++++++++++++--------------
 pypath/inputs/drugcentral.py | 35 +++++++++++++------------
 2 files changed, 50 insertions(+), 35 deletions(-)

diff --git a/pypath/inputs/drugbank.py b/pypath/inputs/drugbank.py
index d55a2c146..852db9804 100644
--- a/pypath/inputs/drugbank.py
+++ b/pypath/inputs/drugbank.py
@@ -23,8 +23,6 @@
 #  Website: http://pypath.omnipathdb.org/
 #
 
-from typing import List
-
 import os
 import csv
 import collections
@@ -38,37 +36,51 @@
 _logger = session.Logger(name = 'drugbank')
 _log = _logger._log
 
-def add_prot_id(
+
+def drugbank_proteins(
         user: str,
         passwd: str,
         pharma_active: bool = False,
-    ) -> List[tuple] :
+    ) -> list[tuple] :
     """
     Retrieves protein identifiers from Drugbank.
 
     Args:
-        user (str): E-mail address for login to DrugBank.
-        passwd (str): Password for login to DrugBank.
-        pharma_active (bool): Wheter to include pharmacologically active identifiers.
+        user:
+            E-mail address with registered DrugBank account.
+        passwd:
+            Password for the DrugBank account.
+        pharma_active:
+            Wheter to include only pharmacologically active identifiers.
 
     Returns:
-        namedtuple.
+        List of protein records as named tuples.
     """
 
     credentials = {'user': user, 'passwd': passwd}
 
     auth_str = base64.b64encode(
-        ('%s:%s' % (credentials['user'], credentials['passwd'])).encode()
-    ).decode()
-
-    decoded = 'Basic %s' % auth_str
-
-    req_hdrs = ['Authorization: %s' % decoded]
-    req_hdrs.extend([settings.get('user_agent')])
+        f"{credentials['user']}:{credentials['passwd']}".encode()
+    )
 
-    fields = ('DrugBank_ID','Target_UniProt_ID','Transporter_UniProt_ID','Enzym_UniProt_ID','Carrier_UniProt_ID')
+    req_hdrs = [
+        f'Authorization: Basic {auth.decode()}',
+        settings.get('user_agent'),
+    ]
+
+    fields = (
+        'DrugBank_ID',
+        'Target_UniProt_ID',
+        'Transporter_UniProt_ID',
+        'Enzym_UniProt_ID',
+        'Carrier_UniProt_ID',
+    )
 
-    ProteinIdentifiers = collections.namedtuple('ProteinIndetifiers', fields,defaults = ("",) * len(fields))
+    DrugbankProtein = collections.namedtuple(
+        'DrugbankProtein',
+        fields,
+        defaults = (None,) * len(fields),
+    )
 
     url = urls.urls['drugbank']['drug_enzym_identifiers']
     c = curl.Curl(
@@ -319,12 +331,13 @@ def add_prot_id(
 
     return result
 
+
 def drug_bank(
         user: str,
         passwd: str,
         addprotid: bool = True,
         pharma_active: bool = False,
-    ) -> List[tuple] :
+    ) -> list[tuple] :
     """
     Retrieves structures, external links and protein identifiers from Drugbank.
 
@@ -439,5 +452,4 @@ def drug_bank(
 
             index += 1
 
-
     return result
diff --git a/pypath/inputs/drugcentral.py b/pypath/inputs/drugcentral.py
index edeff5436..8e2c49414 100644
--- a/pypath/inputs/drugcentral.py
+++ b/pypath/inputs/drugcentral.py
@@ -23,20 +23,19 @@
 #  Website: http://pypath.omnipathdb.org/
 #
 
-from typing import List
-
 import csv
 import collections
 
 import pypath.share.curl as curl
 import pypath.resources.urls as urls
+import pypath.share.common as common
 
 def drug_central(
         organism: str = "Homo sapiens",
         SMILES: bool = False,
         InChI: bool = False,
         CAS_RN: bool = False,
-    ) -> List[tuple]:
+    ) -> list[tuple]:
     """
     Retrieves drug-target interactions datasets from Drug Central.
 
@@ -50,24 +49,28 @@ def drug_central(
         namedtuple.
     """
 
-    fields = ('DRUG_NAME','TARGET_NAME','TARGET_CLASS',
-            'TARGET_ACCESSION','GENE','ACT_VALUE','ACT_TYPE',
-            'ACTION_TYPE','TDL','ORGANISM','SMILES','InChI',
-            'InChIKey','CAS_RN',)
+    fields = (
+        'DRUG_NAME',
+        'TARGET_NAME',
+        'TARGET_CLASS',
+        'TARGET_ACCESSION',
+        'GENE',
+        'ACT_VALUE',
+        'ACT_TYPE',
+        'ACTION_TYPE',
+        'TDL',
+        'ORGANISM',
+        'SMILES',
+        'InChI',
+        'InChIKey',
+        'CAS_RN',
+    )
 
     url = urls.urls['drugcentral']['interactions']
     c = curl.Curl(url, large = True, silent = False)
     interactions = list(csv.DictReader(c.result, delimiter = '\t'))
 
-    temp_inter = []
-
-    for rec in interactions:
-
-        if rec not in temp_inter:
-
-            temp_inter.append(rec)
-
-    interactions = temp_inter
+    interactions = common.unique_list(interactions)
 
     result = []
 

From 56f2ee1731576bc14a585a7a810f5ded3d0b2183 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Fri, 19 Aug 2022 21:18:44 +0200
Subject: [PATCH 09/32] urls: long lines

---
 pypath/resources/urls.py | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/pypath/resources/urls.py b/pypath/resources/urls.py
index 71de13390..a23d01148 100644
--- a/pypath/resources/urls.py
+++ b/pypath/resources/urls.py
@@ -1528,23 +1528,32 @@
     },
     'interpro': {
         'label': 'Protein families, domains and functional sites',
-        'entries': 'https://ftp.ebi.ac.uk/pub/databases/interpro/interpro.xml.gz',
-        'annotations': 'https://www.ebi.ac.uk/interpro/api/entry/InterPro/protein/'
-            '%s/taxonomy/uniprot/%u?page_size=%u',
+        'entries': 'https://ftp.ebi.ac.uk/pub/databases/'
+            'interpro/interpro.xml.gz',
+        'annotations': 'https://www.ebi.ac.uk/interpro/api/entry/InterPro/'
+            'protein/%s/taxonomy/uniprot/%u?page_size=%u',
     },
     'drugcentral': {
         'label': 'Drug-target interactions',
-        'interactions': 'https://unmtid-shinyapps.net/download/DrugCentral/2021_09_01/drug.target.interaction.tsv.gz',
-        'SMILES_InChI' : 'https://unmtid-shinyapps.net/download/DrugCentral/2021_09_01/structures.smiles.tsv',
+        'interactions': 'https://unmtid-shinyapps.net/download/DrugCentral'
+            '/2021_09_01/drug.target.interaction.tsv.gz',
+        'SMILES_InChI' : 'https://unmtid-shinyapps.net/download/DrugCentral'
+            '/2021_09_01/structures.smiles.tsv',
     },
     'drugbank': {
         'label': 'DrugBank database',
-        'all_structures': 'https://go.drugbank.com/releases/5-1-9/downloads/all-structure-links',
-        'all_drug': 'https://go.drugbank.com/releases/5-1-9/downloads/all-drug-links',
-        'drug_target_identifiers' : 'https://go.drugbank.com/releases/5-1-9/downloads/target-all-polypeptide-ids',
-        'drug_enzym_identifiers' : 'https://go.drugbank.com/releases/5-1-9/downloads/enzyme-all-polypeptide-ids',
-        'drug_carrier_identifiers' : 'https://go.drugbank.com/releases/5-1-9/downloads/carrier-all-polypeptide-ids',
-        'drug_transporter_identifiers' : 'https://go.drugbank.com/releases/5-1-9/downloads/transporter-all-polypeptide-ids',
+        'all_structures': 'https://go.drugbank.com/releases/5-1-9/'
+            'downloads/all-structure-links',
+        'all_drug': 'https://go.drugbank.com/releases/5-1-9/downloads/'
+            'all-drug-links',
+        'drug_target_identifiers' : 'https://go.drugbank.com/releases/'
+            '5-1-9/downloads/target-all-polypeptide-ids',
+        'drug_enzym_identifiers' : 'https://go.drugbank.com/releases/5-1-9/'
+            'downloads/enzyme-all-polypeptide-ids',
+        'drug_carrier_identifiers' : 'https://go.drugbank.com/releases/5-1-9/'
+            'downloads/carrier-all-polypeptide-ids',
+        'drug_transporter_identifiers' : 'https://go.drugbank.com/releases/'
+            '5-1-9/downloads/transporter-all-polypeptide-ids',
     }, 
     'chembl': {
         'label': 'ChEMBL database',
@@ -1556,9 +1565,11 @@
     },
     'hpo': {
         'label': 'HPO database',
-        'ontology': 'https://raw.githubusercontent.com/obophenotype/human-phenotype-ontology/master/hp.obo',
+        'ontology': 'https://raw.githubusercontent.com/obophenotype/'
+            'human-phenotype-ontology/master/hp.obo',
         'disease' : 'http://purl.obolibrary.org/obo/hp/hpoa/phenotype.hpoa',
-        'gene' : 'http://purl.obolibrary.org/obo/hp/hpoa/genes_to_phenotype.txt',
+        'gene' : 'http://purl.obolibrary.org/obo/hp/hpoa/'
+            'genes_to_phenotype.txt',
     },
 }
 

From 90e37df6e0692da3fcf71ae2237eebcca1680ea8 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Fri, 19 Aug 2022 22:35:20 +0200
Subject: [PATCH 10/32] `inputs.drugbank` - refactoring 1.

---
 pypath/inputs/drugbank.py | 328 +++++++-------------------------------
 pypath/resources/urls.py  |   2 +-
 2 files changed, 63 insertions(+), 267 deletions(-)

diff --git a/pypath/inputs/drugbank.py b/pypath/inputs/drugbank.py
index 852db9804..217c368dc 100644
--- a/pypath/inputs/drugbank.py
+++ b/pypath/inputs/drugbank.py
@@ -23,7 +23,6 @@
 #  Website: http://pypath.omnipathdb.org/
 #
 
-import os
 import csv
 import collections
 import base64
@@ -37,7 +36,27 @@
 _log = _logger._log
 
 
-def drugbank_proteins(
+def _drugbank_download(user: str, passwd: str, *args, **kwargs):
+
+    defaults = {
+        'large': True,
+        'silent': False,
+        'compr': 'zip',
+    }
+
+    defaults.update(kwargs)
+
+    auth_str = base64.b64encode(f"{user}:{passwd}".encode())
+
+    defaults['req_headers'] = [
+        f'Authorization: Basic {auth.decode()}',
+        settings.get('user_agent'),
+    ]
+
+    return curl.Curl(*args, **defaults)
+
+
+def drugbank_raw_interactions(
         user: str,
         passwd: str,
         pharma_active: bool = False,
@@ -51,29 +70,18 @@ def drugbank_proteins(
         passwd:
             Password for the DrugBank account.
         pharma_active:
-            Wheter to include only pharmacologically active identifiers.
+            Only pharmacologically active relations.
 
     Returns:
-        List of protein records as named tuples.
+        List of drug-protein relations.
     """
 
-    credentials = {'user': user, 'passwd': passwd}
-
-    auth_str = base64.b64encode(
-        f"{credentials['user']}:{credentials['passwd']}".encode()
-    )
-
-    req_hdrs = [
-        f'Authorization: Basic {auth.decode()}',
-        settings.get('user_agent'),
-    ]
+    csv_name = 'pharmacologically_active.csv' if pharma_active else 'all.csv'
 
     fields = (
-        'DrugBank_ID',
-        'Target_UniProt_ID',
-        'Transporter_UniProt_ID',
-        'Enzym_UniProt_ID',
-        'Carrier_UniProt_ID',
+        'drugbank_id',
+        'uniprot_id',
+        'relation',
     )
 
     DrugbankProtein = collections.namedtuple(
@@ -82,257 +90,37 @@ def drugbank_proteins(
         defaults = (None,) * len(fields),
     )
 
-    url = urls.urls['drugbank']['drug_enzym_identifiers']
-    c = curl.Curl(
-        url,
-        large = True,
-        silent = False,
-        req_headers = req_hdrs,
-        cache = False,
-    )
-
-    os.rename(c.fileobj.name, c.fileobj.name + ".csv.zip")
-    zipfile = curl.FileOpener(c.fileobj.name + ".csv.zip")
-    enzym = list(csv.DictReader(zipfile.result["all.csv"], delimiter = ','))
-
-    if pharma_active:
-
-        active = list(csv.DictReader(zipfile.result["pharmacologically_active.csv"], delimiter = ','))
-
-        for rec in active:
-
-            enzym.append(rec)
-
     result = []
 
-    result.append(
-        ProteinIdentifiers(
-            DrugBank_ID = "",
-            )
-        )
-
-    for enzym_attr in enzym:
-
-        DrugBank_IDs = [i for i in enzym_attr['Drug IDs'].replace(" ","").split(';')]
-
-        for id in DrugBank_IDs:
-
-            index = 0
-            flag = 0
-
-            for res_attr in result:
-
-                if id == res_attr.DrugBank_ID:
-
-                    flag = 1
-
-                    if res_attr.Enzym_UniProt_ID == "":
-
-                        result[index] = result[index]._replace(
-                        Enzym_UniProt_ID = enzym_attr['UniProt ID'],)
-
-                    else:
-
-                        result[index] = result[index]._replace(
-                        Enzym_UniProt_ID = result[index].Enzym_UniProt_ID + ";" + enzym_attr['UniProt ID'],)
-
-                    break
-
-                index += 1
-
-            if flag == 0:
-
-                result.append(
-                    ProteinIdentifiers(
-                        DrugBank_ID = id,
-                        Enzym_UniProt_ID = enzym_attr['UniProt ID'],
-                        )
-                    )
-
-    del result[0]
-
-    url = urls.urls['drugbank']['drug_carrier_identifiers']
-    c = curl.Curl(
-        url,
-        large = True,
-        silent = False,
-        req_headers = req_hdrs,
-        cache = False,
-    )
-
-    os.rename(c.fileobj.name, c.fileobj.name + ".csv.zip")
-    zipfile = curl.FileOpener(c.fileobj.name + ".csv.zip")
-    carrier = list(csv.DictReader(zipfile.result["all.csv"], delimiter = ','))
-
-    if pharma_active:
-
-        active = list(csv.DictReader(zipfile.result["pharmacologically_active.csv"], delimiter = ','))
-
-        for rec in active:
-
-            carrier.append(rec)
-
-    for carrier_attr in carrier:
-
-        DrugBank_IDs = [i for i in carrier_attr['Drug IDs'].replace(" ","").split(';')]
-
-        for id in DrugBank_IDs:
-
-            index = 0
-            flag = 0
-
-            for res_attr in result:
-
-                if id == res_attr.DrugBank_ID:
-
-                    flag = 1
-
-                    if res_attr.Carrier_UniProt_ID == "":
-
-                        result[index] = result[index]._replace(
-                        Carrier_UniProt_ID = carrier_attr['UniProt ID'],)
+    for rel in ('carrier', 'enzyme', 'target', 'transporter'):
 
-                    else:
+        url = urls.urls['drugbank'][f'drug_{rel}_identifiers']
 
-                        result[index] = result[index]._replace(
-                        Carrier_UniProt_ID = result[index].Carrier_UniProt_ID + ";" + carrier_attr['UniProt ID'],)
-
-                    break
-
-                index += 1
-
-            if flag == 0:
-
-                result.append(
-                    ProteinIdentifiers(
-                        DrugBank_ID = id,
-                        Carrier_UniProt_ID = carrier_attr['UniProt ID'],
-                        )
-                    )
-
-
-    url = urls.urls['drugbank']['drug_transporter_identifiers']
-    c = curl.Curl(
-        url,
-        large = True,
-        silent = False,
-        req_headers = req_hdrs,
-        cache = False,
-    )
-
-    os.rename(c.fileobj.name, c.fileobj.name + ".csv.zip")
-    zipfile = curl.FileOpener(c.fileobj.name + ".csv.zip")
-    transporter = list(csv.DictReader(zipfile.result["all.csv"], delimiter = ','))
-
-    if pharma_active:
-
-        active = list(csv.DictReader(zipfile.result["pharmacologically_active.csv"], delimiter = ','))
-
-        for rec in active:
-
-            transporter.append(rec)
-
-    for transporter_attr in transporter:
-
-        DrugBank_IDs = [i for i in transporter_attr['Drug IDs'].replace(" ","").split(';')]
-
-        for id in DrugBank_IDs:
-
-            index = 0
-            flag = 0
-
-            for res_attr in result:
-
-                if id == res_attr.DrugBank_ID:
-
-                    flag = 1
-
-                    if res_attr.Transporter_UniProt_ID == "":
-
-                        result[index] = result[index]._replace(
-                        Transporter_UniProt_ID = transporter_attr['UniProt ID'],)
-
-                    else:
-
-                        result[index] = result[index]._replace(
-                        Transporter_UniProt_ID = result[index].Transporter_UniProt_ID + ";" + transporter_attr['UniProt ID'],)
-
-                    break
-
-                index += 1
-
-            if flag == 0:
-
-                result.append(
-                    ProteinIdentifiers(
-                        DrugBank_ID = id,
-                        Transporter_UniProt_ID = transporter_attr['UniProt ID'],
-                        )
-                    )
-
-    url = urls.urls['drugbank']['drug_target_identifiers']
-    c = curl.Curl(
-        url,
-        large = True,
-        silent = False,
-        req_headers = req_hdrs,
-        cache = False,
-    )
-
-    os.rename(c.fileobj.name, c.fileobj.name + ".csv.zip")
-    zipfile = curl.FileOpener(c.fileobj.name + ".csv.zip")
-    target = list(csv.DictReader(zipfile.result["all.csv"], delimiter = ','))
-
-    if pharma_active:
-
-        active = list(csv.DictReader(zipfile.result["pharmacologically_active.csv"], delimiter = ','))
-
-        for rec in active:
-
-            target.append(rec)
-
-    for target_attr in target:
-
-        DrugBank_IDs = [i for i in target_attr['Drug IDs'].replace(" ","").split(';')]
-
-        for id in DrugBank_IDs:
-
-            index = 0
-            flag = 0
-
-            for res_attr in result:
-
-                if id == res_attr.DrugBank_ID:
-
-                    flag = 1
-
-                    if res_attr.Target_UniProt_ID == "":
-
-                        result[index] = result[index]._replace(
-                        Target_UniProt_ID = target_attr['UniProt ID'],)
-
-                    else:
-
-                        result[index] = result[index]._replace(
-                        Target_UniProt_ID = result[index].Target_UniProt_ID + ";" + target_attr['UniProt ID'],)
+        c = _drugbank_download(
+            user = user,
+            passwd = passwd,
+            files_needed = (csv_name,),
+        )
 
-                    break
+        _ = next(c.result[csv_name])
 
-                index += 1
+        for l in c.result[csv_name]:
 
-            if flag == 0:
+            drugs, uniprot = l.strip().split(',')
 
-                result.append(
-                    ProteinIdentifiers(
-                        DrugBank_ID = id,
-                        Target_UniProt_ID = target_attr['UniProt ID'],
-                        )
-                    )
+            result.extend(
+                DrugbankProtein(
+                    drugbank_id = drug,
+                    uniprot_id = uniprot,
+                    relation = rel,
+                )
+                for drug in drugs
+            )
 
     return result
 
 
-def drug_bank(
+def drugbank(
         user: str,
         passwd: str,
         addprotid: bool = True,
@@ -373,12 +161,16 @@ def drug_bank(
         large = True,
         silent = False,
         req_headers = req_hdrs,
-        cache = False
+        compr = 'zip',
+        files_needed = ('structure links.csv',),
     )
 
-    os.rename(c.fileobj.name, c.fileobj.name + ".zip")
-    zipfile = curl.FileOpener(c.fileobj.name + ".zip")
-    structure_links = list(csv.DictReader(zipfile.result["structure links.csv"], delimiter = ','))
+    structure_links = list(
+        csv.DictReader(
+            c.result['structure links.csv'],
+            delimiter = ',',
+        )
+    )
 
     url = urls.urls['drugbank']['all_drug']
     c = curl.Curl(
@@ -386,12 +178,16 @@ def drug_bank(
         large = True,
         silent = False,
         req_headers = req_hdrs,
-        cache = False
+        compr = 'zip',
+        files_needed = ('drug links.csv',),
     )
 
-    os.rename(c.fileobj.name, c.fileobj.name + ".zip")
-    zipfile = curl.FileOpener(c.fileobj.name + ".zip")
-    drug_links = list(csv.DictReader(zipfile.result["drug links.csv"], delimiter = ','))
+    drug_links = list(
+        csv.DictReader(
+            c.result['drug links.csv'],
+            delimiter = ',',
+        )
+    )
 
     if addprotid:
 
diff --git a/pypath/resources/urls.py b/pypath/resources/urls.py
index a23d01148..4245fbc55 100644
--- a/pypath/resources/urls.py
+++ b/pypath/resources/urls.py
@@ -1548,7 +1548,7 @@
             'all-drug-links',
         'drug_target_identifiers' : 'https://go.drugbank.com/releases/'
             '5-1-9/downloads/target-all-polypeptide-ids',
-        'drug_enzym_identifiers' : 'https://go.drugbank.com/releases/5-1-9/'
+        'drug_enzyme_identifiers' : 'https://go.drugbank.com/releases/5-1-9/'
             'downloads/enzyme-all-polypeptide-ids',
         'drug_carrier_identifiers' : 'https://go.drugbank.com/releases/5-1-9/'
             'downloads/carrier-all-polypeptide-ids',

From 6bab27d889df86c07ce4bd89034ed26195c1c33e Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Fri, 19 Aug 2022 23:17:35 +0200
Subject: [PATCH 11/32] `inputs.drugbank` - refactoring 2.

---
 pypath/inputs/drugbank.py | 185 +++++++++++++++-----------------------
 1 file changed, 73 insertions(+), 112 deletions(-)

diff --git a/pypath/inputs/drugbank.py b/pypath/inputs/drugbank.py
index 217c368dc..e7e9150b0 100644
--- a/pypath/inputs/drugbank.py
+++ b/pypath/inputs/drugbank.py
@@ -84,8 +84,8 @@ def drugbank_raw_interactions(
         'relation',
     )
 
-    DrugbankProtein = collections.namedtuple(
-        'DrugbankProtein',
+    DrugbankRawInteraction = collections.namedtuple(
+        'DrugbankRawInteraction',
         fields,
         defaults = (None,) * len(fields),
     )
@@ -97,6 +97,7 @@ def drugbank_raw_interactions(
         url = urls.urls['drugbank'][f'drug_{rel}_identifiers']
 
         c = _drugbank_download(
+            url = url,
             user = user,
             passwd = passwd,
             files_needed = (csv_name,),
@@ -109,7 +110,7 @@ def drugbank_raw_interactions(
             drugs, uniprot = l.strip().split(',')
 
             result.extend(
-                DrugbankProtein(
+                DrugbankRawInteraction(
                     drugbank_id = drug,
                     uniprot_id = uniprot,
                     relation = rel,
@@ -120,132 +121,92 @@ def drugbank_raw_interactions(
     return result
 
 
-def drugbank(
-        user: str,
-        passwd: str,
-        addprotid: bool = True,
-        pharma_active: bool = False,
-    ) -> list[tuple] :
+def drugbank_drugs(user: str, passwd: str) -> list[tuple] :
     """
-    Retrieves structures, external links and protein identifiers from Drugbank.
+    Retrieves drug identifiers from Drugbank.
+
+    Each drug is annotated by its various database cross-references.
 
     Args:
-        user (str): E-mail address for login to DrugBank.
-        passwd (str): Password for login to DrugBank.
-        addprotid (bool): Wheter to include protein identifiers from DrugBank.
-        pharma_active (bool): Wheter to include pharmacologically active identifiers.
+        user:
+            E-mail address with registered DrugBank account.
+        passwd:
+            Password for the DrugBank account.
 
     Returns:
-        namedtuple.
+        List of named tuples, each field corresponding to various identifiers.
     """
 
-    fields = ('DrugBank_ID','Name','CAS_Number','Drug_Groups','InChIKey','InChI','SMILES','Formula',
-                'KEGG_Compound_ID','KEGG_Drug_ID','PubChem_Compound_ID','PubChem_Substance_ID','ChEBI_ID',
-                'ChEMBL_ID','Drug_Type','PharmGKB_ID','HET_ID','Target_UniProt_ID','Transporter_UniProt_ID',
-                'Enzym_UniProt_ID','Carrier_UniProt_ID')
-
-    credentials = {'user': user, 'passwd': passwd}
+    fields = (
+        'drugbank',
+        'name',
+        'type',
+        'groups',
+        'cas',
+        'inchikey',
+        'inchi',
+        'smiles',
+        'formula',
+        'kegg_compound',
+        'kegg_drug',
+        'pubchem_cid',
+        'pubchem_sid',
+        'chebi',
+        'chembl',
+        'pharmgkb',
+        'het',
+    )
 
-    auth_str = base64.b64encode(
-        ('%s:%s' % (credentials['user'], credentials['passwd'])).encode()
-    ).decode()
+    raw = {}
 
-    decoded = 'Basic %s' % auth_str
+    for table in ('drug', 'structure'):
 
-    req_hdrs = ['Authorization: %s' % decoded]
-    req_hdrs.extend([settings.get('user_agent')])
+        csv = f'{table} links.csv'
 
-    url = urls.urls['drugbank']['all_structures']
-    c = curl.Curl(
-        url,
-        large = True,
-        silent = False,
-        req_headers = req_hdrs,
-        compr = 'zip',
-        files_needed = ('structure links.csv',),
-    )
-
-    structure_links = list(
-        csv.DictReader(
-            c.result['structure links.csv'],
-            delimiter = ',',
+        c = _drugbank_download(
+            url = urls.urls['drugbank'][f'all_{table}s'],
+            user = user,
+            passwd = passwd,
+            files_needed = (csv,),
         )
-    )
 
-    url = urls.urls['drugbank']['all_drug']
-    c = curl.Curl(
-        url,
-        large = True,
-        silent = False,
-        req_headers = req_hdrs,
-        compr = 'zip',
-        files_needed = ('drug links.csv',),
-    )
-
-    drug_links = list(
-        csv.DictReader(
-            c.result['drug links.csv'],
-            delimiter = ',',
+        raw[table] = dict(
+            (rec['DrugBank ID'], rec)
+            for rec in csv.DictReader(c.result[csv], delimiter = ',')
         )
-    )
-
-    if addprotid:
 
-        Combine = collections.namedtuple('Combine', fields,defaults = ("",) * len(fields))
-
-    else:
-        Combine = collections.namedtuple('Combine', fields[:17],defaults = ("",) * len(fields[:17]))
+    DrugbankDrug = collections.namedtuple(
+        'DrugbankDrug',
+        fields,
+        defaults = (None,) * len(fields),
+    )
 
     result = []
 
-    for struct_attr in structure_links:
-
-        for drug_attr in drug_links:
-
-            if struct_attr['DrugBank ID'] == drug_attr['DrugBank ID']:
-
-                result.append(
-                    Combine(
-                        DrugBank_ID = struct_attr['DrugBank ID'],
-                        Name = struct_attr['Name'],
-                        CAS_Number = struct_attr['CAS Number'],
-                        Drug_Groups = struct_attr['Drug Groups'],
-                        InChIKey = struct_attr['InChIKey'],
-                        InChI = struct_attr['InChI'],
-                        SMILES = struct_attr['SMILES'],
-                        Formula = struct_attr['Formula'],
-                        KEGG_Compound_ID = struct_attr['KEGG Compound ID'],
-                        KEGG_Drug_ID = struct_attr['KEGG Drug ID'],
-                        PubChem_Compound_ID = struct_attr['PubChem Compound ID'],
-                        PubChem_Substance_ID = struct_attr['PubChem Substance ID'],
-                        ChEBI_ID = struct_attr['ChEBI ID'],
-                        ChEMBL_ID = struct_attr['ChEMBL ID'],
-                        Drug_Type = drug_attr['Drug Type'],
-                        PharmGKB_ID = drug_attr['PharmGKB ID'],
-                        HET_ID = drug_attr['HET ID'],
-                    )
-                )
-
-    if addprotid:
-
-        identifiers_list = add_prot_id(user, passwd, pharma_active)
-        index = 0
-
-        for res_attr in result:
-
-            for iden_attr in identifiers_list:
-
-                if res_attr.DrugBank_ID == iden_attr.DrugBank_ID:
-
-                    result[index] = result[index]._replace(
-                        Target_UniProt_ID = iden_attr.Target_UniProt_ID,
-                        Transporter_UniProt_ID = iden_attr.Transporter_UniProt_ID,
-                        Enzym_UniProt_ID = iden_attr.Enzym_UniProt_ID,
-                        Carrier_UniProt_ID = iden_attr.Carrier_UniProt_ID,
-                    )
-
-                    break
-
-            index += 1
+    for dbid, struct in raw['structure'].items():
+
+        drug = raw['drug'].get(dbid, {})
+
+        result.append(
+            DrugbankDrug(
+                drugbank = dbid,
+                name = struct['Name'],
+                type = drug.get('Drug Type', None),
+                groups = struct['Drug Groups'],
+                cas = struct['CAS Number'],
+                inchikey = struct['InChIKey'],
+                inchi = struct['InChI'],
+                smiles = struct['SMILES'],
+                formula = struct['Formula'],
+                kegg_compound = struct['KEGG Compound ID'],
+                kegg_drug = struct['KEGG Drug ID'],
+                pubchem_cid = struct['PubChem Compound ID'],
+                pubchem_sid = struct['PubChem Substance ID'],
+                chebi = struct['ChEBI ID'],
+                chembl = struct['ChEMBL ID'],
+                pharmgkb = drug.get('PharmGKB ID', None)
+                het = drug.get('HET ID', None),
+            )
+        )
 
     return result

From f0de019d0bde1357a4e8b2576015d7a8af4e6537 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Sat, 20 Aug 2022 00:56:01 +0200
Subject: [PATCH 12/32] `drugbank_interactions`

---
 pypath/inputs/drugbank.py | 67 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 66 insertions(+), 1 deletion(-)

diff --git a/pypath/inputs/drugbank.py b/pypath/inputs/drugbank.py
index e7e9150b0..c24b5dd34 100644
--- a/pypath/inputs/drugbank.py
+++ b/pypath/inputs/drugbank.py
@@ -23,6 +23,7 @@
 #  Website: http://pypath.omnipathdb.org/
 #
 
+import re
 import csv
 import collections
 import base64
@@ -32,7 +33,7 @@
 import pypath.share.session as session
 import pypath.share.settings as settings
 
-_logger = session.Logger(name = 'drugbank')
+_logger = session.Logger(name = 'drugbank_input')
 _log = _logger._log
 
 
@@ -121,6 +122,70 @@ def drugbank_raw_interactions(
     return result
 
 
+def drugbank_interactions(
+        user: str,
+        passwd: str,
+        pharma_active: bool = False,
+    ) -> list[tuple] :
+    """
+    Drug-protein and protein-drug interactions from Drugbank.
+
+    Args:
+        user:
+            E-mail address with registered DrugBank account.
+        passwd:
+            Password for the DrugBank account.
+        pharma_active:
+            Only pharmacologically active interactions.
+
+    Returns:
+        List of drug-protein and protein-drug interactions.
+    """
+
+    raw = drugbank_raw_interactions(
+        user = user,
+        passwd = passwd,
+        harma_active = pharma_active,
+    )
+
+    drugs = dict(
+        (d.drugbank, d)
+        for d in drugbank_drugs(user = user, passwd = passwd)
+    )
+
+    DrugbankInteraction = collections.namedtuple(
+        'DrugbankInteraction',
+        (
+            'source',
+            'target',
+            'source_entity_type',
+            'target_entity_type',
+            'interaction_type',
+        )
+    )
+
+    result = []
+
+    for r in raw:
+
+        drug = drugs.get(r.drugbank_id, None)
+
+        # TODO: later engage the mapping module here
+        if drug and drug.pubchem_cid:
+
+            src_tgt = reversed if r.relation == 'target' else lambda x: x
+
+            result.append(
+                DrugbankInteraction(
+                    *src_tgt(r.uniprot_id, drug.pubchem_cid),
+                    *src_tgt('protein', 'drug'),
+                    interaction_type = r.relation,
+                )
+            )
+
+    return result
+
+
 def drugbank_drugs(user: str, passwd: str) -> list[tuple] :
     """
     Retrieves drug identifiers from Drugbank.

From 05b98e13a04d470297439998c3710896af7cc179 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Sat, 20 Aug 2022 00:56:21 +0200
Subject: [PATCH 13/32] `drugbank_annotations`

---
 pypath/inputs/drugbank.py | 44 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/pypath/inputs/drugbank.py b/pypath/inputs/drugbank.py
index c24b5dd34..6ba9df7fd 100644
--- a/pypath/inputs/drugbank.py
+++ b/pypath/inputs/drugbank.py
@@ -275,3 +275,47 @@ def drugbank_drugs(user: str, passwd: str) -> list[tuple] :
         )
 
     return result
+
+
+def drugbank_annotations(user: str, passwd: str):
+    """
+    Drug annotations from Drugbank.
+
+    The annotations are restricted to the drug molecule type and drug status.
+
+    Args:
+        user:
+            E-mail address with registered DrugBank account.
+        passwd:
+            Password for the DrugBank account.
+        pharma_active:
+            Only pharmacologically active interactions.
+
+    Returns:
+        List of drug annotations.
+    """
+
+    drugs = drugbank_drugs(user = user, passwd = passwd)
+
+    DrugbankAnnotation = collections.namedtuple(
+        'DrugbankAnnotation',
+        (
+            'type',
+            'status',
+        )
+    )
+
+    result = collections.defaultdict(set)
+
+    for d in drugs:
+
+        if d.pubchem_cid:
+
+            result[d.pubchem_cid].add(
+                DrugbankAnnotation(
+                    type = d.type,
+                    status = re.sub(',\s*', ';', d.groups),
+                )
+            )
+
+    return dict(result)

From e58b2ecc67689fb4c5535d3a9242e79bd8f9a1c3 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Sat, 20 Aug 2022 01:55:53 +0200
Subject: [PATCH 14/32] new module `input.credentials`: obtain credentials from
 arguments, settings or file

---
 pypath/inputs/credentials.py | 120 +++++++++++++++++++++++++++++++++++
 pypath/inputs/drugbank.py    |  23 ++++++-
 2 files changed, 140 insertions(+), 3 deletions(-)
 create mode 100644 pypath/inputs/credentials.py

diff --git a/pypath/inputs/credentials.py b/pypath/inputs/credentials.py
new file mode 100644
index 000000000..16388343a
--- /dev/null
+++ b/pypath/inputs/credentials.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#
+#  This file is part of the `pypath` python module
+#
+#  Copyright
+#  2014-2022
+#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
+#
+#  Authors: Dénes Türei (turei.denes@gmail.com)
+#           Nicolàs Palacio
+#           Sebastian Lobentanzer
+#           Erva Ulusoy
+#           Olga Ivanova
+#           Ahmet Rifaioglu
+#
+#  Distributed under the GPLv3 License.
+#  See accompanying file LICENSE.txt or copy at
+#      http://www.gnu.org/licenses/gpl-3.0.html
+#
+#  Website: http://pypath.omnipathdb.org/
+#
+
+from typing import Optional
+
+import os
+
+import pypath.share.settings as settings
+import pypath.share.settings as session
+
+_logger = session.Logger(name = 'credentials')
+_log = _logger._log
+
+
+def credentials(
+        *args: tuple[str, str],
+        resource: Optional[str] = None,
+        from_file: Optional[str] = None,
+        **kwargs: dict[str, str],
+    ) -> dict:
+    """
+    Credentials required for restricted access resources.
+
+    Args:
+        args:
+            Two strings: a user name and password. If only one provided, it
+            is assumed to be a user name; if more provided, apart from the
+            first two, the rest will be ignored.
+        resource:
+            Name of the resource. If the key `<resource>_credentials`
+            exists in the module settings, its value will be returned as
+            credentials.
+        from_file:
+            Path to a file or name of a file that is located in the module's
+            default secrets directory.
+        kwargs:
+            Custom key-value pairs, will be returned unchanged. This is the
+            way to explicitely provide user and password, and any further
+            fields.
+
+    Returns:
+        A dictionary with the credentials. Raises RuntimeError if credentials
+        not provided by any of the available ways.
+    """
+
+    fields = ('user', 'passwd')
+    kwargs.update(dict(zip(fields, args)))
+
+    if all(f in kwargs for f in fields):
+
+        credentials = kwargs
+
+    else:
+
+        credentials = settings.get(f'{resource.lower()}_credentials')
+
+        if not credentials:
+
+            secrets_fname = from_file
+
+            if not os.path.exists(secrets_fname):
+
+                secrets_fname = os.path.join(
+                    settings.get('secrets_dir'),
+                    secrets_fname,
+                )
+
+            if os.path.exists(secrets_fname):
+
+                _log(
+                    f'Reading credentials for `{resource}` '
+                    f'from file `{secrets_fname}`.'
+                )
+
+                with open(secrets_fname, 'r') as fp:
+
+                    lines = fp.read().strip().split(os.linesep)
+
+                keys, values = tuple(zip(*(
+                    ([None] + l.split(':', maxsplit = 1))[-2:]
+                    for l in lines
+                )))
+
+                keys = keys if all(keys) else fields
+                credentials = dict(zip(keys, values))
+                credentials.update(kwargs)
+
+        else:
+
+            _log(f'`{resource}` credentials provided by `settings`.')
+
+        if not credentials:
+
+            msg = f'Failed to obtain credentials for resource `{resource}`'
+            _log(msg)
+
+            raise RuntimeError(msg)
+
+    return credentials
diff --git a/pypath/inputs/drugbank.py b/pypath/inputs/drugbank.py
index 6ba9df7fd..d6ba5d9a6 100644
--- a/pypath/inputs/drugbank.py
+++ b/pypath/inputs/drugbank.py
@@ -23,6 +23,8 @@
 #  Website: http://pypath.omnipathdb.org/
 #
 
+from typing import Optional
+
 import re
 import csv
 import collections
@@ -37,7 +39,16 @@
 _log = _logger._log
 
 
-def _drugbank_download(user: str, passwd: str, *args, **kwargs):
+def _drugbank_credentials(
+        user: Optional[str] = None,
+        passwd: Optional[str] = None,
+    ) -> tuple[str, str]:
+    """
+
+    """
+
+
+def _drugbank_download(user: str, passwd: str, *args, **kwargs) -> curl.Curl:
 
     defaults = {
         'large': True,
@@ -186,7 +197,7 @@ def drugbank_interactions(
     return result
 
 
-def drugbank_drugs(user: str, passwd: str) -> list[tuple] :
+def drugbank_drugs(user: str, passwd: str) -> list[tuple]:
     """
     Retrieves drug identifiers from Drugbank.
 
@@ -277,7 +288,7 @@ def drugbank_drugs(user: str, passwd: str) -> list[tuple] :
     return result
 
 
-def drugbank_annotations(user: str, passwd: str):
+def drugbank_annotations(user: str, passwd: str) -> dict[str, set[tuple]]:
     """
     Drug annotations from Drugbank.
 
@@ -319,3 +330,9 @@ def drugbank_annotations(user: str, passwd: str):
             )
 
     return dict(result)
+
+
+def drugbank_mapping(user: str, passwd: str, ) -> dict[str, set[str]]:
+
+
+

From de70e70f1d1d41dcdc2b85f6d69305534630a181 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Sat, 20 Aug 2022 02:01:46 +0200
Subject: [PATCH 15/32] `inputs.cosmic` uses `inputs.credentials`

---
 pypath/inputs/cosmic.py | 64 ++++++++++++-----------------------------
 1 file changed, 19 insertions(+), 45 deletions(-)

diff --git a/pypath/inputs/cosmic.py b/pypath/inputs/cosmic.py
index d654463e7..461579a97 100644
--- a/pypath/inputs/cosmic.py
+++ b/pypath/inputs/cosmic.py
@@ -33,6 +33,7 @@
 import pypath.share.session as session_mod
 import pypath.share.settings as settings
 import pypath.utils.mapping as mapping
+import pypath.inputs.credentials as credentials
 
 _logger = session_mod.Logger(name = 'cosmic_input')
 _log = _logger._log
@@ -49,54 +50,27 @@ def cancer_gene_census_annotations(
     Returns dict of annotations.
     """
 
-    if not user or not passwd:
+    try:
 
-        credentials = settings.get('cosmic_credentials')
-
-        if not credentials:
-
-            if not os.path.exists(credentials_fname):
-
-                credentials_fname = os.path.join(
-                    settings.get('secrets_dir'),
-                    credentials_fname,
-                )
-
-            if os.path.exists(credentials_fname):
-
-                _log(
-                    'Reading COSMIC credentials '
-                    'from file `%s`.' % credentials_fname
-                )
-
-                with open(credentials_fname, 'r') as fp:
-
-                    credentials = dict(
-                        zip(
-                            ('user', 'passwd'),
-                            fp.read().split('\n')[:2],
-                        )
-                    )
-
-        else:
-            _log('COSMIC credentials provided by `settings`.')
-
-        if not credentials or {'user', 'passwd'} - set(credentials.keys()):
-
-            _log(
-                'No credentials available for the COSMIC website. '
-                'Either set the `cosmic_credentials` key in the `settings` '
-                'module (e.g. `{\'user\': \'myuser\', '
-                '\'passwd\': \'mypassword\'}`), or pass them directly to the '
-                '`pypath.inputs.cosmic.cancer_gene_census_annotations` '
-                'method.'
-            )
+        cosmic_cred = credentials.credentials(
+            user = user,
+            passwd = passwd,
+            resource = 'COSMIC',
+            from_file = credentials_fname,
+        )
 
-            return {}
+    except RuntimeError:
 
-    else:
+        _log(
+            'No credentials available for the COSMIC website. '
+            'Either set the `cosmic_credentials` key in the `settings` '
+            'module (e.g. `{\'user\': \'myuser\', '
+            '\'passwd\': \'mypassword\'}`), or pass them directly to the '
+            '`pypath.inputs.cosmic.cancer_gene_census_annotations` '
+            'method.'
+        )
 
-        credentials = {'user': user, 'passwd': passwd}
+        return {}
 
     CancerGeneCensusAnnotation = collections.namedtuple(
         'CancerGeneCensusAnnotation',
@@ -128,7 +102,7 @@ def multi_field(content):
     url = urls.urls['cgc']['url_new']
 
     auth_str = base64.b64encode(
-        ('%s:%s\n' % (credentials['user'], credentials['passwd'])).encode()
+        ('%s:%s\n' % (cosmic_cred['user'], cosmic_cred['passwd'])).encode()
     )
 
     req_hdrs = ['Authorization: Basic %s' % auth_str.decode()]

From 13b0503bd04e1245efe9e9ca028dd0560f55b5c9 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Sat, 20 Aug 2022 02:04:44 +0200
Subject: [PATCH 16/32] `credentials` removes `None` values from arguments

---
 pypath/inputs/credentials.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pypath/inputs/credentials.py b/pypath/inputs/credentials.py
index 16388343a..c6451cfcf 100644
--- a/pypath/inputs/credentials.py
+++ b/pypath/inputs/credentials.py
@@ -66,6 +66,7 @@ def credentials(
 
     fields = ('user', 'passwd')
     kwargs.update(dict(zip(fields, args)))
+    kwargs = dict(it for kwargs.items() if it[1] is not None)
 
     if all(f in kwargs for f in fields):
 

From f0b28572b53dbbe07c1cad808a6548ed76c349f6 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Sat, 20 Aug 2022 02:43:19 +0200
Subject: [PATCH 17/32] `credentials`: settings key is used also as default
 file name

---
 pypath/inputs/credentials.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pypath/inputs/credentials.py b/pypath/inputs/credentials.py
index c6451cfcf..e8f8a90ff 100644
--- a/pypath/inputs/credentials.py
+++ b/pypath/inputs/credentials.py
@@ -74,11 +74,12 @@ def credentials(
 
     else:
 
-        credentials = settings.get(f'{resource.lower()}_credentials')
+        settings_key = f'{resource.lower()}_credentials'
+        credentials = settings.get(settings_key)
 
         if not credentials:
 
-            secrets_fname = from_file
+            secrets_fname = from_file or settings_key
 
             if not os.path.exists(secrets_fname):
 

From 68b3f218b03c1b5624c13783365d323e8b11aeee Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Sat, 20 Aug 2022 02:43:55 +0200
Subject: [PATCH 18/32] `inputs.drugbank` uses `credentials` +
 `drugbank_mapping`

---
 pypath/inputs/drugbank.py | 133 ++++++++++++++++++++++++++++++++++----
 1 file changed, 119 insertions(+), 14 deletions(-)

diff --git a/pypath/inputs/drugbank.py b/pypath/inputs/drugbank.py
index d6ba5d9a6..fe5afeab4 100644
--- a/pypath/inputs/drugbank.py
+++ b/pypath/inputs/drugbank.py
@@ -34,6 +34,7 @@
 import pypath.share.curl as curl
 import pypath.share.session as session
 import pypath.share.settings as settings
+import pypath.inputs.credentials as credentials
 
 _logger = session.Logger(name = 'drugbank_input')
 _log = _logger._log
@@ -42,13 +43,38 @@
 def _drugbank_credentials(
         user: Optional[str] = None,
         passwd: Optional[str] = None,
+        credentials_fname: Optional[str] = None,
     ) -> tuple[str, str]:
-    """
 
-    """
+    return credentials.credentials(
+        user = user,
+        passwd = passwd,
+        resource = 'DrugBank',
+        from_file = credentials_fname,
+    )
+
+
+def _drugbank_download(
+        *args,
+        user: Optional[str] = None,
+        passwd: Optional[str] = None,
+        credentials_fname: Optional[str] = None,
+        **kwargs
+    ) -> Optional[curl.Curl]:
+
+    try:
+
+        cred = _drugbank_credentials(
+            user = user,
+            passwd = passwd,
+            credentials_fname = credentials_fname,
+        )
+
+    except RuntimeError:
 
+        _log('No credentials available for the DrugBank website.')
 
-def _drugbank_download(user: str, passwd: str, *args, **kwargs) -> curl.Curl:
+        return None
 
     defaults = {
         'large': True,
@@ -58,7 +84,7 @@ def _drugbank_download(user: str, passwd: str, *args, **kwargs) -> curl.Curl:
 
     defaults.update(kwargs)
 
-    auth_str = base64.b64encode(f"{user}:{passwd}".encode())
+    auth_str = base64.b64encode(f"{cred['user']}:{cred['passwd']}".encode())
 
     defaults['req_headers'] = [
         f'Authorization: Basic {auth.decode()}',
@@ -69,8 +95,9 @@ def _drugbank_download(user: str, passwd: str, *args, **kwargs) -> curl.Curl:
 
 
 def drugbank_raw_interactions(
-        user: str,
-        passwd: str,
+        user: Optional[str] = None,
+        passwd: Optional[str] = None,
+        credentials_fname: Optional[str] = None,
         pharma_active: bool = False,
     ) -> list[tuple] :
     """
@@ -112,9 +139,12 @@ def drugbank_raw_interactions(
             url = url,
             user = user,
             passwd = passwd,
+            credentials_fname = credentials_fname,
             files_needed = (csv_name,),
         )
 
+        if not c: continue
+
         _ = next(c.result[csv_name])
 
         for l in c.result[csv_name]:
@@ -134,8 +164,9 @@ def drugbank_raw_interactions(
 
 
 def drugbank_interactions(
-        user: str,
-        passwd: str,
+        user: Optional[str] = None,
+        passwd: Optional[str] = None,
+        credentials_fname: Optional[str] = None,
         pharma_active: bool = False,
     ) -> list[tuple] :
     """
@@ -157,6 +188,7 @@ def drugbank_interactions(
         user = user,
         passwd = passwd,
         harma_active = pharma_active,
+        credentials_fname = credentials_fname,
     )
 
     drugs = dict(
@@ -197,7 +229,11 @@ def drugbank_interactions(
     return result
 
 
-def drugbank_drugs(user: str, passwd: str) -> list[tuple]:
+def drugbank_drugs(
+        user: Optional[str] = None,
+        passwd: Optional[str] = None,
+        credentials_fname: Optional[str] = None,
+    ) -> list[tuple]:
     """
     Retrieves drug identifiers from Drugbank.
 
@@ -243,9 +279,12 @@ def drugbank_drugs(user: str, passwd: str) -> list[tuple]:
             url = urls.urls['drugbank'][f'all_{table}s'],
             user = user,
             passwd = passwd,
+            credentials_fname = credentials_fname,
             files_needed = (csv,),
         )
 
+        if not c: continue
+
         raw[table] = dict(
             (rec['DrugBank ID'], rec)
             for rec in csv.DictReader(c.result[csv], delimiter = ',')
@@ -288,7 +327,11 @@ def drugbank_drugs(user: str, passwd: str) -> list[tuple]:
     return result
 
 
-def drugbank_annotations(user: str, passwd: str) -> dict[str, set[tuple]]:
+def drugbank_annotations(
+        user: Optional[str] = None,
+        passwd: Optional[str] = None,
+        credentials_fname: Optional[str] = None,
+    ) -> dict[str, set[tuple]]:
     """
     Drug annotations from Drugbank.
 
@@ -299,14 +342,16 @@ def drugbank_annotations(user: str, passwd: str) -> dict[str, set[tuple]]:
             E-mail address with registered DrugBank account.
         passwd:
             Password for the DrugBank account.
-        pharma_active:
-            Only pharmacologically active interactions.
 
     Returns:
         List of drug annotations.
     """
 
-    drugs = drugbank_drugs(user = user, passwd = passwd)
+    drugs = drugbank_drugs(
+        user = user,
+        passwd = passwd
+        credentials_fname = credentials_fname,
+    )
 
     DrugbankAnnotation = collections.namedtuple(
         'DrugbankAnnotation',
@@ -332,7 +377,67 @@ def drugbank_annotations(user: str, passwd: str) -> dict[str, set[tuple]]:
     return dict(result)
 
 
-def drugbank_mapping(user: str, passwd: str, ) -> dict[str, set[str]]:
+def drugbank_mapping(
+        id_type: str,
+        target_id_type: str,
+        user: Optional[str] = None,
+        passwd: Optional[str] = None,
+        credentials_fname: Optional[str] = None,
+    ) -> dict[str, set[str]]:
+    """
+    Identifier translation table from DrugBank.
+
+    Available ID types: drugbank, name, type, groups, cas, inchikey,
+    inchi, smiles, formula, kegg_compound, kegg_drug, pubchem_cid,
+    pubchem_sid, chebi, chembl, pharmgkb, het.
+
+    Args:
+        id_type:
+            The identifier type to be used as keys.
+        target_id_type:
+            The identifier type that will be collected into the values.
+        user:
+            E-mail address with registered DrugBank account.
+        passwd:
+            Password for the DrugBank account.
+        credentials_fname:
+            File name or path to a file with DrugBank login credentials.
+
+    Returns:
+        An identifier translation table.
+    """
+
+    synonyms = {
+        'pubchem_compound': 'pubchem_cid',
+        'pubchem_substance': 'pubchem_sid',
+    }
+
 
+    def id_type_proc(_id_type):
 
+        _id_type = re.sub('[^cs]id$', '', _id_type.lower()).replace(' ', '_')
 
+        return synonyms.get(_id_type, _id_type)
+
+
+    drugs = drugbank_drugs(
+        user = user,
+        passwd = passwd
+        credentials_fname = credentials_fname,
+    )
+
+    result = collections.defaultdict(set)
+
+    id_type = id_type_proc(id_type)
+    target_id_type = id_type_proc(id_type)
+
+    for d in drugs:
+
+        the_id = getattr(d, id_type)
+        target_id = getattr(d, target_id_type)
+
+        if the_id and target_id:
+
+            result[the_id].add(target_id)
+
+    return dict(result)

From e1429174553f7723f6910f2840d3b46449e2e960 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Sat, 20 Aug 2022 19:09:50 +0200
Subject: [PATCH 19/32] `drugcentral_drugs`: retrieves drug data

---
 pypath/inputs/drugcentral.py | 99 +++++++++++++++++++++++++-----------
 1 file changed, 69 insertions(+), 30 deletions(-)

diff --git a/pypath/inputs/drugcentral.py b/pypath/inputs/drugcentral.py
index 8e2c49414..158d5829c 100644
--- a/pypath/inputs/drugcentral.py
+++ b/pypath/inputs/drugcentral.py
@@ -27,58 +27,97 @@
 import collections
 
 import pypath.share.curl as curl
+import pypath.share.session as session
 import pypath.resources.urls as urls
-import pypath.share.common as common
+import pypath.utils.taxonomy as taxonomy
+
+_logger = session.Logger(name = 'drugcentral_input')
+_log = _logger._log
+
+
+
+def drugcentral_drugs() -> list[tuple]:
+    """
+    Drug names and structures from Drug Central.
+
+    Returns:
+        List of drugs, each represented by a named tuple.
+    """
+
+    DrugcentralDrug = collections.namedtuple(
+        'DrugcentralDrug',
+        (
+            'drugcentral',
+            'inn',
+            'cas',
+            'smiles',
+            'inchikey',
+            'inchi',
+        )
+    )
+
+    url = urls.urls['drugcentral']['SMILES_InChI']
+    c = curl.Curl(url, large = True, silent = False)
+    drugs = list(csv.DictReader(c.result, delimiter = '\t'))
+
+    result = [
+        DrugcentralDrug(
+            drugcentral = drug['ID'],
+            inn = drug['INN'],
+            cas = drug['CAS_RN'],
+            smiles = drug['SMILES'],
+            inchikey = drug['InChIKey'],
+            inchi = drug['InChI'],
+        )
+        for drug in drugs
+    ]
+
+    return result
+
 
 def drug_central(
-        organism: str = "Homo sapiens",
-        SMILES: bool = False,
-        InChI: bool = False,
-        CAS_RN: bool = False,
+        organism: Union[str, int] = 'Homo sapiens',
     ) -> list[tuple]:
     """
-    Retrieves drug-target interactions datasets from Drug Central.
+    Retrieves drug-target interactions from Drug Central.
 
     Args:
-        organism (str): Which organism to use for processing.
-        SMILES (bool): Whether to include SMILES structures from Drug Central.
-        InChI (bool): Whether to include InChI formats and InChI keys from Drug Central.
-        CAS_RN (bool): Whether to include CAS Registry Number from Drug Central.
+        organism:
+            Organism name or NCBI Taxonomy ID.
 
     Returns:
-        namedtuple.
+        List of drug-target relationships, represented as named tuples.
     """
 
     fields = (
-        'DRUG_NAME',
-        'TARGET_NAME',
-        'TARGET_CLASS',
-        'TARGET_ACCESSION',
-        'GENE',
-        'ACT_VALUE',
-        'ACT_TYPE',
-        'ACTION_TYPE',
-        'TDL',
-        'ORGANISM',
-        'SMILES',
-        'InChI',
-        'InChIKey',
-        'CAS_RN',
+        'drug',
+        'target',
+        'target_class',
+        'target_accession',
+        'gene',
+        'act_value',
+        'act_type',
+        'action_type',
+        'tdl',
+        'organism',
     )
 
     url = urls.urls['drugcentral']['interactions']
     c = curl.Curl(url, large = True, silent = False)
     interactions = list(csv.DictReader(c.result, delimiter = '\t'))
 
-    interactions = common.unique_list(interactions)
+    organism_latin = taxonomy.ensure_latin_name(organism)
+
+    if not organism_latin:
+
+        msg = f'Could not find latin name for organism: `{organism}`.'
+        _log(msg)
+
+        raise ValueError(msg)
 
     result = []
 
-    if SMILES == True or InChI == True or CAS_RN == True:
 
-        url = urls.urls['drugcentral']['SMILES_InChI']
-        c = curl.Curl(url, large = True, silent = False)
-        structures = list(csv.DictReader(c.result, delimiter = '\t'))
 
         temp_struct = []
 

From 2ea48de1ea5cc34416e19477e656374d6d8717af Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Sat, 20 Aug 2022 19:51:32 +0200
Subject: [PATCH 20/32] refactored `inputs.drugcentral` - drug data,
 interactions, id translation

---
 pypath/inputs/drugcentral.py | 224 ++++++++++++-----------------------
 1 file changed, 74 insertions(+), 150 deletions(-)

diff --git a/pypath/inputs/drugcentral.py b/pypath/inputs/drugcentral.py
index 158d5829c..7e6a43fd3 100644
--- a/pypath/inputs/drugcentral.py
+++ b/pypath/inputs/drugcentral.py
@@ -23,11 +23,14 @@
 #  Website: http://pypath.omnipathdb.org/
 #
 
+from typing import Optional, Union
+
 import csv
 import collections
 
 import pypath.share.curl as curl
 import pypath.share.session as session
+import pypath.share.common as common
 import pypath.resources.urls as urls
 import pypath.utils.taxonomy as taxonomy
 
@@ -35,7 +38,6 @@
 _log = _logger._log
 
 
-
 def drugcentral_drugs() -> list[tuple]:
     """
     Drug names and structures from Drug Central.
@@ -75,31 +77,40 @@ def drugcentral_drugs() -> list[tuple]:
     return result
 
 
-def drug_central(
-        organism: Union[str, int] = 'Homo sapiens',
+def drugcentral_interactions(
+        organism: Optional[Union[str, int]] = None,
+        comments: bool = False,
     ) -> list[tuple]:
     """
     Retrieves drug-target interactions from Drug Central.
 
     Args:
         organism:
-            Organism name or NCBI Taxonomy ID.
+            Organism name or NCBI Taxonomy ID. If not provided,
+            all organisms will be retained.
+        comments:
+            Include comments in the result.
 
     Returns:
         List of drug-target relationships, represented as named tuples.
     """
 
-    fields = (
-        'drug',
-        'target',
-        'target_class',
-        'target_accession',
-        'gene',
-        'act_value',
-        'act_type',
-        'action_type',
-        'tdl',
-        'organism',
+    DrugcentralInteraction = collections.namedtuple(
+        'DrugcentralInteraction',
+        (
+            'drug',
+            'drug_name',
+            'uniprot',
+            'target_type',
+            'canonical',
+            'act_value',
+            'act_type',
+            'relation',
+            'effect',
+            'tdl',
+            'organism',
+            'comment',
+        ),
     )
 
     url = urls.urls['drugcentral']['interactions']
@@ -108,156 +119,69 @@ def drug_central(
 
     organism_latin = taxonomy.ensure_latin_name(organism)
 
-    if not organism_latin:
+    if organism and not organism_latin:
 
         msg = f'Could not find latin name for organism: `{organism}`.'
         _log(msg)
 
-        raise ValueError(msg)
-
-    result = []
-
-
-
-        temp_struct = []
-
-        for rec in structures:
-
-            if rec not in temp_struct:
-
-                temp_struct.append(rec)
-
-        structures = temp_struct
-
-        if SMILES == True and InChI == True and CAS_RN == True:
-
-            InteractionsandStructures = collections.namedtuple('InteractionsandStructures', fields[0:], defaults = (None,) * len(fields))
-
-        elif SMILES == True and InChI == True and CAS_RN == False:
-
-            fields = fields[0:13]
-            InteractionsandStructures = collections.namedtuple('InteractionsandStructures', fields, defaults = (None,) * len(fields))
-
-        elif SMILES == True and InChI == False and CAS_RN == True:
-
-            fields = fields[0:11] + fields[13:]
-            InteractionsandStructures = collections.namedtuple('InteractionsandStructures', fields, defaults = (None,) * len(fields))
-
-        elif SMILES == True and InChI == False and CAS_RN == False:
-
-            fields = fields[0:11]
-            InteractionsandStructures = collections.namedtuple('InteractionsandStructures', fields, defaults = (None,) * len(fields))
-
-        elif SMILES == False and InChI == True and CAS_RN == True:
-
-            fields = fields[0:10] + fields[11:]
-            InteractionsandStructures = collections.namedtuple('InteractionsandStructures', fields, defaults = (None,) * len(fields))
-
-        elif SMILES == False and InChI == False and CAS_RN == True:
-
-            fields = fields[13:]
-            InteractionsandStructures = collections.namedtuple('InteractionsandStructures', fields, defaults = (None,) * len(fields))
-
-        elif SMILES == False and InChI == True and CAS_RN == False:
-
-            fields = fields[0:10] + fields[11:13]
-            InteractionsandStructures = collections.namedtuple('InteractionsandStructures', fields, defaults = (None,) * len(fields))
-
-        for inter_attr in interactions:
-
-            if organism == inter_attr['ORGANISM']:
-
-                result.append(
-                    InteractionsandStructures(
-                        DRUG_NAME = inter_attr['DRUG_NAME'],
-                        TARGET_NAME = inter_attr['TARGET_NAME'],
-                        TARGET_CLASS = inter_attr['TARGET_CLASS'],
-                        TARGET_ACCESSION = inter_attr['ACCESSION'],
-                        GENE = inter_attr['GENE'],
-                        ACT_VALUE = inter_attr['ACT_VALUE'],
-                        ACT_TYPE = inter_attr['ACT_TYPE'],
-                        ACTION_TYPE = inter_attr['ACTION_TYPE'],
-                        TDL = inter_attr['TDL'],
-                        ORGANISM = inter_attr['ORGANISM'],
-                        )
-                    )
-
-                for struct_attr in structures:
-
-                    if inter_attr['STRUCT_ID'] == struct_attr['ID']:
-
-                        if SMILES == True and InChI == True and CAS_RN == True:
-
-                            result[-1] = result[-1]._replace(
-                                SMILES = struct_attr['SMILES'],
-                                InChI = struct_attr['InChI'],
-                                InChIKey = struct_attr['InChIKey'],
-                                CAS_RN = struct_attr['CAS_RN'],
-                            )
-
-                        elif SMILES == True and InChI == True and CAS_RN == False:
-
-                            result[-1] = result[-1]._replace(
-                                SMILES = struct_attr['SMILES'],
-                                InChI = struct_attr['InChI'],
-                                InChIKey = struct_attr['InChIKey'],
-                            )
-
-                        elif SMILES == True and InChI == False and CAS_RN == True:
-
-                            result[-1] = result[-1]._replace(
-                                SMILES = struct_attr['SMILES'],
-                                CAS_RN = struct_attr['CAS_RN'],
-                            )
+    drugs = dict(
+        (d.drugcentral, d)
+        for d in drugcentral_drugs()
+    )
 
-                        elif SMILES == True and InChI == False and CAS_RN == False:
+    result = [
+        DrugcentralInteraction(
+            drug = drugs.get(i['STRUCT_ID'], None),
+            drug_name = i['DRUG_NAME'],
+            uniprot = uniprot,
+            target_type = i['TARGET_CLASS'],
+            canonical = i['MOA'] == '1',
+            act_value = common.try_float(i['ACT_VALUE']) or None,
+            act_type = i['ACT_TYPE'],
+            relation = i['RELATION'] or None, # what is relation??
+            effect = i['ACTION_TYPE'] or None,
+            tdl = i['TDL'],
+            organism = i['ORGANISM'],
+            comment = i['ACT_COMMENT'] if comments else None,
+        )
+        for i in interactions
+        for uniprot in i['ACCESSION'].split('|')
+        if not organism_latin or i['ORGANISM'] == organism_latin
+    ]
 
-                            result[-1] = result[-1]._replace(
-                                SMILES = struct_attr['SMILES'],
-                            )
+    return result
 
-                        elif SMILES == False and InChI == True and CAS_RN == True:
 
-                            result[-1] = result[-1]._replace(
-                                InChI = struct_attr['InChI'],
-                                InChIKey = struct_attr['InChIKey'],
-                                CAS_RN = struct_attr['CAS_RN'],
-                            )
+def drugcentral_mapping(
+        id_type: str,
+        target_id_type: str,
+    ) -> dict[str, set[str]]:
+    """
+    Identifier translation table from Drug Central.
 
-                        elif SMILES == False and InChI == False and CAS_RN == True:
+    Available ID types: drugcentral, inn, cas, smiles, inchikey, inchi.
 
-                            result[-1] = result[-1]._replace(
-                                CAS_RN = struct_attr['CAS_RN'],
-                            )
+    Args:
+        id_type:
+            The identifier type to be used as keys.
+        target_id_type:
+            The identifier type that will be collected into the values.
 
-                        elif SMILES == False and InChI == True and CAS_RN == False:
+    Returns:
+        An identifier translation table.
+    """
 
-                            result[-1] = result[-1]._replace(
-                                InChI = struct_attr['InChI'],
-                                InChIKey = struct_attr['InChIKey'],
-                            )
+    drugs = drugcentral_drugs()
 
-    else:
+    result = collections.defaultdict(set)
 
-        DrugTargetInteractions = collections.namedtuple('DrugTargetInteractions', fields[0:10])
+    for d in drugs:
 
-        for inter_attr in interactions:
+        the_id = getattr(d, id_type)
+        target_id = getattr(d, target_id_type)
 
-            if organism == inter_attr['ORGANISM']:
+        if the_id and target_id:
 
-                result.append(
-                    DrugTargetInteractions(
-                        DRUG_NAME = inter_attr['DRUG_NAME'],
-                        TARGET_NAME = inter_attr['TARGET_NAME'],
-                        TARGET_CLASS = inter_attr['TARGET_CLASS'],
-                        TARGET_ACCESSION = inter_attr['ACCESSION'],
-                        GENE = inter_attr['GENE'],
-                        ACT_VALUE = inter_attr['ACT_VALUE'],
-                        ACT_TYPE = inter_attr['ACT_TYPE'],
-                        ACTION_TYPE = inter_attr['ACTION_TYPE'],
-                        TDL = inter_attr['TDL'],
-                        ORGANISM = inter_attr['ORGANISM'],
-                    )
-                )
+            result[the_id].add(target_id)
 
-    return result
+    return dict(result)

From 71a6f1ab94bfcbb3cdfa5f8e4f72f2ae4695c9b0 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Sat, 20 Aug 2022 20:10:30 +0200
Subject: [PATCH 21/32] refactored `hpo_annotations`

---
 pypath/inputs/go.py  |  2 +-
 pypath/inputs/hpo.py | 43 ++++++++++++++++---------------------------
 2 files changed, 17 insertions(+), 28 deletions(-)

diff --git a/pypath/inputs/go.py b/pypath/inputs/go.py
index 2fb82497c..25fa657a4 100644
--- a/pypath/inputs/go.py
+++ b/pypath/inputs/go.py
@@ -68,7 +68,7 @@ def go_annotations_uniprot(organism = 9606, swissprot = 'yes'):
 def go_annotations_goa(
     organism = 'human',
     evidence_codes=False):
-    
+
     """
     Downloads GO annotation from UniProt GOA.
     """
diff --git a/pypath/inputs/hpo.py b/pypath/inputs/hpo.py
index 4a7e8f413..94b69ed70 100644
--- a/pypath/inputs/hpo.py
+++ b/pypath/inputs/hpo.py
@@ -23,55 +23,43 @@
 #  Website: http://pypath.omnipathdb.org/
 #
 
-from typing import List, Dict
-
 import csv
 import collections
 
-import pypath.utils.mapping as map
+import pypath.utils.mapping as mapping
 import pypath.share.curl as curl
 import pypath.resources.urls as urls
 import pypath.formats.obo as obo
 
-def hpo_gene_annotations() -> Dict[str, list]:
+
+def hpo_annotations() -> dict[str, set[str]]:
     """
-    Retrieves Gene-HPO relationships from HPO.
+    Human Phenotype Ontology annotations.
 
     Returns:
-        namedtuple.
+        Dict of proteins as keys and sets of HPO terms as values.
     """
 
     url = urls.urls['hpo']['gene']
     c = curl.Curl(url, large = True, silent = False)
+    _ = next(c.result)
 
-    gene = list(csv.DictReader(c.result, delimiter = ','))
-
-    fields = ('entrez_gene_id','entrez_gene_symbol','HPO_Term_ID')
+    result = collections.defaultdict(set)
 
-    HPOGeneAnnotations = collections.namedtuple('HPOGeneAnnotations', fields,defaults = ("",) * len(fields))
+    for r in c.result:
 
-    annotations = collections.defaultdict(list)
+        r = r.strip().split('\t')
 
-    for rec in gene:
+        uniprots = mapping.map_name(r[0], 'entrez', 'uniprot')
 
-        values = rec.values()
-        values = list(values)[0].replace('\t',',').split(',')
-        id = map.map_name(values[1], 'genesymbol', 'uniprot')
-        id = list(id)
+        for uniprot in uniprots:
 
-        if id:
+            result[uniprot].add(r[2])
 
-            annotations[id[0]].append(
-                HPOGeneAnnotations(
-                    entrez_gene_id = values[0],
-                    entrez_gene_symbol = values[1],
-                    HPO_Term_ID = values[2],
-                    )
-            )
+    return result
 
-    return annotations
 
-def hpo_disease_annotations() -> List[tuple] :
+def hpo_disease_annotations() -> list[tuple] :
     """
     Retrieves Disease-HPO relationships from HPO.
 
@@ -110,7 +98,8 @@ def hpo_disease_annotations() -> List[tuple] :
 
     return result
 
-def hpo_ontology() -> List[tuple] :
+
+def hpo_ontology() -> list[tuple] :
     """
     Retrieves ontology from HPO.
 

From 6a7462b026e157268648aa8b2712f444233122d9 Mon Sep 17 00:00:00 2001
From: deeenes <turei.denes@gmail.com>
Date: Sat, 20 Aug 2022 21:10:33 +0200
Subject: [PATCH 22/32] refactored `inputs.hpo`

---
 pypath/inputs/hpo.py | 165 +++++++++++++++++++++----------------------
 1 file changed, 82 insertions(+), 83 deletions(-)

diff --git a/pypath/inputs/hpo.py b/pypath/inputs/hpo.py
index 94b69ed70..1e64feaea 100644
--- a/pypath/inputs/hpo.py
+++ b/pypath/inputs/hpo.py
@@ -23,7 +23,9 @@
 #  Website: http://pypath.omnipathdb.org/
 #
 
-import csv
+from typing import Union
+
+import re
 import collections
 
 import pypath.utils.mapping as mapping
@@ -59,122 +61,119 @@ def hpo_annotations() -> dict[str, set[str]]:
     return result
 
 
-def hpo_disease_annotations() -> list[tuple] :
+def hpo_terms() -> dict[str, str]:
+    """
+    Human Phenotype Ontology accession to term mapping.
     """
-    Retrieves Disease-HPO relationships from HPO.
+
+    return hpo_ontology()['terms']
+
+
+def hpo_diseases() -> dict[str, set[tuple]]:
+    """
+    HPO term-disease relationships from Human Phenotype Ontology.
 
     Returns:
-        namedtuple.
+        A set of disease records for each HPO term.
     """
 
     url = urls.urls['hpo']['disease']
     c = curl.Curl(url, large = True, silent = False)
 
-    disease = list(csv.DictReader(c.result, delimiter = '\t'))
+    HpoDisease = collections.namedtuple(
+        'HpoDisease',
+        (
+            'omim',
+            'name',
+            'pmid',
+            'evidence',
+            'onset',
+            'frequency',
+            'sex',
+            'modifier',
+            'aspect',
+        ),
+    )
 
-    fields = ('DatabaseID', 'DiseaseName', 'Qualifier', 'HPO_ID', 'Reference', 'Evidence', 'Aspect')
+    result = collections.defaultdict(set)
 
-    HPODiseaseAnnotations = collections.namedtuple('HPODiseaseAnnotations', fields,defaults = ("",) * len(fields))
+    for r in c.result:
 
-    result = []
+        if r[0] == '#': continue
 
-    for i in range(4,len(disease)):
+        r = r.split('\t')
 
-        values = disease[i].values()
-        values = list(values)
+        pmid = re.sub('^PMID:', '', r[4]) if r[4][:4] == 'PMID' else None
 
-        result.append(
-            HPODiseaseAnnotations(
-                DatabaseID = values[0],
-                DiseaseName = values[1][0],
-                Qualifier = values[1][1],
-                HPO_ID = values[1][2],
-                Reference = values[1][3],
-                Evidence = values[1][4],
-                Aspect = values[1][9],
-                )
+        result[r[3]].add(
+            HpoDisease(
+                omim = r[0],
+                name = r[1],
+                pmid = pmid,
+                evidence = r[5] or None,
+                onset = r[6] or None,
+                frequency = r[7] or None,
+                sex = r[8] or None,
+                modifier = r[9] or None,
+                aspect = r[10],
             )
+        )
 
-
-    return result
+    return dict(result)
 
 
-def hpo_ontology() -> list[tuple] :
+def hpo_ontology() -> dict[str, dict[str, Union[str, set[str]]]]:
     """
-    Retrieves ontology from HPO.
+    Ontology data from HPO.
 
     Returns:
-        namedtuple.
+        Five dictionaries with term names, term definitions, parents in the
+        ontology tree, term synonyms and cross references to other databases.
+        The dicts "terms" and "defs" are one-to-one, while "parents",
+        "synonyms" and "xrefs" are one-to-many mappings, the keys are always
+        HPO terms.
     """
 
     url = urls.urls['hpo']['ontology']
     reader = obo.Obo(url)
-    hpo_ontology = [i for i in reader]
-
-
-    fields = ('hpo_id','term_name','synonyms','xrefs','is_a')
 
-    Ontology = collections.namedtuple('Ontology', fields,defaults = ("",) * len(fields))
+    result = {
+        'terms': {},
+        'defs': {},
+        'parents': collections.defaultdict(set),
+        'synonyms': collections.defaultdict(set),
+        'xrefs': collections.defaultdict(set),
+    }
 
+    for r in reader:
 
-    result = []
+        if r.stanza != 'Term': continue
 
-    for rec in hpo_ontology:
+        term = r.id.value
 
-        syn_lst = []
-        xref_lst = []
-        isa_lst = []
+        name = (r.name.value, r.name.modifiers)
+        name = ' '.join(n for n in name if n)
+        result['terms'][term] = name
 
-        if rec[2][1]:
+        result['defs'][term] = r.definition.value if r.definition else None
 
-            name = rec[2][0] + " " + rec[2][1]
+        for key, obokey in (
+            ('parents', 'is_a'),
+            ('synonyms', 'synonym'),
+            ('xrefs', 'xref'),
+        ):
 
-        else:
-
-            name = rec[2][0]
-
-        result.append(
-            Ontology(
-                hpo_id = rec[1][0],
-                term_name = name,
+            proc = (
+                lambda x: tuple(x.split(':'))
+                    if key == 'xrefs' else
+                lambda x: x
             )
-        )
-
-        if rec[5].get('synonym'):
 
-            synonym = list(rec[5].get('synonym'))
-
-            for i in synonym:
-
-                syn = i[0] + " " + i[1]
-                syn_lst.append(syn)
-
-            result[-1] = result[-1]._replace(
-                synonyms = syn_lst
-            )
-
-        if rec[5].get('xref'):
-
-            xref = list(rec[5].get('xref'))
-
-            for i in xref:
-
-                xref_lst.append(i[0])
-
-            result[-1] = result[-1]._replace(
-                xrefs = xref_lst
-            )
-
-        if rec[5].get('is_a'):
-
-            is_a = list(rec[5].get('is_a'))
-
-            for i in is_a:
-
-                isa_lst.append(i[0] + " : " + i[2])
-
-            result[-1] = result[-1]._replace(
-                is_a = isa_lst
+            result[key][term].update(
+                {
+                    proc(x.value)
+                    for x in r.attrs.get(obokey, ())
+                }
             )
 
-    return result
+    return {k, dict(v) for k, v in result.items()}

From a5d98d2db55faf047af624200ed19ad7d6b3748c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tennur=20K=C4=B1l=C4=B1=C3=A7?=
 <99681146+tnnrklc@users.noreply.github.com>
Date: Mon, 22 Aug 2022 09:52:03 +0300
Subject: [PATCH 23/32] Update urls.py

---
 pypath/resources/urls.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypath/resources/urls.py b/pypath/resources/urls.py
index 4245fbc55..e39e2d371 100644
--- a/pypath/resources/urls.py
+++ b/pypath/resources/urls.py
@@ -1544,7 +1544,7 @@
         'label': 'DrugBank database',
         'all_structures': 'https://go.drugbank.com/releases/5-1-9/'
             'downloads/all-structure-links',
-        'all_drug': 'https://go.drugbank.com/releases/5-1-9/downloads/'
+        'all_drugs': 'https://go.drugbank.com/releases/5-1-9/downloads/'
             'all-drug-links',
         'drug_target_identifiers' : 'https://go.drugbank.com/releases/'
             '5-1-9/downloads/target-all-polypeptide-ids',

From 7b66002c190efc8b9532a3ebdef95d1ce08c8f03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tennur=20K=C4=B1l=C4=B1=C3=A7?=
 <99681146+tnnrklc@users.noreply.github.com>
Date: Mon, 22 Aug 2022 10:02:37 +0300
Subject: [PATCH 24/32] Update credentials.py

---
 pypath/inputs/credentials.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypath/inputs/credentials.py b/pypath/inputs/credentials.py
index e8f8a90ff..c11151e2d 100644
--- a/pypath/inputs/credentials.py
+++ b/pypath/inputs/credentials.py
@@ -27,7 +27,7 @@
 import os
 
 import pypath.share.settings as settings
-import pypath.share.settings as session
+import pypath.share.session as session
 
 _logger = session.Logger(name = 'credentials')
 _log = _logger._log

From 75e6f9ad32a97a03559149738a19c64850bb321d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tennur=20K=C4=B1l=C4=B1=C3=A7?=
 <99681146+tnnrklc@users.noreply.github.com>
Date: Mon, 22 Aug 2022 17:44:05 +0300
Subject: [PATCH 25/32] Update drugbank.py

---
 pypath/inputs/drugbank.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pypath/inputs/drugbank.py b/pypath/inputs/drugbank.py
index fe5afeab4..3bea4cde5 100644
--- a/pypath/inputs/drugbank.py
+++ b/pypath/inputs/drugbank.py
@@ -87,7 +87,7 @@ def _drugbank_download(
     auth_str = base64.b64encode(f"{cred['user']}:{cred['passwd']}".encode())
 
     defaults['req_headers'] = [
-        f'Authorization: Basic {auth.decode()}',
+        f'Authorization: Basic {auth_str.decode()}',
         settings.get('user_agent'),
     ]
 
@@ -149,11 +149,13 @@ def drugbank_raw_interactions(
 
         for l in c.result[csv_name]:
 
-            drugs, uniprot = l.strip().split(',')
+            drugs, uniprot = l.strip().split(',')[-1], l.strip().split(',')[5]
+
+            drugs = drugs.strip().split(';')
 
             result.extend(
                 DrugbankRawInteraction(
-                    drugbank_id = drug,
+                    drugbank_id = drug.strip(),
                     uniprot_id = uniprot,
                     relation = rel,
                 )
@@ -187,7 +189,7 @@ def drugbank_interactions(
     raw = drugbank_raw_interactions(
         user = user,
         passwd = passwd,
-        harma_active = pharma_active,
+        pharma_active = pharma_active,
         credentials_fname = credentials_fname,
     )
 
@@ -349,7 +351,7 @@ def drugbank_annotations(
 
     drugs = drugbank_drugs(
         user = user,
-        passwd = passwd
+        passwd = passwd,
         credentials_fname = credentials_fname,
     )
 
@@ -422,7 +424,7 @@ def id_type_proc(_id_type):
 
     drugs = drugbank_drugs(
         user = user,
-        passwd = passwd
+        passwd = passwd,
         credentials_fname = credentials_fname,
     )
 

From aebcb69b733a7e6f6c27444dadd327b3db4d24e3 Mon Sep 17 00:00:00 2001
From: Erva Ulusoy <95041228+ervau@users.noreply.github.com>
Date: Sat, 27 Aug 2022 01:19:55 +0300
Subject: [PATCH 26/32] refactored `inputs.drugbank` and `inputs.credentials`

---
 pypath/inputs/credentials.py |  8 ++++----
 pypath/inputs/drugbank.py    | 34 +++++++++++++++++-----------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/pypath/inputs/credentials.py b/pypath/inputs/credentials.py
index c11151e2d..aff3c26f1 100644
--- a/pypath/inputs/credentials.py
+++ b/pypath/inputs/credentials.py
@@ -22,7 +22,7 @@
 #  Website: http://pypath.omnipathdb.org/
 #
 
-from typing import Optional
+from typing import Optional, Tuple, Dict
 
 import os
 
@@ -34,10 +34,10 @@
 
 
 def credentials(
-        *args: tuple[str, str],
+        *args: Tuple[str, str],
         resource: Optional[str] = None,
         from_file: Optional[str] = None,
-        **kwargs: dict[str, str],
+        **kwargs: Dict[str, str],
     ) -> dict:
     """
     Credentials required for restricted access resources.
@@ -66,7 +66,7 @@ def credentials(
 
     fields = ('user', 'passwd')
     kwargs.update(dict(zip(fields, args)))
-    kwargs = dict(it for kwargs.items() if it[1] is not None)
+    kwargs = dict(it for it in kwargs.items() if it[1] is not None)
 
     if all(f in kwargs for f in fields):
 
diff --git a/pypath/inputs/drugbank.py b/pypath/inputs/drugbank.py
index 3bea4cde5..69df0036a 100644
--- a/pypath/inputs/drugbank.py
+++ b/pypath/inputs/drugbank.py
@@ -23,7 +23,7 @@
 #  Website: http://pypath.omnipathdb.org/
 #
 
-from typing import Optional
+from typing import Optional, Tuple, List, Set, Dict
 
 import re
 import csv
@@ -44,7 +44,7 @@ def _drugbank_credentials(
         user: Optional[str] = None,
         passwd: Optional[str] = None,
         credentials_fname: Optional[str] = None,
-    ) -> tuple[str, str]:
+    ) -> Tuple[str, str]:
 
     return credentials.credentials(
         user = user,
@@ -99,7 +99,7 @@ def drugbank_raw_interactions(
         passwd: Optional[str] = None,
         credentials_fname: Optional[str] = None,
         pharma_active: bool = False,
-    ) -> list[tuple] :
+    ) -> List[tuple] :
     """
     Retrieves protein identifiers from Drugbank.
 
@@ -170,7 +170,7 @@ def drugbank_interactions(
         passwd: Optional[str] = None,
         credentials_fname: Optional[str] = None,
         pharma_active: bool = False,
-    ) -> list[tuple] :
+    ) -> List[tuple] :
     """
     Drug-protein and protein-drug interactions from Drugbank.
 
@@ -222,8 +222,8 @@ def drugbank_interactions(
 
             result.append(
                 DrugbankInteraction(
-                    *src_tgt(r.uniprot_id, drug.pubchem_cid),
-                    *src_tgt('protein', 'drug'),
+                    *src_tgt((r.uniprot_id, drug.pubchem_cid)),
+                    *src_tgt(('protein', 'drug')),
                     interaction_type = r.relation,
                 )
             )
@@ -235,7 +235,7 @@ def drugbank_drugs(
         user: Optional[str] = None,
         passwd: Optional[str] = None,
         credentials_fname: Optional[str] = None,
-    ) -> list[tuple]:
+    ) -> List[tuple]:
     """
     Retrieves drug identifiers from Drugbank.
 
@@ -275,21 +275,21 @@ def drugbank_drugs(
 
     for table in ('drug', 'structure'):
 
-        csv = f'{table} links.csv'
+        csv_ = f'{table} links.csv'
 
         c = _drugbank_download(
             url = urls.urls['drugbank'][f'all_{table}s'],
             user = user,
             passwd = passwd,
             credentials_fname = credentials_fname,
-            files_needed = (csv,),
+            files_needed = (csv_,),
         )
 
         if not c: continue
 
         raw[table] = dict(
             (rec['DrugBank ID'], rec)
-            for rec in csv.DictReader(c.result[csv], delimiter = ',')
+            for rec in csv.DictReader(c.result[csv_], delimiter = ',')
         )
 
     DrugbankDrug = collections.namedtuple(
@@ -321,7 +321,7 @@ def drugbank_drugs(
                 pubchem_sid = struct['PubChem Substance ID'],
                 chebi = struct['ChEBI ID'],
                 chembl = struct['ChEMBL ID'],
-                pharmgkb = drug.get('PharmGKB ID', None)
+                pharmgkb = drug.get('PharmGKB ID', None),
                 het = drug.get('HET ID', None),
             )
         )
@@ -333,7 +333,7 @@ def drugbank_annotations(
         user: Optional[str] = None,
         passwd: Optional[str] = None,
         credentials_fname: Optional[str] = None,
-    ) -> dict[str, set[tuple]]:
+    ) -> Dict[str, Set[tuple]]:
     """
     Drug annotations from Drugbank.
 
@@ -385,13 +385,13 @@ def drugbank_mapping(
         user: Optional[str] = None,
         passwd: Optional[str] = None,
         credentials_fname: Optional[str] = None,
-    ) -> dict[str, set[str]]:
+    ) -> Dict[str, Set[str]]:
     """
     Identifier translation table from DrugBank.
 
     Available ID types: drugbank, name, type, groups, cas, inchikey,
-    inchi, smiles, formula, kegg_compound, kegg_drug, pubchem_cid,
-    pubchem_sid, chebi, chembl, pharmgkb, het.
+    inchi, smiles, formula, kegg_compound, kegg_drug, pubchem_compound,
+    pubchem_substance, chebi, chembl, pharmgkb, het.
 
     Args:
         id_type:
@@ -431,7 +431,7 @@ def id_type_proc(_id_type):
     result = collections.defaultdict(set)
 
     id_type = id_type_proc(id_type)
-    target_id_type = id_type_proc(id_type)
+    target_id_type = id_type_proc(target_id_type)
 
     for d in drugs:
 
@@ -442,4 +442,4 @@ def id_type_proc(_id_type):
 
             result[the_id].add(target_id)
 
-    return dict(result)
+    return dict(result)
\ No newline at end of file

From 475a62cb047ad4ed06969e58a9fa09b98dbf8bac Mon Sep 17 00:00:00 2001
From: Erva Ulusoy <95041228+ervau@users.noreply.github.com>
Date: Sat, 27 Aug 2022 09:46:07 +0300
Subject: [PATCH 27/32] refactored typings back to "drugbank: trivial
 refactoring"

---
 pypath/inputs/drugbank.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pypath/inputs/drugbank.py b/pypath/inputs/drugbank.py
index 69df0036a..2f0c91570 100644
--- a/pypath/inputs/drugbank.py
+++ b/pypath/inputs/drugbank.py
@@ -23,7 +23,7 @@
 #  Website: http://pypath.omnipathdb.org/
 #
 
-from typing import Optional, Tuple, List, Set, Dict
+from typing import Optional
 
 import re
 import csv
@@ -44,7 +44,7 @@ def _drugbank_credentials(
         user: Optional[str] = None,
         passwd: Optional[str] = None,
         credentials_fname: Optional[str] = None,
-    ) -> Tuple[str, str]:
+    ) -> tuple[str, str]:
 
     return credentials.credentials(
         user = user,
@@ -99,7 +99,7 @@ def drugbank_raw_interactions(
         passwd: Optional[str] = None,
         credentials_fname: Optional[str] = None,
         pharma_active: bool = False,
-    ) -> List[tuple] :
+    ) -> list[tuple] :
     """
     Retrieves protein identifiers from Drugbank.
 
@@ -170,7 +170,7 @@ def drugbank_interactions(
         passwd: Optional[str] = None,
         credentials_fname: Optional[str] = None,
         pharma_active: bool = False,
-    ) -> List[tuple] :
+    ) -> list[tuple] :
     """
     Drug-protein and protein-drug interactions from Drugbank.
 
@@ -235,7 +235,7 @@ def drugbank_drugs(
         user: Optional[str] = None,
         passwd: Optional[str] = None,
         credentials_fname: Optional[str] = None,
-    ) -> List[tuple]:
+    ) -> list[tuple]:
     """
     Retrieves drug identifiers from Drugbank.
 
@@ -333,7 +333,7 @@ def drugbank_annotations(
         user: Optional[str] = None,
         passwd: Optional[str] = None,
         credentials_fname: Optional[str] = None,
-    ) -> Dict[str, Set[tuple]]:
+    ) -> dict[str, set[tuple]]:
     """
     Drug annotations from Drugbank.
 
@@ -385,7 +385,7 @@ def drugbank_mapping(
         user: Optional[str] = None,
         passwd: Optional[str] = None,
         credentials_fname: Optional[str] = None,
-    ) -> Dict[str, Set[str]]:
+    ) -> dict[str, set[str]]:
     """
     Identifier translation table from DrugBank.
 
@@ -442,4 +442,4 @@ def id_type_proc(_id_type):
 
             result[the_id].add(target_id)
 
-    return dict(result)
\ No newline at end of file
+    return dict(result)

From 6ca6d40f3b8481709e32f8adad015be6e78668a1 Mon Sep 17 00:00:00 2001
From: Erva Ulusoy <95041228+ervau@users.noreply.github.com>
Date: Sat, 27 Aug 2022 09:48:50 +0300
Subject: [PATCH 28/32] refactored typing back to original in
 `inputs.credentials`

---
 pypath/inputs/credentials.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pypath/inputs/credentials.py b/pypath/inputs/credentials.py
index aff3c26f1..1d9ac4560 100644
--- a/pypath/inputs/credentials.py
+++ b/pypath/inputs/credentials.py
@@ -22,7 +22,7 @@
 #  Website: http://pypath.omnipathdb.org/
 #
 
-from typing import Optional, Tuple, Dict
+from typing import Optional
 
 import os
 
@@ -34,10 +34,10 @@
 
 
 def credentials(
-        *args: Tuple[str, str],
+        *args: tuple[str, str],
         resource: Optional[str] = None,
         from_file: Optional[str] = None,
-        **kwargs: Dict[str, str],
+        **kwargs: dict[str, str],
     ) -> dict:
     """
     Credentials required for restricted access resources.

From 87c768948d35b4a96b432df18b7bac896c569761 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tennur=20K=C4=B1l=C4=B1=C3=A7?=
 <99681146+tnnrklc@users.noreply.github.com>
Date: Sat, 27 Aug 2022 18:12:05 +0300
Subject: [PATCH 29/32] Update chembl.py

---
 pypath/inputs/chembl.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/pypath/inputs/chembl.py b/pypath/inputs/chembl.py
index 0bfbae8f4..9eb79f15c 100644
--- a/pypath/inputs/chembl.py
+++ b/pypath/inputs/chembl.py
@@ -82,7 +82,7 @@ def chembl_targets() -> list[tuple]:
             ChemblTarget(
                 accession = (
                     tgt['target_components'][0]['accession']
-                        if 'target_components' in tgt else
+                        if tgt['target_components'] else
                     None
                 ),
                 target_chembl_id = tgt['target_chembl_id'],
@@ -165,8 +165,16 @@ def chembl_molecules() -> list[tuple]:
     """
 
     def _get(mol, key0, key1):
-
-        return mol.get(f'molecule_{key0}', {}).get(key1, None)
+    
+        molecule_properties = mol.get(f'molecule_{key0}', {})
+        
+        if molecule_properties:
+        
+            return molecule_properties.get(key1, None)
+            
+        else:
+        
+            return None
 
 
     fields_molecule = (
@@ -205,7 +213,7 @@ def _get(mol, key0, key1):
 
             url = (
                 f"{urls.urls['chembl']['url']}"
-                f"{lst['page_meta']['next']}"
+                f"{page_dct['page_meta']['next']}"
             )
 
         else:
@@ -253,9 +261,9 @@ def _get(mol, key0, key1):
 
 
 def chembl_activities(
-        pchembl_value_none: bool = False,
         #TODO: are these below all the allowed values?
         standard_relation: Literal['=', '>', '<', '>=', '<='],
+        pchembl_value_none: bool = False,
     ) -> list[tuple] :
     """
     Retrieves activities data from ChEMBL.
@@ -295,7 +303,7 @@ def chembl_activities(
 
     while True:
 
-        if not page_lst:
+        if not page_dct:
 
 
             url = (
@@ -309,7 +317,7 @@ def chembl_activities(
 
             url = (
                 f"{urls.urls['chembl']['url']}"
-                f"{lst['page_meta']['next']}"
+                f"{page_dct['page_meta']['next']}"
             )
 
         else:

From fd24c5501a384774e0cbe8d7014c7dfb0496964b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tennur=20K=C4=B1l=C4=B1=C3=A7?=
 <99681146+tnnrklc@users.noreply.github.com>
Date: Wed, 31 Aug 2022 13:04:37 +0300
Subject: [PATCH 30/32] Update hpo.py

---
 pypath/inputs/hpo.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/pypath/inputs/hpo.py b/pypath/inputs/hpo.py
index 1e64feaea..e6c062eb0 100644
--- a/pypath/inputs/hpo.py
+++ b/pypath/inputs/hpo.py
@@ -34,17 +34,21 @@
 import pypath.formats.obo as obo
 
 
-def hpo_annotations() -> dict[str, set[str]]:
+def hpo_annotations() -> dict[str, set[tuple]]:
     """
     Human Phenotype Ontology annotations.
 
     Returns:
-        Dict of proteins as keys and sets of HPO terms as values.
+        Dict of proteins as keys and sets of HPO annotations as values.
     """
 
     url = urls.urls['hpo']['gene']
     c = curl.Curl(url, large = True, silent = False)
     _ = next(c.result)
+    
+    fields = ('entrez_gene_id','entrez_gene_symbol','hpo_id')
+
+    HPOAnnotations = collections.namedtuple('HPOAnnotations', fields,defaults = ("",) * len(fields))
 
     result = collections.defaultdict(set)
 
@@ -56,7 +60,13 @@ def hpo_annotations() -> dict[str, set[str]]:
 
         for uniprot in uniprots:
 
-            result[uniprot].add(r[2])
+            result[uniprot].add(
+                HPOAnnotations(
+                    entrez_gene_id = r[0],
+                    entrez_gene_symbol = r[1],
+                    hpo_id = r[2],
+                    )
+            )
 
     return result
 

From 042fa92392738c245572e7d4fe760fdfe1e3195d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tennur=20K=C4=B1l=C4=B1=C3=A7?=
 <99681146+tnnrklc@users.noreply.github.com>
Date: Wed, 31 Aug 2022 13:06:03 +0300
Subject: [PATCH 31/32] Update annot.py

---
 pypath/core/annot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypath/core/annot.py b/pypath/core/annot.py
index 034476831..8bfa606cf 100644
--- a/pypath/core/annot.py
+++ b/pypath/core/annot.py
@@ -6922,7 +6922,7 @@ def __init__(self, **kwargs):
             self,
             name = 'HPO',
             ncbi_tax_id = constants.NOT_ORGANISM_SPECIFIC,
-            input_method = 'hpo.hpo_gene_annotations',
+            input_method = 'hpo.hpo_annotations',
             **kwargs
         )
 

From deb7d434c44a2325444d2e8752dd7923d895a3da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tennur=20K=C4=B1l=C4=B1=C3=A7?=
 <99681146+tnnrklc@users.noreply.github.com>
Date: Sat, 10 Sep 2022 18:45:23 +0300
Subject: [PATCH 32/32] Update hpo.py

---
 pypath/inputs/hpo.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/pypath/inputs/hpo.py b/pypath/inputs/hpo.py
index e6c062eb0..f3d0406c3 100644
--- a/pypath/inputs/hpo.py
+++ b/pypath/inputs/hpo.py
@@ -96,6 +96,7 @@ def hpo_diseases() -> dict[str, set[tuple]]:
             'omim',
             'name',
             'pmid',
+            'qualifier',
             'evidence',
             'onset',
             'frequency',
@@ -120,6 +121,7 @@ def hpo_diseases() -> dict[str, set[tuple]]:
                 omim = r[0],
                 name = r[1],
                 pmid = pmid,
+                qualifier = r[2] or None,
                 evidence = r[5] or None,
                 onset = r[6] or None,
                 frequency = r[7] or None,
@@ -179,11 +181,14 @@ def hpo_ontology() -> dict[str, dict[str, Union[str, set[str]]]]:
                 lambda x: x
             )
 
-            result[key][term].update(
-                {
-                    proc(x.value)
-                    for x in r.attrs.get(obokey, ())
-                }
-            )
-
-    return {k, dict(v) for k, v in result.items()}
+            for x in r.attrs.get(obokey, ()):
+                y = proc(x.value)
+                result[key][term].update(
+                    {
+                        y(x.value)
+                        if type(y) != tuple else
+                        y 
+                    }
+                )
+
+    return {k: dict(v) for k, v in result.items()}