diff --git a/Jenkinsfile b/Jenkinsfile index 31ac1c2..3ecade6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -75,16 +75,13 @@ pipeline { script { // Get the names of all BioPortal ontologies // This saves the list to data/raw/ontologylist.tsv - sh ". venv/bin/activate && kgbioportal get-ontology-list --api_key ${NCBO_API_KEY}" + //sh ". venv/bin/activate && kgbioportal -vvv get-ontology-list --api_key ${NCBO_API_KEY}" + + // For now, we use the pre-built list included with the repo. - // Now download all - // or at least in the future, do them all. - // For now just do a few - sh "printf 'ENVO\nPO\nSEPIO\n' > data/raw/ontologylist.tsv" - // Download the ontologies // This saves them to data/raw/ - sh ". venv/bin/activate && kgbioportal download --api_key ${NCBO_API_KEY} --ontology_file data/raw/ontologylist.tsv" + sh ". venv/bin/activate && kgbioportal download --api_key ${NCBO_API_KEY}" } } @@ -105,8 +102,10 @@ pipeline { dir('./gitrepo') { script { - if (env.GIT_BRANCH != 'origin/main') { - echo "Will not push if not on main branch." + //if (env.GIT_BRANCH != 'origin/main') { + // echo "Will not push if not on main branch." + if (1 == 1) { + echo "TESTING." } else { withCredentials([ file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG'), diff --git a/data/raw/ontologylist.tsv b/data/raw/ontologylist.tsv new file mode 100644 index 0000000..03de534 --- /dev/null +++ b/data/raw/ontologylist.tsv @@ -0,0 +1,4 @@ +id name current_version submission_id +ABA-AMB Allen Brain Atlas (ABA) Adult Mouse Brain Ontology 1 1 +ABD Anthology of Biosurveillance Diseases NA 4 +ACESO Adverse Childhood Experiences Ontology Light 2 diff --git a/src/kg_bioportal/downloader.py b/src/kg_bioportal/downloader.py index 83fe36d..79ab6f5 100644 --- a/src/kg_bioportal/downloader.py +++ b/src/kg_bioportal/downloader.py @@ -2,6 +2,8 @@ import logging import os +import time + import requests ONTOLOGY_LIST_NAME = "ontologylist.tsv" @@ -134,14 +136,12 @@ def get_ontology_list(self) -> None: with open(f"{self.output_dir}/{ONTOLOGY_LIST_NAME}", "w") as outfile: outfile.write(f"id\tname\tcurrent_version\tsubmission_id\n") for acronym in ontologies: - metadata_url = f"https://data.bioontology.org/ontologies/{acronym}" - metadata = requests.get(metadata_url, headers=headers).json() latest_submission_url = f"https://data.bioontology.org/ontologies/{acronym}/latest_submission" latest_submission = requests.get( latest_submission_url, headers=headers ).json() - name = metadata["name"].replace("\n", " ").replace("\t", " ") + name = latest_submission["ontology"]["name"].replace("\n", " ").replace("\t", " ") if len(latest_submission) > 0: if latest_submission["version"]: current_version = " ".join( @@ -160,5 +160,7 @@ def get_ontology_list(self) -> None: outfile.write( f"{acronym}\t{name}\t{current_version}\t{submission_id}\n" ) + # Wait for half a second to avoid rate limiting + time.sleep(0.5) logging.info(f"Wrote to {self.output_dir}/{ONTOLOGY_LIST_NAME}")