From 136c23a85c90b9ccec3fbe984aef5bfb310024ef Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Fri, 16 Aug 2024 16:11:58 -0400 Subject: [PATCH] Add get_ontology_list function --- src/kg_bioportal/cli.py | 48 +++++++++++++++++++++++++++++++--- src/kg_bioportal/downloader.py | 24 +++++++++++++++++ 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/src/kg_bioportal/cli.py b/src/kg_bioportal/cli.py index 57f4a26..00582a8 100644 --- a/src/kg_bioportal/cli.py +++ b/src/kg_bioportal/cli.py @@ -32,6 +32,37 @@ def main(verbose: int, quiet: bool): logger.info(f"Logger {logger.name} set to level {logger.level}") +@main.command() +@click.option("output_dir", "-o", required=True, default="data/raw") +@click.option( + "api_key", + "-k", + required=False, + type=str, + help="API key for BioPortal", +) +def get_ontology_list(output_dir, api_key) -> None: + """Downloads the list of all BioPortal ontologies and saves to a file in the data directory (default: data/raw). + + Args: + + output_dir: A string pointing to the directory to download data to. + Defaults to data/raw. + + api_key: BioPortal / NCBO API key. + + Returns: + None. + + """ + + dl = Downloader(output_dir=output_dir, api_key=api_key) + + dl.get_ontology_list() + + return None + + @main.command() @click.option( "ontologies", @@ -67,7 +98,9 @@ def main(verbose: int, quiet: bool): type=str, help="API key for BioPortal", ) -def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache, api_key) -> None: +def download( + ontologies, ontology_file, output_dir, snippet_only, ignore_cache, api_key +) -> None: """Downloads specified ontologies into data directory (default: data/raw). Args: @@ -81,9 +114,11 @@ def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache, output_dir: A string pointing to the directory to download data to. Defaults to data/raw. - snippet_only: Downloads only the first 5 kB of the source, for testing and file checks. + snippet_only: (Not yet implemented) Downloads only the first 5 kB of the source, for testing and file checks. + + ignore_cache: (Not yet implemented) If specified, will ignore existing files and download again. - ignore_cache: If specified, will ignore existing files and download again. + api_key: BioPortal / NCBO API key. Returns: None. @@ -105,7 +140,12 @@ def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache, logging.info(f"{len(onto_list)} ontologies to retrieve.") - dl = Downloader(output_dir, snippet_only, ignore_cache, api_key) + dl = Downloader( + output_dir=output_dir, + snippet_only=snippet_only, + ignore_cache=ignore_cache, + api_key=api_key, + ) dl.download(onto_list) diff --git a/src/kg_bioportal/downloader.py b/src/kg_bioportal/downloader.py index c6f5ffc..1a626e0 100644 --- a/src/kg_bioportal/downloader.py +++ b/src/kg_bioportal/downloader.py @@ -4,6 +4,7 @@ import os import requests +ONTOLOGY_LIST_NAME = "ontologylist.tsv" class Downloader: @@ -89,3 +90,26 @@ def download(self, onto_list: list = []) -> None: return None + + def get_ontology_list(self) -> None: + """Get the list of ontologies from BioPortal. + + Args: + None. + + Returns: + None. + """ + headers = {"Authorization": f"apikey token={self.api_key}"} + + logging.info("Getting set of all ontologies...") + + analytics_url = "https://data.bioontology.org/analytics" + + ontologies = requests.get(analytics_url, headers=headers, allow_redirects=True).json() + + with open(f"{self.output_dir}/{ONTOLOGY_LIST_NAME}", "w") as outfile: + for name in ontologies: + outfile.write(f"{name}\n") + + logging.info(f"Wrote to {self.output_dir}/{ONTOLOGY_LIST_NAME}")