From 136c23a85c90b9ccec3fbe984aef5bfb310024ef Mon Sep 17 00:00:00 2001
From: caufieldjh <j.harry.caufield@gmail.com>
Date: Fri, 16 Aug 2024 16:11:58 -0400
Subject: [PATCH] Add get_ontology_list function

---
 src/kg_bioportal/cli.py        | 48 +++++++++++++++++++++++++++++++---
 src/kg_bioportal/downloader.py | 24 +++++++++++++++++
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/src/kg_bioportal/cli.py b/src/kg_bioportal/cli.py
index 57f4a26..00582a8 100644
--- a/src/kg_bioportal/cli.py
+++ b/src/kg_bioportal/cli.py
@@ -32,6 +32,37 @@ def main(verbose: int, quiet: bool):
     logger.info(f"Logger {logger.name} set to level {logger.level}")
 
 
+@main.command()
+@click.option("output_dir", "-o", required=True, default="data/raw")
+@click.option(
+    "api_key",
+    "-k",
+    required=False,
+    type=str,
+    help="API key for BioPortal",
+)
+def get_ontology_list(output_dir, api_key) -> None:
+    """Downloads the list of all BioPortal ontologies and saves to a file in the data directory (default: data/raw).
+
+    Args:
+
+        output_dir: A string pointing to the directory to download data to.
+        Defaults to data/raw.
+
+        api_key: BioPortal / NCBO API key.
+
+    Returns:
+        None.
+
+    """
+
+    dl = Downloader(output_dir=output_dir, api_key=api_key)
+
+    dl.get_ontology_list()
+
+    return None
+
+
 @main.command()
 @click.option(
     "ontologies",
@@ -67,7 +98,9 @@ def main(verbose: int, quiet: bool):
     type=str,
     help="API key for BioPortal",
 )
-def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache, api_key) -> None:
+def download(
+    ontologies, ontology_file, output_dir, snippet_only, ignore_cache, api_key
+) -> None:
     """Downloads specified ontologies into data directory (default: data/raw).
 
     Args:
@@ -81,9 +114,11 @@ def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache,
         output_dir: A string pointing to the directory to download data to.
         Defaults to data/raw.
 
-        snippet_only: Downloads only the first 5 kB of the source, for testing and file checks.
+        snippet_only: (Not yet implemented) Downloads only the first 5 kB of the source, for testing and file checks.
+
+        ignore_cache: (Not yet implemented) If specified, will ignore existing files and download again.
 
-        ignore_cache: If specified, will ignore existing files and download again.
+        api_key: BioPortal / NCBO API key.
 
     Returns:
         None.
@@ -105,7 +140,12 @@ def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache,
 
     logging.info(f"{len(onto_list)} ontologies to retrieve.")
 
-    dl = Downloader(output_dir, snippet_only, ignore_cache, api_key)
+    dl = Downloader(
+        output_dir=output_dir,
+        snippet_only=snippet_only,
+        ignore_cache=ignore_cache,
+        api_key=api_key,
+    )
 
     dl.download(onto_list)
 
diff --git a/src/kg_bioportal/downloader.py b/src/kg_bioportal/downloader.py
index c6f5ffc..1a626e0 100644
--- a/src/kg_bioportal/downloader.py
+++ b/src/kg_bioportal/downloader.py
@@ -4,6 +4,7 @@
 import os
 import requests
 
+ONTOLOGY_LIST_NAME = "ontologylist.tsv"
 
 class Downloader:
 
@@ -89,3 +90,26 @@ def download(self, onto_list: list = []) -> None:
 
 
         return None
+
+    def get_ontology_list(self) -> None:
+        """Get the list of ontologies from BioPortal.
+
+        Args:
+            None.
+
+        Returns:
+            None.
+        """
+        headers = {"Authorization": f"apikey token={self.api_key}"}
+
+        logging.info("Getting set of all ontologies...")
+
+        analytics_url = "https://data.bioontology.org/analytics"
+
+        ontologies = requests.get(analytics_url, headers=headers, allow_redirects=True).json()
+
+        with open(f"{self.output_dir}/{ONTOLOGY_LIST_NAME}", "w") as outfile:
+            for name in ontologies:
+                outfile.write(f"{name}\n")
+
+        logging.info(f"Wrote to {self.output_dir}/{ONTOLOGY_LIST_NAME}")