Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restructuring - get all ontologies #52

Merged
merged 3 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 44 additions & 4 deletions src/kg_bioportal/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,37 @@ def main(verbose: int, quiet: bool):
logger.info(f"Logger {logger.name} set to level {logger.level}")


@main.command()
@click.option("output_dir", "-o", required=True, default="data/raw")
@click.option(
"api_key",
"-k",
required=False,
type=str,
help="API key for BioPortal",
)
def get_ontology_list(output_dir, api_key) -> None:
"""Downloads the list of all BioPortal ontologies and saves to a file in the data directory (default: data/raw).

Args:

output_dir: A string pointing to the directory to download data to.
Defaults to data/raw.

api_key: BioPortal / NCBO API key.

Returns:
None.

"""

dl = Downloader(output_dir=output_dir, api_key=api_key)

dl.get_ontology_list()

return None


@main.command()
@click.option(
"ontologies",
Expand Down Expand Up @@ -67,7 +98,9 @@ def main(verbose: int, quiet: bool):
type=str,
help="API key for BioPortal",
)
def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache, api_key) -> None:
def download(
ontologies, ontology_file, output_dir, snippet_only, ignore_cache, api_key
) -> None:
"""Downloads specified ontologies into data directory (default: data/raw).

Args:
Expand All @@ -81,9 +114,11 @@ def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache,
output_dir: A string pointing to the directory to download data to.
Defaults to data/raw.

snippet_only: Downloads only the first 5 kB of the source, for testing and file checks.
snippet_only: (Not yet implemented) Downloads only the first 5 kB of the source, for testing and file checks.

ignore_cache: (Not yet implemented) If specified, will ignore existing files and download again.

ignore_cache: If specified, will ignore existing files and download again.
api_key: BioPortal / NCBO API key.

Returns:
None.
Expand All @@ -105,7 +140,12 @@ def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache,

logging.info(f"{len(onto_list)} ontologies to retrieve.")

dl = Downloader(output_dir, snippet_only, ignore_cache, api_key)
dl = Downloader(
output_dir=output_dir,
snippet_only=snippet_only,
ignore_cache=ignore_cache,
api_key=api_key,
)

dl.download(onto_list)

Expand Down
26 changes: 26 additions & 0 deletions src/kg_bioportal/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
import os
import requests

ONTOLOGY_LIST_NAME = "ontologylist.tsv"

class Downloader:

# TODO: implement ignore_cache and snippet_only

# Directory to save the downloaded files
output_dir: str = "data/raw"

Expand Down Expand Up @@ -87,3 +90,26 @@ def download(self, onto_list: list = []) -> None:


return None

def get_ontology_list(self) -> None:
"""Get the list of ontologies from BioPortal.

Args:
None.

Returns:
None.
"""
headers = {"Authorization": f"apikey token={self.api_key}"}

logging.info("Getting set of all ontologies...")

analytics_url = "https://data.bioontology.org/analytics"

ontologies = requests.get(analytics_url, headers=headers, allow_redirects=True).json()

with open(f"{self.output_dir}/{ONTOLOGY_LIST_NAME}", "w") as outfile:
for name in ontologies:
outfile.write(f"{name}\n")

logging.info(f"Wrote to {self.output_dir}/{ONTOLOGY_LIST_NAME}")
Loading