Skip to content

Commit

Permalink
Merge pull request #43 from ncbo/viz
Browse files Browse the repository at this point in the history
Add visualizations to dashboard
  • Loading branch information
caufieldjh authored Nov 28, 2022
2 parents 5b14323 + fb57fa2 commit 84f40b2
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 48 deletions.
6 changes: 5 additions & 1 deletion docs/build_site.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,8 @@ wget -N $ONTO_STATUS_URL

# Append ontology status list
echo "Adding all lists to Jekyll config."
cat $JEKYLL_CONFIG_HEADER_FILE $GRAPH_STATS_FILE $ONTO_STATUS_FILE > $JEKYLL_CONFIG_FILE
cat $JEKYLL_CONFIG_HEADER_FILE $GRAPH_STATS_FILE $ONTO_STATUS_FILE > $JEKYLL_CONFIG_FILE

# Make figures
echo "Producing figures."
python make_viz.py
3 changes: 0 additions & 3 deletions docs/index.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
---
# Feel free to add content and custom Front Matter to this file.
# To modify the layout, see https://jekyllrb.com/docs/themes/#overriding-theme-defaults

layout: default
title: KG-Bioportal
---
Expand Down
41 changes: 41 additions & 0 deletions docs/make_viz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Create static plotly figures for KG-Bioportal"""

import pandas as pd
import plotly.express as px
import yaml
from plotly.subplots import make_subplots

with open("onto_status.yaml", "r") as infile:
ontos = pd.DataFrame(((yaml.safe_load(infile)))["ontologies"])
countcols = ["nodecount", "edgecount"]
ontos[countcols] = ontos[countcols].apply(pd.to_numeric, errors="coerce")

# Consider putting these in subplots
# https://plotly.com/python/pie-charts/#pie-charts-in-subplots

# Node counts across all ontologies, unmerged
nodeontos = ontos.loc[ontos["nodecount"] < 150000, "id"] = "All other ontologies"
fig1 = px.pie(
ontos,
values="nodecount",
names="id",
title="Nodes used to make KG-Bioportal",
hole=.3
)
fig1.update_traces(textposition='inside', textinfo='percent+label')
fig1.update_layout(uniformtext_minsize=14, uniformtext_mode='hide')
fig1.write_html("fig1.html", include_plotlyjs='cdn')

# Edge counts across all ontologies, unmerged
ontos.loc[ontos["edgecount"] < 150000, "id"] = "All other ontologies"
fig2 = px.pie(
ontos,
values="edgecount",
names="id",
title="Edges used to make KG-Bioportal",
hole=.3
)
fig2.update_traces(textposition='inside', textinfo='percent+label')
fig2.update_layout(uniformtext_minsize=14, uniformtext_mode='hide')
fig2.write_html("fig2.html", include_plotlyjs='cdn')

12 changes: 6 additions & 6 deletions kg_bioportal/transform.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
from typing import List
from typing import List, Optional

from kg_bioportal.transform_utils.ontology import OntologyTransform
from kg_bioportal.transform_utils.ontology.ontology_transform import ONTOLOGIES

DATA_SOURCES = {
'ChebiTransform': OntologyTransform,
'EnvoTransform' : OntologyTransform
}
DATA_SOURCES = {"ChebiTransform": OntologyTransform, "EnvoTransform": OntologyTransform}

def transform(input_dir: str, output_dir: str, sources: List[str] = None) -> None:

def transform(
input_dir: str, output_dir: str, sources: Optional[List[str]] = None
) -> None:
"""Call scripts in kg_bioportal/transform/[source name]/ to transform each source into a graph format that
KGX can ingest directly, in either TSV or JSON format:
https://github.com/biolink/kgx/blob/master/specification/kgx-format.md
Expand Down
35 changes: 19 additions & 16 deletions kg_bioportal/transform_utils/ontology/ontology_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,22 @@

from kg_bioportal.transform_utils.transform import Transform
from kg_bioportal.utils.robot_utils import convert_to_json
from kgx.cli.cli_utils import transform # type: ignore
from kgx.cli.cli_utils import transform # type: ignore


ONTOLOGIES = {
'ChebiTransform': 'chebi.owl.gz',
'EnvoTransform': 'envo.json'
}
ONTOLOGIES = {"ChebiTransform": "chebi.owl.gz", "EnvoTransform": "envo.json"}


class OntologyTransform(Transform):
"""
OntologyTransform parses an Obograph JSON form of an Ontology into nodes and edges.
If it isn't in Obograph JSON format, it is transformed with ROBOT.
If it needs to be decompressed, that happens here too.
"""
def __init__(self, input_dir: str = None, output_dir: str = None):

def __init__(
self, input_dir: Optional[str] = None, output_dir: Optional[str] = None
):
source_name = "ontologies"
super().__init__(source_name, input_dir, output_dir)

Expand All @@ -31,8 +32,8 @@ def run(self, data_file: Optional[str] = None) -> None:
Returns:
None.
"""
if data_file: # if we specify a data file
k = data_file.split('.')[0]
if data_file: # if we specify a data file
k = data_file.split(".")[0]
data_file = os.path.join(self.input_base_dir, data_file)
self.parse(k, data_file, k)
else:
Expand All @@ -51,22 +52,24 @@ def parse(self, name: str, data_file: str, source: str) -> None:
None.
"""
print(f"Parsing {data_file}")

# Decompress if needed
if data_file[-3:] == ".gz":
outfile = data_file[:-3]
with gzip.open(data_file, 'rb') as f_in:
with open(outfile, 'wb') as f_out:
with gzip.open(data_file, "rb") as f_in:
with open(outfile, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
data_file = outfile

# Transform to obojson if needed
# Need to set up ROBOT first
if data_file[-4:] == ".owl":
convert_to_json("data/raw/", "chebi") # Use the downloaded ROBOT
convert_to_json("data/raw/", "chebi") # Use the downloaded ROBOT
data_file = data_file[:-4] + ".json"

transform(inputs=[data_file],
input_format='obojson',
output= os.path.join(self.output_dir, name),
output_format='tsv')
transform(
inputs=[data_file],
input_format="obojson",
output=os.path.join(self.output_dir, name),
output_format="tsv",
)
35 changes: 23 additions & 12 deletions kg_bioportal/transform_utils/transform.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,42 @@
import os
from typing import Optional


class Transform:
"""Parent class for transforms, that sets up a lot of default file info
"""
"""Parent class for transforms, that sets up a lot of default file info"""

DEFAULT_INPUT_DIR = os.path.join("data", "raw")
DEFAULT_OUTPUT_DIR = os.path.join("data", "transformed")

DEFAULT_INPUT_DIR = os.path.join('data', 'raw')
DEFAULT_OUTPUT_DIR = os.path.join('data', 'transformed')

def __init__(self, source_name, input_dir: str = None, output_dir: str = None, nlp: bool = False):
def __init__(
self,
source_name,
input_dir: Optional[str] = None,
output_dir: Optional[str] = None,
nlp: bool = False,
):
# default columns, can be appended to or overwritten as necessary
self.source_name = source_name
self.node_header = ['id', 'name', 'category']
self.edge_header = ['subject', 'edge_label', 'object', 'relation',
'provided_by']
self.node_header = ["id", "name", "category"]
self.edge_header = [
"subject",
"edge_label",
"object",
"relation",
"provided_by",
]

# default dirs
self.input_base_dir = input_dir if input_dir else self.DEFAULT_INPUT_DIR
self.output_base_dir = output_dir if output_dir else self.DEFAULT_OUTPUT_DIR
self.output_dir = os.path.join(self.output_base_dir, source_name)

# default filenames
self.output_node_file = os.path.join(self.output_dir, "nodes.tsv")
self.output_edge_file = os.path.join(self.output_dir, "edges.tsv")
self.output_json_file = os.path.join(self.output_dir, "nodes_edges.json")
self.subset_terms_file = os.path.join(self.input_base_dir,"subset_terms.tsv")
self.subset_terms_file = os.path.join(self.input_base_dir, "subset_terms.tsv")

os.makedirs(self.output_dir, exist_ok=True)

def run(self, data_file: Optional[str] = None):
Expand Down
21 changes: 11 additions & 10 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,22 @@ def find_version(*file_paths):
tests_require=test_deps,
# add package dependencies
install_requires=[
'tqdm',
'wget',
'compress_json',
'cat-merge',
'click==8.0.4',
'pyyaml',
'compress_json',
'kghub-downloader',
'kgx',
'networkx',
'pandas',
'parameterized',
'plotly',
'pyyaml',
'recommonmark',
'sphinx',
'sphinx_rtd_theme',
'recommonmark',
'parameterized',
'tqdm',
'validate_version_code',
'pandas',
'networkx',
'kghub-downloader',
'cat-merge'
'wget',
],
extras_require=extras,
)

0 comments on commit 84f40b2

Please sign in to comment.