Skip to content

Commit

Permalink
updated qualitys with pato sizes, curated some older pmdco terms and …
Browse files Browse the repository at this point in the history
…removed several obsolete ones, added the scipts curating the qualities
  • Loading branch information
Hanke committed Dec 6, 2023
1 parent 1809ac0 commit 472f314
Show file tree
Hide file tree
Showing 2 changed files with 4,347 additions and 4,209 deletions.
316 changes: 316 additions & 0 deletions modules/curate_qualities.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from rdflib import BNode, URIRef, Literal, Graph, Namespace\n",
"from rdflib.collection import Collection\n",
"from rdflib.util import guess_format\n",
"from rdflib.namespace import RDF, XSD, RDFS, OWL, SKOS, DCTERMS\n",
"from rdflib.plugins.sparql import prepareQuery\n",
"from datetime import datetime\n",
"from urllib.request import urlopen, pathname2url\n",
"from urllib.parse import urlparse, urljoin\n",
"from typing import Dict, List, Tuple\n",
"import logging\n",
"from re import sub\n",
"import os\n",
"\n",
"from deep_translator import GoogleTranslator\n",
"\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.DEBUG)\n",
"\n",
"def path2url(path):\n",
" return urljoin(\n",
" 'file:', pathname2url(os.path.abspath(path)))\n",
"\n",
"dir=os.getcwd()\n",
"PMDCO = Namespace('https://w3id.org/pmd/co/')\n",
"bfo2020_url='http://purl.obolibrary.org/obo/bfo/2020/bfo.owl'\n",
"BFO = Namespace(bfo2020_url+\"/\") \n",
"OBO = Namespace('http://purl.obolibrary.org/obo/')\n",
"PROV= Namespace('http://www.w3.org/ns/prov#')\n",
"IOFAV = Namespace('https://spec.industrialontologies.org/ontology/core/meta/AnnotationVocabulary/')\n",
"PATO = Namespace('http://purl.obolibrary.org/obo/pato/releases/2023-05-18/pato-full.owl')\n",
"\n",
"editor=\"Thomas Hanke\"\n",
"\n",
"filename=\"pmdco-qualities.ttl\"\n",
"this_ontology_url=path2url(filename)\n",
"pato_source=\"pato-full.owl\"\n",
"pato_url=path2url(pato_source)\n",
"# Snake Chase - your_term\n",
"def snake_case(s):\n",
" return '_'.join(\n",
" sub('([A-Z][a-z]+)', r' \\1',\n",
" sub('([A-Z]+)', r' \\1',\n",
" s.replace('-', ' '))).split()).lower()\n",
"\n",
"# Camel Chase - yourTerm\n",
"def lower_camel_case(s):\n",
" #print(s)\n",
" s = sub(r\"(_|-)+\", \" \", s).title().replace(\" \", \"\")\n",
" return ''.join([s[0].lower(), s[1:]])\n",
"\n",
"# Pascal Chase - YourTerm\n",
"def upper_camel_case(s):\n",
" #print(s)\n",
" s = sub(r\"(_|-)+\", \" \", s).title().replace(\" \", \"\")\n",
" return s\n",
"\n",
"def parse_graph(url: str, graph: Graph = Graph(), format: str = \"\") -> Graph:\n",
" \"\"\"Parse a Graph from web url to rdflib graph object\n",
" Args:\n",
" url (AnyUrl): Url to an web ressource\n",
" graph (Graph): Existing Rdflib Graph object to parse data to.\n",
" Returns:\n",
" Graph: Rdflib graph Object\n",
" \"\"\"\n",
" logging.debug(\"parsing graph from {}\".format(url))\n",
" parsed_url = urlparse(url)\n",
" META = Namespace(url + \"/\")\n",
" if not format:\n",
" format = guess_format(parsed_url.path)\n",
" if parsed_url.scheme in [\"https\", \"http\"]:\n",
" graph.parse(urlopen(parsed_url.geturl()).read(), format=format)\n",
" elif parsed_url.scheme == \"file\":\n",
" graph.parse(parsed_url.path, format=format)\n",
" graph.bind(\"meta\", META)\n",
" return graph\n",
"\n",
"def add_ontology_header(g):\n",
" g.bind('owl',OWL)\n",
" g.bind('bfo',BFO)\n",
" g.bind('obo',OBO)\n",
" g.bind('skos',SKOS)\n",
" g.bind('dcterms',DCTERMS)\n",
" g.bind('iof-av',IOFAV)\n",
" g.bind('pmdco',PMDCO)\n",
" g.bind('prov',PROV)\n",
" return g\n",
"\n",
"sub_classes = prepareQuery(\"SELECT ?entity WHERE {?entity rdfs:subClassOf* ?parent}\")\n",
"\n",
"all_labels = prepareQuery(\"SELECT ?entity ?label WHERE {?entity rdfs:label ?label}\")\n",
"\n",
"def get_all_sub_classes(superclass: URIRef, ontology: Graph, authorization=None) -> List[URIRef]:\n",
" \"\"\"Gets all subclasses of a given class.\n",
"\n",
" Args:\n",
" superclass (URIRef): Rdflib URIRef of the superclass\n",
"\n",
" Returns:\n",
" List[URIRef]: List of all subclasses\n",
" \"\"\"\n",
" # parse template and add mapping results\n",
" results = list(\n",
" ontology.query(\n",
" sub_classes,\n",
" initBindings={\"parent\": superclass},\n",
" # initNs={'cco': CCO, 'mseo': MSEO},\n",
" ),\n",
" )\n",
" # print(list(ontology[ : RDFS.subClassOf]))\n",
" classes = [result[0] for result in results]\n",
" logging.info(\"Found following subclasses of {}: {}\".format(superclass, classes))\n",
" return classes\n",
"\n",
"def add_morphologic_shape_qualities(g: Graph, pato_graph: Graph):\n",
" pato_shape_class=OBO.PATO_0000052\n",
" pmd_shape=g.value(predicate=RDFS.label,object=Literal(\"Shape\", lang=\"en\"))\n",
" pato_shapes=get_all_sub_classes(pato_shape_class,pato_graph)\n",
" i=0\n",
" for shape in pato_shapes:\n",
" label=None\n",
" definition=None\n",
" #skip shape class\n",
" if str(shape)==str(pato_shape_class):\n",
" g.add((pmd_shape,OWL.equivalentClass,pato_shape_class))\n",
" continue\n",
" else:\n",
" i+=1\n",
" for s,p, o in pato_graph.triples((shape,None,None)):\n",
" #print(s,p,o)\n",
" if p==RDFS.label:\n",
" label=str(o)\n",
" #label_de=togerman.translate(label)\n",
" if p==OBO.IAO_0000115:\n",
" definition=o\n",
" if label:\n",
" iri=URIRef(PMDCO+upper_camel_case(label))\n",
" g.add((iri,RDF.type,OWL.Class))\n",
" g.add((iri,OWL.equivalentClass,shape))\n",
" g.add((iri,RDFS.label,Literal(label,lang='en')))\n",
" #g.add((iri,RDFS.label,Literal(label_de,lang='de')))\n",
" if definition:\n",
" g.add((iri,SKOS.definition,definition))\n",
" g.add((iri,OBO.IAO_0000117,Literal(\"PERSON: \" + editor )))\n",
" logging.info(\"added {} shape entities from pato\".format(i))\n",
" return g\n",
"\n",
"# copys subclass relations from equivalentClasses of Pato\n",
"def copy_subclass_relations(g: Graph):\n",
" i=0\n",
" for s,p, o in g.triples((None,OWL.equivalentClass,None)):\n",
" if isinstance(o,URIRef):\n",
" subclassof=list(pato.objects(o,RDFS.subClassOf))\n",
" for item in subclassof:\n",
" pmd_class = g.value(predicate=OWL.equivalentClass, object=item, any=False)\n",
" if pmd_class:\n",
" #print(s,RDFS.subClassOf,pmd_class)\n",
" g.add((s,RDFS.subClassOf,pmd_class))\n",
" i+=1\n",
" logging.info(\"added {} subclass relations from equivalent pato entities\".format(i))\n",
" return g\n",
"\n",
"def translate_labels(g: Graph, language: str='de'):\n",
" translator=GoogleTranslator(source='auto', target=language)\n",
" res=dict()\n",
" labels=g.query(all_labels) \n",
" for thing, label in labels:\n",
" if thing not in res.keys():\n",
" res[thing]={}\n",
" res[thing][label.language]=label\n",
" \n",
" for thing, labels in res.items():\n",
" if not all(lang in labels.keys() for lang in (\"en\",\"de\")):\n",
" #entitys with only one label\n",
" #print(thing,labels)\n",
" #translate and add triple\n",
" if labels.get('en',None):\n",
" label_de=Literal(translator.translate(labels['en']),lang='de')\n",
" logging.info('adding german label {} for entity {}'.format(label_de, thing))\n",
" g.add((thing,RDFS.label,label_de))\n",
" #add curation status - requires discussion\n",
" g.add((thing,OBO.IAO_0000114,OBO.IAO_0000428))\n",
" return g\n",
"\n",
"def entitle_all_labels(g: Graph):\n",
" res=dict()\n",
" labels=g.query(all_labels) \n",
" for thing, label in labels:\n",
" if thing not in res.keys():\n",
" res[thing]={}\n",
" res[thing][label.language]=label\n",
" \n",
" for thing, labels in res.items():\n",
" for lang, label in labels.items():\n",
" entitled_label=str(label).title()\n",
" if not str(label)==entitled_label:\n",
" logging.info('replacing label [{}] with [{}] on {}'.format(str(label), entitled_label, thing))\n",
" #remove old label\n",
" g.remove((thing,RDFS.label,label))\n",
" #add capitalized one\n",
" g.add((thing,RDFS.label,Literal(entitled_label,lang=getattr(label,'language',\"\"))))\n",
" #add curation status - requires discussion\n",
" g.add((thing,OBO.IAO_0000114,OBO.IAO_0000428))\n",
" return g\n",
"\n",
"def add_morphologic_size_qualities(g: Graph, pato_graph: Graph):\n",
" pato_size_class=OBO.PATO_0000117\n",
" #also sub classes will be added\n",
" #pato_size_classes_toadd=[OBO.PATO_0001708, OBO.PATO_0001709, OBO.PATO_0001710]\n",
" pato_size_classes_toadd=[OBO.PATO_0000117]\n",
" pmd_size=g.value(predicate=RDFS.label,object=Literal(\"Size\", lang=\"en\"))\n",
" pato_shapes=[pato_size_class,]\n",
" [pato_shapes.extend(get_all_sub_classes(size_class,pato_graph)) for size_class in pato_size_classes_toadd]\n",
" #print(pato_shapes)\n",
" i=0\n",
" for shape in pato_shapes:\n",
" label=None\n",
" definition=None\n",
" #skip shape class\n",
" if str(shape)==str(pato_size_class):\n",
" g.add((pmd_size,OWL.equivalentClass,pato_size_class))\n",
" continue\n",
" else:\n",
" i+=1\n",
" for s,p, o in pato_graph.triples((shape,None,None)):\n",
" #print(s,p,o)\n",
" if p==RDFS.label:\n",
" label=str(o)\n",
" #label_de=togerman.translate(label)\n",
" if p==OBO.IAO_0000115:\n",
" definition=o\n",
" if label:\n",
" iri=URIRef(PMDCO+upper_camel_case(label))\n",
" g.add((iri,RDF.type,OWL.Class))\n",
" g.add((iri,OWL.equivalentClass,shape))\n",
" g.add((iri,RDFS.label,Literal(label,lang='en')))\n",
" #g.add((iri,RDFS.label,Literal(label_de,lang='de')))\n",
" if definition:\n",
" g.add((iri,SKOS.definition,definition))\n",
" g.add((iri,OBO.IAO_0000117,Literal(\"PERSON: \" + editor )))\n",
" logging.info(\"added {} size entities from pato\".format(i))\n",
" return g\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pato=parse_graph(pato_url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"print(this_ontology_url)\n",
"onto=Graph()\n",
"onto=parse_graph(this_ontology_url,graph=onto)\n",
"onto=add_ontology_header(onto)\n",
"onto=add_morphologic_shape_qualities(onto,pato_graph=pato)\n",
"onto=add_morphologic_size_qualities(onto,pato_graph=pato)\n",
"onto=copy_subclass_relations(onto)\n",
"onto=translate_labels(onto,language='de')\n",
"onto=entitle_all_labels(onto)\n",
"\n",
"onto.serialize(\"curated_\"+filename,format='turtle')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "p3.11-jupyter",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "61eede6994971b58f8144333c24a1e0b10c06d738f28fb47725492fa949c2ec5"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 472f314

Please sign in to comment.