-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
updated qualitys with pato sizes, curated some older pmdco terms and …
…removed several obsolete ones, added the scipts curating the qualities
- Loading branch information
Hanke
committed
Dec 6, 2023
1 parent
1809ac0
commit 472f314
Showing
2 changed files
with
4,347 additions
and
4,209 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,316 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from rdflib import BNode, URIRef, Literal, Graph, Namespace\n", | ||
"from rdflib.collection import Collection\n", | ||
"from rdflib.util import guess_format\n", | ||
"from rdflib.namespace import RDF, XSD, RDFS, OWL, SKOS, DCTERMS\n", | ||
"from rdflib.plugins.sparql import prepareQuery\n", | ||
"from datetime import datetime\n", | ||
"from urllib.request import urlopen, pathname2url\n", | ||
"from urllib.parse import urlparse, urljoin\n", | ||
"from typing import Dict, List, Tuple\n", | ||
"import logging\n", | ||
"from re import sub\n", | ||
"import os\n", | ||
"\n", | ||
"from deep_translator import GoogleTranslator\n", | ||
"\n", | ||
"logger = logging.getLogger()\n", | ||
"logger.setLevel(logging.DEBUG)\n", | ||
"\n", | ||
"def path2url(path):\n", | ||
" return urljoin(\n", | ||
" 'file:', pathname2url(os.path.abspath(path)))\n", | ||
"\n", | ||
"dir=os.getcwd()\n", | ||
"PMDCO = Namespace('https://w3id.org/pmd/co/')\n", | ||
"bfo2020_url='http://purl.obolibrary.org/obo/bfo/2020/bfo.owl'\n", | ||
"BFO = Namespace(bfo2020_url+\"/\") \n", | ||
"OBO = Namespace('http://purl.obolibrary.org/obo/')\n", | ||
"PROV= Namespace('http://www.w3.org/ns/prov#')\n", | ||
"IOFAV = Namespace('https://spec.industrialontologies.org/ontology/core/meta/AnnotationVocabulary/')\n", | ||
"PATO = Namespace('http://purl.obolibrary.org/obo/pato/releases/2023-05-18/pato-full.owl')\n", | ||
"\n", | ||
"editor=\"Thomas Hanke\"\n", | ||
"\n", | ||
"filename=\"pmdco-qualities.ttl\"\n", | ||
"this_ontology_url=path2url(filename)\n", | ||
"pato_source=\"pato-full.owl\"\n", | ||
"pato_url=path2url(pato_source)\n", | ||
"# Snake Chase - your_term\n", | ||
"def snake_case(s):\n", | ||
" return '_'.join(\n", | ||
" sub('([A-Z][a-z]+)', r' \\1',\n", | ||
" sub('([A-Z]+)', r' \\1',\n", | ||
" s.replace('-', ' '))).split()).lower()\n", | ||
"\n", | ||
"# Camel Chase - yourTerm\n", | ||
"def lower_camel_case(s):\n", | ||
" #print(s)\n", | ||
" s = sub(r\"(_|-)+\", \" \", s).title().replace(\" \", \"\")\n", | ||
" return ''.join([s[0].lower(), s[1:]])\n", | ||
"\n", | ||
"# Pascal Chase - YourTerm\n", | ||
"def upper_camel_case(s):\n", | ||
" #print(s)\n", | ||
" s = sub(r\"(_|-)+\", \" \", s).title().replace(\" \", \"\")\n", | ||
" return s\n", | ||
"\n", | ||
"def parse_graph(url: str, graph: Graph = Graph(), format: str = \"\") -> Graph:\n", | ||
" \"\"\"Parse a Graph from web url to rdflib graph object\n", | ||
" Args:\n", | ||
" url (AnyUrl): Url to an web ressource\n", | ||
" graph (Graph): Existing Rdflib Graph object to parse data to.\n", | ||
" Returns:\n", | ||
" Graph: Rdflib graph Object\n", | ||
" \"\"\"\n", | ||
" logging.debug(\"parsing graph from {}\".format(url))\n", | ||
" parsed_url = urlparse(url)\n", | ||
" META = Namespace(url + \"/\")\n", | ||
" if not format:\n", | ||
" format = guess_format(parsed_url.path)\n", | ||
" if parsed_url.scheme in [\"https\", \"http\"]:\n", | ||
" graph.parse(urlopen(parsed_url.geturl()).read(), format=format)\n", | ||
" elif parsed_url.scheme == \"file\":\n", | ||
" graph.parse(parsed_url.path, format=format)\n", | ||
" graph.bind(\"meta\", META)\n", | ||
" return graph\n", | ||
"\n", | ||
"def add_ontology_header(g):\n", | ||
" g.bind('owl',OWL)\n", | ||
" g.bind('bfo',BFO)\n", | ||
" g.bind('obo',OBO)\n", | ||
" g.bind('skos',SKOS)\n", | ||
" g.bind('dcterms',DCTERMS)\n", | ||
" g.bind('iof-av',IOFAV)\n", | ||
" g.bind('pmdco',PMDCO)\n", | ||
" g.bind('prov',PROV)\n", | ||
" return g\n", | ||
"\n", | ||
"sub_classes = prepareQuery(\"SELECT ?entity WHERE {?entity rdfs:subClassOf* ?parent}\")\n", | ||
"\n", | ||
"all_labels = prepareQuery(\"SELECT ?entity ?label WHERE {?entity rdfs:label ?label}\")\n", | ||
"\n", | ||
"def get_all_sub_classes(superclass: URIRef, ontology: Graph, authorization=None) -> List[URIRef]:\n", | ||
" \"\"\"Gets all subclasses of a given class.\n", | ||
"\n", | ||
" Args:\n", | ||
" superclass (URIRef): Rdflib URIRef of the superclass\n", | ||
"\n", | ||
" Returns:\n", | ||
" List[URIRef]: List of all subclasses\n", | ||
" \"\"\"\n", | ||
" # parse template and add mapping results\n", | ||
" results = list(\n", | ||
" ontology.query(\n", | ||
" sub_classes,\n", | ||
" initBindings={\"parent\": superclass},\n", | ||
" # initNs={'cco': CCO, 'mseo': MSEO},\n", | ||
" ),\n", | ||
" )\n", | ||
" # print(list(ontology[ : RDFS.subClassOf]))\n", | ||
" classes = [result[0] for result in results]\n", | ||
" logging.info(\"Found following subclasses of {}: {}\".format(superclass, classes))\n", | ||
" return classes\n", | ||
"\n", | ||
"def add_morphologic_shape_qualities(g: Graph, pato_graph: Graph):\n", | ||
" pato_shape_class=OBO.PATO_0000052\n", | ||
" pmd_shape=g.value(predicate=RDFS.label,object=Literal(\"Shape\", lang=\"en\"))\n", | ||
" pato_shapes=get_all_sub_classes(pato_shape_class,pato_graph)\n", | ||
" i=0\n", | ||
" for shape in pato_shapes:\n", | ||
" label=None\n", | ||
" definition=None\n", | ||
" #skip shape class\n", | ||
" if str(shape)==str(pato_shape_class):\n", | ||
" g.add((pmd_shape,OWL.equivalentClass,pato_shape_class))\n", | ||
" continue\n", | ||
" else:\n", | ||
" i+=1\n", | ||
" for s,p, o in pato_graph.triples((shape,None,None)):\n", | ||
" #print(s,p,o)\n", | ||
" if p==RDFS.label:\n", | ||
" label=str(o)\n", | ||
" #label_de=togerman.translate(label)\n", | ||
" if p==OBO.IAO_0000115:\n", | ||
" definition=o\n", | ||
" if label:\n", | ||
" iri=URIRef(PMDCO+upper_camel_case(label))\n", | ||
" g.add((iri,RDF.type,OWL.Class))\n", | ||
" g.add((iri,OWL.equivalentClass,shape))\n", | ||
" g.add((iri,RDFS.label,Literal(label,lang='en')))\n", | ||
" #g.add((iri,RDFS.label,Literal(label_de,lang='de')))\n", | ||
" if definition:\n", | ||
" g.add((iri,SKOS.definition,definition))\n", | ||
" g.add((iri,OBO.IAO_0000117,Literal(\"PERSON: \" + editor )))\n", | ||
" logging.info(\"added {} shape entities from pato\".format(i))\n", | ||
" return g\n", | ||
"\n", | ||
"# copys subclass relations from equivalentClasses of Pato\n", | ||
"def copy_subclass_relations(g: Graph):\n", | ||
" i=0\n", | ||
" for s,p, o in g.triples((None,OWL.equivalentClass,None)):\n", | ||
" if isinstance(o,URIRef):\n", | ||
" subclassof=list(pato.objects(o,RDFS.subClassOf))\n", | ||
" for item in subclassof:\n", | ||
" pmd_class = g.value(predicate=OWL.equivalentClass, object=item, any=False)\n", | ||
" if pmd_class:\n", | ||
" #print(s,RDFS.subClassOf,pmd_class)\n", | ||
" g.add((s,RDFS.subClassOf,pmd_class))\n", | ||
" i+=1\n", | ||
" logging.info(\"added {} subclass relations from equivalent pato entities\".format(i))\n", | ||
" return g\n", | ||
"\n", | ||
"def translate_labels(g: Graph, language: str='de'):\n", | ||
" translator=GoogleTranslator(source='auto', target=language)\n", | ||
" res=dict()\n", | ||
" labels=g.query(all_labels) \n", | ||
" for thing, label in labels:\n", | ||
" if thing not in res.keys():\n", | ||
" res[thing]={}\n", | ||
" res[thing][label.language]=label\n", | ||
" \n", | ||
" for thing, labels in res.items():\n", | ||
" if not all(lang in labels.keys() for lang in (\"en\",\"de\")):\n", | ||
" #entitys with only one label\n", | ||
" #print(thing,labels)\n", | ||
" #translate and add triple\n", | ||
" if labels.get('en',None):\n", | ||
" label_de=Literal(translator.translate(labels['en']),lang='de')\n", | ||
" logging.info('adding german label {} for entity {}'.format(label_de, thing))\n", | ||
" g.add((thing,RDFS.label,label_de))\n", | ||
" #add curation status - requires discussion\n", | ||
" g.add((thing,OBO.IAO_0000114,OBO.IAO_0000428))\n", | ||
" return g\n", | ||
"\n", | ||
"def entitle_all_labels(g: Graph):\n", | ||
" res=dict()\n", | ||
" labels=g.query(all_labels) \n", | ||
" for thing, label in labels:\n", | ||
" if thing not in res.keys():\n", | ||
" res[thing]={}\n", | ||
" res[thing][label.language]=label\n", | ||
" \n", | ||
" for thing, labels in res.items():\n", | ||
" for lang, label in labels.items():\n", | ||
" entitled_label=str(label).title()\n", | ||
" if not str(label)==entitled_label:\n", | ||
" logging.info('replacing label [{}] with [{}] on {}'.format(str(label), entitled_label, thing))\n", | ||
" #remove old label\n", | ||
" g.remove((thing,RDFS.label,label))\n", | ||
" #add capitalized one\n", | ||
" g.add((thing,RDFS.label,Literal(entitled_label,lang=getattr(label,'language',\"\"))))\n", | ||
" #add curation status - requires discussion\n", | ||
" g.add((thing,OBO.IAO_0000114,OBO.IAO_0000428))\n", | ||
" return g\n", | ||
"\n", | ||
"def add_morphologic_size_qualities(g: Graph, pato_graph: Graph):\n", | ||
" pato_size_class=OBO.PATO_0000117\n", | ||
" #also sub classes will be added\n", | ||
" #pato_size_classes_toadd=[OBO.PATO_0001708, OBO.PATO_0001709, OBO.PATO_0001710]\n", | ||
" pato_size_classes_toadd=[OBO.PATO_0000117]\n", | ||
" pmd_size=g.value(predicate=RDFS.label,object=Literal(\"Size\", lang=\"en\"))\n", | ||
" pato_shapes=[pato_size_class,]\n", | ||
" [pato_shapes.extend(get_all_sub_classes(size_class,pato_graph)) for size_class in pato_size_classes_toadd]\n", | ||
" #print(pato_shapes)\n", | ||
" i=0\n", | ||
" for shape in pato_shapes:\n", | ||
" label=None\n", | ||
" definition=None\n", | ||
" #skip shape class\n", | ||
" if str(shape)==str(pato_size_class):\n", | ||
" g.add((pmd_size,OWL.equivalentClass,pato_size_class))\n", | ||
" continue\n", | ||
" else:\n", | ||
" i+=1\n", | ||
" for s,p, o in pato_graph.triples((shape,None,None)):\n", | ||
" #print(s,p,o)\n", | ||
" if p==RDFS.label:\n", | ||
" label=str(o)\n", | ||
" #label_de=togerman.translate(label)\n", | ||
" if p==OBO.IAO_0000115:\n", | ||
" definition=o\n", | ||
" if label:\n", | ||
" iri=URIRef(PMDCO+upper_camel_case(label))\n", | ||
" g.add((iri,RDF.type,OWL.Class))\n", | ||
" g.add((iri,OWL.equivalentClass,shape))\n", | ||
" g.add((iri,RDFS.label,Literal(label,lang='en')))\n", | ||
" #g.add((iri,RDFS.label,Literal(label_de,lang='de')))\n", | ||
" if definition:\n", | ||
" g.add((iri,SKOS.definition,definition))\n", | ||
" g.add((iri,OBO.IAO_0000117,Literal(\"PERSON: \" + editor )))\n", | ||
" logging.info(\"added {} size entities from pato\".format(i))\n", | ||
" return g\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"pato=parse_graph(pato_url)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"print(this_ontology_url)\n", | ||
"onto=Graph()\n", | ||
"onto=parse_graph(this_ontology_url,graph=onto)\n", | ||
"onto=add_ontology_header(onto)\n", | ||
"onto=add_morphologic_shape_qualities(onto,pato_graph=pato)\n", | ||
"onto=add_morphologic_size_qualities(onto,pato_graph=pato)\n", | ||
"onto=copy_subclass_relations(onto)\n", | ||
"onto=translate_labels(onto,language='de')\n", | ||
"onto=entitle_all_labels(onto)\n", | ||
"\n", | ||
"onto.serialize(\"curated_\"+filename,format='turtle')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "p3.11-jupyter", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.4" | ||
}, | ||
"orig_nbformat": 4, | ||
"vscode": { | ||
"interpreter": { | ||
"hash": "61eede6994971b58f8144333c24a1e0b10c06d738f28fb47725492fa949c2ec5" | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.