-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalidator.py
129 lines (98 loc) · 4 KB
/
validator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import sys
from rdflib import Graph, RDF
from collections import defaultdict
from error import ValidationError
from vocabulary import DcatApVocab
if len(sys.argv) > 1:
CATALOG = sys.argv[1]
else:
print "Please provide an RDF graph URI as an argument."
exit()
print "Obtaining RDF graph...\n"
try:
graph = Graph()
graph.parse(CATALOG, format="turtle")
except Exception as e:
print "Oops, either the URL gives a 404 or it is not a valid RDF graph."
exit()
"""
Checks whether the graph contains instances of a (mandatory)
class as specified in DCAT-AP v1.1.
Args:
graph: The rdflib.Graph to check.
klass: The URIRef (representing an RDFS class) to check.
Returns:
A defaultdict (errors) that contains error codes mapped to the resources that are not valid
according to the DCAT-AP spec. For example:
{'1': ('dcat.Catalog')}
The error code signifies the type of the validation error, for more see ValidationError class.
"""
def check_mandatory_class(graph, klass):
errors = defaultdict(list)
if (None, None, klass) in graph:
instances = graph.subjects(predicate=RDF.type, object=klass)
if not instances:
errors[ValidationError.ERROR_MANDATORY_CLASS_MISSING].append(klass) # No instances of a mandatory "klass" found
return errors
"""
Checks whether all instances of a class have the mandatory properties defined in DCAT-AP
Args:
graph: The rdflib.Graph to check.
klass: The URIRef (representing an RDFS class) to check the presence of mandatory properties
associated with this class.
Returns:
A defaultdict (errors) that contains error codes mapped to the resources that are not valid
according to the DCAT-AP spec. For example:
{'11': ('dcat.dataset', 'dcat.Catalog')}
The error code signifies the type of the validation error, for more see ValidationError class.
"""
def check_mandatory_properties(graph, klass):
errors = defaultdict(list)
if (None, None, klass) in graph:
instances = graph.subjects(predicate=RDF.type, object=klass)
for instance in instances:
predicates = graph.predicates(subject=instance, object=None) # obtain predicates for each instance of klass
for predicate in DcatApVocab.MANDATORY_PROPERTIES_FOR_CLASSES[klass]:
if predicate not in predicates: # check if all mandatory predicates are found in the graph
errors[ValidationError.ERROR_MANDATORY_PROPERTY_FOR_CLASS_MISSING].append((predicate, klass))
return errors
"""
Checks whether the graph follows the DCAT-AP regarding
the mandatory classes and properties for a given class.
Args:
graph: The rdflib.Graph to check.
klass: The URIRef (representing an RDFS class) to check the presence of mandatory properties
associated with this class.
Returns:
A combined list of error message that show where the validity breaks.
"""
def check_class_validity(graph, klass):
errorList = []
error_class = check_mandatory_class(graph, klass)
errorList.extend(get_error_messages(error_class))
error_property = check_mandatory_properties(graph, klass)
errorList.extend(get_error_messages(error_property))
return errorList
"""
Converts an error dict to a human readable message.
Args:
error: A dictionary of errors and the respective resources.
Returns:
A human readable sentence that shows where the graph breaks a DCAT-AP constraint.
"""
def get_error_messages(errors):
errorList = []
error = ValidationError()
for key in errors:
for resourceList in errors[key]:
error_message = error.to_human_readable(key, resourceList)
errorList.append(error_message)
return errorList
"""Check if some mandatory classes defined in DCAT-AP are valid with respect to the spec.
Prints error messages to show where the validity breaks.
"""
for klass in DcatApVocab.MANDATORY_CLASSES:
errors = check_class_validity(graph, klass)
for message in set(errors):
print (message)
print "\nValidation completed."