Skip to content

Commit

Permalink
Merge pull request #174 from vrk-kpa/REKDAT-94_update-frequency-select
Browse files Browse the repository at this point in the history
REKDAT-94: Change update frequency to a controlled vocabulary
  • Loading branch information
bzar authored Mar 21, 2024
2 parents 7de2638 + 279212f commit 25ee9b2
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,44 @@
XSD, RDF, DCT, DCAT, FOAF, ADMS, VCARD,
Literal, URIRef, BNode)

from rdflib.namespace import Namespace

FREQUENCY = Namespace("http://publications.europa.eu/resource/authority/frequency/")

FREQUENCY_MAP = {
"annual": "ANNUAL",
"semiannual": "ANNUAL_2",
"three_times_a_year": "ANNUAL_3",
"bidecennial": "BIDECENNIAL",
"biennial": "BIENNIAL",
"bihourly": "BIHOURLY",
"bimonthly": "BIMONTHLY",
"biweekly": "BIWEEKLY",
"continuous": "CONT",
"daily": "DAILY",
"twice_a_day": "DAILY_2",
"decennial": "DECENNIAL",
"hourly": "HOURLY",
"irregular": "IRREG",
"monthly": "MONTHLY",
"semimonthly": "MONTHLY_2",
"three_times_a_month": "MONTHLY_3",
"never": "NEVER",
"provisional_data": "OP_DATPRO",
"other": "OTHER",
"quadrennial": "QUADRENNIAL",
"quarterly": "QUARTERLY",
"quinquennial": "QUINQUENNIAL",
"tridecennial": "TRIDECENNIAL",
"triennial": "TRIENNIAL",
"trihourly": "TRIHOURLY",
"unknown": "UNKNOWN",
"continuously_updated": "UPDATE_CONT",
"weekly": "WEEKLY",
"semiweekly": "WEEKLY_2",
"three_times_a_week": "WEEKLY_3",
}


class RestrictedDataDCATAPProfile(EuropeanDCATAP2Profile):
def parse_dataset(self, dataset_dict, dataset_ref):
Expand All @@ -18,7 +56,6 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
('notes_translated', DCT.description),
('rights_translated', DCT.rights),
('keywords', DCAT.keyword),
('update_frequency', DCT.accrualPeriodicity),
])

maintainer_website = dataset_dict.get('maintainer_website')
Expand All @@ -41,6 +78,14 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
self._add_triple_from_dict(dataset_dict, temporal, DCAT.startDate, 'valid_from', date_value=True)
self._add_triple_from_dict(dataset_dict, temporal, DCAT.endDate, 'valid_till', date_value=True)

update_frequency = dataset_dict.get('update_frequency')
if update_frequency:
self.g.bind("frequency", FREQUENCY)

# update_frequency not existing in FREQUENCY_MAP is an error
frequency = FREQUENCY[FREQUENCY_MAP[update_frequency]]
self.g.add((dataset_ref, DCT.accrualPeriodicity, URIRef(frequency)))

distributions = list(self.g.subjects(predicate=RDF.type, object=DCAT.Distribution))
for distribution in distributions:
resource_dict = next((r for r in dataset_dict.get('resources', [])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -959,7 +959,7 @@
<https://purl.eu/ns/shacl#message> "The range of geographical coverage must be of type <http://purl.org/dc/terms/Location>."@en .

<https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetSeriesShape/04d197dc40f01e87093bdd0446a9fdb1a9d44319> rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetSeries.frequency";
# disabled to allow literals: shacl:class dc:Frequency;
# disabled to allow URIRefs: shacl:class dc:Frequency;
shacl:description "The frequency at which the Dataset Series is updated."@en;
shacl:name "frequency"@en;
shacl:path dc:accrualPeriodicity;
Expand Down Expand Up @@ -989,7 +989,7 @@
<https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetSeriesShape/63a8cf23801ef605734507c621693524f22476dd> rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetSeries.frequency";
shacl:description "The frequency at which the Dataset Series is updated."@en;
shacl:name "frequency"@en;
# disabled to allow literals: shacl:nodeKind shacl:BlankNodeOrIRI;
shacl:nodeKind shacl:BlankNodeOrIRI;
shacl:path dc:accrualPeriodicity;
<https://purl.eu/ns/shacl#message> "The expected value for frequency is a rdfs:Resource (URI or blank node)"@en .

Expand Down Expand Up @@ -1044,7 +1044,7 @@

<https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetSeriesShape/84229f2224810ba9c70b4b27f756414cc0353324> rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetSeries.frequency";
shacl:description "The frequency at which the Dataset Series is updated."@en;
# disabled to allow multilingual content: shacl:maxCount 1;
shacl:maxCount 1;
shacl:name "frequency"@en;
shacl:path dc:accrualPeriodicity;
<https://purl.eu/ns/shacl#message> "Maximally 1 values allowed for frequency"@en .
Expand Down Expand Up @@ -1237,7 +1237,7 @@
<https://purl.eu/ns/shacl#message> "The expected value for other identifier is a rdfs:Resource (URI or blank node)"@en .

<https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetShape/04d197dc40f01e87093bdd0446a9fdb1a9d44319> rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#Dataset.frequency";
# disabled to allow literals: shacl:class dc:Frequency;
# disabled to allow URIRefs: shacl:class dc:Frequency;
shacl:description "The frequency at which the Dataset is updated."@en;
shacl:name "frequency"@en;
shacl:path dc:accrualPeriodicity;
Expand Down Expand Up @@ -1428,7 +1428,7 @@
<https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetShape/63a8cf23801ef605734507c621693524f22476dd> rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#Dataset.frequency";
shacl:description "The frequency at which the Dataset is updated."@en;
shacl:name "frequency"@en;
# disabled to allow literals: shacl:nodeKind shacl:BlankNodeOrIRI;
shacl:nodeKind shacl:BlankNodeOrIRI;
shacl:path dc:accrualPeriodicity;
<https://purl.eu/ns/shacl#message> "The expected value for frequency is a rdfs:Resource (URI or blank node)"@en .

Expand Down Expand Up @@ -1504,7 +1504,7 @@

<https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetShape/84229f2224810ba9c70b4b27f756414cc0353324> rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#Dataset.frequency";
shacl:description "The frequency at which the Dataset is updated."@en;
# disabled to allow multilingual content: shacl:maxCount 1;
shacl:maxCount 1;
shacl:name "frequency"@en;
shacl:path dc:accrualPeriodicity;
<https://purl.eu/ns/shacl#message> "Maximally 1 values allowed for frequency"@en .
Expand Down Expand Up @@ -1736,7 +1736,7 @@
shacl:targetClass dcat:Dataset .

<https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetmemberofaDatasetSeriesShape/04d197dc40f01e87093bdd0446a9fdb1a9d44319> rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetmemberofaDatasetSeries.frequency";
# disabled to allow literals: shacl:class dc:Frequency;
# disabled to allow URIRefs: shacl:class dc:Frequency;
shacl:description "The frequency at which the Dataset is updated."@en;
shacl:name "frequency"@en;
shacl:path dc:accrualPeriodicity;
Expand Down Expand Up @@ -1773,7 +1773,7 @@
<https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetmemberofaDatasetSeriesShape/63a8cf23801ef605734507c621693524f22476dd> rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetmemberofaDatasetSeries.frequency";
shacl:description "The frequency at which the Dataset is updated."@en;
shacl:name "frequency"@en;
# disabled to allow literals: shacl:nodeKind shacl:BlankNodeOrIRI;
shacl:nodeKind shacl:BlankNodeOrIRI;
shacl:path dc:accrualPeriodicity;
<https://purl.eu/ns/shacl#message> "The expected value for frequency is a rdfs:Resource (URI or blank node)"@en .

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,20 +183,25 @@
{
"field_name": "update_frequency",
"label": "Update frequency",
"form_placeholder": "e.g. monthly",
"form_languages": [
"fi",
"sv",
"en"
],
"preset": "fluent_vocabulary_with_autocomplete",
"validators": "fluent_tags create_fluent_tags(update_frequency)",
"form_attrs": {
"data-module": "autocomplete",
"data-module-tags": "",
"data-module-source": "/api/2/util/tag/autocomplete?incomplete=?&vocabulary_id=update_frequency"
},
"description": "Describe how often your data is updated"
"description": "Describe how often your data is updated",
"preset": "select",
"choices": [
{"value": "biennial", "label": "Biennial"},
{"value": "annual", "label": "Annual"},
{"value": "semiannual", "label": "Semiannual"},
{"value": "quarterly", "label": "Quarterly"},
{"value": "monthly", "label": "Monthly"},
{"value": "semimonthly", "label": "Semimonthly"},
{"value": "biweekly", "label": "Biweekly"},
{"value": "weekly", "label": "Weekly"},
{"value": "daily", "label": "Daily"},
{"value": "twice_a_day", "label": "Twice a day"},
{"value": "bihourly", "label": "Bihourly"},
{"value": "hourly", "label": "Hourly"},
{"value": "continuous", "label": "Continuous"},
{"value": "irregular", "label": "Irregular"},
{"value": "never", "label": "Never"}
]
},
{
"field_name": "valid_from",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,27 +152,19 @@ def test_dcat_dataset_external_urls(app):
@pytest.mark.usefixtures("clean_db", "clean_index", "with_plugins")
def test_dcat_dataset_update_frequency(app):
dataset_fields = minimal_dataset_with_one_resource_fields(Sysadmin())
dataset_fields['update_frequency'] = {lang: [f'update frequency {lang} {x}' for x in range(2)]
for lang in ['fi', 'sv', 'en']}
dataset_fields['update_frequency'] = 'quarterly'
Dataset(**dataset_fields)

result = fetch_catalog_graph(app).query('''
SELECT ?updateFrequencyFi ?updateFrequencySv ?updateFrequencyEn
SELECT ?updateFrequency
WHERE {
?a a dcat:Dataset
. ?a dcterms:accrualPeriodicity ?updateFrequencyFi
FILTER ( lang(?updateFrequencyFi) = "fi")
. ?a dcterms:accrualPeriodicity ?updateFrequencySv
FILTER ( lang(?updateFrequencySv) = "sv")
. ?a dcterms:accrualPeriodicity ?updateFrequencyEn
FILTER ( lang(?updateFrequencyEn) = "en")
. ?a dcterms:accrualPeriodicity ?updateFrequency
}
''')

results = [r for row in result for r in row]
for lang, values in dataset_fields['update_frequency'].items():
for value in values:
assert Literal(value, lang=lang) in results
[(update_frequency,)] = result
assert update_frequency == URIRef('http://publications.europa.eu/resource/authority/frequency/QUARTERLY')


@pytest.mark.usefixtures("clean_db", "clean_index", "with_plugins")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_some_action():
import pytest

# import ckanext.restricteddata.plugin as plugin
from ckan.plugins import plugin_loaded
from ckan.plugins import plugin_loaded, toolkit
from ckan.tests.factories import Dataset, Sysadmin, Organization, User, Group
from ckan.tests.helpers import call_action
from .utils import minimal_dataset_with_one_resource_fields
Expand Down Expand Up @@ -125,11 +125,15 @@ def test_dataset_with_external_ursl():
@pytest.mark.usefixtures("clean_db", "with_plugins")
def test_dataset_with_update_frequency():
dataset_fields = minimal_dataset_with_one_resource_fields(Sysadmin())
dataset_fields['update_frequency'] = {'fi': ['Test'], 'sv': ['Test']}
dataset_fields['update_frequency'] = 'quarterly'
d = Dataset(**dataset_fields)
dataset = call_action('package_show', id=d['name'])
assert dataset['update_frequency'] == dataset_fields['update_frequency']

dataset_fields['update_frequency'] = 'invalid value'
with pytest.raises(toolkit.ValidationError):
d = Dataset(**dataset_fields)


@pytest.mark.usefixtures("clean_db", "with_plugins")
def test_dataset_with_valid_from():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,22 @@ def _translations():
_("to")
_("Show metadata diff")

_("Biennial")
_("Annual")
_("Semiannual")
_("Quarterly")
_("Monthly")
_("Semimonthly")
_("Biweekly")
_("Weekly")
_("Daily")
_("Twice a day")
_("Bihourly")
_("Hourly")
_("Continuous")
_("Irregular")
_("Never")

# Resource
_("Data resource title")
_("Give a short and descriptive name for the distribution. If the data covers a specific time frame, mention that in the name.") # noqa: E501
Expand Down

0 comments on commit 25ee9b2

Please sign in to comment.