From 279212f19fb957b8755d5e5b28c179612baabe77 Mon Sep 17 00:00:00 2001 From: Teemu Erkkola Date: Wed, 20 Mar 2024 13:04:03 +0200 Subject: [PATCH] REKDAT-94: Change update frequency to a controlled vocabulary --- .../ckanext/restricteddata/dcat.py | 47 ++++++++++++++++++- .../dcat-ap/restricteddata_dcat-ap_shacl.ttl | 16 +++---- .../restricteddata/schemas/dataset.json | 33 +++++++------ .../ckanext/restricteddata/tests/test_dcat.py | 18 ++----- .../restricteddata/tests/test_plugin.py | 8 +++- .../ckanext/restricteddata/translations.py | 16 +++++++ 6 files changed, 100 insertions(+), 38 deletions(-) diff --git a/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/dcat.py b/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/dcat.py index e60bf097..4d8fd230 100644 --- a/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/dcat.py +++ b/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/dcat.py @@ -4,6 +4,44 @@ XSD, RDF, DCT, DCAT, FOAF, ADMS, VCARD, Literal, URIRef, BNode) +from rdflib.namespace import Namespace + +FREQUENCY = Namespace("http://publications.europa.eu/resource/authority/frequency/") + +FREQUENCY_MAP = { + "annual": "ANNUAL", + "semiannual": "ANNUAL_2", + "three_times_a_year": "ANNUAL_3", + "bidecennial": "BIDECENNIAL", + "biennial": "BIENNIAL", + "bihourly": "BIHOURLY", + "bimonthly": "BIMONTHLY", + "biweekly": "BIWEEKLY", + "continuous": "CONT", + "daily": "DAILY", + "twice_a_day": "DAILY_2", + "decennial": "DECENNIAL", + "hourly": "HOURLY", + "irregular": "IRREG", + "monthly": "MONTHLY", + "semimonthly": "MONTHLY_2", + "three_times_a_month": "MONTHLY_3", + "never": "NEVER", + "provisional_data": "OP_DATPRO", + "other": "OTHER", + "quadrennial": "QUADRENNIAL", + "quarterly": "QUARTERLY", + "quinquennial": "QUINQUENNIAL", + "tridecennial": "TRIDECENNIAL", + "triennial": "TRIENNIAL", + "trihourly": "TRIHOURLY", + "unknown": "UNKNOWN", + "continuously_updated": "UPDATE_CONT", + "weekly": "WEEKLY", + "semiweekly": "WEEKLY_2", + "three_times_a_week": "WEEKLY_3", +} + class RestrictedDataDCATAPProfile(EuropeanDCATAP2Profile): def parse_dataset(self, dataset_dict, dataset_ref): @@ -18,7 +56,6 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): ('notes_translated', DCT.description), ('rights_translated', DCT.rights), ('keywords', DCAT.keyword), - ('update_frequency', DCT.accrualPeriodicity), ]) maintainer_website = dataset_dict.get('maintainer_website') @@ -41,6 +78,14 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): self._add_triple_from_dict(dataset_dict, temporal, DCAT.startDate, 'valid_from', date_value=True) self._add_triple_from_dict(dataset_dict, temporal, DCAT.endDate, 'valid_till', date_value=True) + update_frequency = dataset_dict.get('update_frequency') + if update_frequency: + self.g.bind("frequency", FREQUENCY) + + # update_frequency not existing in FREQUENCY_MAP is an error + frequency = FREQUENCY[FREQUENCY_MAP[update_frequency]] + self.g.add((dataset_ref, DCT.accrualPeriodicity, URIRef(frequency))) + distributions = list(self.g.subjects(predicate=RDF.type, object=DCAT.Distribution)) for distribution in distributions: resource_dict = next((r for r in dataset_dict.get('resources', []) diff --git a/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/doc/dcat-ap/restricteddata_dcat-ap_shacl.ttl b/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/doc/dcat-ap/restricteddata_dcat-ap_shacl.ttl index ea72ac59..4b739fcb 100644 --- a/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/doc/dcat-ap/restricteddata_dcat-ap_shacl.ttl +++ b/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/doc/dcat-ap/restricteddata_dcat-ap_shacl.ttl @@ -959,7 +959,7 @@ "The range of geographical coverage must be of type ."@en . rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetSeries.frequency"; - # disabled to allow literals: shacl:class dc:Frequency; + # disabled to allow URIRefs: shacl:class dc:Frequency; shacl:description "The frequency at which the Dataset Series is updated."@en; shacl:name "frequency"@en; shacl:path dc:accrualPeriodicity; @@ -989,7 +989,7 @@ rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetSeries.frequency"; shacl:description "The frequency at which the Dataset Series is updated."@en; shacl:name "frequency"@en; - # disabled to allow literals: shacl:nodeKind shacl:BlankNodeOrIRI; + shacl:nodeKind shacl:BlankNodeOrIRI; shacl:path dc:accrualPeriodicity; "The expected value for frequency is a rdfs:Resource (URI or blank node)"@en . @@ -1044,7 +1044,7 @@ rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetSeries.frequency"; shacl:description "The frequency at which the Dataset Series is updated."@en; - # disabled to allow multilingual content: shacl:maxCount 1; + shacl:maxCount 1; shacl:name "frequency"@en; shacl:path dc:accrualPeriodicity; "Maximally 1 values allowed for frequency"@en . @@ -1237,7 +1237,7 @@ "The expected value for other identifier is a rdfs:Resource (URI or blank node)"@en . rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#Dataset.frequency"; - # disabled to allow literals: shacl:class dc:Frequency; + # disabled to allow URIRefs: shacl:class dc:Frequency; shacl:description "The frequency at which the Dataset is updated."@en; shacl:name "frequency"@en; shacl:path dc:accrualPeriodicity; @@ -1428,7 +1428,7 @@ rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#Dataset.frequency"; shacl:description "The frequency at which the Dataset is updated."@en; shacl:name "frequency"@en; - # disabled to allow literals: shacl:nodeKind shacl:BlankNodeOrIRI; + shacl:nodeKind shacl:BlankNodeOrIRI; shacl:path dc:accrualPeriodicity; "The expected value for frequency is a rdfs:Resource (URI or blank node)"@en . @@ -1504,7 +1504,7 @@ rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#Dataset.frequency"; shacl:description "The frequency at which the Dataset is updated."@en; - # disabled to allow multilingual content: shacl:maxCount 1; + shacl:maxCount 1; shacl:name "frequency"@en; shacl:path dc:accrualPeriodicity; "Maximally 1 values allowed for frequency"@en . @@ -1736,7 +1736,7 @@ shacl:targetClass dcat:Dataset . rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetmemberofaDatasetSeries.frequency"; - # disabled to allow literals: shacl:class dc:Frequency; + # disabled to allow URIRefs: shacl:class dc:Frequency; shacl:description "The frequency at which the Dataset is updated."@en; shacl:name "frequency"@en; shacl:path dc:accrualPeriodicity; @@ -1773,7 +1773,7 @@ rdfs:seeAlso "https://semiceu.github.io//DCAT-AP/releases/3.0.0#DatasetmemberofaDatasetSeries.frequency"; shacl:description "The frequency at which the Dataset is updated."@en; shacl:name "frequency"@en; - # disabled to allow literals: shacl:nodeKind shacl:BlankNodeOrIRI; + shacl:nodeKind shacl:BlankNodeOrIRI; shacl:path dc:accrualPeriodicity; "The expected value for frequency is a rdfs:Resource (URI or blank node)"@en . diff --git a/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/schemas/dataset.json b/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/schemas/dataset.json index 9c14a3fa..dee5c9f5 100644 --- a/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/schemas/dataset.json +++ b/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/schemas/dataset.json @@ -183,20 +183,25 @@ { "field_name": "update_frequency", "label": "Update frequency", - "form_placeholder": "e.g. monthly", - "form_languages": [ - "fi", - "sv", - "en" - ], - "preset": "fluent_vocabulary_with_autocomplete", - "validators": "fluent_tags create_fluent_tags(update_frequency)", - "form_attrs": { - "data-module": "autocomplete", - "data-module-tags": "", - "data-module-source": "/api/2/util/tag/autocomplete?incomplete=?&vocabulary_id=update_frequency" - }, - "description": "Describe how often your data is updated" + "description": "Describe how often your data is updated", + "preset": "select", + "choices": [ + {"value": "biennial", "label": "Biennial"}, + {"value": "annual", "label": "Annual"}, + {"value": "semiannual", "label": "Semiannual"}, + {"value": "quarterly", "label": "Quarterly"}, + {"value": "monthly", "label": "Monthly"}, + {"value": "semimonthly", "label": "Semimonthly"}, + {"value": "biweekly", "label": "Biweekly"}, + {"value": "weekly", "label": "Weekly"}, + {"value": "daily", "label": "Daily"}, + {"value": "twice_a_day", "label": "Twice a day"}, + {"value": "bihourly", "label": "Bihourly"}, + {"value": "hourly", "label": "Hourly"}, + {"value": "continuous", "label": "Continuous"}, + {"value": "irregular", "label": "Irregular"}, + {"value": "never", "label": "Never"} + ] }, { "field_name": "valid_from", diff --git a/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/tests/test_dcat.py b/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/tests/test_dcat.py index 54271903..026bf855 100644 --- a/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/tests/test_dcat.py +++ b/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/tests/test_dcat.py @@ -152,27 +152,19 @@ def test_dcat_dataset_external_urls(app): @pytest.mark.usefixtures("clean_db", "clean_index", "with_plugins") def test_dcat_dataset_update_frequency(app): dataset_fields = minimal_dataset_with_one_resource_fields(Sysadmin()) - dataset_fields['update_frequency'] = {lang: [f'update frequency {lang} {x}' for x in range(2)] - for lang in ['fi', 'sv', 'en']} + dataset_fields['update_frequency'] = 'quarterly' Dataset(**dataset_fields) result = fetch_catalog_graph(app).query(''' - SELECT ?updateFrequencyFi ?updateFrequencySv ?updateFrequencyEn + SELECT ?updateFrequency WHERE { ?a a dcat:Dataset - . ?a dcterms:accrualPeriodicity ?updateFrequencyFi - FILTER ( lang(?updateFrequencyFi) = "fi") - . ?a dcterms:accrualPeriodicity ?updateFrequencySv - FILTER ( lang(?updateFrequencySv) = "sv") - . ?a dcterms:accrualPeriodicity ?updateFrequencyEn - FILTER ( lang(?updateFrequencyEn) = "en") + . ?a dcterms:accrualPeriodicity ?updateFrequency } ''') - results = [r for row in result for r in row] - for lang, values in dataset_fields['update_frequency'].items(): - for value in values: - assert Literal(value, lang=lang) in results + [(update_frequency,)] = result + assert update_frequency == URIRef('http://publications.europa.eu/resource/authority/frequency/QUARTERLY') @pytest.mark.usefixtures("clean_db", "clean_index", "with_plugins") diff --git a/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/tests/test_plugin.py b/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/tests/test_plugin.py index eb7e1244..8f3e22a1 100644 --- a/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/tests/test_plugin.py +++ b/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/tests/test_plugin.py @@ -50,7 +50,7 @@ def test_some_action(): import pytest # import ckanext.restricteddata.plugin as plugin -from ckan.plugins import plugin_loaded +from ckan.plugins import plugin_loaded, toolkit from ckan.tests.factories import Dataset, Sysadmin, Organization, User, Group from ckan.tests.helpers import call_action from .utils import minimal_dataset_with_one_resource_fields @@ -125,11 +125,15 @@ def test_dataset_with_external_ursl(): @pytest.mark.usefixtures("clean_db", "with_plugins") def test_dataset_with_update_frequency(): dataset_fields = minimal_dataset_with_one_resource_fields(Sysadmin()) - dataset_fields['update_frequency'] = {'fi': ['Test'], 'sv': ['Test']} + dataset_fields['update_frequency'] = 'quarterly' d = Dataset(**dataset_fields) dataset = call_action('package_show', id=d['name']) assert dataset['update_frequency'] == dataset_fields['update_frequency'] + dataset_fields['update_frequency'] = 'invalid value' + with pytest.raises(toolkit.ValidationError): + d = Dataset(**dataset_fields) + @pytest.mark.usefixtures("clean_db", "with_plugins") def test_dataset_with_valid_from(): diff --git a/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/translations.py b/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/translations.py index 447115c5..c39a66fb 100644 --- a/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/translations.py +++ b/ckan/ckanext/ckanext-restricteddata/ckanext/restricteddata/translations.py @@ -83,6 +83,22 @@ def _translations(): _("to") _("Show metadata diff") + _("Biennial") + _("Annual") + _("Semiannual") + _("Quarterly") + _("Monthly") + _("Semimonthly") + _("Biweekly") + _("Weekly") + _("Daily") + _("Twice a day") + _("Bihourly") + _("Hourly") + _("Continuous") + _("Irregular") + _("Never") + # Resource _("Data resource title") _("Give a short and descriptive name for the distribution. If the data covers a specific time frame, mention that in the name.") # noqa: E501