Skip to content

Commit

Permalink
fix: Improve Editor error handling and entity deletion
Browse files Browse the repository at this point in the history
This commit enhances the Editor class with better error handling and more thorough entity deletion:

- Added validation to check if triples exist before attempting to update or delete them
- Improved entity deletion to remove triples where the entity appears as an object, not just as a subject
- Added appropriate exception raising when attempting to modify or delete non-existent triples or entities
- Added comprehensive error messages to help identify the specific triple or entity that doesn't exist

These changes prevent silent failures when working with non-existent data and ensure complete cleanup when deleting entities from the graph.
  • Loading branch information
arcangelo7 committed Feb 27, 2025
1 parent 1bd0f82 commit d406748
Show file tree
Hide file tree
Showing 14 changed files with 814 additions and 221 deletions.
27 changes: 24 additions & 3 deletions .github/workflows/python-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,34 @@ jobs:
retention-days: 14

- name: Generate coverage badge
if: matrix.python-version == '3.10'
run: |
# Extract coverage percentage as a number
COVERAGE_NUM=$(echo ${{ env.COVERAGE }} | sed 's/%//')
# Determine color based on coverage
if (( $(echo "$COVERAGE_NUM >= 90" | bc -l) )); then
COLOR="brightgreen"
elif (( $(echo "$COVERAGE_NUM >= 80" | bc -l) )); then
COLOR="green"
elif (( $(echo "$COVERAGE_NUM >= 70" | bc -l) )); then
COLOR="yellowgreen"
elif (( $(echo "$COVERAGE_NUM >= 60" | bc -l) )); then
COLOR="yellow"
else
COLOR="red"
fi
echo "BADGE_COLOR=$COLOR" >> $GITHUB_ENV
- name: Create badge
if: matrix.python-version == '3.10'
uses: RubbaBoy/BYOB@v1.3.0
with:
name: opencitations-heritrace-coverage-${{ github.ref_name }}
label: "Coverage"
status: "${{ env.COVERAGE }}%"
color: green
status: "${{ env.COVERAGE }}"
color: ${{ env.BADGE_COLOR }}
github_token: ${{ secrets.GIST_PAT }}
repository: arcangelo7/badges
actor: arcangelo7
actor: arcangelo7
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ database/
prov_database/
cache.json
heritrace/static/dist/
.vscode/

# Unit test / coverage reports
.coverage
Expand Down
116 changes: 52 additions & 64 deletions heritrace/editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,24 @@ def update(
if isinstance(graph, Graph)
else URIRef(graph) if graph else None
)

# Check if the triple exists before updating
if self.dataset_is_quadstore and graph:
if not (subject, predicate, old_value, graph) in self.g_set:
raise Exception(
f"Triple ({subject}, {predicate}, {old_value}, {graph}) does not exist"
)
self.g_set.remove((subject, predicate, old_value, graph))
self.g_set.add(
(subject, predicate, new_value, graph),
resp_agent=self.resp_agent,
primary_source=self.source,
)
else:
if not (subject, predicate, old_value) in self.g_set:
raise Exception(
f"Triple ({subject}, {predicate}, {old_value}) does not exist"
)
self.g_set.remove((subject, predicate, old_value))
self.g_set.add(
(subject, predicate, new_value),
Expand All @@ -105,30 +115,63 @@ def delete(

if predicate is None:
# Delete the entire entity
# Check if the entity exists
if self.dataset_is_quadstore:
for quad in list(self.g_set.quads((subject, None, None, None))):
quads = list(self.g_set.quads((subject, None, None, None)))
if not quads:
raise Exception(f"Entity {subject} does not exist")
for quad in quads:
self.g_set.remove(quad)

# Also remove any triples where this entity is the object
object_quads = list(self.g_set.quads((None, None, subject, None)))
for quad in object_quads:
self.g_set.remove(quad)
else:
for triple in list(self.g_set.triples((subject, None, None))):
triples = list(self.g_set.triples((subject, None, None)))
if not triples:
raise Exception(f"Entity {subject} does not exist")
for triple in triples:
self.g_set.remove(triple)

# Also remove any triples where this entity is the object
object_triples = list(self.g_set.triples((None, None, subject)))
for triple in object_triples:
self.g_set.remove(triple)
# Mark the entity as deleted
self.g_set.mark_as_deleted(subject)
else:
if value:
# Remove the specific triple/quad directly
# Check if the specific triple/quad exists before removing it
value = URIRef(value) if isinstance(value, str) else value
if self.dataset_is_quadstore and graph:
if not (subject, predicate, value, graph) in self.g_set:
raise Exception(
f"Triple ({subject}, {predicate}, {value}, {graph}) does not exist"
)
self.g_set.remove((subject, predicate, value, graph))
else:
if not (subject, predicate, value) in self.g_set:
raise Exception(
f"Triple ({subject}, {predicate}, {value}) does not exist"
)
self.g_set.remove((subject, predicate, value))
else:
# Remove all triples with the given subject and predicate
# Check if any triples with the given subject and predicate exist
if self.dataset_is_quadstore and graph:
for quad in list(
self.g_set.quads((subject, predicate, None, graph))
):
quads = list(self.g_set.quads((subject, predicate, None, graph)))
if not quads:
raise Exception(
f"No triples found with subject {subject} and predicate {predicate} in graph {graph}"
)
for quad in quads:
self.g_set.remove(quad)
else:
for triple in list(self.g_set.triples((subject, predicate, None))):
triples = list(self.g_set.triples((subject, predicate, None)))
if not triples:
raise Exception(
f"No triples found with subject {subject} and predicate {predicate}"
)
for triple in triples:
self.g_set.remove(triple)

# Check if the entity is now empty and mark it as deleted if so
Expand Down Expand Up @@ -199,61 +242,6 @@ def import_entity_from_triplestore(self, res_list: list):
triple, resp_agent=self.resp_agent, primary_source=self.source
)

def execute(self, sparql_query: str) -> None:
parsed = parseUpdate(sparql_query)
translated = translateUpdate(parsed).algebra
entities_added = set()

def extract_entities(operation):
if hasattr(operation, "quads") and isinstance(operation.quads, defaultdict):
for graph, triples in operation.quads.items():
for triple in triples:
yield triple[0]
else:
for triple in operation.triples:
yield triple[0]

for operation in translated:
for entity in extract_entities(operation):
if entity not in entities_added:
Reader.import_entities_from_triplestore(
self.g_set, self.dataset_endpoint, [entity]
)
entities_added.add(entity)

self.g_set.preexisting_finished(self.resp_agent, self.source, self.c_time)

for operation in translated:
if operation.name == "DeleteData":
if hasattr(operation, "quads") and isinstance(
operation.quads, defaultdict
):
for graph, triples in operation.quads.items():
for triple in triples:
self.g_set.remove((triple[0], triple[1], triple[2], graph))
else:
for triple in operation.triples:
self.g_set.remove(triple)
elif operation.name == "InsertData":
if hasattr(operation, "quads") and isinstance(
operation.quads, defaultdict
):
for graph, triples in operation.quads.items():
for triple in triples:
self.g_set.add((triple[0], triple[1], triple[2], graph))
else:
for triple in operation.triples:
self.g_set.add(triple)

if isinstance(self.g_set, OCDMConjunctiveGraph):
for subject in self.g_set.subjects(unique=True):
if len(list(self.g_set.quads((subject, None, None, None)))) == 0:
self.g_set.mark_as_deleted(subject)
else:
for subject in self.g_set.subjects(unique=True):
if len(list(self.g_set.triples((subject, None, None)))) == 0:
self.g_set.mark_as_deleted(subject)

def import_entity(self, subject):
Reader.import_entities_from_triplestore(
self.g_set, self.dataset_endpoint, [subject]
Expand Down
71 changes: 40 additions & 31 deletions heritrace/meta_counter_handler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sqlite3
import urllib.parse


class MetaCounterHandler:
def __init__(self, database: str) -> None:
"""
Expand All @@ -18,34 +19,38 @@ def __init__(self, database: str) -> None:
self.supplier_prefix = "060"

self.entity_type_abbr = {
'http://purl.org/spar/fabio/Expression': 'br',
'http://purl.org/spar/fabio/Article': 'br',
'http://purl.org/spar/fabio/JournalArticle': 'br',
'http://purl.org/spar/fabio/Book': 'br',
'http://purl.org/spar/fabio/BookChapter': 'br',
'http://purl.org/spar/fabio/JournalIssue': 'br',
'http://purl.org/spar/fabio/JournalVolume': 'br',
'http://purl.org/spar/fabio/Journal': 'br',
'http://purl.org/spar/fabio/AcademicProceedings': 'br',
'http://purl.org/spar/fabio/ProceedingsPaper': 'br',
'http://purl.org/spar/fabio/ReferenceBook': 'br',
'http://purl.org/spar/fabio/Review': 'br',
'http://purl.org/spar/fabio/ReviewArticle': 'br',
'http://purl.org/spar/fabio/Series': 'br',
'http://purl.org/spar/fabio/Thesis': 'br',
'http://purl.org/spar/pro/RoleInTime': 'ar',
'http://purl.org/spar/fabio/Manifestation': 're',
'http://xmlns.com/foaf/0.1/Agent': 'ra',
'http://purl.org/spar/datacite/Identifier': 'id'
"http://purl.org/spar/fabio/Expression": "br",
"http://purl.org/spar/fabio/Article": "br",
"http://purl.org/spar/fabio/JournalArticle": "br",
"http://purl.org/spar/fabio/Book": "br",
"http://purl.org/spar/fabio/BookChapter": "br",
"http://purl.org/spar/fabio/JournalIssue": "br",
"http://purl.org/spar/fabio/JournalVolume": "br",
"http://purl.org/spar/fabio/Journal": "br",
"http://purl.org/spar/fabio/AcademicProceedings": "br",
"http://purl.org/spar/fabio/ProceedingsPaper": "br",
"http://purl.org/spar/fabio/ReferenceBook": "br",
"http://purl.org/spar/fabio/Review": "br",
"http://purl.org/spar/fabio/ReviewArticle": "br",
"http://purl.org/spar/fabio/Series": "br",
"http://purl.org/spar/fabio/Thesis": "br",
"http://purl.org/spar/pro/RoleInTime": "ar",
"http://purl.org/spar/fabio/Manifestation": "re",
"http://xmlns.com/foaf/0.1/Agent": "ra",
"http://purl.org/spar/datacite/Identifier": "id",
}

# Create tables
self.cur.execute("""CREATE TABLE IF NOT EXISTS data_counters(
self.cur.execute(
"""CREATE TABLE IF NOT EXISTS data_counters(
entity TEXT PRIMARY KEY,
count INTEGER)""")
self.cur.execute("""CREATE TABLE IF NOT EXISTS prov_counters(
count INTEGER)"""
)
self.cur.execute(
"""CREATE TABLE IF NOT EXISTS prov_counters(
entity TEXT PRIMARY KEY,
count INTEGER)""")
count INTEGER)"""
)
self.con.commit()

def _process_entity_name(self, entity_name: str) -> tuple:
Expand All @@ -59,9 +64,9 @@ def _process_entity_name(self, entity_name: str) -> tuple:
"""
entity_name_str = str(entity_name)
if entity_name_str in self.entity_type_abbr:
return ('data_counters', self.entity_type_abbr[entity_name_str])
return ("data_counters", self.entity_type_abbr[entity_name_str])
else:
return ('prov_counters', urllib.parse.quote(entity_name_str))
return ("prov_counters", urllib.parse.quote(entity_name_str))

def set_counter(self, new_value: int, entity_name: str) -> None:
"""
Expand All @@ -76,10 +81,12 @@ def set_counter(self, new_value: int, entity_name: str) -> None:
"""
if new_value < 0:
raise ValueError("new_value must be a non negative integer!")

table, processed_entity_name = self._process_entity_name(entity_name)
self.cur.execute(f"INSERT OR REPLACE INTO {table} (entity, count) VALUES (?, ?)",
(processed_entity_name, new_value))
self.cur.execute(
f"INSERT OR REPLACE INTO {table} (entity, count) VALUES (?, ?)",
(processed_entity_name, new_value),
)
self.con.commit()

def read_counter(self, entity_name: str) -> int:
Expand All @@ -91,9 +98,11 @@ def read_counter(self, entity_name: str) -> int:
:return: The requested counter value.
"""
table, processed_entity_name = self._process_entity_name(entity_name)
self.cur.execute(f"SELECT count FROM {table} WHERE entity=?", (processed_entity_name,))
self.cur.execute(
f"SELECT count FROM {table} WHERE entity=?", (processed_entity_name,)
)
result = self.cur.fetchone()

if result:
return result[0]
else:
Expand All @@ -117,4 +126,4 @@ def close(self):
Closes the database connection.
"""
if self.con:
self.con.close()
self.con.close()
12 changes: 7 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
This file contains fixtures and configuration for the test suite.
"""

import os
from typing import Generator

import pytest
from flask import Flask
from tests.test_config import TestConfig
from flask.testing import FlaskClient, FlaskCliRunner
from heritrace import create_app
from tests.test_config import TestConfig


@pytest.fixture
def app():
def app() -> Generator[Flask, None, None]:
"""Create and configure a Flask application for testing."""
app = create_app(TestConfig)

Expand All @@ -21,12 +23,12 @@ def app():


@pytest.fixture
def client(app):
def client(app: Flask) -> FlaskClient:
"""A test client for the app."""
return app.test_client()


@pytest.fixture
def runner(app):
def runner(app: Flask) -> FlaskCliRunner:
"""A test CLI runner for the app."""
return app.test_cli_runner()
8 changes: 8 additions & 0 deletions tests/test_dataset_db/virtuoso.log
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,11 @@
14:02:28 Checkpoint finished, log reused
14:02:28 HTTP/WebDAV server online at 8890
14:02:28 Server online at 1111 (pid 1)
15:02:29 Checkpoint started
15:02:29 Checkpoint finished, log reused
16:02:31 Checkpoint started
16:02:31 Checkpoint finished, log reused
17:02:31 Checkpoint started
17:02:31 Checkpoint finished, log reused
18:02:32 Checkpoint started
18:02:32 Checkpoint finished, log reused
Binary file modified tests/test_dataset_db/virtuoso.trx
Binary file not shown.
8 changes: 8 additions & 0 deletions tests/test_provenance_db/virtuoso.log
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,11 @@
14:02:28 Checkpoint finished, log reused
14:02:30 HTTP/WebDAV server online at 8890
14:02:30 Server online at 1111 (pid 1)
15:02:32 Checkpoint started
15:02:32 Checkpoint finished, log reused
16:02:33 Checkpoint started
16:02:33 Checkpoint finished, log reused
17:02:34 Checkpoint started
17:02:34 Checkpoint finished, log reused
18:02:34 Checkpoint started
18:02:34 Checkpoint finished, log reused
Binary file modified tests/test_provenance_db/virtuoso.trx
Binary file not shown.
Loading

0 comments on commit d406748

Please sign in to comment.