From 279869bd85e402839e4a1a938d4b070f4ad13aaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20=C5=A0tamcar?= Date: Fri, 27 Sep 2024 19:21:00 +0200 Subject: [PATCH 1/4] Add new menu URL formats --- API/gimvicurnik/updaters/menu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/API/gimvicurnik/updaters/menu.py b/API/gimvicurnik/updaters/menu.py index d5affe2..80df918 100644 --- a/API/gimvicurnik/updaters/menu.py +++ b/API/gimvicurnik/updaters/menu.py @@ -90,7 +90,7 @@ def get_document_effective(self, document: DocumentInfo) -> datetime.date: # jedilnik-kosilo-YYYY-MM-DD(-popravek).pdf # jedilnik-malica-YYYY-MM-DD(-popravek).pdf date = re.search( - r"jedilnik-(?:kosilo|malica)-(\d+)-(\d+)-(\d+)(?:-[\w-]*)?\.(?:pdf|xlsx)", document.url + r"jedilnik-(?:kosilo|malica|K|M)-(\d+)-(\d+)-(\d+)(?:-[\w-]*)?\.(?:pdf|xlsx)", document.url ) # The specified date is commonly Monday of the effective week From 082a62848f7192e11daca73b901fcefc9679e770 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20=C5=A0tamcar?= Date: Fri, 27 Sep 2024 20:45:19 +0200 Subject: [PATCH 2/4] Retrieve documents also based on effective date --- API/gimvicurnik/updaters/base.py | 35 +++++++++++++++------------ API/gimvicurnik/updaters/timetable.py | 4 +-- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/API/gimvicurnik/updaters/base.py b/API/gimvicurnik/updaters/base.py index a17e6b1..af99bf6 100644 --- a/API/gimvicurnik/updaters/base.py +++ b/API/gimvicurnik/updaters/base.py @@ -163,15 +163,22 @@ def handle_document(self, document: DocumentInfo, span: Span) -> None: span.set_tag("document.modified", document.modified) span.set_tag("document.action", "crashed") + # == DOCUMENT EFFECTIVE + + # Get the document's effective date using the subclassed method + # This may return none for documents without an effective date + # If this fails, we can't do anything other than to skip the document + effective = self.get_document_effective(document) + # == DOCUMENT RECORD (GET) # Try to find an existing document record - record = self.retrieve_document(document) + record = self.retrieve_document(document, effective) # == DOCUMENT PROCESSING # Get the modified time if it is set, otherwise use the current time - created = document.created or datetime.datetime.utcnow() + created = document.created or datetime.datetime.now(datetime.UTC) modified = document.modified or created # Check if the document has changed without downloading it and comparing hashes @@ -193,8 +200,8 @@ def handle_document(self, document: DocumentInfo, span: Span) -> None: # If this fails, we can't do anything other than to skip the document stream, new_hash = self.download_document(document) - # Check if the document hash has changed - if record and record.parsed and record.hash == new_hash: + # Check if the document hash or document URL have changed + if record and record.parsed and record.hash == new_hash and record.url == document.url: changed = False else: action = "updated" @@ -233,11 +240,6 @@ def handle_document(self, document: DocumentInfo, span: Span) -> None: return - # Get the document's effective date using the subclassed method - # This may return none for documents without an effective date - # If this fails, we can't do anything other than to skip the document - effective = self.get_document_effective(document) - if parsable: # If there is no date, we can't do anything other than to skip the document if not effective: @@ -320,14 +322,17 @@ def handle_document(self, document: DocumentInfo, span: Span) -> None: self.logger.info("Skipped because the %s document for %s is already stored", document.type.value, effective) # fmt: on - def retrieve_document(self, document: DocumentInfo) -> Document | None: + def retrieve_document(self, document: DocumentInfo, effective: datetime.date | None) -> Document | None: """Get a document record from the database. May be set by subclasses.""" - return ( - self.session.query(Document) - .filter(Document.type == document.type, Document.url == document.url) - .first() - ) + # Normally, the document URL should match + criterion = Document.url == document.url + + if effective: + # If effective date is set, it may also match instead of the URL + criterion |= Document.effective == effective + + return self.session.query(Document).filter(Document.type == document.type, criterion).first() @with_span(op="download") def download_document(self, document: DocumentInfo) -> tuple[BytesIO, str]: diff --git a/API/gimvicurnik/updaters/timetable.py b/API/gimvicurnik/updaters/timetable.py index 11c2a81..6419829 100644 --- a/API/gimvicurnik/updaters/timetable.py +++ b/API/gimvicurnik/updaters/timetable.py @@ -4,7 +4,7 @@ import re import typing from collections import defaultdict -from datetime import datetime +from datetime import datetime, UTC from hashlib import sha256 import requests @@ -167,7 +167,7 @@ def _parse(self, document: Document | None, raw_data: str, new_hash: str, span: created = False document.type = DocumentType.TIMETABLE - document.modified = datetime.utcnow() + document.modified = datetime.now(UTC) document.url = self.config.url document.hash = new_hash self.session.add(document) From 2483dfd9e5746f091d2d8cf2434494f02ca37e28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20=C5=A0tamcar?= Date: Fri, 27 Sep 2024 21:11:08 +0200 Subject: [PATCH 3/4] Make current date and time overlay reactive --- website/src/components/TimetableDisplay.vue | 19 +++++++++++++------ website/src/utils/days.ts | 11 +++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/website/src/components/TimetableDisplay.vue b/website/src/components/TimetableDisplay.vue index 10749bb..715fa36 100644 --- a/website/src/components/TimetableDisplay.vue +++ b/website/src/components/TimetableDisplay.vue @@ -1,13 +1,14 @@