From c6642227e0f33ce347e229b874a9cd29007685f6 Mon Sep 17 00:00:00 2001 From: Valentin Ambroise <113367796+vaamb@users.noreply.github.com> Date: Sat, 1 Feb 2025 14:18:38 +0100 Subject: [PATCH] Store wiki article versions as "compact diff"s (#169) * Make `WikiArticleModification` a `CRUDMixin` * Cleanup `WikiArticle` paths related vars * Add a `path` parameter to `WikiArticleModification` * Store article versions as "compact diff"s --- src/ouranos/core/database/models/app.py | 161 +++++++++--------- .../web_server/routes/services/wiki.py | 6 +- 2 files changed, 86 insertions(+), 81 deletions(-) diff --git a/src/ouranos/core/database/models/app.py b/src/ouranos/core/database/models/app.py index a5fcdda7..fd517277 100644 --- a/src/ouranos/core/database/models/app.py +++ b/src/ouranos/core/database/models/app.py @@ -1,6 +1,7 @@ from __future__ import annotations -from datetime import datetime, timezone +from datetime import datetime +import difflib import enum from enum import Enum, IntFlag, StrEnum import re @@ -1068,7 +1069,7 @@ def topic_name(self) -> str: # Needed for response formatting return self.topic.name @property - def absolute_path(self) -> ioPath: + def abs_path(self) -> ioPath: return self.root_dir() / self.path @property @@ -1080,17 +1081,20 @@ def content_path(self) -> ioPath: return self.path / self.content_name @property - def _abs_content_path(self) -> ioPath: - return self.absolute_path / self.content_name + def abs_content_path(self) -> ioPath: + return self.abs_path / self.content_name async def set_content(self, content: str) -> None: - async with await self._abs_content_path.open("w") as f: + async with await self.abs_content_path.open("w") as f: await f.write(content) - async def get_content(self) -> str: - async with await self._abs_content_path.open("r") as f: - content = await f.read() - return content + async def get_content(self) -> str | None: + try: + async with await self.abs_content_path.open("r") as f: + content = await f.read() + return content + except IOError: + return None async def attach_tags(self, session:AsyncSession, tags_name: list[str]) -> None: # Clear all tags @@ -1112,6 +1116,25 @@ async def attach_tags(self, session:AsyncSession, tags_name: list[str]) -> None: await session.execute(stmt) await session.commit() + async def compute_diff(self, session, next_content: str) -> tuple[str, list[str]]: + modifications = await WikiArticleModification.get_multiple( + session, article_id=self.id, limit=2, + order_by=WikiArticleModification.version.desc()) + if len(modifications) == 1: + current_name = f"EMPTY" + next_name = f"diff_{modifications[0].version:03}.md" + elif len(modifications) == 2: + current_name = f"diff_{modifications[1].version:03}.md" + next_name = f"diff_{modifications[0].version:03}.md" + else: + raise ValueError + current_content: str = await self.get_content() or "" + current_content: list[str] = current_content.splitlines(True) + next_content: list[str] = next_content.splitlines(True) + diff = difflib.unified_diff( + current_content, next_content, current_name, next_name, n=0) + return next_name, list(diff) + @classmethod async def create( cls, @@ -1131,10 +1154,10 @@ async def create( article_dir = topic_obj.absolute_path / name await article_dir.mkdir(parents=True, exist_ok=True) rel_path = article_dir.relative_to(current_app.static_dir) + values["path"] = str(rel_path) # Create the article info content = values.pop("content") author_id = values.pop("author_id") - values["path"] = str(rel_path) tags_name: list[str] = values.pop("tags_name", []) await super().create(session, values=values, **lookup_keys) article = await cls.get(session, topic_name=topic_name, name=name) @@ -1143,10 +1166,17 @@ async def create( # Create the article modification await WikiArticleModification.create( session, - article=article, + article_id=article.id, author_id=author_id, - modification=ModificationType.creation, + values = { + "modification_type": ModificationType.creation, + }, ) + # Save the diff + diff_name, diff = await article.compute_diff(session, content) + diff_path = article.abs_path / diff_name + async with await diff_path.open("w") as f: + await f.writelines(diff) # Save the article content await article.set_content(content) @@ -1201,6 +1231,12 @@ async def get_multiple( result = await session.execute(stmt) return result.scalars().all() + @classmethod + async def get_by_id(cls, session: AsyncSession, article_id: int) -> Self | None: + stmt = select(cls).where(cls.id == article_id) + result = await session.execute(stmt) + return result.scalar_one_or_none() + @classmethod async def get_history( cls, @@ -1213,7 +1249,7 @@ async def get_history( article = await cls.get(session, topic_name=topic, name=name) if not article: raise ValueError("Article not found") - history = await WikiArticleModification.get_for_article( + history = await WikiArticleModification.get_multiple( session, article_id=article.id, limit=limit) return history @@ -1243,10 +1279,17 @@ async def update( # Create the article modification await WikiArticleModification.create( session, - article=article, + article_id=article.id, author_id=author_id, - modification=ModificationType.update, + values={ + "modification_type": ModificationType.update, + }, ) + # Save the diff + diff_name, diff = await article.compute_diff(session, content) + diff_path = article.abs_path / diff_name + async with await diff_path.open("w") as f: + await f.writelines(diff) # Save the article content await article.set_content(content) @@ -1273,27 +1316,30 @@ async def delete( # Create the article modification await WikiArticleModification.create( session, - article=article, + article_id=article.id, author_id=author_id, - modification=ModificationType.deletion, + values={ + "modification_type": ModificationType.deletion, + }, ) -class WikiArticleModification(Base): +class WikiArticleModification(Base, CRUDMixin): __tablename__ = "wiki_articles_modifications" __bind_key__ = "app" __table_args__ = ( UniqueConstraint( - "article_id", "version", "modification_type", + "article_id", "version", name="uq_wiki_articles_modifications" ), ) - _lookup_keys = ["topic_name", "article_name", "version"] + _lookup_keys = ["article_id", "version"] id: Mapped[int] = mapped_column(primary_key=True) article_id: Mapped[int] = mapped_column(sa.ForeignKey("wiki_articles.id")) version: Mapped[int] = mapped_column() modification_type: Mapped[ModificationType] = mapped_column() + path: Mapped[ioPath] = mapped_column(PathType(length=512)) timestamp: Mapped[datetime] = mapped_column(UtcDateTime, default=func.current_timestamp()) author_id: Mapped[str] = mapped_column(sa.ForeignKey("users.id")) @@ -1320,39 +1366,20 @@ async def create( cls, session: AsyncSession, /, - article: WikiArticle, - author_id: int, - modification: ModificationType, + values: dict | None = None, # modification_type, author_id + **lookup_keys: lookup_keys_type, # article_id ) -> None: - history = await cls.get_latest_version(session, article_id=article.id) - version: int = history.version + 1 if history is not None else 1 - stmt = ( - insert(cls) - .values({ - "article_id": article.id, - "version": version, - "modification_type": modification, - "author_id": author_id, - }) - ) - await session.execute(stmt) - - @classmethod - async def get_for_article( - cls, - session: AsyncSession, - /, - article_id: int, - limit: int = 50, - ) -> Sequence[Self]: - stmt = ( - select(cls) - .where(cls.article_id == article_id) - .order_by(cls.version.desc()) - .limit(limit) - ) - result = await session.execute(stmt) - return result.scalars().all() + # Get version number + history = await cls.get_latest_version( + session, article_id=lookup_keys["article_id"]) + version = history.version + 1 if history is not None else 1 + lookup_keys["version"] = version + # Get path info + article = await WikiArticle.get_by_id(session, lookup_keys["article_id"]) + rel_path = article.path / f"diff_{version:03}.md" + values["path"] = str(rel_path) + # Create the entry + await super().create(session, values=values, **lookup_keys) @classmethod async def get_latest_version( @@ -1361,31 +1388,9 @@ async def get_latest_version( /, article_id: int, ) -> Self | None: - stmt = ( - select(cls) - .where(cls.article_id == article_id) - .order_by(cls.version.desc()) - .limit(1) - ) - result = await session.execute(stmt) - return result.scalars().one_or_none() - - @classmethod - async def delete( - cls, - session: AsyncSession, - /, - article_id: int, - version, - ) -> None: - stmt = ( - delete(cls) - .where( - (cls.article_id == article_id) - & (cls.version == version) - ) - ) - await session.execute(stmt) + return await cls.get( + session, article_id=article_id, limit=1, + order_by=WikiArticleModification.version.desc()) class WikiArticlePicture(Base, WikiObject): @@ -1451,7 +1456,7 @@ async def create( session, topic_name=topic_name, name=article_name) if article is None: raise WikiArticleNotFound - picture_path = article.absolute_path / name + picture_path = article.abs_path / name rel_path = cls.get_rel_path(picture_path) # Create the picture info stmt = ( diff --git a/src/ouranos/web_server/routes/services/wiki.py b/src/ouranos/web_server/routes/services/wiki.py index aab7fa88..507b2f61 100644 --- a/src/ouranos/web_server/routes/services/wiki.py +++ b/src/ouranos/web_server/routes/services/wiki.py @@ -368,7 +368,7 @@ async def create_article( raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=( - f"Failed to create a new wiki template. Error msg: " + f"Failed to create a new wiki article. Error msg: " f"`{e.__class__.__name__}: {e}`", ), ) @@ -500,8 +500,8 @@ async def get_article_history( session: Annotated[AsyncSession, Depends(get_session)], ): article = await article_or_abort(session, topic=topic_name, name=article_name) - history = await WikiArticleModification.get_for_article( - session, article_id=article.id) + history = await WikiArticleModification.get_multiple( + session, article_id=article.id, order_by=WikiArticleModification.version.desc()) return history