SamsungDS · mbrsamsung · Oct 16, 2024 · Oct 15, 2024 · Oct 16, 2024
diff --git a/src/spex/htmlspec/htmlrenderer.py b/src/spex/htmlspec/htmlrenderer.py
@@ -9,6 +9,7 @@
 from gcgen.api import Section, write_file
 from lxml.etree import _Element
 
+from spex import __version__
 from spex.htmlspec import css
 from spex.htmlspec.docx import Document
 from spex.htmlspec.parser import (
@@ -137,6 +138,7 @@ def generate(self, yield_progress: bool = False) -> Generator[int, None, None]:
         if revision is not None:
             s.emit(f' data-revision="{revision}"')
         s.emitln("/>")
+        s.emitln(f'<meta name="spexVersion" content="{__version__}" />')
         s.emitln(f"<title>{self._fname}</title>")
         s.add_section(self._html_head)
         s.emitln(f'<link rel="stylesheet" href="{self._fname}.css"/>')
@@ -198,7 +200,10 @@ def _render_list(self, s: Section, lst: List) -> None:
         s.dedent().emitln(f"</{lst.tag}>")
 
     def _render_table(self, s: Section, tbl: Table) -> None:
-        s.emitln("<table>").indent()
+        if tbl.id:
+            s.emitln(f"<table id='{tbl.id}'>").indent()
+        else:
+            s.emitln("<table>").indent()
         for rndx, row in enumerate(tbl.rows):
             s.emitln("<tr>").indent()
             for cndx, cell in enumerate(row):

diff --git a/src/spex/htmlspec/parser.py b/src/spex/htmlspec/parser.py
@@ -11,6 +11,7 @@
 
 from spex.htmlspec.docx import AbstractNumLvl, Document, RunProperties, TableWrap, Tag
 from spex.htmlspec.stream import Stream
+from spex.jsonspec.extractors.regular_expressions import TABLE_ID_REGEX
 from spex.xml import Xpath
 
 
@@ -57,10 +58,13 @@ class Point:
     y: int
 
 
+HTMLUnits = Union[List, Paragraph, Span, "Table"]
+
+
 @dataclass(frozen=True)
 class TableCell:
     tag: str
-    elems: TList[Union[List, Paragraph, Span, "Table"]]
+    elems: TList[HTMLUnits]
     span: Point
     origin: Point
     tc_pr: Optional[TcPr]
@@ -69,6 +73,30 @@ class TableCell:
 @dataclass(frozen=True)
 class Table:
     rows: TList[TList[TableCell]]
+    id: Optional[str] = None
+
+
+def find_id(rows: TList[TList[TableCell]]) -> Optional[str]:
+    def extract_text(elem: Paragraph) -> str:
+        return "".join([e.text for e in elem.spans])
+
+    # Find first row
+    if len(rows):
+        row = rows[0]
+        if len(row):
+            # Find first cell i row
+            cell = row[0]
+            if (
+                cell.tag == "td"
+                and len(cell.elems)
+                and isinstance(cell.elems[0], Paragraph)
+            ):
+                text = extract_text(cell.elems[0])
+                if text != "":
+                    maybe_id = TABLE_ID_REGEX.match(text)
+                    if maybe_id:
+                        return maybe_id.group("id")
+    return None
 
 
 class SpexParser:
@@ -252,4 +280,5 @@ def _parse_table(self, stream: Stream[_Element]) -> Optional[Table]:
                 cell_cache[p] = tcell
                 cells.append(tcell)
             rows.append(cells)
-        return Table(rows=rows)
+        table = Table(rows=rows, id=find_id(rows))
+        return table
diff --git a/src/spex/jsonspec/extractors/regular_expressions.py b/src/spex/jsonspec/extractors/regular_expressions.py
@@ -65,3 +65,5 @@ def match(self, text: str) -> Tuple[Optional[str], Optional[str], Optional[str]]
 ELLIPSIS_LABEL_REGEX = compile(r"^[\s]*[\.\.\.]|[…][\s]*$")
 
 LABEL_VALIDATION_REGEX = compile(r"^[a-zA-Z][\w]*$")
+
+TABLE_ID_REGEX = compile(r"^Figure (?P<id>\d+):.*")
diff --git a/src/spex/parse.py b/src/spex/parse.py
@@ -5,6 +5,7 @@
 from pathlib import Path
 from typing import Generator, Optional, Set, cast
 
+from spex import __version__
 from spex.htmlspec.htmlrenderer import SpexHtmlRenderer
 from spex.jsonspec import parse
 from spex.jsonspec.defs import JSON
@@ -45,6 +46,7 @@ def parse_spec(
         w.write_meta("specification", sdoc.key)
         w.write_meta("revision", sdoc.rev)
         w.write_meta("format version", 1)  # TODO define elsewhere
+        w.write_meta("spex version", __version__)  # TODO define elsewhere
         dparser = sdoc.get_parser(args)
         if yield_progress:
             num_figures = dparser.num_figures

diff --git a/src/spex/resources/stage2.schema.json b/src/spex/resources/stage2.schema.json
@@ -21,6 +21,10 @@
           "type": "number",
           "minimum": 1
         },
+        "spex version": {
+          "description": "version of the NVMe Spex tool",
+          "type": "string"
+        },
         "lint": {
           "description": "linting issues encountered during parsing",
           "type": "array",

diff --git a/src/spexsrv/application/app.py b/src/spexsrv/application/app.py
@@ -11,16 +11,18 @@
 from contextlib import contextmanager
 from os import environ
 from pathlib import Path
-from typing import Any, AsyncIterator, Dict, Iterator, Tuple
+from typing import Any, AsyncIterator, Dict, Iterator, Set, Tuple
 
 import quart
-from quart import Quart, abort, make_response, redirect, request, url_for
+from quart import Quart, abort, make_response, request, url_for
 from quart.wrappers.response import Response
 
+from spex import __version__
 from spex.jsonspec.defs import JSON
 from spex.jsonspec.lint import Code
 from spex.jsonspec.parserargs import ParserArgs
 from spex.parse import parse_spec
+from spexsrv.application.report_view import get_erroneous_figures
 
 SPEX_CACHE = environ.get("SPEX_CACHE", "true").lower() in ("1", "y", "yes", "true")
 
@@ -58,9 +60,9 @@ def json_to_sse(data: JSON) -> bytes:
 @contextmanager
 def temp_dir() -> Iterator[Path]:
     tmp = tempfile.TemporaryDirectory()
-    tmpd = Path(tmp.name)
+    tmp_path = Path(tmp.name)
     try:
-        yield tmpd
+        yield tmp_path
     finally:
         tmp.cleanup()
 
@@ -109,26 +111,52 @@ async def index() -> str:
 async def report(hash: str) -> str:
     # TODO: enable this part again this actually looks for the document requested
     json_fpath = app.config[SPEX_CACHE_LOOKUP].get(hash)
+    html_fpath = Path(app.config[SPEX_CACHE_LOOKUP].get(hash)).with_suffix(".html")
+
     if json_fpath is None or not json_fpath.is_file():
         abort(404)
 
-    print(json_fpath)
     with open(json_fpath) as fh:
         report_json = json.load(fh)
 
-    bundle = request.args.get("bundle", default=None, type=str) is not None
+    erroneous_figure_ids: Set[str] = {
+        report["fig"].split("_")[0] for report in report_json["meta"]["lint"]
+    }
+    report_html = get_erroneous_figures(list(erroneous_figure_ids), html_fpath)
 
+    bundle = request.args.get("bundle", default=None, type=str) is not None
     tpl_ctx = {
         "title": "Report",
         "report_json": report_json,
+        "report_html": report_html,
+        "erroneous_figure_ids": list(erroneous_figure_ids),
         "lint_codes": lint_codes,
         "link_self": url_for("report", hash=hash, bundle=1) if not bundle else None,
+        "link_json": (
+            url_for("json_report", hash=hash, bundle=1) if not bundle else None
+        ),
+        "link_html": (
+            url_for("html_report", hash=hash, bundle=1) if not bundle else None
+        ),
         "bundle": bundle,
+        "spex_version": __version__,
     }
 
     return await render_template("report.html", **tpl_ctx)
 
 
+@app.route("/json/<hash>")
+async def json_report(hash: str) -> Response:
+    json_fpath = Path(app.config[SPEX_CACHE_LOOKUP].get(hash))
+    return await quart.helpers.send_file(json_fpath)
+
+
+@app.route("/html/<hash>")
+async def html_report(hash: str) -> Response:
+    html_fpath = Path(app.config[SPEX_CACHE_LOOKUP].get(hash)).with_suffix(".html")
+    return await quart.helpers.send_file(html_fpath)
+
+
 @app.route("/parse", methods=["POST"])
 async def spec_parse() -> Response | Tuple[str, int, Dict[Any, Any]]:
     files = await request.files
@@ -141,26 +169,22 @@ async def spec_parse() -> Response | Tuple[str, int, Dict[Any, Any]]:
 
     spec = files["document"]
 
-    tdir = tempfile.TemporaryDirectory()
-    tdir_path = Path(tdir.name)
-    dst = tdir_path / spec.filename
-    await spec.save(dst)
-    hash = md5sum(dst)
-    report_url = url_for("report", hash=hash)
-    json_path = app.config[SPEX_CACHE_LOOKUP].get(hash)
-    if SPEX_CACHE and json_path:
-        # redirect immediately to the existing report
-        return redirect(report_url)  # type: ignore
+    temp_dir = tempfile.TemporaryDirectory()
+    temp_dir_path = Path(temp_dir.name)
+    destination = temp_dir_path / spec.filename
 
+    await spec.save(destination)
+    hash = md5sum(destination)
+    report_url = url_for("report", hash=hash)
     pargs = ParserArgs(
-        output_dir=Path(tdir_path),
+        output_dir=Path(temp_dir_path),
         skip_fig_on_error=True,  # required for caching making sense
         lint_codes_ignore=[],
     )
 
     async def sse_events() -> AsyncIterator[bytes]:
         try:
-            gen = parse_spec(dst, pargs, yield_progress=True)
+            gen = parse_spec(destination, pargs, yield_progress=True)
             try:
                 while True:
                     phase, fig_ndx, num_figs = next(gen)
@@ -174,23 +198,27 @@ async def sse_events() -> AsyncIterator[bytes]:
                     )
             except StopIteration as e:
                 json_fpath = e.value
+                html_path = Path(json_fpath.with_suffix(".html"))
+
                 cache_entry = Path(app.config[SPEX_CACHE_TMPDIR].name) / json_fpath.name
                 shutil.copy(json_fpath, cache_entry)
-                print(f"hash({hash}) -> {cache_entry}")
+                shutil.copy(
+                    html_path, Path(app.config[SPEX_CACHE_TMPDIR].name) / html_path.name
+                )
                 app.config[SPEX_CACHE_LOOKUP][hash] = cache_entry
                 yield json_to_sse({"type": "report-completed", "url": report_url})
         finally:
-            tdir.cleanup()
+            temp_dir.cleanup()
 
     # skip parsing..
     # then generate, then return big ass report doc
-    rsp = await make_response(
+    response = await make_response(
         sse_events(),
         {
             "Content-Type": "text/event-stream",
             "Cache-Control": "no-cache",
             "Transfer-Encoding": "chunked",
         },
     )
-    rsp.timeout = 60  # type: ignore
-    return rsp  # type: ignore
+    response.timeout = 60  # type: ignore
+    return response  # type: ignore
diff --git a/src/spexsrv/application/report_view.py b/src/spexsrv/application/report_view.py
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: 2023 Samsung Electronics Co., Ltd
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from pathlib import Path
+from typing import Dict, List
+
+import lxml
+import lxml.etree
+
+from spex.xml import XmlUtils, Xpath, etree
+
+
+def get_erroneous_figures(figures: List[str], html_doc: Path) -> Dict[str, str]:
+    doc = etree.parse(html_doc)
+    _err_figures: Dict[str, str] = {}
+    for figure in figures:
+        figure_id = figure.split("_")[0]
+        try:
+            p = Xpath.elem_first(doc, f"./body/table[@id={figure_id}]")
+            if p is not None:
+                _err_figures[figure] = XmlUtils.fmt(p)
+        except lxml.etree.XPathEvalError:
+            ...
+
+    return _err_figures
Original file line number	Diff line number	Diff line change
Expand Up		@@ -65,3 +65,5 @@ def match(self, text: str) -> Tuple[Optional[str], Optional[str], Optional[str]]
		ELLIPSIS_LABEL_REGEX = compile(r"^[\s][\.\.\.]\|[…][\s]$")

		LABEL_VALIDATION_REGEX = compile(r"^[a-zA-Z][\w]*$")

		TABLE_ID_REGEX = compile(r"^Figure (?P<id>\d+):.*")