Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New reporting view for web & add spex version attribute to stage 1 and 2 docs #52

Merged
merged 2 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/spex/htmlspec/htmlrenderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from gcgen.api import Section, write_file
from lxml.etree import _Element

from spex import __version__
from spex.htmlspec import css
from spex.htmlspec.docx import Document
from spex.htmlspec.parser import (
Expand Down Expand Up @@ -137,6 +138,7 @@ def generate(self, yield_progress: bool = False) -> Generator[int, None, None]:
if revision is not None:
s.emit(f' data-revision="{revision}"')
s.emitln("/>")
s.emitln(f'<meta name="spexVersion" content="{__version__}" />')
s.emitln(f"<title>{self._fname}</title>")
s.add_section(self._html_head)
s.emitln(f'<link rel="stylesheet" href="{self._fname}.css"/>')
Expand Down Expand Up @@ -198,7 +200,10 @@ def _render_list(self, s: Section, lst: List) -> None:
s.dedent().emitln(f"</{lst.tag}>")

def _render_table(self, s: Section, tbl: Table) -> None:
s.emitln("<table>").indent()
if tbl.id:
s.emitln(f"<table id='{tbl.id}'>").indent()
else:
s.emitln("<table>").indent()
for rndx, row in enumerate(tbl.rows):
s.emitln("<tr>").indent()
for cndx, cell in enumerate(row):
Expand Down
33 changes: 31 additions & 2 deletions src/spex/htmlspec/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from spex.htmlspec.docx import AbstractNumLvl, Document, RunProperties, TableWrap, Tag
from spex.htmlspec.stream import Stream
from spex.jsonspec.extractors.regular_expressions import TABLE_ID_REGEX
from spex.xml import Xpath


Expand Down Expand Up @@ -57,10 +58,13 @@ class Point:
y: int


HTMLUnits = Union[List, Paragraph, Span, "Table"]


@dataclass(frozen=True)
class TableCell:
tag: str
elems: TList[Union[List, Paragraph, Span, "Table"]]
elems: TList[HTMLUnits]
span: Point
origin: Point
tc_pr: Optional[TcPr]
Expand All @@ -69,6 +73,30 @@ class TableCell:
@dataclass(frozen=True)
class Table:
rows: TList[TList[TableCell]]
id: Optional[str] = None


def find_id(rows: TList[TList[TableCell]]) -> Optional[str]:
def extract_text(elem: Paragraph) -> str:
return "".join([e.text for e in elem.spans])

# Find first row
if len(rows):
row = rows[0]
if len(row):
# Find first cell i row
cell = row[0]
if (
cell.tag == "td"
and len(cell.elems)
and isinstance(cell.elems[0], Paragraph)
):
text = extract_text(cell.elems[0])
if text != "":
maybe_id = TABLE_ID_REGEX.match(text)
if maybe_id:
return maybe_id.group("id")
return None


class SpexParser:
Expand Down Expand Up @@ -252,4 +280,5 @@ def _parse_table(self, stream: Stream[_Element]) -> Optional[Table]:
cell_cache[p] = tcell
cells.append(tcell)
rows.append(cells)
return Table(rows=rows)
table = Table(rows=rows, id=find_id(rows))
return table
2 changes: 2 additions & 0 deletions src/spex/jsonspec/extractors/regular_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,5 @@ def match(self, text: str) -> Tuple[Optional[str], Optional[str], Optional[str]]
ELLIPSIS_LABEL_REGEX = compile(r"^[\s]*[\.\.\.]|[…][\s]*$")

LABEL_VALIDATION_REGEX = compile(r"^[a-zA-Z][\w]*$")

TABLE_ID_REGEX = compile(r"^Figure (?P<id>\d+):.*")
2 changes: 2 additions & 0 deletions src/spex/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path
from typing import Generator, Optional, Set, cast

from spex import __version__
from spex.htmlspec.htmlrenderer import SpexHtmlRenderer
from spex.jsonspec import parse
from spex.jsonspec.defs import JSON
Expand Down Expand Up @@ -45,6 +46,7 @@ def parse_spec(
w.write_meta("specification", sdoc.key)
w.write_meta("revision", sdoc.rev)
w.write_meta("format version", 1) # TODO define elsewhere
w.write_meta("spex version", __version__) # TODO define elsewhere
dparser = sdoc.get_parser(args)
if yield_progress:
num_figures = dparser.num_figures
Expand Down
4 changes: 4 additions & 0 deletions src/spex/resources/stage2.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
"type": "number",
"minimum": 1
},
"spex version": {
"description": "version of the NVMe Spex tool",
"type": "string"
},
"lint": {
"description": "linting issues encountered during parsing",
"type": "array",
Expand Down
74 changes: 51 additions & 23 deletions src/spexsrv/application/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,18 @@
from contextlib import contextmanager
from os import environ
from pathlib import Path
from typing import Any, AsyncIterator, Dict, Iterator, Tuple
from typing import Any, AsyncIterator, Dict, Iterator, Set, Tuple

import quart
from quart import Quart, abort, make_response, redirect, request, url_for
from quart import Quart, abort, make_response, request, url_for
from quart.wrappers.response import Response

from spex import __version__
from spex.jsonspec.defs import JSON
from spex.jsonspec.lint import Code
from spex.jsonspec.parserargs import ParserArgs
from spex.parse import parse_spec
from spexsrv.application.report_view import get_erroneous_figures

SPEX_CACHE = environ.get("SPEX_CACHE", "true").lower() in ("1", "y", "yes", "true")

Expand Down Expand Up @@ -58,9 +60,9 @@ def json_to_sse(data: JSON) -> bytes:
@contextmanager
def temp_dir() -> Iterator[Path]:
tmp = tempfile.TemporaryDirectory()
tmpd = Path(tmp.name)
tmp_path = Path(tmp.name)
try:
yield tmpd
yield tmp_path
finally:
tmp.cleanup()

Expand Down Expand Up @@ -109,26 +111,52 @@ async def index() -> str:
async def report(hash: str) -> str:
# TODO: enable this part again this actually looks for the document requested
json_fpath = app.config[SPEX_CACHE_LOOKUP].get(hash)
html_fpath = Path(app.config[SPEX_CACHE_LOOKUP].get(hash)).with_suffix(".html")

if json_fpath is None or not json_fpath.is_file():
abort(404)

print(json_fpath)
with open(json_fpath) as fh:
report_json = json.load(fh)

bundle = request.args.get("bundle", default=None, type=str) is not None
erroneous_figure_ids: Set[str] = {
report["fig"].split("_")[0] for report in report_json["meta"]["lint"]
}
report_html = get_erroneous_figures(list(erroneous_figure_ids), html_fpath)

bundle = request.args.get("bundle", default=None, type=str) is not None
tpl_ctx = {
"title": "Report",
"report_json": report_json,
"report_html": report_html,
"erroneous_figure_ids": list(erroneous_figure_ids),
"lint_codes": lint_codes,
"link_self": url_for("report", hash=hash, bundle=1) if not bundle else None,
"link_json": (
url_for("json_report", hash=hash, bundle=1) if not bundle else None
),
"link_html": (
url_for("html_report", hash=hash, bundle=1) if not bundle else None
),
"bundle": bundle,
"spex_version": __version__,
}

return await render_template("report.html", **tpl_ctx)


@app.route("/json/<hash>")
async def json_report(hash: str) -> Response:
json_fpath = Path(app.config[SPEX_CACHE_LOOKUP].get(hash))
return await quart.helpers.send_file(json_fpath)


@app.route("/html/<hash>")
async def html_report(hash: str) -> Response:
html_fpath = Path(app.config[SPEX_CACHE_LOOKUP].get(hash)).with_suffix(".html")
return await quart.helpers.send_file(html_fpath)


@app.route("/parse", methods=["POST"])
async def spec_parse() -> Response | Tuple[str, int, Dict[Any, Any]]:
files = await request.files
Expand All @@ -141,26 +169,22 @@ async def spec_parse() -> Response | Tuple[str, int, Dict[Any, Any]]:

spec = files["document"]

tdir = tempfile.TemporaryDirectory()
tdir_path = Path(tdir.name)
dst = tdir_path / spec.filename
await spec.save(dst)
hash = md5sum(dst)
report_url = url_for("report", hash=hash)
json_path = app.config[SPEX_CACHE_LOOKUP].get(hash)
if SPEX_CACHE and json_path:
# redirect immediately to the existing report
return redirect(report_url) # type: ignore
temp_dir = tempfile.TemporaryDirectory()
temp_dir_path = Path(temp_dir.name)
destination = temp_dir_path / spec.filename

await spec.save(destination)
hash = md5sum(destination)
report_url = url_for("report", hash=hash)
pargs = ParserArgs(
output_dir=Path(tdir_path),
output_dir=Path(temp_dir_path),
skip_fig_on_error=True, # required for caching making sense
lint_codes_ignore=[],
)

async def sse_events() -> AsyncIterator[bytes]:
try:
gen = parse_spec(dst, pargs, yield_progress=True)
gen = parse_spec(destination, pargs, yield_progress=True)
try:
while True:
phase, fig_ndx, num_figs = next(gen)
Expand All @@ -174,23 +198,27 @@ async def sse_events() -> AsyncIterator[bytes]:
)
except StopIteration as e:
json_fpath = e.value
html_path = Path(json_fpath.with_suffix(".html"))

cache_entry = Path(app.config[SPEX_CACHE_TMPDIR].name) / json_fpath.name
shutil.copy(json_fpath, cache_entry)
print(f"hash({hash}) -> {cache_entry}")
shutil.copy(
html_path, Path(app.config[SPEX_CACHE_TMPDIR].name) / html_path.name
)
app.config[SPEX_CACHE_LOOKUP][hash] = cache_entry
yield json_to_sse({"type": "report-completed", "url": report_url})
finally:
tdir.cleanup()
temp_dir.cleanup()

# skip parsing..
# then generate, then return big ass report doc
rsp = await make_response(
response = await make_response(
sse_events(),
{
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
"Transfer-Encoding": "chunked",
},
)
rsp.timeout = 60 # type: ignore
return rsp # type: ignore
response.timeout = 60 # type: ignore
return response # type: ignore
26 changes: 26 additions & 0 deletions src/spexsrv/application/report_view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# SPDX-FileCopyrightText: 2023 Samsung Electronics Co., Ltd
#
# SPDX-License-Identifier: BSD-3-Clause

from pathlib import Path
from typing import Dict, List

import lxml
import lxml.etree

from spex.xml import XmlUtils, Xpath, etree


def get_erroneous_figures(figures: List[str], html_doc: Path) -> Dict[str, str]:
doc = etree.parse(html_doc)
_err_figures: Dict[str, str] = {}
for figure in figures:
figure_id = figure.split("_")[0]
try:
p = Xpath.elem_first(doc, f"./body/table[@id={figure_id}]")
if p is not None:
_err_figures[figure] = XmlUtils.fmt(p)
except lxml.etree.XPathEvalError:
...

return _err_figures
Loading