From 54e63bd01891ac47ca41e748cb55e7f1925aa1bd Mon Sep 17 00:00:00 2001 From: David Vogt Date: Wed, 15 Jan 2025 15:20:18 +0100 Subject: [PATCH] refactor(forms): rewrite structure / jexl evaluator The new structure / jexl evaluator works a bit differently: Instead of trying to replace evaluation contexts during recursive evaluation (for example is_hidden checks), we now have a local JEXL runtime for each field. Also, the JEXL expressions (or their results, rather) are heavily cached and should speed things up significantly. Note this is WIP, many tests are still failing, but many are already succeeding as well. We're trying to keep the test cases 100% unchanged - the only modifications currently are some improved assertion messages, so debugging becomes easier. --- caluma/caluma_core/jexl.py | 5 + caluma/caluma_form/domain_logic.py | 14 + caluma/caluma_form/jexl2.py | 138 ++++++ caluma/caluma_form/structure2.py | 499 +++++++++++++++++++++ caluma/caluma_form/tests/test_jexl.py | 27 +- caluma/caluma_form/tests/test_structure.py | 112 +++++ caluma/caluma_form/utils.py | 9 + caluma/caluma_form/validators.py | 243 ++++------ 8 files changed, 893 insertions(+), 154 deletions(-) create mode 100644 caluma/caluma_form/jexl2.py create mode 100644 caluma/caluma_form/structure2.py create mode 100644 caluma/caluma_form/tests/test_structure.py diff --git a/caluma/caluma_core/jexl.py b/caluma/caluma_core/jexl.py index 27c797bc9..ba3b04420 100644 --- a/caluma/caluma_core/jexl.py +++ b/caluma/caluma_core/jexl.py @@ -167,6 +167,11 @@ def _length_transform(self, value, *options): return None def evaluate(self, expression, context=None): + # log.info( + # "JEXL: evaluating expression <<< %s >>> in context: %s", + # str(expression), + # str(dict(context)), + # ) self._expr_stack.append(expression) try: return super().evaluate(expression, context) diff --git a/caluma/caluma_form/domain_logic.py b/caluma/caluma_form/domain_logic.py index 83c255618..0bc379f3b 100644 --- a/caluma/caluma_form/domain_logic.py +++ b/caluma/caluma_form/domain_logic.py @@ -1,4 +1,5 @@ from graphlib import TopologicalSorter +from logging import getLogger from typing import Optional from django.db import transaction @@ -12,6 +13,8 @@ from caluma.caluma_user.models import BaseUser from caluma.utils import update_model +log = getLogger(__name__) + class BaseLogic: @staticmethod @@ -156,6 +159,7 @@ def post_save(answer: models.Answer) -> models.Answer: def update_calc_dependents(answer): if not answer.question.calc_dependents: return + log.debug("update_calc_dependents(%s)", answer) root_doc = utils.prefetch_document(answer.document.family_id) struc = structure.FieldSet(root_doc, root_doc.form) @@ -163,6 +167,16 @@ def update_calc_dependents(answer): for question in models.Question.objects.filter( pk__in=answer.question.calc_dependents ): + log.debug( + "update_calc_dependents(%s): updating question %s", answer, question.pk + ) + # FIXME: update_or_create_calc_answer() does not properly + # deal with table rows: we start recalculating from the root doc, + # but if a recalculation was triggered inside a table row, we need to + # do *that* recalc properly as well (we search for dependents here, but + # if those dependents are in a row doc, we can't find all of them and) + # don't properly update them either (get_field() returns None, because the + # affected question is not in the root form) update_or_create_calc_answer(question, root_doc, struc) @classmethod diff --git a/caluma/caluma_form/jexl2.py b/caluma/caluma_form/jexl2.py new file mode 100644 index 000000000..a09d74555 --- /dev/null +++ b/caluma/caluma_form/jexl2.py @@ -0,0 +1,138 @@ +from collections import ChainMap +from contextlib import contextmanager +from functools import partial + +from pyjexl.analysis import ValidatingAnalyzer + +from caluma.caluma_form.jexl import QuestionMissing +from caluma.caluma_form.structure2 import BaseField + +from ..caluma_core.jexl import ( + JEXL, + ExtractTransformArgumentAnalyzer, + ExtractTransformSubjectAnalyzer, + ExtractTransformSubjectAndArgumentsAnalyzer, +) +from .structure import Field + +""" +Rewrite of the JEXL handling code. + +Design principles: + +* The JEXL classes do not deal with context switching between questions anymore +* The QuestionJexl class only sets up the "runtime", any context is used from the + structure2 code +* We only deal with the *evaluation*, no transform/extraction is happening here - that code + is mostly fine and doesn't need a rewrite +* Caching is done by the structure2 code, not here +* JEXL evaluation happens lazily, but the results are cached. +""" + + +class QuestionValidatingAnalyzer(ValidatingAnalyzer): + def visit_Transform(self, transform): + if transform.name == "answer" and not isinstance(transform.subject.value, str): + yield f"{transform.subject.value} is not a valid question slug." + + yield from super().visit_Transform(transform) + + +class QuestionJexl2(JEXL): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self.current_structure = [] + + self.add_transform("answer", self.answer_transform) + + def get_structure(self): + return self.current_structure[-1] + + @contextmanager + def use_structure(self, context): + self.current_structure.append(context) + try: + yield + finally: + self.current_structure.pop() + + def answer_transform(self, question_slug, *args): + context = self.get_structure() + field = context.get_field(question_slug) + + def _default_or_none(self): + if len(args): + return args[0] + return None + + if not field: + raise QuestionMissing( + f"Question `{question_slug}` could not be found in form {context.get_form()}" + ) + # TODO: should this be an exception? JEXL is referencing a + # non-existing *field* not just a missing answer to an existing + # field + return _default_or_none() + + if field.is_hidden(): + # Hidden fields *always* return the empty value, even if we have + # a default + return field.question.empty_value() + elif field.is_empty(): + # not hidden, but empty + return _default_or_none() + + return field.get_value() + + def validate(self, expression, **kwargs): + return super().validate(expression, QuestionValidatingAnalyzer) + + def extract_referenced_questions(self, expr): + transforms = ["answer"] + yield from self.analyze( + expr, partial(ExtractTransformSubjectAnalyzer, transforms=transforms) + ) + + def extract_referenced_questions_with_arguments(self, expr): + transforms = ["answer"] + yield from self.analyze( + expr, + partial(ExtractTransformSubjectAndArgumentsAnalyzer, transforms=transforms), + ) + + def extract_referenced_mapby_questions(self, expr): + transforms = ["mapby"] + yield from self.analyze( + expr, partial(ExtractTransformArgumentAnalyzer, transforms=transforms) + ) + + def _get_referenced_fields(self, field: Field, expr: str): + deps = list(self.extract_referenced_questions_with_arguments(expr)) + referenced_fields = [self._structure.get_field(slug) for slug, _ in deps] + + referenced_slugs = [ref.question.slug for ref in referenced_fields if ref] + + for slug, args in deps: + required = len(args) == 0 + if slug not in referenced_slugs and required: + raise QuestionMissing( + f"Question `{slug}` could not be found in form {field.form}" + ) + + return [field for field in referenced_fields if field] + + def evaluate(self, expr, context: BaseField, raise_on_error=True): + try: + with self.use_structure(context): + # Sadly, some expressions (such as the answer transform) + # need the current context but don't regularly have it available. + # Therefore we use this context manager so it can do it's job + # Also, combine the global context (self.context) with + return super().evaluate( + expr, ChainMap(self.context, context.get_context()) + ) + except (TypeError, ValueError, ZeroDivisionError, AttributeError): + if raise_on_error: + raise + return None diff --git a/caluma/caluma_form/structure2.py b/caluma/caluma_form/structure2.py new file mode 100644 index 000000000..8ed3a8c2e --- /dev/null +++ b/caluma/caluma_form/structure2.py @@ -0,0 +1,499 @@ +""" +Structure2 - Fast and correct form structure representation. + +Design requirements: + +* Fast initialisation from a document. Use preload if needed to reduce + number of queries +* Eager loading of a full document structure. No lazy-loading with questionable + performance expectations +* Correct navigation via question slugs (JEXL references) from any context +* Fast lookups once initialized +* Extensible for caching JEXL expression results + +* No properties. Code is always in methods +""" + +import collections +import copy +import typing +import weakref +from abc import ABC +from collections.abc import Iterable +from dataclasses import dataclass, field +from functools import singledispatch +from typing import Optional + +from django.db.models import QuerySet + +if typing.TYPE_CHECKING: + from caluma.caluma_form.jexl2 import QuestionJexl2 + +from caluma.caluma_form.models import ( + Answer, + AnswerDocument, + FormQuestion, + Question, +) +from caluma.caluma_form.structure import object_local_memoise + +# Implementation details: +# * Each sub-context (for example row document) has a correct representation +# of each "visible" answer + + +class Context(collections.ChainMap): + """Context container for field lookups. + + The context differs from the base ChainMap such that it supports updating + the *latest* entry as well as updating the parent dicts as well. + """ + + # Requirements: + # * In regular subforms (form questions), we create a sub-context, but need + # to update the parent context as well, because fields in those subforms + # should be visible everywhere. + # * In table rows however, we do not update the parents, and this is why we + # need to differentiate. + + def __init__(self, *maps, update_parent=True): + super().__init__(*maps) + self._update_parent = update_parent + + def new_child(self, m=None, update_parent=False, **kwargs): + new = super().new_child(m, **kwargs) + new._update_parent = update_parent + return new + + def __setitem__(self, key, value): + super().__setitem__(key, value) + if self._update_parent and len(self.maps) > 1: + self.maps[1].__setitem__(key, value) + + +@dataclass +class BaseField(ABC): + """Base class for the field types. This is the interface we aim to provide.""" + + parent: Optional["FieldSet"] = field(default=None) + + question: Optional[Question] = field(default=None) + answer: Optional[Answer] = field(default=None) + + @object_local_memoise + def get_evaluator(self) -> "QuestionJexl2": + """Return the JEXL evaluator for this field.""" + + # JEXL is implemented such that it has one context in the engine, and + # trying to swap it out to switch context is just problematic. So we use + # one JEXL instance per field. + + # deferred import to avoid circular dependency + from caluma.caluma_form.jexl2 import QuestionJexl2 + + # The "info" block is provided by the global context, but we need + # to patch it to represent some local information: The "form" and + # "document" bits should point to the current context, not to the global + # structure. This is a bit unfortunate, but we *have* to fill this in + # two separate places to avoid breaking compatibility + context = collections.ChainMap( + # We need a deep copy of the global context, so we can + # extend the info block without leaking + copy.deepcopy(self.get_global_context()), + self.get_context(), + ) + context["info"].update(self.get_local_info_context()) + + return QuestionJexl2(context=context) + + def _get_grandparent(self): + """Return the grandparent if it exists, None otherwise.""" + try: + return self.parent.parent + except (NameError, AttributeError): + return None + + def _get_root(self): + return self.parent._get_root() if self.parent else self + + @object_local_memoise + def get_local_info_context(self): + form = self.get_form() + + # FIXME: This is a bit ugly, but: In JEXL, the "parent" does not map + # to our internal hierarchy 100%: In our structure, the hierarchy for + # tables goes: root fieldset -> table (rowset) -> fielset (row) -> value + # (column) field, so the parent of the (column) valuefield is the row, + # and it's parent in turn is the rowset. However JEXL (or better, our + # previous implementation) assumes the row fieldset to be virtually + # transparent. + # Therefore, here's the ugly hack: If we're in a fieldset within a rowset, + # the parent info should actually be the grandparent info instead. + if isinstance(self, ValueField) and isinstance(self._get_grandparent(), RowSet): + parent_info = self._get_grandparent().get_local_info_context() + else: + parent_info = self.parent.get_local_info_context() if self.parent else None + return { + "_type": type(self).__qualname__, + "question": self.question.slug if self.question else None, + "form": form and form.slug or None, + "formMeta": form and form.meta or None, + "parent": parent_info, + # TODO how is "root" expected to behave if we're *already* on root? + "root": self._get_root().get_local_info_context() if self.parent else None, + } + + # @object_local_memoise + def is_required(self) -> bool: + """Return True if the field is required. + + Evaluate the `is_required` expression of the field. But if the field + is hidden, it is considered not-required regardless of what the JEXL + expression says. + """ + + if self.is_hidden(): + # hidden can never be required + return False + return self.evaluate_jexl(self.question.is_required) + + # @object_local_memoise + def is_visible(self) -> bool: + """Just return the opposite of is_hidden() for convenience.""" + return not self.is_hidden() + + # @object_local_memoise + def is_hidden(self) -> bool: + """Return True if the field is hidden. + + A field is hidden if either it's parent is hidden, or it's `is_hidden` + JEXL expression evaluates to `True`. + """ + parent_hidden = self.parent and self.parent.is_hidden() + + # If the parent is hidden, then *we* are implicitly also hidden, + # without even evaluating our own is_hidden expression + # + if not self.parent: + # We are the root form. No question attached, so we're always + # visible + return False + return parent_hidden or self.evaluate_jexl( + self.question.is_hidden, raise_on_error=False + ) + + def slug(self): + return self.question and self.question.slug or None + + def get_context(self) -> Context: ... + + def get_field(self, slug): + return self.get_context().get(slug) + + @object_local_memoise + def calculate(self): + return self.evaluate_jexl(self.question.calc_expression) + + def evaluate_jexl(self, expression: str, raise_on_error=True): + # Some stupid shortcuts to avoid building up an evaluation context for + # ~90% of cases where the expression is a simple "true" or "false" + fast_results = {"true": True, "false": False} + if fast_result := fast_results.get(expression) is not None: + return fast_result + + eval = self.get_evaluator() + result = eval.evaluate(expression, self, raise_on_error) + return result + + def get_global_context(self) -> dict: + return self._global_context + + def get_form(self): + parent_form = self.parent.form if self.parent else None + return parent_form or self.form or None + + +@dataclass +class ValueField(BaseField): + """Represent a field in the form. + + This is roughly 1:1 with a question, but repeated in case of a table + row for example. + """ + + @object_local_memoise + def get_value(self): + if self.answer and not self.is_hidden(): + return self.answer.value or self.answer.date + return self.question.empty_value() + + def get_context(self) -> Context: + return self.parent.get_context() + + def is_empty(self): + if not self.answer: + return True + return ( + not bool(self.answer.value or self.answer.date or self.answer.files) + # Being hidden makes you empty even if an answer exists + or self.is_hidden() + ) + + def get_global_context(self) -> dict: + return self.parent.get_global_context() + + def __str__(self): + return f"Field({self.question.slug}, {self.get_value()})" + + +class FieldSet(BaseField): + def __init__( + self, document, form=None, parent=None, question=None, global_context=None + ): + # TODO: prefetch document once we have the structure built up + # + self.question = question + self._document = document + self.form = form or document.form + + self._global_context = global_context + + # uplinks always weak + self.parent = weakref.proxy(parent) if parent else None + + self._own_fields = {} + + if parent: + # Our evaluation context needs to update the parent context, because + # all fields are visible globally, *except* within a table row, where + # the *outside* cannot have a look *inside*. + self._context = self.parent.get_context().new_child( + # Only update parent's context if it's a fieldset. If it's a RowSet, + # we don't want to "leak" out our fields + self._own_fields, + update_parent=isinstance(parent, FieldSet), + ) + else: + self._context = Context(self._own_fields) + + self._build_context() + + def is_empty(self): + # Fieldset: If *any* field is non-empty, we consider ourselves also + # non-empty. + # We reverse the lookup here to speed up (don't need to see if *all* are + # empty, just if *one* has a value) + if self.is_hidden(): + # Hidden makes us empty, even if there's theoretically a value. + # We do the "cheap" check first, then iterate over the children. + return True + has_at_least_one_value = any(not child.is_empty() for child in self.children()) + return not has_at_least_one_value + + def get_context(self): + return self._context + + def get_value(self): + if self.is_hidden(): + return {} + return { + formfield.question.slug: formfield.get_value() + for formfield in self._own_fields.values() + } + + def is_required(self) -> bool: + # Fieldsets (in other words - subforms) should never be required. + # TODO: Verify this assumption + + return False + + def get_all_fields(self) -> Iterable[BaseField]: + """Return all fields in the structure, as an iterator. + + Yields (slug,field) tuples. But note that a slug may be repeated + as we iterate over table rows. + + NOTE: For tables, the same *question* may be repeated in each row. This + is intended and normal behaviour. + """ + for formfield in self._own_fields.values(): + yield formfield + if isinstance(formfield, FieldSet): + yield from formfield.get_all_fields() + if isinstance(formfield, RowSet): + # row sets *must* have fieldsets as children. Let's loop + # over them here + for child in formfield.children(): + yield child + yield from child.get_all_fields() + + def find_all_fields_by_slug(self, slug: str) -> list[BaseField]: + """Return all fields with the given question slug. + + This may return multiple fields, as tables are traversed as well. + + Should be used for debugging or analytics only. If you need the one + field that the `answer` transform would return in this context, use + `.get_field()` instead. + """ + result = [] + for formfield in self.get_all_fields(): + if formfield.question and formfield.slug() == slug: + result.append(formfield) + return result + + def children(self): + # This should already be sorted, as the context buildup + # is doing that for us. TODO: Verify this claim + return list(self._own_fields.values()) + + def _build_context(self): + # context inheritance: The ChainMap allows lookups in "parent" + # contexts, so a row context will be able to look "out". We implement + # form questions the same way, even though not strictly neccessary + + root_answers = self._document.answers.all().select_related("question") + answers_by_q_slug = {ans.question_id: ans for ans in root_answers} + + formquestions: QuerySet[FormQuestion] = FormQuestion.objects.filter( + form=self.form + ).order_by("-sort") + + for fq in formquestions: + question = fq.question + if question.type == Question.TYPE_FORM: + self._context[question.slug] = FieldSet( + document=self._document, + # question=question, + form=question.sub_form, + parent=self, + question=question, + global_context=self.get_global_context(), + ) + elif question.type == Question.TYPE_TABLE: + self._context[question.slug] = RowSet( + question=question, + answer=answers_by_q_slug.get(question.slug), + parent=self, + ) + else: + # "leaf" question + self._context[question.slug] = ValueField( + question=question, + answer=answers_by_q_slug.get(question.slug), + parent=self, + ) + + def __str__(self): + return f"FieldSet({self.form.slug})" + + +class RowSet(BaseField): + rows: list[FieldSet] + + def __init__(self, question, parent, answer: Optional[Answer] = None): + self.form = question.row_form + self.question = question + if not answer: + self.rows = [] + return + self.answer = answer + + # our own context will not contain anything useful, but we're using it + # here to chain the outer context in a clean way down to the rows + self.parent = weakref.proxy(parent) + self._own_fields = {} + # The RowSet context differs from the FieldSet context such that any + # fields within the rowset shall not "leak" outside + self._context = self.parent.get_context().new_child( + self._own_fields, update_parent=False + ) + + self.rows = [ + FieldSet( + document=row_doc.document, + question=question, + form=question.row_form, + parent=self, + global_context=self.get_global_context(), + ) + for row_doc in AnswerDocument.objects.all() + .filter(answer=answer) + .order_by("-sort") + ] or [] + + def get_value(self): + if self.is_hidden(): + return [] + + return [row.get_value() for row in self.children()] + + def get_context(self): + return self._context + + def children(self): + return self.rows + + def get_global_context(self) -> dict: + return self.parent.get_global_context() + + def is_empty(self): + # Table is considered empty if it has no rows. + # Hidden implies empty, even if there *theoretically* is an answer + # present + return self.is_hidden() or not bool(self.rows) + + def __str__(self): + return f"RowSet({self.form.slug})" + + +def print_document_structure(fieldset: FieldSet, print_fn=None): # pragma: no cover + """Print a document's structure. + + Intended halfway as an example on how to use the structure + classes, and a debugging utility. + """ + ind = {"i": 0} + + print_fn = print_fn or print + + @singledispatch + def visit(vis): + raise Exception(f"generic visit(): {vis}") + + @visit.register(FieldSet) + def _(vis: FieldSet): + print_fn(" " * ind["i"], vis) + ind["i"] += 1 + for sub in vis.children(): + visit(sub) + ind["i"] -= 1 + + @visit.register(RowSet) + def _(vis: RowSet): + print_fn(" " * ind["i"], vis) + ind["i"] += 1 + for sub in vis.children(): + visit(sub) + ind["i"] -= 1 + + @visit.register(ValueField) + def _(vis): + print_fn(" " * ind["i"], vis) + ind["i"] += 1 + ind["i"] -= 1 + + visit(fieldset) + + +def list_document_structure(fieldset): + out_lines = [] + + def fake_print(*args): + out_lines.append(" ".join([str(x) for x in args])) + + print_document_structure( + fieldset, + print_fn=fake_print, + ) + return out_lines diff --git a/caluma/caluma_form/tests/test_jexl.py b/caluma/caluma_form/tests/test_jexl.py index f86edf57c..a7233204b 100644 --- a/caluma/caluma_form/tests/test_jexl.py +++ b/caluma/caluma_form/tests/test_jexl.py @@ -272,9 +272,13 @@ def test_new_jexl_expressions( elif document_owner == "root_case": root_case = case_factory(workflow__slug="main-case-workflow", document=document) - # expression test method: we delete an answer and set it's is_hidden + document.refresh_from_db() + + # Expression test method: we delete an answer and set it's is_hidden # to an expression to be tested. If the expression evaluates to True, - # we won't have a ValidationError. + # the question is hidden and the missing answer is not a problem, thus + # we don't get a validation error. If it evaluates to False, the missing answer + # causes an exception, and we will know. answers[question].delete() questions[question].is_hidden = expr questions[question].save() @@ -287,7 +291,24 @@ def do_check(): except validators.CustomValidationError: # pragma: no cover return False - assert do_check() == expectation + # if do_check() != expectation: + # breakpoint() + context = [] + from caluma.caluma_form import structure2 + + fieldset = structure2.FieldSet(document) + structure2.print_document_structure( + fieldset, + print_fn=lambda *x: context.append(" ".join([str(f) for f in x])), + ) + ctx_str = "\n".join(context) + result = do_check() + relevant_field = fieldset.find_all_fields_by_slug(question)[0] + assert result == expectation, ( + f"Expected JEXL({expr}) on question '{question}' to evaluate " + f"to {expectation} but got {result}; context was: \n{ctx_str};\n" + f"field-local `info` context: {relevant_field.get_local_info_context()}" + ) def test_answer_transform_on_hidden_question(info, form_and_document): diff --git a/caluma/caluma_form/tests/test_structure.py b/caluma/caluma_form/tests/test_structure.py new file mode 100644 index 000000000..196594f09 --- /dev/null +++ b/caluma/caluma_form/tests/test_structure.py @@ -0,0 +1,112 @@ +# Test cases for the (NEW!) structure utility class + +from datetime import datetime + +import pytest + +from caluma.caluma_form import structure2 +from caluma.caluma_form.models import Question + + +@pytest.fixture() +def simple_form_structure( + db, form_factory, form_question_factory, answer_factory, document_factory +): + form = form_factory(slug="root") + root_leaf1 = form_question_factory( + form=form, question__type=Question.TYPE_TEXT, question__slug="leaf1" + ).question + root_leaf2 = form_question_factory( + form=form, question__type=Question.TYPE_INTEGER, question__slug="leaf2" + ).question + + root_formquestion = form_question_factory( + form=form, question__type=Question.TYPE_FORM, question__slug="subform" + ).question + assert root_formquestion.sub_form + + form_question_factory( + form=root_formquestion.sub_form, + question__type=Question.TYPE_TEXT, + question__slug="sub_leaf1", + ) + form_question_factory( + form=root_formquestion.sub_form, + question__type=Question.TYPE_INTEGER, + question__slug="sub_leaf2", + ) + + sub_table = form_question_factory( + form=root_formquestion.sub_form, + question__type=Question.TYPE_TABLE, + question__slug="sub_table", + ).question + + row_field_1 = form_question_factory( + form=sub_table.row_form, + question__type=Question.TYPE_DATE, + question__slug="row_field_1", + ).question + row_field_2 = form_question_factory( + form=sub_table.row_form, + question__type=Question.TYPE_FLOAT, + question__slug="row_field_2", + ).question + + # row field has a dependency *outside* the row, and one *inside* + form_question_factory( + form=sub_table.row_form, + question__type=Question.TYPE_CALCULATED_FLOAT, + question__calc_expression=f"'{root_leaf2.slug}'|answer + '{row_field_2.slug}'|answer", + question__slug="row_calc", + ) + + root_doc = document_factory(form=form) + + answer_factory(document=root_doc, question=root_leaf1, value="Some Value") + answer_factory(document=root_doc, question=root_leaf2, value=33) + table_ans = answer_factory(document=root_doc, question=sub_table) + + row_doc1 = document_factory(form=sub_table.row_form) + answer_factory(document=row_doc1, question=row_field_1, date=datetime(2025, 1, 13)) + answer_factory(document=row_doc1, question=row_field_2, value=99.5) + + row_doc2 = document_factory(form=sub_table.row_form) + answer_factory(document=row_doc2, question=row_field_1, date=datetime(2025, 1, 10)) + answer_factory(document=row_doc2, question=row_field_2, value=23.0) + + table_ans.documents.add(row_doc1) + table_ans.documents.add(row_doc2) + + return root_doc + + +def test_printing_structure(simple_form_structure): + out_lines = [] + + def fake_print(*args): + out_lines.append(" ".join([str(x) for x in args])) + + fieldset = structure2.FieldSet(simple_form_structure) + structure2.print_document_structure( + fieldset, + print_fn=fake_print, + ) + + assert out_lines == [ + " FieldSet(root)", + " Field(leaf1, Some Value)", + " Field(leaf2, 33)", + " FieldSet(measure-evening)", + " Field(sub_leaf1, None)", + " Field(sub_leaf2, None)", + " RowSet(too-wonder-option)", + " FieldSet(too-wonder-option)", + " Field(row_field_1, 2025-01-13)", + " Field(row_field_2, 99.5)", + " Field(row_calc, None)", + " FieldSet(too-wonder-option)", + " Field(row_field_1, 2025-01-10)", + " Field(row_field_2, 23.0)", + " Field(row_calc, None)", + ] diff --git a/caluma/caluma_form/utils.py b/caluma/caluma_form/utils.py index f7ac560bd..d626f183a 100644 --- a/caluma/caluma_form/utils.py +++ b/caluma/caluma_form/utils.py @@ -1,8 +1,12 @@ +from logging import getLogger + from django.db.models import Prefetch from caluma.caluma_form import models, structure from caluma.caluma_form.jexl import QuestionJexl +log = getLogger(__name__) + def prefetch_document(document_id): """Fetch a document while prefetching the entire structure. @@ -117,6 +121,11 @@ def update_or_create_calc_answer( struc = structure.FieldSet(root_doc, root_doc.form) field = struc.get_field(question.slug) + log.debug( + "update_or_create_calc_answer(%s): updating field %s", + question.pk, + field.answer, + ) # skip if question doesn't exist in this document structure if field is None: diff --git a/caluma/caluma_form/validators.py b/caluma/caluma_form/validators.py index a57a14431..70b6ca1b2 100644 --- a/caluma/caluma_form/validators.py +++ b/caluma/caluma_form/validators.py @@ -8,10 +8,11 @@ from rest_framework import exceptions from caluma.caluma_data_source.data_source_handlers import get_data_sources +from caluma.caluma_form import jexl2, structure2 +from caluma.caluma_workflow.models import Case from . import jexl, models, structure from .format_validators import get_format_validators -from .jexl import QuestionJexl from .models import DynamicOption, Question log = getLogger() @@ -97,7 +98,7 @@ def _validation_context(document): # `self.visible_questions()` already needs a context to evaluate # `is_hidden` expressions intermediate_context = { - "form": document.family.form, + "form": document.family.form_id, "document": document.family, "visible_questions": None, "jexl_cache": defaultdict(dict), @@ -116,7 +117,7 @@ def _validation_context(document): validation_context = validation_context or _validation_context(document) - jexl = QuestionJexl(validation_context) + jexl = jexl2.QuestionJexl2(validation_context) with jexl.use_field_context( validation_context["structure"].get_field(question.slug) ): @@ -218,11 +219,22 @@ def _remove_unused_dynamic_options(self, question, document, used_values): ).delete() def _validate_question_table( - self, question, value, document, user, instance=None, origin=False, **kwargs + self, + question, + value, + document, + user, + validation_context: structure2.RowSet, + instance=None, + origin=False, + **kwargs, ): if not origin: - for row_doc in value: - DocumentValidator().validate(row_doc, user=user, **kwargs) + # Rowsets should always be validated in context, so we can use that + for row in validation_context.children(): + DocumentValidator().validate( + row._document, user=user, **kwargs, validation_context=row + ) return # this answer was the entry-point of the validation @@ -312,165 +324,94 @@ def validate( if not validation_context: validation_context = self._validation_context(document) - self._validate_required(validation_context) + required_but_empty = [] - for answer in document.answers.filter( - question_id__in=validation_context["visible_questions"] - ): - validator = AnswerValidator() - validator.validate( - document=document, - question=answer.question, - value=answer.value, - documents=answer.documents.all(), - user=user, - validation_context=validation_context, - data_source_context=data_source_context, + if len(__import__("traceback").extract_stack()) > 200: + breakpoint() + + all_fields = list(validation_context.get_all_fields()) + for field in all_fields: + if field.is_required() and field.is_empty(): + required_but_empty.append(field) + + if required_but_empty: + affected_slugs = [f.slug() for f in required_but_empty if f.slug()] + raise CustomValidationError( + f"Question{'' if len(affected_slugs) == 1 else 's'} " + f"{', '.join(affected_slugs)} are required but not provided.", + slugs=affected_slugs, ) + for field in all_fields: + if field.is_visible() and field.answer: + # if answer is not given, the required_but_empty check above would + # already have raised an exception + validator = AnswerValidator() + validator.validate( + document=field.answer.document, + question=field.question, + value=field.answer.value, + documents=field.answer.documents.all(), + user=user, + # TODO those contexts should probably be dropped, + # or passed along as the correct field + # form the structure2 object + validation_context=field, + data_source_context=data_source_context, + ) + def _validation_context(self, document): - # we need to build the context in two steps (for now), as - # `self.visible_questions()` already needs a context to evaluate - # `is_hidden` expressions - intermediate_context = { - "form": document.form, - "document": document, - "visible_questions": None, - "jexl_cache": defaultdict(dict), - "structure": structure.FieldSet(document, document.form), - } + relevant_case: Case = ( + getattr(document, "case", None) + or getattr(getattr(document, "work_item", None), "case", None) + or None + ) - intermediate_context["visible_questions"] = self.visible_questions( - document, intermediate_context + case_info = ( + { + # Why are we even represent this in context of the case? + # The form does not belong there, it's not even there in + # the model + "form": relevant_case.document.form_id, + "workflow": relevant_case.workflow_id, + "root": { + "form": relevant_case.family.document.form_id, + "workflow": relevant_case.family.workflow_id, + }, + } + if relevant_case + else None + ) + workitem_info = ( + document.work_item.__dict__ if hasattr(document, "work_item") else None ) - return intermediate_context + context = { + # TODO: Build the global context so it's the same as it was before + # the refactoring + "info": { + "document": document.family, + "form": document.form, + "case": case_info, + "work_item": workitem_info, + } + } - def visible_questions(self, document, validation_context=None): + return structure2.FieldSet(document, global_context=context) + + def visible_questions(self, document) -> list[Question]: """Evaluate the visibility of the questions for the given context. This evaluates the `is_hidden` expression for each question to decide if the question is visible. Return a list of question slugs that are visible. """ - if not validation_context: - validation_context = self._validation_context(document) - visible_questions = [] - - q_jexl = jexl.QuestionJexl(validation_context) - for field in validation_context["structure"].children(): - question = field.question - try: - is_hidden = q_jexl.is_hidden(field) - - if is_hidden: - # no need to descend further - continue - - visible_questions.append(question.slug) - if question.type == Question.TYPE_FORM: - # answers to questions in subforms are still in - # the top level document - sub_context = {**validation_context, "structure": field} - visible_questions.extend( - self.visible_questions(document, sub_context) - ) - - elif question.type == Question.TYPE_TABLE: - row_visibles = set() - # make a copy of the validation context, so we - # can reuse it for each row - row_context = {**validation_context} - for row in field.children(): - sub_context = {**row_context, "structure": row} - row_visibles.update( - self.visible_questions(document, sub_context) - ) - visible_questions.extend(row_visibles) - - except (jexl.QuestionMissing, exceptions.ValidationError): - raise - except Exception as exc: - log.error( - f"Error while evaluating `is_hidden` expression on question {question.slug}: " - f"{question.is_hidden}: {str(exc)}" - ) - raise RuntimeError( - f"Error while evaluating `is_hidden` expression on question {question.slug}: " - f"{question.is_hidden}. The system log contains more information" - ) - return visible_questions - - def _validate_required(self, validation_context): # noqa: C901 - """Validate the 'requiredness' of the given answers. - - Raise exceptions if a required question is not answered. - - Since we're iterating and evaluating `is_hidden` as well for this - purpose, we help our call site by returning a list of *non-hidden* - question slugs. - """ - required_but_empty = [] - - q_jexl = jexl.QuestionJexl(validation_context) - for field in validation_context["structure"].children(): - question = field.question - try: - is_hidden = question.slug not in validation_context["visible_questions"] - if is_hidden: - continue - - # The above `is_hidden` is globally cached per question, mainly to optimize DB access. - # This means a question could be marked "visible" as it would be visible - # in another row, but would still be hidden in the local row, if this is a - # table question context. Thus, in this case we need to re-evaluate it's - # hiddenness. Luckily, the JEXL evaluator caches those values (locally). - with q_jexl.use_field_context(field): - if q_jexl.is_hidden(field): - continue - - is_required = q_jexl.is_required(field) - - if question.type == Question.TYPE_FORM: - # form questions's answers are still in the top level document - sub_context = {**validation_context, "structure": field} - self._validate_required(sub_context) - - elif question.type == Question.TYPE_TABLE: - # We need to validate presence in at least one row, but only - # if the table question is required. - if is_required and not field.children(): - raise CustomValidationError( - f"no rows in {question.slug}", slugs=[question.slug] - ) - for row in field.children(): - sub_context = {**validation_context, "structure": row} - self._validate_required(sub_context) - else: - value = field.value() - if value in EMPTY_VALUES and is_required: - required_but_empty.append(question.slug) - - except CustomValidationError as exc: - required_but_empty.extend(exc.slugs) - - except (jexl.QuestionMissing, exceptions.ValidationError): - raise - except Exception as exc: - log.error( - f"Error while evaluating `is_required` expression on question {question.slug}: " - f"{question.is_required}: {str(exc)}" - ) + validation_context = self._validation_context(document) - raise RuntimeError( - f"Error while evaluating `is_required` expression on question {question.slug}: " - f"{question.is_required}. The system log contains more information" - ) - - if required_but_empty: - raise CustomValidationError( - f"Questions {','.join(required_but_empty)} are required but not provided.", - slugs=required_but_empty, - ) + return [ + field.question + for field in validation_context.get_all_fields() + if field.is_visible() + ] class QuestionValidator: