WIP: decouple public query types from the old ones, rework interfaces.

lsst · Jan 12, 2024 · bb5a22a · bb5a22a
1 parent 560646d
commit bb5a22a
Show file tree

Hide file tree

Showing 14 changed files with 841 additions and 801 deletions.
diff --git a/python/lsst/daf/butler/direct_query_driver/_driver.py b/python/lsst/daf/butler/direct_query_driver/_driver.py
@@ -39,11 +39,22 @@
 from .._dataset_type import DatasetType
 from ..dimensions import DataIdValue, DimensionElement, DimensionGroup, DimensionUniverse
 from ..queries import tree as qt
-from ..queries.data_coordinate_results import DataCoordinateResultPage, DataCoordinateResultSpec
-from ..queries.dataset_results import DatasetRefResultPage, DatasetRefResultSpec
-from ..queries.dimension_record_results import DimensionRecordResultPage, DimensionRecordResultSpec
-from ..queries.driver import PageKey, QueryDriver, ResultPage, ResultSpec
-from ..queries.general_results import GeneralResultPage, GeneralResultSpec
+from ..queries.driver import (
+    DataCoordinateResultPage,
+    DatasetRefResultPage,
+    DimensionRecordResultPage,
+    GeneralResultPage,
+    PageKey,
+    QueryDriver,
+    ResultPage,
+)
+from ..queries.result_specs import (
+    DataCoordinateResultSpec,
+    DatasetRefResultSpec,
+    DimensionRecordResultSpec,
+    GeneralResultSpec,
+    ResultSpec,
+)
 from ..registry.managers import RegistryManagerInstances
 
 if TYPE_CHECKING:
@@ -147,7 +158,7 @@ def count(
         tree: qt.QueryTree,
         *,
         dimensions: DimensionGroup,
-        datasets: frozenset[str],
+        datasets: Set[str],
         exact: bool,
         discard: bool,
     ) -> int:
@@ -194,14 +205,8 @@ def _build_sql_select(
         else:
             sql_builder, postprocessing = self._make_vanilla_sql_builder(tree, columns_to_select)
         # TODO: make results unique over columns_to_select, while taking into
-        # account postprocessing columns
-        return sql_builder.sql_select(
-            columns_to_select,
-            postprocessing,
-            order_by=[self._build_sql_order_by_expression(sql_builder, term) for term in tree.order_terms],
-            limit=tree.limit,
-            offset=tree.offset,
-        )
+        # account postprocessing columns.
+        return sql_builder.sql_select(columns_to_select, postprocessing)
 
     def _make_vanilla_sql_builder(
         self,
@@ -324,15 +329,13 @@ def _make_find_first_sql_builder(
         #
 
         # We'll start with the Common Table Expression (CTE) at the top, which
-        # we mostly get from _build_sql_selet.  Note that we need to use
-        # 'columns_required' to populate the SELECT clause list, because this
-        # isn't the outermost query, and hence we need to propagate columns
-        # we'll use but not return to the user through it.
+        # we mostly get from _make_vanilla_sql_builder.  Note that we need to
+        # use 'columns_required' to populate the SELECT clause list, because
+        # this isn't the outermost query, and hence we need to propagate
+        # columns we'll use but not return to the user through it.
         rank = qt.DatasetFieldReference.model_construct(dataset_type=dataset_type, field="rank")
         internal_columns = set(columns_to_select)
         internal_columns.add(rank)
-        for term in tree.order_terms:
-            internal_columns.update(term.gather_required_columns())
         base_sql_builder, postprocessing = self._make_vanilla_sql_builder(tree, internal_columns)
         base_select, postprocessing = base_sql_builder.sql_select(internal_columns, postprocessing)
         internal_columns.update(postprocessing.gather_columns_required())

diff --git a/python/lsst/daf/butler/queries/__init__.py b/python/lsst/daf/butler/queries/__init__.py
@@ -24,3 +24,9 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from ._base import *
+from ._data_coordinate_query_results import *
+from ._dataset_query_results import *
+from ._dimension_record_query_results import *
+from ._query import *
diff --git a/python/lsst/daf/butler/queries/_base.py b/python/lsst/daf/butler/queries/_base.py
@@ -0,0 +1,217 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+__all__ = ("ResultSpecBase", "QueryBase", "HomogeneousQueryBase", "CountableQueryBase", "QueryResultsBase")
+
+from abc import ABC, abstractmethod
+from collections.abc import Iterable, Set
+from typing import Any, Self
+
+import pydantic
+
+from ..dimensions import DimensionGroup
+from .convert_args import convert_order_by_args
+from .driver import QueryDriver
+from .expression_factory import ExpressionProxy
+from .tree import OrderExpression, QueryTree
+
+
+class ResultSpecBase(pydantic.BaseModel):
+    """Base class for all query-result specification objects."""
+
+    order_by: tuple[OrderExpression, ...] = ()
+    """Expressions to sort the rows by."""
+
+    offset: int = 0
+    """Index of the first row to return."""
+
+    limit: int | None = None
+    """Maximum number of rows to return, or `None` for no bound."""
+
+
+class QueryBase(ABC):
+    @abstractmethod
+    def any(self, *, execute: bool = True, exact: bool = True) -> bool:
+        """Test whether the query would return any rows.
+
+        Parameters
+        ----------
+        execute : `bool`, optional
+            If `True`, execute at least a ``LIMIT 1`` query if it cannot be
+            determined prior to execution that the query would return no rows.
+        exact : `bool`, optional
+            If `True`, run the full query and perform post-query filtering if
+            needed, until at least one result row is found.  If `False`, the
+            returned result does not account for post-query filtering, and
+            hence may be `True` even when all result rows would be filtered
+            out.
+
+        Returns
+        -------
+        any : `bool`
+            `True` if the query would (or might, depending on arguments) yield
+            result rows.  `False` if it definitely would not.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def explain_no_results(self, execute: bool = True) -> Iterable[str]:
+        """Return human-readable messages that may help explain why the query
+        yields no results.
+
+        Parameters
+        ----------
+        execute : `bool`, optional
+            If `True` (default) execute simplified versions (e.g. ``LIMIT 1``)
+            of aspects of the tree to more precisely determine where rows were
+            filtered out.
+
+        Returns
+        -------
+        messages : `~collections.abc.Iterable` [ `str` ]
+            String messages that describe reasons the query might not yield any
+            results.
+        """
+        raise NotImplementedError()
+
+
+class HomogeneousQueryBase(QueryBase):
+    def __init__(self, driver: QueryDriver, tree: QueryTree):
+        self._driver = driver
+        self._tree = tree
+
+    @property
+    def dimensions(self) -> DimensionGroup:
+        """All dimensions included in the query's columns."""
+        return self._tree.dimensions
+
+    def any(self, *, execute: bool = True, exact: bool = True) -> bool:
+        # Docstring inherited.
+        return self._driver.any(self._tree, execute=execute, exact=exact)
+
+    def explain_no_results(self, execute: bool = True) -> Iterable[str]:
+        # Docstring inherited.
+        return self._driver.explain_no_results(self._tree, execute=execute)
+
+
+class CountableQueryBase(QueryBase):
+    @abstractmethod
+    def count(self, *, exact: bool = True, discard: bool = False) -> int:
+        """Count the number of rows this query would return.
+
+        Parameters
+        ----------
+        exact : `bool`, optional
+            If `True`, run the full query and perform post-query filtering if
+            needed to account for that filtering in the count.  If `False`, the
+            result may be an upper bound.
+        discard : `bool`, optional
+            If `True`, compute the exact count even if it would require running
+            the full query and then throwing away the result rows after
+            counting them.  If `False`, this is an error, as the user would
+            usually be better off executing the query first to fetch its rows
+            into a new query (or passing ``exact=False``).  Ignored if
+            ``exact=False``.
+
+        Returns
+        -------
+        count : `int`
+            The number of rows the query would return, or an upper bound if
+            ``exact=False``.
+        """
+        raise NotImplementedError()
+
+
+class QueryResultsBase(HomogeneousQueryBase, CountableQueryBase):
+    def count(self, *, exact: bool = True, discard: bool = False) -> int:
+        # Docstring inherited.
+        return self._driver.count(
+            self._tree,
+            dimensions=self.dimensions,
+            datasets=self._get_datasets(),
+            exact=exact,
+            discard=discard,
+        )
+
+    def order_by(self, *args: str | OrderExpression | ExpressionProxy) -> Self:
+        """Return a new query that yields ordered results.
+
+        Parameters
+        ----------
+        *args : `str`
+            Names of the columns/dimensions to use for ordering. Column name
+            can be prefixed with minus (``-``) to use descending ordering.
+
+        Returns
+        -------
+        result : `QueryResultsBase`
+            An ordered version of this query results object.
+
+        Notes
+        -----
+        If this method is called multiple times, the new sort terms replace
+        the old ones.
+        """
+        return self._copy(
+            self._tree, order_by=convert_order_by_args(self.dimensions, self._get_datasets(), *args)
+        )
+
+    def limit(self, limit: int | None = None, offset: int = 0) -> Self:
+        """Return a new query that slices its result rows positionally.
+
+        Parameters
+        ----------
+        limit : `int` or `None`, optional
+            Upper limit on the number of returned records.
+        offset : `int`, optional
+            The number of records to skip before returning at most ``limit``
+            records.
+
+        Returns
+        -------
+        result : `QueryResultsBase`
+            A sliced version of this query results object.
+
+        Notes
+        -----
+        If this method is called multiple times, the new slice parameters
+        replace the old ones.  Slicing always occurs after sorting, even if
+        `limit` is called before `order_by`.
+        """
+        return self._copy(self._tree, limit=limit, offset=offset)
+
+    @abstractmethod
+    def _get_datasets(self) -> Set[str]:
+        """Return all dataset types included in the query's result rows."""
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _copy(self, tree: QueryTree, **kwargs: Any) -> Self:
+        """Return a modified copy of ``self``."""
+        raise NotImplementedError()