From acefb48ee5775da222a31efaf80e7c00dc0173b0 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Tue, 20 Sep 2022 16:26:36 -0400 Subject: [PATCH 01/18] Deprecate component dataset support in Registry query methods. See RFC-879. --- python/lsst/daf/butler/registry/interfaces/_datasets.py | 4 ++++ python/lsst/daf/butler/registry/queries/_query_backend.py | 4 ++++ python/lsst/daf/butler/registry/queries/_results.py | 4 ++-- python/lsst/daf/butler/registry/tests/_registry.py | 3 +-- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/python/lsst/daf/butler/registry/interfaces/_datasets.py b/python/lsst/daf/butler/registry/interfaces/_datasets.py index 59822c6741..a63f2b0def 100644 --- a/python/lsst/daf/butler/registry/interfaces/_datasets.py +++ b/python/lsst/daf/butler/registry/interfaces/_datasets.py @@ -558,6 +558,10 @@ def resolve_wildcard( datasets were not matched by the expression. Fully-specified component datasets (`str` or `DatasetType` instances) are always included. + + Values other than `False` are deprecated, and only `False` will be + supported after v26. After v27 this argument will be removed + entirely. missing : `list` of `str`, optional String dataset type names that were explicitly given (i.e. not regular expression patterns) but not found will be appended to this diff --git a/python/lsst/daf/butler/registry/queries/_query_backend.py b/python/lsst/daf/butler/registry/queries/_query_backend.py index 1156860174..c5b6338d1b 100644 --- a/python/lsst/daf/butler/registry/queries/_query_backend.py +++ b/python/lsst/daf/butler/registry/queries/_query_backend.py @@ -220,6 +220,10 @@ def resolve_single_dataset_type_wildcard( datasets were not matched by the expression. Fully-specified component datasets (`str` or `DatasetType` instances) are always included. + + Values other than `False` are deprecated, and only `False` will be + supported after v26. After v27 this argument will be removed + entirely. explicit_only : `bool`, optional If `True`, require explicit `DatasetType` instances or `str` names, with `re.Pattern` instances deprecated and ``...`` prohibited. diff --git a/python/lsst/daf/butler/registry/queries/_results.py b/python/lsst/daf/butler/registry/queries/_results.py index 038c5c7f3f..74fce928bc 100644 --- a/python/lsst/daf/butler/registry/queries/_results.py +++ b/python/lsst/daf/butler/registry/queries/_results.py @@ -229,12 +229,12 @@ def subset( def findDatasets( self, - datasetType: Any, + datasetType: DatasetType | str, collections: Any, *, findFirst: bool = True, components: bool | None = None, - ) -> DatasetQueryResults: + ) -> ParentDatasetQueryResults: """Find datasets using the data IDs identified by this query. Parameters diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py index 0bc7a79637..1c0c7b77be 100644 --- a/python/lsst/daf/butler/registry/tests/_registry.py +++ b/python/lsst/daf/butler/registry/tests/_registry.py @@ -632,8 +632,7 @@ def testDatasetTypeComponentQueries(self): # that component dataset type if components=None. with self.assertWarns(FutureWarning): self.assertEqual( - {"bias.wcs"}, - NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=None)).names, + {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names ) self.assertEqual( set(), From d9ed29aaa5be8976c0d63e48322a1a9da0cd6fe1 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Wed, 28 Sep 2022 12:54:11 -0400 Subject: [PATCH 02/18] Remove CollectionSearch. This involves some guesses about how we'll update the RemoteRegistry server, which I'm not attempting to change right now since I don't know how to test it. --- python/lsst/daf/butler/__init__.py | 1 - python/lsst/daf/butler/registry/__init__.py | 1 - python/lsst/daf/butler/registry/wildcards.py | 109 +------------------ 3 files changed, 1 insertion(+), 110 deletions(-) diff --git a/python/lsst/daf/butler/__init__.py b/python/lsst/daf/butler/__init__.py index 13196509ad..f15193ce87 100644 --- a/python/lsst/daf/butler/__init__.py +++ b/python/lsst/daf/butler/__init__.py @@ -83,7 +83,6 @@ # Do not import or lift symbols from 'server' or 'server_models'. # Import the registry subpackage directly for other symbols. from .registry import ( - CollectionSearch, CollectionType, MissingCollectionError, MissingDatasetTypeError, diff --git a/python/lsst/daf/butler/registry/__init__.py b/python/lsst/daf/butler/registry/__init__.py index 740ce8ae67..57e4788175 100644 --- a/python/lsst/daf/butler/registry/__init__.py +++ b/python/lsst/daf/butler/registry/__init__.py @@ -33,7 +33,6 @@ from ._exceptions import * from ._registry import * from ._registry_factory import * -from .wildcards import CollectionSearch # Some modules intentionally not imported, either because they are purely # internal (e.g. nameShrinker.py) or they contain implementations that are diff --git a/python/lsst/daf/butler/registry/wildcards.py b/python/lsst/daf/butler/registry/wildcards.py index bbfc834b16..c885af6f4c 100644 --- a/python/lsst/daf/butler/registry/wildcards.py +++ b/python/lsst/daf/butler/registry/wildcards.py @@ -29,20 +29,17 @@ __all__ = ( "CategorizedWildcard", "CollectionWildcard", - "CollectionSearch", "DatasetTypeWildcard", ) import contextlib import dataclasses import re -from collections.abc import Callable, Iterable, Iterator, Mapping +from collections.abc import Callable, Iterable, Mapping from types import EllipsisType from typing import Any -from deprecated.sphinx import deprecated from lsst.utils.iteration import ensure_iterable -from pydantic import RootModel from .._dataset_type import DatasetType from ..utils import globToRegex @@ -265,110 +262,6 @@ def process(element: Any, alreadyCoerced: bool = False) -> EllipsisType | None: """ -class _CollectionSearch(RootModel): - root: tuple[str, ...] - - -@deprecated( - reason="Tuples of string collection names are now preferred. Will be removed after v26.", - version="v25.0", - category=FutureWarning, -) -class CollectionSearch(_CollectionSearch): - """An ordered search path of collections. - - The `fromExpression` method should almost always be used to construct - instances, as the regular constructor performs no checking of inputs (and - that can lead to confusing error messages downstream). - - Notes - ----- - A `CollectionSearch` is used to find a single dataset (or set of datasets - with different dataset types or data IDs) according to its dataset type and - data ID, giving preference to collections in the order in which they are - specified. A `CollectionWildcard` can be constructed from a broader range - of expressions but does not order the collections to be searched. - - `CollectionSearch` is an immutable sequence of `str` collection names. - - A `CollectionSearch` instance constructed properly (e.g. via - `fromExpression`) is a unique representation of a particular search path; - it is exactly the same internally and compares as equal to any - `CollectionSearch` constructed from an equivalent expression, regardless of - how different the original expressions appear. - """ - - @classmethod - def fromExpression(cls, expression: Any) -> CollectionSearch: - """Process a general expression to construct a `CollectionSearch` - instance. - - Parameters - ---------- - expression : `~typing.Any` - May be: - - - a `str` collection name; - - an iterable of `str` collection names; - - another `CollectionSearch` instance (passed through unchanged). - - Duplicate entries will be removed (preserving the first appearance - of each collection name). - - Returns - ------- - collections : `CollectionSearch` - A `CollectionSearch` instance. - """ - # First see if this is already a CollectionSearch; just pass that - # through unchanged. This lets us standardize expressions (and turn - # single-pass iterators into multi-pass iterables) in advance and pass - # them down to other routines that accept arbitrary expressions. - if isinstance(expression, cls): - return expression - try: - wildcard = CategorizedWildcard.fromExpression( - expression, - allowAny=False, - allowPatterns=False, - ) - except TypeError as err: - raise CollectionExpressionError(str(err)) from None - assert wildcard is not ... - assert not wildcard.patterns - assert not wildcard.items - deduplicated = [] - for name in wildcard.strings: - if name not in deduplicated: - deduplicated.append(name) - model = cls(tuple(deduplicated)) - return model - - def explicitNames(self) -> Iterator[str]: - """Iterate over collection names that were specified explicitly.""" - yield from self.root - - def __iter__(self) -> Iterator[str]: # type: ignore - yield from self.root - - def __len__(self) -> int: - return len(self.root) - - def __getitem__(self, index: Any) -> str: - return self.root[index] - - def __eq__(self, other: Any) -> bool: - if isinstance(other, CollectionSearch): - return self.root == other.root - return False - - def __str__(self) -> str: - return "[{}]".format(", ".join(self)) - - def __repr__(self) -> str: - return f"CollectionSearch({self.root!r})" - - @dataclasses.dataclass(frozen=True) class CollectionWildcard: """A validated wildcard for collection names. From 3cb054a1a597fb2fd87e5610f36919352b9d151b Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Wed, 28 Sep 2022 17:02:35 -0400 Subject: [PATCH 03/18] Remove Registry support for component dataset types. --- python/lsst/daf/butler/cli/opt/options.py | 5 +- python/lsst/daf/butler/registry/_registry.py | 52 ++---- .../datasets/byDimensions/_manager.py | 108 +++--------- .../butler/registry/interfaces/_datasets.py | 26 +-- .../butler/registry/queries/_query_backend.py | 66 +------- .../daf/butler/registry/queries/_results.py | 30 ++-- .../registry/queries/_sql_query_backend.py | 6 +- .../lsst/daf/butler/registry/sql_registry.py | 156 +++++++----------- .../daf/butler/registry/tests/_registry.py | 142 ---------------- tests/test_cliCmdQueryDatasetTypes.py | 8 +- 10 files changed, 129 insertions(+), 470 deletions(-) diff --git a/python/lsst/daf/butler/cli/opt/options.py b/python/lsst/daf/butler/cli/opt/options.py index 61290ec987..fb8258bb98 100644 --- a/python/lsst/daf/butler/cli/opt/options.py +++ b/python/lsst/daf/butler/cli/opt/options.py @@ -115,9 +115,8 @@ def makeCollectionTypes( help=unwrap( """For --components, apply all expression patterns to component dataset type names as well. For --no-components, - never apply patterns to components. Default is False. - Fully-specified component datasets (`str` or `DatasetType` - instances) are always included.""" + never apply patterns to components. Only --no-components + is now supported.""" ), ) diff --git a/python/lsst/daf/butler/registry/_registry.py b/python/lsst/daf/butler/registry/_registry.py index a35744f5f2..78c1536ade 100644 --- a/python/lsst/daf/butler/registry/_registry.py +++ b/python/lsst/daf/butler/registry/_registry.py @@ -1010,7 +1010,7 @@ def queryDatasetTypes( self, expression: Any = ..., *, - components: bool | None = False, + components: bool = False, missing: list[str] | None = None, ) -> Iterable[DatasetType]: """Iterate over the dataset types whose names match an expression. @@ -1024,16 +1024,8 @@ def queryDatasetTypes( default. See :ref:`daf_butler_dataset_type_expressions` for more information. components : `bool`, optional - If `True`, apply all expression patterns to component dataset type - names as well. If `False`, never apply patterns to components. - If `None`, apply patterns to components only if their - parent datasets were not matched by the expression. - Fully-specified component datasets (`str` or `DatasetType` - instances) are always included. - - Values other than `False` are deprecated, and only `False` will be - supported after v26. After v27 this argument will be removed - entirely. + Must be `False`. Provided only for backwards compatibility. After + v27 this argument will be removed entirely. missing : `list` of `str`, optional String dataset type names that were explicitly given (i.e. not regular expression patterns) but not found will be appended to this @@ -1117,7 +1109,7 @@ def queryDatasets( dataId: DataId | None = None, where: str = "", findFirst: bool = False, - components: bool | None = False, + components: bool = False, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, @@ -1163,16 +1155,8 @@ def queryDatasets( ``collections`` must not contain regular expressions and may not be ``...``. components : `bool`, optional - If `True`, apply all dataset expression patterns to component - dataset type names as well. If `False`, never apply patterns to - components. If `None`, apply patterns to components only - if their parent datasets were not matched by the expression. - Fully-specified component datasets (`str` or `DatasetType` - instances) are always included. - - Values other than `False` are deprecated, and only `False` will be - supported after v26. After v27 this argument will be removed - entirely. + Must be `False`. Provided only for backwards compatibility. After + v27 this argument will be removed entirely. bind : `~collections.abc.Mapping`, optional Mapping containing literal values that should be injected into the ``where`` expression, keyed by the identifiers they replace. @@ -1239,7 +1223,7 @@ def queryDataIds( datasets: Any = None, collections: CollectionArgType | None = None, where: str = "", - components: bool | None = False, + components: bool = False, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, @@ -1284,16 +1268,8 @@ def queryDataIds( key column of a dimension table) dimension name. See :ref:`daf_butler_dimension_expressions` for more information. components : `bool`, optional - If `True`, apply all dataset expression patterns to component - dataset type names as well. If `False`, never apply patterns to - components. If `None`, apply patterns to components only - if their parent datasets were not matched by the expression. - Fully-specified component datasets (`str` or `DatasetType` - instances) are always included. - - Values other than `False` are deprecated, and only `False` will be - supported after v26. After v27 this argument will be removed - entirely. + Must be `False`. Provided only for backwards compatibility. After + v27 this argument will be removed entirely. bind : `~collections.abc.Mapping`, optional Mapping containing literal values that should be injected into the ``where`` expression, keyed by the identifiers they replace. @@ -1351,7 +1327,7 @@ def queryDimensionRecords( datasets: Any = None, collections: CollectionArgType | None = None, where: str = "", - components: bool | None = False, + components: bool = False, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, @@ -1384,12 +1360,8 @@ def queryDimensionRecords( `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more information. components : `bool`, optional - Whether to apply dataset expressions to components as well. - See `queryDataIds` for more information. - - Values other than `False` are deprecated, and only `False` will be - supported after v26. After v27 this argument will be removed - entirely. + Must be `False`. Provided only for backwards compatibility. After + v27 this argument will be removed entirely. bind : `~collections.abc.Mapping`, optional Mapping containing literal values that should be injected into the ``where`` expression, keyed by the identifiers they replace. diff --git a/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py b/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py index 5d0fea5777..91b4e98ccb 100644 --- a/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py +++ b/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py @@ -6,18 +6,20 @@ import dataclasses import logging -import warnings -from collections import defaultdict from collections.abc import Iterable, Mapping from typing import TYPE_CHECKING, Any import sqlalchemy -from lsst.utils.introspection import find_outside_stacklevel from ...._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, DatasetType from ....dimensions import DimensionUniverse from ..._collection_summary import CollectionSummary -from ..._exceptions import ConflictingDefinitionError, DatasetTypeError, OrphanedRecordError +from ..._exceptions import ( + ConflictingDefinitionError, + DatasetTypeError, + DatasetTypeExpressionError, + OrphanedRecordError, +) from ...interfaces import DatasetRecordStorage, DatasetRecordStorageManager, VersionTuple from ...wildcards import DatasetTypeWildcard from ._storage import ByDimensionsDatasetRecordStorage, ByDimensionsDatasetRecordStorageUUID @@ -382,115 +384,49 @@ def register(self, datasetType: DatasetType) -> bool: def resolve_wildcard( self, expression: Any, - components: bool | None = False, missing: list[str] | None = None, explicit_only: bool = False, - components_deprecated: bool = True, - ) -> dict[DatasetType, list[str | None]]: + ) -> list[DatasetType]: wildcard = DatasetTypeWildcard.from_expression(expression) - result: defaultdict[DatasetType, set[str | None]] = defaultdict(set) - # This message can be transformed into an error on DM-36303 after v26, - # and the components and components_deprecated arguments can be merged - # into one on DM-36457 after v27. - deprecation_message = ( - "Querying for component datasets via Registry query methods is deprecated in favor of using " - "DatasetRef and DatasetType methods on parent datasets. Only components=False will be supported " - "after v26, and the components argument will be removed after v27." - ) + result: list[DatasetType] = [] for name, dataset_type in wildcard.values.items(): parent_name, component_name = DatasetType.splitDatasetTypeName(name) - if component_name is not None and components_deprecated: - warnings.warn( - deprecation_message, FutureWarning, stacklevel=find_outside_stacklevel("lsst.daf.butler") + if component_name is not None: + raise DatasetTypeError( + "Component dataset types are not supported in Registry methods; use DatasetRef or " + "DatasetType methods to obtain components from parents instead." ) if (found_storage := self.find(parent_name)) is not None: - found_parent = found_storage.datasetType - if component_name is not None: - found = found_parent.makeComponentDatasetType(component_name) - else: - found = found_parent + resolved_dataset_type = found_storage.datasetType if dataset_type is not None: - if dataset_type.is_compatible_with(found): + if dataset_type.is_compatible_with(resolved_dataset_type): # Prefer the given dataset type to enable storage class # conversions. - if component_name is not None: - found_parent = dataset_type.makeCompositeDatasetType() - else: - found_parent = dataset_type + resolved_dataset_type = dataset_type else: raise DatasetTypeError( f"Dataset type definition in query expression {dataset_type} is " - f"not compatible with the registered type {found}." + f"not compatible with the registered type {resolved_dataset_type}." ) - result[found_parent].add(component_name) + result.append(resolved_dataset_type) elif missing is not None: missing.append(name) - already_warned = False if wildcard.patterns is ...: if explicit_only: raise TypeError( "Universal wildcard '...' is not permitted for dataset types in this context." ) for datasetType in self._fetch_dataset_types(): - result[datasetType].add(None) - if components: - try: - result[datasetType].update(datasetType.storageClass.allComponents().keys()) - if ( - datasetType.storageClass.allComponents() - and not already_warned - and components_deprecated - ): - warnings.warn( - deprecation_message, - FutureWarning, - stacklevel=find_outside_stacklevel("lsst.daf.butler"), - ) - already_warned = True - except KeyError as err: - _LOG.warning( - f"Could not load storage class {err} for {datasetType.name}; " - "if it has components they will not be included in query results.", - ) + result.append(datasetType) elif wildcard.patterns: if explicit_only: - # After v26 this should raise DatasetTypeExpressionError, to - # be implemented on DM-36303. - warnings.warn( - "Passing wildcard patterns here is deprecated and will be prohibited after v26.", - FutureWarning, - stacklevel=find_outside_stacklevel("lsst.daf.butler"), - ) + raise DatasetTypeExpressionError("Wildcard patterns are not supported when explicit only.") dataset_types = self._fetch_dataset_types() for datasetType in dataset_types: if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): - result[datasetType].add(None) - if components is not False: - for datasetType in dataset_types: - if components is None and datasetType in result: - continue - try: - components_for_parent = datasetType.storageClass.allComponents().keys() - except KeyError as err: - _LOG.warning( - f"Could not load storage class {err} for {datasetType.name}; " - "if it has components they will not be included in query results." - ) - continue - for component_name in components_for_parent: - if any( - p.fullmatch(DatasetType.nameWithComponent(datasetType.name, component_name)) - for p in wildcard.patterns - ): - result[datasetType].add(component_name) - if not already_warned and components_deprecated: - warnings.warn( - deprecation_message, - FutureWarning, - stacklevel=find_outside_stacklevel("lsst.daf.butler"), - ) - already_warned = True - return {k: list(v) for k, v in result.items()} + result.append(datasetType) + + return result def getDatasetRef(self, id: DatasetId) -> DatasetRef | None: # Docstring inherited from DatasetRecordStorageManager. diff --git a/python/lsst/daf/butler/registry/interfaces/_datasets.py b/python/lsst/daf/butler/registry/interfaces/_datasets.py index a63f2b0def..fac1d17134 100644 --- a/python/lsst/daf/butler/registry/interfaces/_datasets.py +++ b/python/lsst/daf/butler/registry/interfaces/_datasets.py @@ -539,11 +539,9 @@ def remove(self, name: str) -> None: def resolve_wildcard( self, expression: Any, - components: bool | None = False, missing: list[str] | None = None, explicit_only: bool = False, - components_deprecated: bool = True, - ) -> dict[DatasetType, list[str | None]]: + ) -> list[DatasetType]: """Resolve a dataset type wildcard expression. Parameters @@ -551,17 +549,6 @@ def resolve_wildcard( expression : `~typing.Any` Expression to resolve. Will be passed to `DatasetTypeWildcard.from_expression`. - components : `bool`, optional - If `True`, apply all expression patterns to component dataset type - names as well. If `False`, never apply patterns to components. If - `None`, apply patterns to components only if their parent - datasets were not matched by the expression. Fully-specified - component datasets (`str` or `DatasetType` instances) are always - included. - - Values other than `False` are deprecated, and only `False` will be - supported after v26. After v27 this argument will be removed - entirely. missing : `list` of `str`, optional String dataset type names that were explicitly given (i.e. not regular expression patterns) but not found will be appended to this @@ -569,18 +556,11 @@ def resolve_wildcard( explicit_only : `bool`, optional If `True`, require explicit `DatasetType` instances or `str` names, with `re.Pattern` instances deprecated and ``...`` prohibited. - components_deprecated : `bool`, optional - If `True`, this is a context in which component dataset support is - deprecated. This will result in a deprecation warning when - ``components=True`` or ``components=None`` and a component dataset - is matched. In the future this will become an error. Returns ------- - dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ] - A mapping with resolved dataset types as keys and lists of - matched component names as values, where `None` indicates the - parent composite dataset type was matched. + dataset_types : `list` [ `DatasetType` ] + A list of resolved dataset types. """ raise NotImplementedError() diff --git a/python/lsst/daf/butler/registry/queries/_query_backend.py b/python/lsst/daf/butler/registry/queries/_query_backend.py index c5b6338d1b..1bfc778ebd 100644 --- a/python/lsst/daf/butler/registry/queries/_query_backend.py +++ b/python/lsst/daf/butler/registry/queries/_query_backend.py @@ -158,11 +158,9 @@ def resolve_collection_wildcard( def resolve_dataset_type_wildcard( self, expression: Any, - components: bool | None = None, missing: list[str] | None = None, explicit_only: bool = False, - components_deprecated: bool = True, - ) -> dict[DatasetType, list[str | None]]: + ) -> list[DatasetType]: """Return the dataset types that match a wildcard expression. Parameters @@ -170,13 +168,6 @@ def resolve_dataset_type_wildcard( expression : `~typing.Any` Names and/or patterns for dataset types; will be passed to `DatasetTypeWildcard.from_expression`. - components : `bool`, optional - If `True`, apply all expression patterns to component dataset type - names as well. If `False`, never apply patterns to components. If - `None` (default), apply patterns to components only if their parent - datasets were not matched by the expression. Fully-specified - component datasets (`str` or `DatasetType` instances) are always - included. missing : `list` of `str`, optional String dataset type names that were explicitly given (i.e. not regular expression patterns) but not found will be appended to this @@ -184,28 +175,19 @@ def resolve_dataset_type_wildcard( explicit_only : `bool`, optional If `True`, require explicit `DatasetType` instances or `str` names, with `re.Pattern` instances deprecated and ``...`` prohibited. - components_deprecated : `bool`, optional - If `True`, this is a context in which component dataset support is - deprecated. This will result in a deprecation warning when - ``components=True`` or ``components=None`` and a component dataset - is matched. In the future this will become an error. Returns ------- - dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ] - A mapping with resolved dataset types as keys and lists of - matched component names as values, where `None` indicates the - parent composite dataset type was matched. + dataset_types : `list` [ `DatasetType` ] + A list of resolved dataset types. """ raise NotImplementedError() def resolve_single_dataset_type_wildcard( self, expression: Any, - components: bool | None = None, explicit_only: bool = False, - components_deprecated: bool = True, - ) -> tuple[DatasetType, list[str | None]]: + ) -> DatasetType: """Return a single dataset type that matches a wildcard expression. Parameters @@ -213,48 +195,18 @@ def resolve_single_dataset_type_wildcard( expression : `~typing.Any` Names and/or patterns for the dataset type; will be passed to `DatasetTypeWildcard.from_expression`. - components : `bool`, optional - If `True`, apply all expression patterns to component dataset type - names as well. If `False`, never apply patterns to components. If - `None` (default), apply patterns to components only if their parent - datasets were not matched by the expression. Fully-specified - component datasets (`str` or `DatasetType` instances) are always - included. - - Values other than `False` are deprecated, and only `False` will be - supported after v26. After v27 this argument will be removed - entirely. explicit_only : `bool`, optional If `True`, require explicit `DatasetType` instances or `str` names, with `re.Pattern` instances deprecated and ``...`` prohibited. - components_deprecated : `bool`, optional - If `True`, this is a context in which component dataset support is - deprecated. This will result in a deprecation warning when - ``components=True`` or ``components=None`` and a component dataset - is matched. In the future this will become an error. Returns ------- - single_parent : `DatasetType` - The matched parent dataset type. - single_components : `list` [ `str` | `None` ] - The matched components that correspond to this parent, or `None` if - the parent dataset type itself was matched. - - Notes - ----- - This method really finds a single parent dataset type and any number of - components, because it's only the parent dataset type that's known to - registry at all; many callers are expected to discard the - ``single_components`` return value. + single : `DatasetType` + The matched dataset type. """ missing: list[str] = [] matching = self.resolve_dataset_type_wildcard( - expression, - components=components, - missing=missing, - explicit_only=explicit_only, - components_deprecated=components_deprecated, + expression, missing=missing, explicit_only=explicit_only ) if not matching: if missing: @@ -274,13 +226,13 @@ def resolve_single_dataset_type_wildcard( f"Expression {expression!r} matched multiple parent dataset types: " f"{[t.name for t in matching]}, but only one is allowed." ) - ((single_parent, single_components),) = matching.items() + (single_parent,) = matching if missing: raise DatasetTypeError( f"Expression {expression!r} appears to involve multiple dataset types, even though only " f"one ({single_parent.name}) is registered, and only one is allowed here." ) - return single_parent, single_components + return single_parent @abstractmethod def filter_dataset_collections( diff --git a/python/lsst/daf/butler/registry/queries/_results.py b/python/lsst/daf/butler/registry/queries/_results.py index 74fce928bc..d18a3b64a3 100644 --- a/python/lsst/daf/butler/registry/queries/_results.py +++ b/python/lsst/daf/butler/registry/queries/_results.py @@ -53,6 +53,7 @@ DimensionGroup, DimensionRecord, ) +from .._exceptions import DatasetTypeError from ._query import Query from ._structs import OrderByClause @@ -233,7 +234,7 @@ def findDatasets( collections: Any, *, findFirst: bool = True, - components: bool | None = None, + components: bool = False, ) -> ParentDatasetQueryResults: """Find datasets using the data IDs identified by this query. @@ -254,16 +255,8 @@ def findDatasets( passed in). If `True`, ``collections`` must not contain regular expressions and may not be ``...``. components : `bool`, optional - If `True`, apply all expression patterns to component dataset type - names as well. If `False`, never apply patterns to components. If - `None` (default), apply patterns to components only if their parent - datasets were not matched by the expression. Fully-specified - component datasets (`str` or `DatasetType` instances) are always - included. - - Values other than `False` are deprecated, and only `False` will be - supported after v26. After v27 this argument will be removed - entirely. + Must be `False`. Provided only for backwards compatibility. After + v27 this argument will be removed entirely. Returns ------- @@ -278,13 +271,18 @@ def findDatasets( MissingDatasetTypeError Raised if the given dataset type is not registered. """ - parent_dataset_type, components_found = self._query.backend.resolve_single_dataset_type_wildcard( - datasetType, components=components, explicit_only=True + if components is not False: + raise DatasetTypeError( + "Dataset component queries are no longer supported by Registry. Use " + "DatasetType methods to obtain components from parent dataset types instead." + ) + resolved_dataset_type = self._query.backend.resolve_single_dataset_type_wildcard( + datasetType, explicit_only=True ) return ParentDatasetQueryResults( - self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True), - parent_dataset_type, - components_found, + self._query.find_datasets(resolved_dataset_type, collections, find_first=findFirst, defer=True), + resolved_dataset_type, + [None], ) def findRelatedDatasets( diff --git a/python/lsst/daf/butler/registry/queries/_sql_query_backend.py b/python/lsst/daf/butler/registry/queries/_sql_query_backend.py index 4c4fa201f1..ca7413ede1 100644 --- a/python/lsst/daf/butler/registry/queries/_sql_query_backend.py +++ b/python/lsst/daf/butler/registry/queries/_sql_query_backend.py @@ -110,14 +110,14 @@ def resolve_collection_wildcard( def resolve_dataset_type_wildcard( self, expression: Any, - components: bool | None = None, missing: list[str] | None = None, explicit_only: bool = False, - components_deprecated: bool = True, ) -> dict[DatasetType, list[str | None]]: # Docstring inherited. return self._managers.datasets.resolve_wildcard( - expression, components, missing, explicit_only, components_deprecated + expression, + missing, + explicit_only, ) def filter_dataset_collections( diff --git a/python/lsst/daf/butler/registry/sql_registry.py b/python/lsst/daf/butler/registry/sql_registry.py index 3391d94157..21f6c1d8f0 100644 --- a/python/lsst/daf/butler/registry/sql_registry.py +++ b/python/lsst/daf/butler/registry/sql_registry.py @@ -40,7 +40,6 @@ import sqlalchemy from lsst.daf.relation import LeafRelation, Relation from lsst.resources import ResourcePathExpression -from lsst.utils.introspection import find_outside_stacklevel from lsst.utils.iteration import ensure_iterable from .._column_tags import DatasetColumnTag @@ -75,6 +74,7 @@ DatasetTypeError, DimensionNameError, InconsistentDataIdError, + MissingDatasetTypeError, NoDefaultCollectionError, OrphanedRecordError, RegistryConfig, @@ -905,9 +905,7 @@ def findDataset( if collection_wildcard.empty(): return None matched_collections = backend.resolve_collection_wildcard(collection_wildcard) - parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( - datasetType, components_deprecated=False - ) + parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard(datasetType) if len(components) > 1: raise DatasetTypeError( f"findDataset requires exactly one dataset type; got multiple components {components} " @@ -1713,7 +1711,7 @@ def queryDatasetTypes( self, expression: Any = ..., *, - components: bool | None = False, + components: bool = False, missing: list[str] | None = None, ) -> Iterable[DatasetType]: """Iterate over the dataset types whose names match an expression. @@ -1727,16 +1725,8 @@ def queryDatasetTypes( default. See :ref:`daf_butler_dataset_type_expressions` for more information. components : `bool`, optional - If `True`, apply all expression patterns to component dataset type - names as well. If `False`, never apply patterns to components. - If `None`, apply patterns to components only if their - parent datasets were not matched by the expression. - Fully-specified component datasets (`str` or `DatasetType` - instances) are always included. - - Values other than `False` are deprecated, and only `False` will be - supported after v26. After v27 this argument will be removed - entirely. + Must be `False`. Provided only for backwards compatibility. After + v27 this argument will be removed entirely. missing : `list` of `str`, optional String dataset type names that were explicitly given (i.e. not regular expression patterns) but not found will be appended to this @@ -1753,19 +1743,13 @@ def queryDatasetTypes( lsst.daf.butler.registry.DatasetTypeExpressionError Raised when ``expression`` is invalid. """ - wildcard = DatasetTypeWildcard.from_expression(expression) - composition_dict = self._managers.datasets.resolve_wildcard( - wildcard, - components=components, - missing=missing, - ) - result: list[DatasetType] = [] - for parent_dataset_type, components_for_parent in composition_dict.items(): - result.extend( - parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type - for c in components_for_parent + if components is not False: + raise DatasetTypeError( + "Dataset component queries are no longer supported by Registry. Use " + "DatasetType methods to obtain components from parent dataset types instead." ) - return result + wildcard = DatasetTypeWildcard.from_expression(expression) + return self._managers.datasets.resolve_wildcard(wildcard, missing=missing) def queryCollections( self, @@ -1914,11 +1898,10 @@ def _standardize_query_dataset_args( self, datasets: Any, collections: CollectionArgType | None, - components: bool | None, mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", *, doomed_by: list[str], - ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: + ) -> tuple[list[DatasetType], CollectionWildcard | None]: """Preprocess dataset arguments passed to query* methods. Parameters @@ -1929,17 +1912,6 @@ def _standardize_query_dataset_args( collections : `str`, `re.Pattern`, or iterable of these Expression identifying collections to be searched. See `queryCollections` for details. - components : `bool`, optional - If `True`, apply all expression patterns to component dataset type - names as well. If `False`, never apply patterns to components. - If `None` (default), apply patterns to components only if their - parent datasets were not matched by the expression. - Fully-specified component datasets (`str` or `DatasetType` - instances) are always included. - - Values other than `False` are deprecated, and only `False` will be - supported after v26. After v27 this argument will be removed - entirely. mode : `str`, optional The way in which datasets are being used in this query; one of: @@ -1960,13 +1932,12 @@ def _standardize_query_dataset_args( Returns ------- - composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] - Dictionary mapping parent dataset type to `list` of components - matched for that dataset type (or `None` for the parent itself). + dataset_types : `list` [ `DatasetType` ] + List of matched dataset types. collections : `CollectionWildcard` Processed collection expression. """ - composition: dict[DatasetType, list[str | None]] = {} + dataset_types: list[DatasetType] = [] collection_wildcard: CollectionWildcard | None = None if datasets is not None: if collections is None: @@ -1980,23 +1951,19 @@ def _standardize_query_dataset_args( f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context." ) missing: list[str] = [] - composition = self._managers.datasets.resolve_wildcard( - datasets, components=components, missing=missing, explicit_only=(mode == "constrain") + dataset_types = self._managers.datasets.resolve_wildcard( + datasets, missing=missing, explicit_only=(mode == "constrain") ) if missing and mode == "constrain": - # After v26 this should raise MissingDatasetTypeError, to be - # implemented on DM-36303. - warnings.warn( - f"Dataset type(s) {missing} are not registered; this will be an error after v26.", - FutureWarning, - stacklevel=find_outside_stacklevel("lsst.daf.butler"), + raise MissingDatasetTypeError( + f"Dataset type(s) {missing} are not registered.", ) doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) elif collections: # I think this check should actually be `collections is not None`, # but it looks like some CLI scripts use empty tuple as default. raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") - return composition, collection_wildcard + return dataset_types, collection_wildcard def queryDatasets( self, @@ -2007,7 +1974,7 @@ def queryDatasets( dataId: DataId | None = None, where: str = "", findFirst: bool = False, - components: bool | None = False, + components: bool = False, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, @@ -2053,16 +2020,8 @@ def queryDatasets( ``collections`` must not contain regular expressions and may not be ``...``. components : `bool`, optional - If `True`, apply all dataset expression patterns to component - dataset type names as well. If `False`, never apply patterns to - components. If `None`, apply patterns to components only - if their parent datasets were not matched by the expression. - Fully-specified component datasets (`str` or `DatasetType` - instances) are always included. - - Values other than `False` are deprecated, and only `False` will be - supported after v26. After v27 this argument will be removed - entirely. + Must be `False`. Provided only for backwards compatibility. After + v27 this argument will be removed entirely. bind : `~collections.abc.Mapping`, optional Mapping containing literal values that should be injected into the ``where`` expression, keyed by the identifiers they replace. @@ -2117,12 +2076,16 @@ def queryDatasets( query), and then use multiple (generally much simpler) calls to `queryDatasets` with the returned data IDs passed as constraints. """ + if components is not False: + raise DatasetTypeError( + "Dataset component queries are no longer supported by Registry. Use " + "DatasetType methods to obtain components from parent dataset types instead." + ) doomed_by: list[str] = [] data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) - dataset_composition, collection_wildcard = self._standardize_query_dataset_args( + resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( datasetType, collections, - components, mode="find_first" if findFirst else "find_all", doomed_by=doomed_by, ) @@ -2130,11 +2093,11 @@ def queryDatasets( doomed_by.append("No datasets can be found because collection list is empty.") return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) parent_results: list[queries.ParentDatasetQueryResults] = [] - for parent_dataset_type, components_for_parent in dataset_composition.items(): + for resolved_dataset_type in resolved_dataset_types: # The full set of dimensions in the query is the combination of # those needed for the DatasetType and those explicitly requested, # if any. - dimension_names = set(parent_dataset_type.dimensions.names) + dimension_names = set(resolved_dataset_type.dimensions.names) if dimensions is not None: dimension_names.update(self.dimensions.conform(dimensions).names) # Construct the summary structure needed to construct a @@ -2147,7 +2110,7 @@ def queryDatasets( bind=bind, defaults=self.defaults.dataId, check=check, - datasets=[parent_dataset_type], + datasets=[resolved_dataset_type], ) builder = self._makeQueryBuilder(summary) # Add the dataset subquery to the query, telling the QueryBuilder @@ -2155,12 +2118,12 @@ def queryDatasets( # only if we need to findFirst. Note that if any of the # collections are actually wildcard expressions, and # findFirst=True, this will raise TypeError for us. - builder.joinDataset(parent_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst) + builder.joinDataset( + resolved_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst + ) query = builder.finish() parent_results.append( - queries.ParentDatasetQueryResults( - query, parent_dataset_type, components=components_for_parent - ) + queries.ParentDatasetQueryResults(query, datasetType=resolved_dataset_type, components=[None]) ) if not parent_results: doomed_by.extend( @@ -2183,7 +2146,7 @@ def queryDataIds( datasets: Any = None, collections: CollectionArgType | None = None, where: str = "", - components: bool | None = None, + components: bool = False, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, @@ -2228,16 +2191,8 @@ def queryDataIds( key column of a dimension table) dimension name. See :ref:`daf_butler_dimension_expressions` for more information. components : `bool`, optional - If `True`, apply all dataset expression patterns to component - dataset type names as well. If `False`, never apply patterns to - components. If `None`, apply patterns to components only - if their parent datasets were not matched by the expression. - Fully-specified component datasets (`str` or `DatasetType` - instances) are always included. - - Values other than `False` are deprecated, and only `False` will be - supported after v26. After v27 this argument will be removed - entirely. + Must be `False`. Provided only for backwards compatibility. After + v27 this argument will be removed entirely. bind : `~collections.abc.Mapping`, optional Mapping containing literal values that should be injected into the ``where`` expression, keyed by the identifiers they replace. @@ -2284,11 +2239,16 @@ def queryDataIds( lsst.daf.butler.registry.UserExpressionError Raised when ``where`` expression is invalid. """ + if components is not False: + raise DatasetTypeError( + "Dataset component queries are no longer supported by Registry. Use " + "DatasetType methods to obtain components from parent dataset types instead." + ) requested_dimensions = self.dimensions.conform(dimensions) doomed_by: list[str] = [] data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) - dataset_composition, collection_wildcard = self._standardize_query_dataset_args( - datasets, collections, components, doomed_by=doomed_by + resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( + datasets, collections, doomed_by=doomed_by ) if collection_wildcard is not None and collection_wildcard.empty(): doomed_by.append("No data coordinates can be found because collection list is empty.") @@ -2300,10 +2260,10 @@ def queryDataIds( bind=bind, defaults=self.defaults.dataId, check=check, - datasets=dataset_composition.keys(), + datasets=resolved_dataset_types, ) builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) - for datasetType in dataset_composition: + for datasetType in resolved_dataset_types: builder.joinDataset(datasetType, collection_wildcard, isResult=False) query = builder.finish() @@ -2317,7 +2277,7 @@ def queryDimensionRecords( datasets: Any = None, collections: CollectionArgType | None = None, where: str = "", - components: bool | None = None, + components: bool = False, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, @@ -2353,9 +2313,8 @@ def queryDimensionRecords( Whether to apply dataset expressions to components as well. See `queryDataIds` for more information. - Values other than `False` are deprecated, and only `False` will be - supported after v26. After v27 this argument will be removed - entirely. + Must be `False`. Provided only for backwards compatibility. After + v27 this argument will be removed entirely. bind : `~collections.abc.Mapping`, optional Mapping containing literal values that should be injected into the ``where`` expression, keyed by the identifiers they replace. @@ -2393,6 +2352,11 @@ def queryDimensionRecords( lsst.daf.butler.registry.UserExpressionError Raised when ``where`` expression is invalid. """ + if components is not False: + raise DatasetTypeError( + "Dataset component queries are no longer supported by Registry. Use " + "DatasetType methods to obtain components from parent dataset types instead." + ) if not isinstance(element, DimensionElement): try: element = self.dimensions[element] @@ -2402,8 +2366,8 @@ def queryDimensionRecords( ) from e doomed_by: list[str] = [] data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) - dataset_composition, collection_wildcard = self._standardize_query_dataset_args( - datasets, collections, components, doomed_by=doomed_by + resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( + datasets, collections, doomed_by=doomed_by ) if collection_wildcard is not None and collection_wildcard.empty(): doomed_by.append("No dimension records can be found because collection list is empty.") @@ -2415,10 +2379,10 @@ def queryDimensionRecords( bind=bind, defaults=self.defaults.dataId, check=check, - datasets=dataset_composition.keys(), + datasets=resolved_dataset_types, ) builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) - for datasetType in dataset_composition: + for datasetType in resolved_dataset_types: builder.joinDataset(datasetType, collection_wildcard, isResult=False) query = builder.finish().with_record_columns(element.name) return queries.DatabaseDimensionRecordQueryResults(query, element) diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py index 1c0c7b77be..6db91742f6 100644 --- a/python/lsst/daf/butler/registry/tests/_registry.py +++ b/python/lsst/daf/butler/registry/tests/_registry.py @@ -32,7 +32,6 @@ import datetime import itertools -import logging import os import re import unittest @@ -57,7 +56,6 @@ from ..._dataset_association import DatasetAssociation from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef from ..._dataset_type import DatasetType -from ..._named import NamedValueSet from ..._storage_class import StorageClass from ..._timespan import Timespan from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension @@ -597,103 +595,6 @@ def testImportDatasetsUUID(self): # DATAID_TYPE_RUN ref can be imported into a new run (ref2,) = registry._importDatasets([ref]) - def testDatasetTypeComponentQueries(self): - """Test component options when querying for dataset types. - - All of the behavior here is deprecated, so many of these tests are - currently wrapped in a context to check that we get a warning whenever - a component dataset is actually returned. - """ - registry = self.makeRegistry() - self.loadData(registry, "base.yaml") - self.loadData(registry, "datasets.yaml") - # Test querying for dataset types with different inputs. - # First query for all dataset types; components should only be included - # when components=True. - self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names) - self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names) - with self.assertWarns(FutureWarning): - self.assertLess( - {"bias", "flat", "bias.wcs", "flat.photoCalib"}, - NamedValueSet(registry.queryDatasetTypes(components=True)).names, - ) - # Use a pattern that can match either parent or components. Again, - # components are only returned if components=True. - self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names) - self.assertEqual( - {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names - ) - with self.assertWarns(FutureWarning): - self.assertLess( - {"bias", "bias.wcs"}, - NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names, - ) - # This pattern matches only a component. In this case we also return - # that component dataset type if components=None. - with self.assertWarns(FutureWarning): - self.assertEqual( - {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names - ) - self.assertEqual( - set(), - NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names, - ) - with self.assertWarns(FutureWarning): - self.assertEqual( - {"bias.wcs"}, - NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names, - ) - # Add a dataset type using a StorageClass that we'll then remove; check - # that this does not affect our ability to query for dataset types - # (though it will warn). - tempStorageClass = StorageClass( - name="TempStorageClass", - components={ - "data1": registry.storageClasses.getStorageClass("StructuredDataDict"), - "data2": registry.storageClasses.getStorageClass("StructuredDataDict"), - }, - ) - registry.storageClasses.registerStorageClass(tempStorageClass) - datasetType = DatasetType( - "temporary", - dimensions=["instrument"], - storageClass=tempStorageClass, - universe=registry.dimensions, - ) - registry.registerDatasetType(datasetType) - registry.storageClasses._unregisterStorageClass(tempStorageClass.name) - datasetType._storageClass = None - del tempStorageClass - # Querying for all dataset types, including components, should include - # at least all non-component dataset types (and I don't want to - # enumerate all of the Exposure components for bias and flat here). - with self.assertWarns(FutureWarning): - with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: - everything = NamedValueSet(registry.queryDatasetTypes(components=True)) - self.assertIn("TempStorageClass", cm.output[0]) - self.assertLess({"bias", "flat", "temporary"}, everything.names) - # It should not include "temporary.columns", because we tried to remove - # the storage class that would tell it about that. So if the next line - # fails (i.e. "temporary.columns" _is_ in everything.names), it means - # this part of the test isn't doing anything, because the _unregister - # call about isn't simulating the real-life case we want it to - # simulate, in which different versions of daf_butler in entirely - # different Python processes interact with the same repo. - self.assertNotIn("temporary.data", everything.names) - # Query for dataset types that start with "temp". This should again - # not include the component, and also not fail. - with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm: - startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True)) - self.assertIn("TempStorageClass", cm.output[0]) - self.assertEqual({"temporary"}, startsWithTemp.names) - # Querying with no components should not warn at all. - with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: - startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False)) - # Must issue a warning of our own to be captured. - logging.getLogger("lsst.daf.butler.registries").warning("test message") - self.assertEqual(len(cm.output), 1) - self.assertIn("test message", cm.output[0]) - def testComponentLookups(self): """Test searching for component datasets via their parents. @@ -718,36 +619,6 @@ def testComponentLookups(self): # Search for a single dataset with findDataset. childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) - # Search for detector data IDs constrained by component dataset - # existence with queryDataIds. - with self.assertWarns(FutureWarning): - dataIds = registry.queryDataIds( - ["detector"], - datasets=["bias.wcs"], - collections=collection, - ).toSet() - self.assertEqual( - dataIds, - DataCoordinateSet( - { - DataCoordinate.standardize( - instrument="Cam1", detector=d, dimensions=parentType.dimensions - ) - for d in (1, 2, 3) - }, - dimensions=parentType.dimensions, - ), - ) - # Search for multiple datasets of a single type with queryDatasets. - with self.assertWarns(FutureWarning): - childRefs2 = set( - registry.queryDatasets( - "bias.wcs", - collections=collection, - ) - ) - self.assertEqual({ref.datasetType for ref in childRefs2}, {childType}) - self.assertEqual({ref.dataId for ref in childRefs2}, set(dataIds)) def testCollections(self): """Tests for registry methods that manage collections.""" @@ -1510,19 +1381,6 @@ def testQueryResults(self): expectedFlats, ) - # Use a component dataset type. - self.assertCountEqual( - [ - ref.makeComponentRef("image") - for ref in subsetDataIds.findDatasets( - bias, - collections=["imported_r", "imported_g"], - findFirst=False, - ) - ], - [ref.makeComponentRef("image") for ref in expectedAllBiases], - ) - # Use a named dataset type that does not exist and a dataset type # object that does not exist. unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") diff --git a/tests/test_cliCmdQueryDatasetTypes.py b/tests/test_cliCmdQueryDatasetTypes.py index 3f33e577b7..0a69b61e3b 100644 --- a/tests/test_cliCmdQueryDatasetTypes.py +++ b/tests/test_cliCmdQueryDatasetTypes.py @@ -63,12 +63,12 @@ def test_requiredMissing(self): def test_all(self): """Test all parameters.""" self.run_test( - ["query-dataset-types", "here", "--verbose", "foo*", "--components"], - self.makeExpected(repo="here", verbose=True, glob=("foo*",), components=True), + ["query-dataset-types", "here", "--verbose", "foo*"], + self.makeExpected(repo="here", verbose=True, glob=("foo*",)), ) self.run_test( - ["query-dataset-types", "here", "--verbose", "foo*", "--no-components"], - self.makeExpected(repo="here", verbose=True, glob=("foo*",), components=False), + ["query-dataset-types", "here", "--verbose", "foo*"], + self.makeExpected(repo="here", verbose=True, glob=("foo*",)), ) From 4a472c0b5525d054ef7d1cf966ab5d745fa969a3 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Thu, 29 Sep 2022 11:51:41 -0400 Subject: [PATCH 04/18] Turn more RFC-879 deprecations into errors. Dataset type wildcards are no longer permitted in queryDataIds and queryDimensionRecords, and missing dataset types are now an error in those contexts. --- .../datasets/byDimensions/_manager.py | 4 ++- .../daf/butler/registry/tests/_registry.py | 33 +++++-------------- tests/test_cliCmdQueryDataIds.py | 8 ----- 3 files changed, 12 insertions(+), 33 deletions(-) diff --git a/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py b/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py index 91b4e98ccb..7dd14bcc9c 100644 --- a/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py +++ b/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py @@ -420,7 +420,9 @@ def resolve_wildcard( result.append(datasetType) elif wildcard.patterns: if explicit_only: - raise DatasetTypeExpressionError("Wildcard patterns are not supported when explicit only.") + raise DatasetTypeExpressionError( + "Dataset type wildcard expressions are not supported in this context." + ) dataset_types = self._fetch_dataset_types() for datasetType in dataset_types: if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py index 6db91742f6..6b149b3449 100644 --- a/python/lsst/daf/butler/registry/tests/_registry.py +++ b/python/lsst/daf/butler/registry/tests/_registry.py @@ -69,6 +69,7 @@ ConflictingDefinitionError, DataIdValueError, DatasetTypeError, + DatasetTypeExpressionError, InconsistentDataIdError, MissingCollectionError, MissingDatasetTypeError, @@ -2634,26 +2635,12 @@ def testQueryResultSummaries(self): ["potato"], ), ] - # The behavior of these additional queries is slated to change in the - # future, so we also check for deprecation warnings. - with self.assertWarns(FutureWarning): - queries_and_snippets.append( - ( - # Dataset type name doesn't match any existing dataset - # types. - registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...), - ["nonexistent"], - ) - ) - with self.assertWarns(FutureWarning): - queries_and_snippets.append( - ( - # Dataset type name doesn't match any existing dataset - # types. - registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...), - ["nonexistent"], - ) - ) + with self.assertRaises(MissingDatasetTypeError): + # Dataset type name doesn't match any existing dataset types. + registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...) + with self.assertRaises(MissingDatasetTypeError): + # Dataset type name doesn't match any existing dataset types. + registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...) for query, snippets in queries_and_snippets: self.assertFalse(query.any(execute=False, exact=False)) self.assertFalse(query.any(execute=True, exact=False)) @@ -2670,10 +2657,8 @@ def testQueryResultSummaries(self): messages, ) - # This query does yield results, but should also emit a warning because - # dataset type patterns to queryDataIds is deprecated; just look for - # the warning. - with self.assertWarns(FutureWarning): + # Wildcards on dataset types are not permitted in queryDataIds. + with self.assertRaises(DatasetTypeExpressionError): registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...) # These queries yield no results due to problems that can be identified diff --git a/tests/test_cliCmdQueryDataIds.py b/tests/test_cliCmdQueryDataIds.py index 18f4b59c76..031b38a865 100644 --- a/tests/test_cliCmdQueryDataIds.py +++ b/tests/test_cliCmdQueryDataIds.py @@ -180,14 +180,6 @@ def testDatasetsAndCollections(self): self.assertIsNone(res) self.assertIn("No dimensions in common", msg) - # Check that we get a reason returned if no dataset type is found. - with self.assertWarns(FutureWarning): - res, msg = self._queryDataIds( - repo=self.root, dimensions=("detector",), collections=("imported_g",), datasets="raw" - ) - self.assertIsNone(res) - self.assertEqual(msg, "Dataset type raw is not registered.") - # Check that we get a reason returned if no dataset is found in # collection. res, msg = self._queryDataIds( From c173d5e4fb64bb45fffe30b9a5ce4672f001483f Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 5 Jan 2024 14:21:48 -0700 Subject: [PATCH 05/18] Force components parameter to False in registry shim --- python/lsst/daf/butler/_registry_shim.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/lsst/daf/butler/_registry_shim.py b/python/lsst/daf/butler/_registry_shim.py index 2a49c161e9..61fd3787b4 100644 --- a/python/lsst/daf/butler/_registry_shim.py +++ b/python/lsst/daf/butler/_registry_shim.py @@ -281,7 +281,7 @@ def queryDatasetTypes( self, expression: Any = ..., *, - components: bool | None = None, + components: bool = False, missing: list[str] | None = None, ) -> Iterable[DatasetType]: # Docstring inherited from a base class. @@ -309,7 +309,7 @@ def queryDatasets( dataId: DataId | None = None, where: str = "", findFirst: bool = False, - components: bool | None = None, + components: bool = False, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, @@ -337,7 +337,7 @@ def queryDataIds( datasets: Any = None, collections: CollectionArgType | None = None, where: str = "", - components: bool | None = None, + components: bool = False, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, @@ -363,7 +363,7 @@ def queryDimensionRecords( datasets: Any = None, collections: CollectionArgType | None = None, where: str = "", - components: bool | None = None, + components: bool = False, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, From e2736ba5d148fe4661afdbc7801c48bc45597be5 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 5 Jan 2024 14:22:37 -0700 Subject: [PATCH 06/18] Fix exception type in test code --- python/lsst/daf/butler/tests/butler_query.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/python/lsst/daf/butler/tests/butler_query.py b/python/lsst/daf/butler/tests/butler_query.py index f895a9e666..9de7342a43 100644 --- a/python/lsst/daf/butler/tests/butler_query.py +++ b/python/lsst/daf/butler/tests/butler_query.py @@ -45,7 +45,7 @@ from .._exceptions import EmptyQueryResultError from ..dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension from ..registry._collection_type import CollectionType -from ..registry._exceptions import DataIdValueError, DatasetTypeError, MissingCollectionError +from ..registry._exceptions import DataIdValueError, DatasetTypeError, DatasetTypeExpressionError, MissingCollectionError, MissingDatasetTypeError from ..transfers import YamlRepoImportBackend from .utils import TestCaseMixin @@ -449,9 +449,8 @@ def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQuer ["potato"], ), ] - # The behavior of these additional queries is slated to change in the - # future, so we also check for deprecation warnings. - with self.assertWarns(FutureWarning): + + with self.assertRaises(MissingDatasetTypeError): queries_and_snippets.append( ( # Dataset type name doesn't match any existing dataset @@ -460,7 +459,7 @@ def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQuer ["nonexistent"], ) ) - with self.assertWarns(FutureWarning): + with self.assertRaises(MissingDatasetTypeError): queries_and_snippets.append( ( # Dataset type name doesn't match any existing dataset @@ -488,7 +487,7 @@ def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQuer # This query does yield results, but should also emit a warning because # dataset type patterns to queryDataIds is deprecated; just look for # the warning. - with self.assertWarns(FutureWarning): + with self.assertRaises(DatasetTypeExpressionError): _query_data_ids(["detector"], datasets=re.compile("^nonexistent$"), collections=...) # These queries yield no results due to problems that can be identified From 48a6c4058404b8f89d1cef87b0cb3595d0930d28 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 5 Jan 2024 14:22:54 -0700 Subject: [PATCH 07/18] More fixes for removal of components --- .../lsst/daf/butler/registry/sql_registry.py | 30 +++++++------------ python/lsst/daf/butler/tests/butler_query.py | 8 ++++- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/python/lsst/daf/butler/registry/sql_registry.py b/python/lsst/daf/butler/registry/sql_registry.py index 21f6c1d8f0..d12c3f432c 100644 --- a/python/lsst/daf/butler/registry/sql_registry.py +++ b/python/lsst/daf/butler/registry/sql_registry.py @@ -905,23 +905,17 @@ def findDataset( if collection_wildcard.empty(): return None matched_collections = backend.resolve_collection_wildcard(collection_wildcard) - parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard(datasetType) - if len(components) > 1: - raise DatasetTypeError( - f"findDataset requires exactly one dataset type; got multiple components {components} " - f"for parent dataset type {parent_dataset_type.name}." - ) - component = components[0] + resolved_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType) dataId = DataCoordinate.standardize( dataId, - dimensions=parent_dataset_type.dimensions, + dimensions=resolved_dataset_type.dimensions, universe=self.dimensions, defaults=self.defaults.dataId, **kwargs, ) governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.dimensions.governors} (filtered_collections,) = backend.filter_dataset_collections( - [parent_dataset_type], + [resolved_dataset_type], matched_collections, governor_constraints=governor_constraints, ).values() @@ -937,17 +931,17 @@ def findDataset( requested_columns = {"dataset_id", "run", "collection"} with backend.context() as context: predicate = context.make_data_coordinate_predicate( - dataId.subset(parent_dataset_type.dimensions), full=False + dataId.subset(resolved_dataset_type.dimensions), full=False ) if timespan is not None: requested_columns.add("timespan") predicate = predicate.logical_and( context.make_timespan_overlap_predicate( - DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan + DatasetColumnTag(resolved_dataset_type.name, "timespan"), timespan ) ) relation = backend.make_dataset_query_relation( - parent_dataset_type, filtered_collections, requested_columns, context + resolved_dataset_type, filtered_collections, requested_columns, context ).with_rows_satisfying(predicate) rows = list(context.fetch_iterable(relation)) else: @@ -958,7 +952,7 @@ def findDataset( best_row = rows[0] else: rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} - collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") + collection_tag = DatasetColumnTag(resolved_dataset_type.name, "collection") row_iter = iter(rows) best_row = next(row_iter) best_rank = rank_by_collection_key[best_row[collection_tag]] @@ -973,16 +967,14 @@ def findDataset( assert timespan is not None, "Rank ties should be impossible given DB constraints." if have_tie: raise LookupError( - f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " + f"Ambiguous calibration lookup for {resolved_dataset_type.name} in collections " f"{collection_wildcard.strings} with timespan {timespan}." ) reader = queries.DatasetRefReader( - parent_dataset_type, + resolved_dataset_type, translate_collection=lambda k: self._managers.collections[k].name, ) ref = reader.read(best_row, data_id=dataId) - if component is not None: - ref = ref.makeComponentRef(component) if datastore_records: ref = self.get_datastore_records(ref) @@ -2123,7 +2115,7 @@ def queryDatasets( ) query = builder.finish() parent_results.append( - queries.ParentDatasetQueryResults(query, datasetType=resolved_dataset_type, components=[None]) + queries.ParentDatasetQueryResults(query, resolved_dataset_type, components=[None]) ) if not parent_results: doomed_by.extend( @@ -2446,7 +2438,7 @@ def queryDatasetAssociations( collections = self.defaults.collections collection_wildcard = CollectionWildcard.from_expression(collections) backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) - parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) + parent_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType) timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") for parent_collection_record in backend.resolve_collection_wildcard( diff --git a/python/lsst/daf/butler/tests/butler_query.py b/python/lsst/daf/butler/tests/butler_query.py index 9de7342a43..8ac7de9477 100644 --- a/python/lsst/daf/butler/tests/butler_query.py +++ b/python/lsst/daf/butler/tests/butler_query.py @@ -45,7 +45,13 @@ from .._exceptions import EmptyQueryResultError from ..dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension from ..registry._collection_type import CollectionType -from ..registry._exceptions import DataIdValueError, DatasetTypeError, DatasetTypeExpressionError, MissingCollectionError, MissingDatasetTypeError +from ..registry._exceptions import ( + DataIdValueError, + DatasetTypeError, + DatasetTypeExpressionError, + MissingCollectionError, + MissingDatasetTypeError, +) from ..transfers import YamlRepoImportBackend from .utils import TestCaseMixin From af41679799ac76086e153e4df233a3144541008c Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 5 Jan 2024 14:41:57 -0700 Subject: [PATCH 08/18] Fix API call now that component not returned --- python/lsst/daf/butler/registry/queries/_results.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/lsst/daf/butler/registry/queries/_results.py b/python/lsst/daf/butler/registry/queries/_results.py index d18a3b64a3..fb7f22df54 100644 --- a/python/lsst/daf/butler/registry/queries/_results.py +++ b/python/lsst/daf/butler/registry/queries/_results.py @@ -339,8 +339,8 @@ def findRelatedDatasets( dimensions = self.dimensions else: dimensions = self.universe.conform(dimensions) - parent_dataset_type, _ = self._query.backend.resolve_single_dataset_type_wildcard( - datasetType, components=False, explicit_only=True + parent_dataset_type = self._query.backend.resolve_single_dataset_type_wildcard( + datasetType, explicit_only=True ) query = self._query.find_datasets(parent_dataset_type, collections, find_first=findFirst, defer=True) return query.iter_data_ids_and_dataset_refs(parent_dataset_type, dimensions) From ead414ae4bce4f54476a69a900248f2fe72fdc64 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 5 Jan 2024 14:42:10 -0700 Subject: [PATCH 09/18] Fix tests now that components not allowed --- python/lsst/daf/butler/registry/tests/_registry.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py index 6b149b3449..eebfef27f3 100644 --- a/python/lsst/daf/butler/registry/tests/_registry.py +++ b/python/lsst/daf/butler/registry/tests/_registry.py @@ -524,7 +524,7 @@ def testRemoveDatasetTypeFailure(self): self.loadData(registry, "datasets.yaml") with self.assertRaises(OrphanedRecordError): registry.removeDatasetType("flat") - with self.assertRaises(ValueError): + with self.assertRaises(DatasetTypeError): registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) def testImportDatasetsUUID(self): @@ -599,9 +599,8 @@ def testImportDatasetsUUID(self): def testComponentLookups(self): """Test searching for component datasets via their parents. - All of the behavior here is deprecated, so many of these tests are - currently wrapped in a context to check that we get a warning whenever - a component dataset is actually returned. + Components can no longer be found by registry. This test checks + that this now fails. """ registry = self.makeRegistry() self.loadData(registry, "base.yaml") @@ -618,8 +617,8 @@ def testComponentLookups(self): self.assertIsInstance(parentRefResolved, DatasetRef) self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) # Search for a single dataset with findDataset. - childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) - self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) + with self.assertRaises(DatasetTypeError): + registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId) def testCollections(self): """Tests for registry methods that manage collections.""" From 0e8a20514ea3bee3ff0a104ae50762ceed40dadf Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 5 Jan 2024 14:25:33 -0700 Subject: [PATCH 10/18] Allow Butler.find_dataset to return a component DatasetRef --- python/lsst/daf/butler/direct_butler.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/python/lsst/daf/butler/direct_butler.py b/python/lsst/daf/butler/direct_butler.py index 18eb838bf5..1f24b41c34 100644 --- a/python/lsst/daf/butler/direct_butler.py +++ b/python/lsst/daf/butler/direct_butler.py @@ -1217,10 +1217,18 @@ def find_dataset( actual_type = self.get_dataset_type(dataset_type) else: actual_type = dataset_type - data_id, kwargs = self._rewrite_data_id(data_id, actual_type, **kwargs) + + # Store the component for later. + component_name = actual_type.component() + if actual_type.isComponent(): + parent_type = actual_type.makeCompositeDatasetType() + else: + parent_type = actual_type + + data_id, kwargs = self._rewrite_data_id(data_id, parent_type, **kwargs) ref = self._registry.findDataset( - dataset_type, + parent_type, data_id, collections=collections, timespan=timespan, @@ -1229,8 +1237,11 @@ def find_dataset( ) if ref is not None and dimension_records: ref = ref.expanded(self._registry.expandDataId(ref.dataId, dimensions=ref.datasetType.dimensions)) + if ref is not None and component_name: + ref = ref.makeComponentRef(component_name) if ref is not None and storage_class is not None: ref = ref.overrideStorageClass(storage_class) + return ref def retrieveArtifacts( From fa4caa75b1ff114559956ab2258a9c22e1698c8f Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 5 Jan 2024 14:48:49 -0700 Subject: [PATCH 11/18] Fix return type of base class --- python/lsst/daf/butler/registry/queries/_sql_query_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lsst/daf/butler/registry/queries/_sql_query_backend.py b/python/lsst/daf/butler/registry/queries/_sql_query_backend.py index ca7413ede1..cf83b56ed9 100644 --- a/python/lsst/daf/butler/registry/queries/_sql_query_backend.py +++ b/python/lsst/daf/butler/registry/queries/_sql_query_backend.py @@ -112,7 +112,7 @@ def resolve_dataset_type_wildcard( expression: Any, missing: list[str] | None = None, explicit_only: bool = False, - ) -> dict[DatasetType, list[str | None]]: + ) -> list[DatasetType]: # Docstring inherited. return self._managers.datasets.resolve_wildcard( expression, From ebfe7a4b2646c4da12ca4c1f5d49c564c814d1f1 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 5 Jan 2024 14:49:00 -0700 Subject: [PATCH 12/18] Drop components option in query-dataset-types command line Do not forward. --- python/lsst/daf/butler/cli/cmd/commands.py | 2 ++ python/lsst/daf/butler/script/queryDatasetTypes.py | 10 ++-------- tests/test_cliCmdQueryDatasetTypes.py | 2 +- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/python/lsst/daf/butler/cli/cmd/commands.py b/python/lsst/daf/butler/cli/cmd/commands.py index cec5364dc0..5acb4cfe73 100644 --- a/python/lsst/daf/butler/cli/cmd/commands.py +++ b/python/lsst/daf/butler/cli/cmd/commands.py @@ -442,6 +442,8 @@ def query_collections(*args: Any, **kwargs: Any) -> None: @options_file_option() def query_dataset_types(*args: Any, **kwargs: Any) -> None: """Get the dataset types in a repository.""" + # Drop the components option. + kwargs.pop("components") table = script.queryDatasetTypes(*args, **kwargs) if table: table.pprint_all() diff --git a/python/lsst/daf/butler/script/queryDatasetTypes.py b/python/lsst/daf/butler/script/queryDatasetTypes.py index efe9aeaeb0..58cf68b693 100644 --- a/python/lsst/daf/butler/script/queryDatasetTypes.py +++ b/python/lsst/daf/butler/script/queryDatasetTypes.py @@ -34,7 +34,7 @@ from .._butler import Butler -def queryDatasetTypes(repo: str, verbose: bool, glob: Iterable[str], components: bool | None) -> Table: +def queryDatasetTypes(repo: str, verbose: bool, glob: Iterable[str]) -> Table: """Get the dataset types in a repository. Parameters @@ -48,12 +48,6 @@ def queryDatasetTypes(repo: str, verbose: bool, glob: Iterable[str], components: glob : iterable [`str`] A list of glob-style search string that fully or partially identify the dataset type names to search for. - components : `bool` or `None` - If `True`, apply all glob patterns to component dataset type - names as well. If `False`, never apply patterns to components. If - `None` (default), apply patterns to components only if their parent - datasets were not matched by the expression. Fully-specified component - datasets (`str` or `DatasetType` instances) are always included. Returns ------- @@ -63,7 +57,7 @@ def queryDatasetTypes(repo: str, verbose: bool, glob: Iterable[str], components: """ butler = Butler.from_config(repo, without_datastore=True) expression = glob or ... - datasetTypes = butler.registry.queryDatasetTypes(components=components, expression=expression) + datasetTypes = butler.registry.queryDatasetTypes(expression=expression) if verbose: table = Table( array( diff --git a/tests/test_cliCmdQueryDatasetTypes.py b/tests/test_cliCmdQueryDatasetTypes.py index 0a69b61e3b..33aaf566cd 100644 --- a/tests/test_cliCmdQueryDatasetTypes.py +++ b/tests/test_cliCmdQueryDatasetTypes.py @@ -46,7 +46,7 @@ class QueryDatasetTypesCmdTest(CliCmdTestBase, unittest.TestCase): @staticmethod def defaultExpected(): - return dict(repo=None, verbose=False, glob=(), components=False) + return dict(repo=None, verbose=False, glob=()) @staticmethod def command(): From 0e4372ea77383ae9007b37ba30f73526bb3215ca Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 5 Jan 2024 15:40:10 -0700 Subject: [PATCH 13/18] Issue warning from butler query-dataset-types if --[no-]components used --- python/lsst/daf/butler/cli/cmd/commands.py | 5 ++++- python/lsst/daf/butler/cli/opt/options.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/python/lsst/daf/butler/cli/cmd/commands.py b/python/lsst/daf/butler/cli/cmd/commands.py index 5acb4cfe73..0ebaeb56ce 100644 --- a/python/lsst/daf/butler/cli/cmd/commands.py +++ b/python/lsst/daf/butler/cli/cmd/commands.py @@ -443,7 +443,10 @@ def query_collections(*args: Any, **kwargs: Any) -> None: def query_dataset_types(*args: Any, **kwargs: Any) -> None: """Get the dataset types in a repository.""" # Drop the components option. - kwargs.pop("components") + components = kwargs.pop("components") + if components is not None: + comp_opt_str = "" if components else "no-" + click.echo(f"WARNING: --{comp_opt_str}components option is deprecated and will be removed after v27.") table = script.queryDatasetTypes(*args, **kwargs) if table: table.pprint_all() diff --git a/python/lsst/daf/butler/cli/opt/options.py b/python/lsst/daf/butler/cli/opt/options.py index fb8258bb98..d4abe3e713 100644 --- a/python/lsst/daf/butler/cli/opt/options.py +++ b/python/lsst/daf/butler/cli/opt/options.py @@ -111,12 +111,12 @@ def makeCollectionTypes( components_option = MWOptionDecorator( "--components/--no-components", - default=False, + default=None, help=unwrap( """For --components, apply all expression patterns to component dataset type names as well. For --no-components, never apply patterns to components. Only --no-components - is now supported.""" + is now supported. Option will be removed after v27.""" ), ) From b5d07c0d6d3b530badfbc3cc643d6ada3bd2cca8 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Tue, 9 Jan 2024 14:11:14 -0500 Subject: [PATCH 14/18] Deprecate ParentDatasetQueryResults.withComponents. Should have been deprecated earlier, but better late than never. --- python/lsst/daf/butler/registry/queries/_results.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/lsst/daf/butler/registry/queries/_results.py b/python/lsst/daf/butler/registry/queries/_results.py index fb7f22df54..53fe7b1512 100644 --- a/python/lsst/daf/butler/registry/queries/_results.py +++ b/python/lsst/daf/butler/registry/queries/_results.py @@ -698,6 +698,7 @@ def dataIds(self) -> DataCoordinateQueryResults: """ return DataCoordinateQueryResults(self._query.projected(defer=True)) + @deprecated("Deprecated, will be removed after v27.", version="v27", category=FutureWarning) def withComponents(self, components: Sequence[str | None]) -> ParentDatasetQueryResults: """Return a new query results object for the same parent datasets but different components. From 4f677dd47856dc82d20372e06f65c45df8f30bdc Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Tue, 9 Jan 2024 14:12:31 -0500 Subject: [PATCH 15/18] Avoid warnings in DatasetQueryResults._iter_by_dataset_type. --- python/lsst/daf/butler/registry/queries/_results.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python/lsst/daf/butler/registry/queries/_results.py b/python/lsst/daf/butler/registry/queries/_results.py index 53fe7b1512..9172364d8a 100644 --- a/python/lsst/daf/butler/registry/queries/_results.py +++ b/python/lsst/daf/butler/registry/queries/_results.py @@ -615,11 +615,17 @@ def _iter_by_dataset_type(self) -> Iterator[tuple[DatasetType, Iterable[DatasetR directly from queries. """ for parent_results in self.byParentDatasetType(): - for component in parent_results.components: + for component in parent_results._components: dataset_type = parent_results.parentDatasetType if component is not None: dataset_type = dataset_type.makeComponentDatasetType(component) - yield (dataset_type, parent_results.withComponents((component,))) + if tuple(parent_results._components) == (component,): + # Usual case, and in the future (after component support + # has been fully removed) the only case. + yield dataset_type, parent_results + else: + # General case that emits a deprecation warning. + yield (dataset_type, parent_results.withComponents((component,))) class ParentDatasetQueryResults(DatasetQueryResults): From 43d1db0a6f4517c79325c7949d99e210bd9883f0 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Mon, 11 Dec 2023 09:56:27 -0500 Subject: [PATCH 16/18] Drop component support and nomenclature from new query interface. Since getting a component DatasetRef from a parent one is just a method call, there's no need for this complexity in the query system. --- python/lsst/daf/butler/_query_results.py | 34 ++++++------------- python/lsst/daf/butler/direct_query.py | 4 +-- .../lsst/daf/butler/direct_query_results.py | 30 +++++++--------- python/lsst/daf/butler/tests/butler_query.py | 4 +-- tests/test_query_relations.py | 8 +++-- 5 files changed, 33 insertions(+), 47 deletions(-) diff --git a/python/lsst/daf/butler/_query_results.py b/python/lsst/daf/butler/_query_results.py index 022d4e00e8..16da006c32 100644 --- a/python/lsst/daf/butler/_query_results.py +++ b/python/lsst/daf/butler/_query_results.py @@ -31,11 +31,11 @@ "DataCoordinateQueryResults", "DatasetQueryResults", "DimensionRecordQueryResults", - "ParentDatasetQueryResults", + "SingleTypeDatasetQueryResults", ) from abc import abstractmethod -from collections.abc import Iterable, Iterator, Sequence +from collections.abc import Iterable, Iterator from contextlib import AbstractContextManager from typing import TYPE_CHECKING, Any @@ -422,15 +422,14 @@ class DatasetQueryResults(Iterable[DatasetRef]): """ @abstractmethod - def by_parent_dataset_type(self) -> Iterator[ParentDatasetQueryResults]: - """Group results by parent dataset type. + def by_dataset_type(self) -> Iterator[SingleTypeDatasetQueryResults]: + """Group results by dataset type. Returns ------- - iter : `~collections.abc.Iterator` [ `ParentDatasetQueryResults` ] + iter : `~collections.abc.Iterator` [ `SingleTypeDatasetQueryResults` ] An iterator over `DatasetQueryResults` instances that are each - responsible for a single parent dataset type (either just that - dataset type, one or more of its component dataset types, or both). + responsible for a single dataset type. """ raise NotImplementedError() @@ -546,19 +545,19 @@ def explain_no_results(self, execute: bool = True) -> Iterable[str]: raise NotImplementedError() -class ParentDatasetQueryResults(DatasetQueryResults): +class SingleTypeDatasetQueryResults(DatasetQueryResults): """An object that represents results from a query for datasets with a single parent `DatasetType`. """ @abstractmethod - def materialize(self) -> AbstractContextManager[ParentDatasetQueryResults]: + def materialize(self) -> AbstractContextManager[SingleTypeDatasetQueryResults]: # Docstring inherited from DatasetQueryResults. raise NotImplementedError() @property @abstractmethod - def parent_dataset_type(self) -> DatasetType: + def dataset_type(self) -> DatasetType: """The parent dataset type for all datasets in this iterable (`DatasetType`). """ @@ -576,20 +575,7 @@ def data_ids(self) -> DataCoordinateQueryResults: """ raise NotImplementedError() - @abstractmethod - def with_components(self, components: Sequence[str | None]) -> ParentDatasetQueryResults: - """Return a new query results object for the same parent datasets but - different components. - - Parameters - ---------- - components : `~collections.abc.Sequence` [ `str` or `None` ] - Names of components to include in iteration. `None` may be - included (at most once) to include the parent dataset type. - """ - raise NotImplementedError() - - def expanded(self) -> ParentDatasetQueryResults: + def expanded(self) -> SingleTypeDatasetQueryResults: # Docstring inherited from DatasetQueryResults. raise NotImplementedError() diff --git a/python/lsst/daf/butler/direct_query.py b/python/lsst/daf/butler/direct_query.py index c8898cfdf7..5f75ab6766 100644 --- a/python/lsst/daf/butler/direct_query.py +++ b/python/lsst/daf/butler/direct_query.py @@ -37,7 +37,7 @@ DirectDataCoordinateQueryResults, DirectDatasetQueryResults, DirectDimensionRecordQueryResults, - DirectParentDatasetQueryResults, + DirectSingleTypeDatasetQueryResults, ) from .registry import queries as registry_queries from .registry.sql_registry import SqlRegistry @@ -103,7 +103,7 @@ def datasets( **kwargs, ) if isinstance(registry_query_result, registry_queries.ParentDatasetQueryResults): - return DirectParentDatasetQueryResults(registry_query_result) + return DirectSingleTypeDatasetQueryResults(registry_query_result) else: return DirectDatasetQueryResults(registry_query_result) diff --git a/python/lsst/daf/butler/direct_query_results.py b/python/lsst/daf/butler/direct_query_results.py index 7aaae7553c..ca0b297133 100644 --- a/python/lsst/daf/butler/direct_query_results.py +++ b/python/lsst/daf/butler/direct_query_results.py @@ -31,18 +31,18 @@ "DirectDataCoordinateQueryResults", "DirectDatasetQueryResults", "DirectDimensionRecordQueryResults", - "DirectParentDatasetQueryResults", + "DirectSingleTypeDatasetQueryResults", ] import contextlib -from collections.abc import Iterable, Iterator, Sequence +from collections.abc import Iterable, Iterator from typing import TYPE_CHECKING, Any from ._query_results import ( DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults, - ParentDatasetQueryResults, + SingleTypeDatasetQueryResults, ) from .registry import queries as registry_queries @@ -165,10 +165,10 @@ def __init__(self, registry_query_result: registry_queries.DatasetQueryResults): def __iter__(self) -> Iterator[DatasetRef]: return iter(self._registry_query_result) - def by_parent_dataset_type(self) -> Iterator[ParentDatasetQueryResults]: + def by_dataset_type(self) -> Iterator[SingleTypeDatasetQueryResults]: # Docstring inherited. for by_parent in self._registry_query_result.byParentDatasetType(): - yield DirectParentDatasetQueryResults(by_parent) + yield DirectSingleTypeDatasetQueryResults(by_parent) @contextlib.contextmanager def materialize(self) -> Iterator[DatasetQueryResults]: @@ -193,8 +193,8 @@ def explain_no_results(self, execute: bool = True) -> Iterable[str]: return self._registry_query_result.explain_no_results(execute=execute) -class DirectParentDatasetQueryResults(ParentDatasetQueryResults): - """Implementation of `ParentDatasetQueryResults` using query result +class DirectSingleTypeDatasetQueryResults(SingleTypeDatasetQueryResults): + """Implementation of `SingleTypeDatasetQueryResults` using query result obtained from registry. Parameters @@ -210,18 +210,18 @@ def __init__(self, registry_query_result: registry_queries.ParentDatasetQueryRes def __iter__(self) -> Iterator[DatasetRef]: return iter(self._registry_query_result) - def by_parent_dataset_type(self) -> Iterator[ParentDatasetQueryResults]: + def by_dataset_type(self) -> Iterator[SingleTypeDatasetQueryResults]: # Docstring inherited. yield self @contextlib.contextmanager - def materialize(self) -> Iterator[ParentDatasetQueryResults]: + def materialize(self) -> Iterator[SingleTypeDatasetQueryResults]: # Docstring inherited. with self._registry_query_result.materialize() as result: - yield DirectParentDatasetQueryResults(result) + yield DirectSingleTypeDatasetQueryResults(result) @property - def parent_dataset_type(self) -> DatasetType: + def dataset_type(self) -> DatasetType: # Docstring inherited. return self._registry_query_result.parentDatasetType @@ -230,13 +230,9 @@ def data_ids(self) -> DataCoordinateQueryResults: # Docstring inherited. return DirectDataCoordinateQueryResults(self._registry_query_result.dataIds) - def with_components(self, components: Sequence[str | None]) -> ParentDatasetQueryResults: + def expanded(self) -> SingleTypeDatasetQueryResults: # Docstring inherited. - return DirectParentDatasetQueryResults(self._registry_query_result.withComponents(components)) - - def expanded(self) -> ParentDatasetQueryResults: - # Docstring inherited. - return DirectParentDatasetQueryResults(self._registry_query_result.expanded()) + return DirectSingleTypeDatasetQueryResults(self._registry_query_result.expanded()) def count(self, *, exact: bool = True, discard: bool = False) -> int: # Docstring inherited. diff --git a/python/lsst/daf/butler/tests/butler_query.py b/python/lsst/daf/butler/tests/butler_query.py index 8ac7de9477..c33a075688 100644 --- a/python/lsst/daf/butler/tests/butler_query.py +++ b/python/lsst/daf/butler/tests/butler_query.py @@ -355,9 +355,9 @@ def _do_query(dataset: Any, **kwargs: Any) -> DatasetQueryResults: self.assertTrue(result.any()) self.assertCountEqual([ref.dataId["detector"] for ref in result], [1, 2, 3, 2, 3, 4]) - by_type = list(result.by_parent_dataset_type()) + by_type = list(result.by_dataset_type()) self.assertEqual(len(by_type), 2) - self.assertEqual(set(item.parent_dataset_type.name for item in by_type), {"bias", "flat"}) + self.assertEqual(set(item.dataset_type.name for item in by_type), {"bias", "flat"}) with result.materialize() as materialized: result = materialized.expanded() diff --git a/tests/test_query_relations.py b/tests/test_query_relations.py index 807d48fc0b..421b33e393 100644 --- a/tests/test_query_relations.py +++ b/tests/test_query_relations.py @@ -31,7 +31,11 @@ import re import unittest -from lsst.daf.butler import DataCoordinateQueryResults, DimensionRecordQueryResults, ParentDatasetQueryResults +from lsst.daf.butler import ( + DataCoordinateQueryResults, + DimensionRecordQueryResults, + SingleTypeDatasetQueryResults, +) from lsst.daf.butler.registry import MissingSpatialOverlapError, RegistryConfig, _RegistryFactory from lsst.daf.butler.transfers import YamlRepoImportBackend @@ -98,7 +102,7 @@ def setUpClass(cls) -> None: def assert_relation_str( self, expected: str, - *results: DataCoordinateQueryResults | DimensionRecordQueryResults | ParentDatasetQueryResults, + *results: DataCoordinateQueryResults | DimensionRecordQueryResults | SingleTypeDatasetQueryResults, ) -> None: """Assert that checks that one or more registry queries have relation trees that match the given string. From 4d10137605f5666bf8b89f9b6aaa373b90edbe4a Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 9 Jan 2024 15:09:26 -0700 Subject: [PATCH 17/18] Add news fragment --- doc/changes/DM-36303.removal.rst | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 doc/changes/DM-36303.removal.rst diff --git a/doc/changes/DM-36303.removal.rst b/doc/changes/DM-36303.removal.rst new file mode 100644 index 0000000000..60288827f0 --- /dev/null +++ b/doc/changes/DM-36303.removal.rst @@ -0,0 +1,5 @@ +* Removed dataset type component query support from all Registry methods. + The main ``Registry.query*`` methods now warn if a ``components`` parameter is given and raise if it has a value other than `False`. + The components parameters will be removed completely after v27. +* Removed ``CollectionSearch`` class. + A simple `tuple` is now used for this. From 8ee1e2943c0fe27c97fc3cbda8b026e814bae457 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 9 Jan 2024 16:31:27 -0700 Subject: [PATCH 18/18] Add default value to query classes to allow usage to be detected The components parameter is deprecated. Warn if False is specified explicitly in addition to raising if any other value is used. --- python/lsst/daf/butler/_registry_shim.py | 9 ++- .../lsst/daf/butler/registry/sql_registry.py | 79 +++++++++++++------ python/lsst/daf/butler/utils.py | 12 +++ tests/test_testRepo.py | 3 +- 4 files changed, 73 insertions(+), 30 deletions(-) diff --git a/python/lsst/daf/butler/_registry_shim.py b/python/lsst/daf/butler/_registry_shim.py index 61fd3787b4..956985be1a 100644 --- a/python/lsst/daf/butler/_registry_shim.py +++ b/python/lsst/daf/butler/_registry_shim.py @@ -53,6 +53,7 @@ from .registry._collection_type import CollectionType from .registry._defaults import RegistryDefaults from .registry.queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults +from .utils import _DefaultMarker, _Marker if TYPE_CHECKING: from .direct_butler import DirectButler @@ -281,7 +282,7 @@ def queryDatasetTypes( self, expression: Any = ..., *, - components: bool = False, + components: bool | _Marker = _DefaultMarker, missing: list[str] | None = None, ) -> Iterable[DatasetType]: # Docstring inherited from a base class. @@ -309,7 +310,7 @@ def queryDatasets( dataId: DataId | None = None, where: str = "", findFirst: bool = False, - components: bool = False, + components: bool | _Marker = _DefaultMarker, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, @@ -337,7 +338,7 @@ def queryDataIds( datasets: Any = None, collections: CollectionArgType | None = None, where: str = "", - components: bool = False, + components: bool | _Marker = _DefaultMarker, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, @@ -363,7 +364,7 @@ def queryDimensionRecords( datasets: Any = None, collections: CollectionArgType | None = None, where: str = "", - components: bool = False, + components: bool | _Marker = _DefaultMarker, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, diff --git a/python/lsst/daf/butler/registry/sql_registry.py b/python/lsst/daf/butler/registry/sql_registry.py index d12c3f432c..23803814c1 100644 --- a/python/lsst/daf/butler/registry/sql_registry.py +++ b/python/lsst/daf/butler/registry/sql_registry.py @@ -40,6 +40,7 @@ import sqlalchemy from lsst.daf.relation import LeafRelation, Relation from lsst.resources import ResourcePathExpression +from lsst.utils.introspection import find_outside_stacklevel from lsst.utils.iteration import ensure_iterable from .._column_tags import DatasetColumnTag @@ -85,7 +86,7 @@ from ..registry.interfaces import ChainedCollectionRecord, ReadOnlyDatabaseError, RunRecord from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard -from ..utils import transactional +from ..utils import _DefaultMarker, _Marker, transactional if TYPE_CHECKING: from .._butler_config import ButlerConfig @@ -1703,7 +1704,7 @@ def queryDatasetTypes( self, expression: Any = ..., *, - components: bool = False, + components: bool | _Marker = _DefaultMarker, missing: list[str] | None = None, ) -> Iterable[DatasetType]: """Iterate over the dataset types whose names match an expression. @@ -1735,11 +1736,18 @@ def queryDatasetTypes( lsst.daf.butler.registry.DatasetTypeExpressionError Raised when ``expression`` is invalid. """ - if components is not False: - raise DatasetTypeError( - "Dataset component queries are no longer supported by Registry. Use " - "DatasetType methods to obtain components from parent dataset types instead." - ) + if components is not _DefaultMarker: + if components is not False: + raise DatasetTypeError( + "Dataset component queries are no longer supported by Registry. Use " + "DatasetType methods to obtain components from parent dataset types instead." + ) + else: + warnings.warn( + "The components parameter is ignored. It will be removed after v27.", + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.daf.butler"), + ) wildcard = DatasetTypeWildcard.from_expression(expression) return self._managers.datasets.resolve_wildcard(wildcard, missing=missing) @@ -1966,7 +1974,7 @@ def queryDatasets( dataId: DataId | None = None, where: str = "", findFirst: bool = False, - components: bool = False, + components: bool | _Marker = _DefaultMarker, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, @@ -2068,11 +2076,18 @@ def queryDatasets( query), and then use multiple (generally much simpler) calls to `queryDatasets` with the returned data IDs passed as constraints. """ - if components is not False: - raise DatasetTypeError( - "Dataset component queries are no longer supported by Registry. Use " - "DatasetType methods to obtain components from parent dataset types instead." - ) + if components is not _DefaultMarker: + if components is not False: + raise DatasetTypeError( + "Dataset component queries are no longer supported by Registry. Use " + "DatasetType methods to obtain components from parent dataset types instead." + ) + else: + warnings.warn( + "The components parameter is ignored. It will be removed after v27.", + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.daf.butler"), + ) doomed_by: list[str] = [] data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( @@ -2138,7 +2153,7 @@ def queryDataIds( datasets: Any = None, collections: CollectionArgType | None = None, where: str = "", - components: bool = False, + components: bool | _Marker = _DefaultMarker, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, @@ -2231,11 +2246,18 @@ def queryDataIds( lsst.daf.butler.registry.UserExpressionError Raised when ``where`` expression is invalid. """ - if components is not False: - raise DatasetTypeError( - "Dataset component queries are no longer supported by Registry. Use " - "DatasetType methods to obtain components from parent dataset types instead." - ) + if components is not _DefaultMarker: + if components is not False: + raise DatasetTypeError( + "Dataset component queries are no longer supported by Registry. Use " + "DatasetType methods to obtain components from parent dataset types instead." + ) + else: + warnings.warn( + "The components parameter is ignored. It will be removed after v27.", + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.daf.butler"), + ) requested_dimensions = self.dimensions.conform(dimensions) doomed_by: list[str] = [] data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) @@ -2269,7 +2291,7 @@ def queryDimensionRecords( datasets: Any = None, collections: CollectionArgType | None = None, where: str = "", - components: bool = False, + components: bool | _Marker = _DefaultMarker, bind: Mapping[str, Any] | None = None, check: bool = True, **kwargs: Any, @@ -2344,11 +2366,18 @@ def queryDimensionRecords( lsst.daf.butler.registry.UserExpressionError Raised when ``where`` expression is invalid. """ - if components is not False: - raise DatasetTypeError( - "Dataset component queries are no longer supported by Registry. Use " - "DatasetType methods to obtain components from parent dataset types instead." - ) + if components is not _DefaultMarker: + if components is not False: + raise DatasetTypeError( + "Dataset component queries are no longer supported by Registry. Use " + "DatasetType methods to obtain components from parent dataset types instead." + ) + else: + warnings.warn( + "The components parameter is ignored. It will be removed after v27.", + category=FutureWarning, + stacklevel=find_outside_stacklevel("lsst.daf.butler"), + ) if not isinstance(element, DimensionElement): try: element = self.dimensions[element] diff --git a/python/lsst/daf/butler/utils.py b/python/lsst/daf/butler/utils.py index 7a5b96619a..145edd1ea3 100644 --- a/python/lsst/daf/butler/utils.py +++ b/python/lsst/daf/butler/utils.py @@ -132,3 +132,15 @@ def globToRegex(expressions: str | EllipsisType | None | list[str]) -> list[str res = e results.append(res) return results + + +class _Marker: + """Private class to use as a default value when you want to know that + a default parameter has been over-ridden. + """ + + +_DefaultMarker = _Marker() +"""Default value to give to a parameter when you want to know if the value +has been over-ridden. +""" diff --git a/tests/test_testRepo.py b/tests/test_testRepo.py index 6d0dfd7231..ccf2c9dca3 100644 --- a/tests/test_testRepo.py +++ b/tests/test_testRepo.py @@ -171,7 +171,8 @@ def testAddDataIdValue(self): def testAddDatasetType(self): # 1 for StructuredDataNoComponents, 1 for StructuredData (components # not included). - self.assertEqual(len(list(self.butler.registry.queryDatasetTypes(components=False))), 2) + with self.assertWarns(FutureWarning): + self.assertEqual(len(list(self.butler.registry.queryDatasetTypes(components=False))), 2) # Testing the DatasetType objects is not practical, because all tests # need a DimensionUniverse. So just check that we have the dataset