Skip to content

Commit

Permalink
Merge pull request #968 from lsst/tickets/DM-43027
Browse files Browse the repository at this point in the history
DM-43027: Make RemoteButler.find_dataset consistent with DirectButler
  • Loading branch information
dhirving authored Feb 27, 2024
2 parents 87c5805 + b7a0883 commit b6936e0
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 83 deletions.
17 changes: 13 additions & 4 deletions python/lsst/daf/butler/_timespan.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,20 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import annotations

__all__ = ("Timespan",)
__all__ = (
"SerializedTimespan",
"Timespan",
)

import enum
import warnings
from collections.abc import Generator
from typing import TYPE_CHECKING, Any, ClassVar, Union
from typing import TYPE_CHECKING, Annotated, Any, ClassVar, Union

import astropy.time
import astropy.utils.exceptions
import yaml
from pydantic import Field

# As of astropy 4.2, the erfa interface is shipped independently and
# ErfaWarning is no longer an AstropyWarning
Expand Down Expand Up @@ -69,6 +73,11 @@ class _SpecialTimespanBound(enum.Enum):

TimespanBound = Union[astropy.time.Time, _SpecialTimespanBound, None]

SerializedTimespan = Annotated[list[int], Field(min_length=2, max_length=2)]
"""JSON-serializable representation of the Timespan class, as a list of two
integers ``[begin, end]`` in nanoseconds since the epoch.
"""


class Timespan:
"""A half-open time interval with nanosecond precision.
Expand Down Expand Up @@ -483,7 +492,7 @@ def difference(self, other: Timespan) -> Generator[Timespan, None, None]:
if intersection._nsec[1] < self._nsec[1]:
yield Timespan(None, None, _nsec=(intersection._nsec[1], self._nsec[1]))

def to_simple(self, minimal: bool = False) -> list[int]:
def to_simple(self, minimal: bool = False) -> SerializedTimespan:
"""Return simple python type form suitable for serialization.
Parameters
Expand All @@ -502,7 +511,7 @@ def to_simple(self, minimal: bool = False) -> list[int]:
@classmethod
def from_simple(
cls,
simple: list[int] | None,
simple: SerializedTimespan | None,
universe: DimensionUniverse | None = None,
registry: Registry | None = None,
) -> Timespan | None:
Expand Down
18 changes: 1 addition & 17 deletions python/lsst/daf/butler/datastores/fileDatastoreClient.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Any, Literal

import pydantic
from lsst.daf.butler import DatasetRef, Location, StorageClass
from lsst.daf.butler import DatasetRef, Location
from lsst.daf.butler.datastore.cache_manager import DatastoreDisabledCacheManager
from lsst.daf.butler.datastore.stored_file_info import SerializedStoredFileInfo, StoredFileInfo
from lsst.daf.butler.datastores.file_datastore.get import (
Expand Down Expand Up @@ -47,8 +47,6 @@ def get_dataset_as_python_object(
payload: FileDatastoreGetPayload,
*,
parameters: Mapping[str, Any] | None,
storageClass: StorageClass | str | None,
component: str | None,
) -> Any:
"""Retrieve an artifact from storage and return it as a Python object.
Expand All @@ -62,12 +60,6 @@ def get_dataset_as_python_object(
parameters : `Mapping`[`str`, `typing.Any`]
`StorageClass` and `Formatter` parameters to be used when converting
the artifact to a Python object.
storageClass : `StorageClass` | `str` | `None`
Overrides the `StorageClass` to be used when converting the artifact to
a Python object. If `None`, uses the `StorageClass` specified by
``payload``.
component : `str` | `None`
Selects which component of the artifact to retrieve.
Returns
-------
Expand All @@ -79,14 +71,6 @@ def get_dataset_as_python_object(
for file_info in payload.file_info
]

# If we have both a component override and a storage class override, the
# component override has to be applied first. DatasetRef cares because it
# is checking compatibility of the storage class with its DatasetType.
if component is not None:
ref = ref.makeComponentRef(component)
if storageClass is not None:
ref = ref.overrideStorageClass(storageClass)

datastore_file_info = generate_datastore_get_information(
fileLocations,
ref=ref,
Expand Down
76 changes: 76 additions & 0 deletions python/lsst/daf/butler/remote_butler/_collection_args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from __future__ import annotations

import re
from typing import cast

from ..registry import CollectionArgType
from ..registry.wildcards import CollectionWildcard
from .server_models import CollectionList


def convert_collection_arg_to_glob_string_list(arg: CollectionArgType) -> CollectionList:
"""Convert the collections argument used by many Butler/registry methods to
a format suitable for sending to Butler server.
Parameters
----------
arg : `CollectionArgType`
Collection search pattern in many possible formats.
Returns
-------
glob_list : `CollectionList`
Collection search patterns normalized to a list of globs string.
Raises
------
TypeError
If the search pattern provided by the user cannot be converted to a
glob string.
"""
if arg is ...:
return CollectionList(["*"])
elif isinstance(arg, str):
return CollectionList([arg])
elif isinstance(arg, re.Pattern):
raise TypeError("RemoteButler does not support regular expressions as search patterns")
elif isinstance(arg, CollectionWildcard):
# In theory this could work, but CollectionWildcard eagerly converts
# any glob strings to regexes. So it would need some rework to
# preserve the original string inputs. CollectionWildcard has likely
# never been used in the wild by an end-user so this probably isn't
# worth figuring out.
raise TypeError("RemoteButler does not accept CollectionWildcard instances as search patterns")
else:
search = list(arg)
for item in search:
if not isinstance(item, str):
raise TypeError("RemoteButler only accepts strings and lists of strings as search patterns")
return CollectionList(cast(list[str], search))
48 changes: 43 additions & 5 deletions python/lsst/daf/butler/remote_butler/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,17 @@
DimensionRecord,
DimensionUniverse,
)
from ..registry import CollectionArgType, CollectionSummary, CollectionType, Registry, RegistryDefaults
from ..registry import (
CollectionArgType,
CollectionSummary,
CollectionType,
DatasetTypeError,
Registry,
RegistryDefaults,
)
from ..registry.queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults
from ..remote_butler import RemoteButler
from ._collection_args import convert_collection_arg_to_glob_string_list


class RemoteButlerRegistry(Registry):
Expand Down Expand Up @@ -146,10 +154,29 @@ def findDataset(
datastore_records: bool = False,
**kwargs: Any,
) -> DatasetRef | None:
# There is an implementation of find_dataset on RemoteButler, but the
# definition of the collections parameter is incompatible and timespans
# aren't supported yet.
raise NotImplementedError()
# Components are supported by Butler.find_dataset, but are required to
# raise an exception in registry.findDataset (see
# RegistryTests.testComponentLookups). Apparently the registry version
# used to support components, but at some point a decision was made to
# draw a hard architectural boundary between registry and datastore so
# that registry is no longer allowed to know about components.
if _is_component_dataset_type(datasetType):
raise DatasetTypeError(
"Component dataset types are not supported;"
" use DatasetRef or DatasetType methods to obtain components from parents instead."
)

if collections is not None:
collections = convert_collection_arg_to_glob_string_list(collections)

return self._butler.find_dataset(
datasetType,
dataId,
collections=collections,
timespan=timespan,
datastore_records=datastore_records,
**kwargs,
)

def insertDatasets(
self,
Expand Down Expand Up @@ -310,3 +337,14 @@ def storageClasses(self) -> StorageClassFactory:
@storageClasses.setter
def storageClasses(self, value: StorageClassFactory) -> None:
raise NotImplementedError()


def _is_component_dataset_type(dataset_type: DatasetType | str) -> bool:
"""Return true if the given dataset type refers to a component."""
if isinstance(dataset_type, DatasetType):
return dataset_type.component() is not None
elif isinstance(dataset_type, str):
parent, component = DatasetType.splitDatasetTypeName(dataset_type)
return component is not None
else:
raise TypeError(f"Expected DatasetType or str, got {dataset_type!r}")
Loading

0 comments on commit b6936e0

Please sign in to comment.