diff --git a/doc/changes/DM-42660.feature.md b/doc/changes/DM-42660.feature.md new file mode 100644 index 0000000000..6ea5dee2dd --- /dev/null +++ b/doc/changes/DM-42660.feature.md @@ -0,0 +1 @@ +The Butler repository index can now be configured by a new environment variable `DAF_BUTLER_REPOSITORIES`, which contains the configuration directly instead of requiring lookup via a URI. diff --git a/python/lsst/daf/butler/_butler_repo_index.py b/python/lsst/daf/butler/_butler_repo_index.py index 7eda8905e1..0bff390a8c 100644 --- a/python/lsst/daf/butler/_butler_repo_index.py +++ b/python/lsst/daf/butler/_butler_repo_index.py @@ -30,9 +30,11 @@ __all__ = ("ButlerRepoIndex",) import os -from typing import ClassVar +from typing import Any, ClassVar +import yaml from lsst.resources import ResourcePath +from pydantic import TypeAdapter, ValidationError from ._config import Config from ._utilities.thread_safe_cache import ThreadSafeCache @@ -41,10 +43,14 @@ class ButlerRepoIndex: """Index of all known butler repositories. - The index of butler repositories is found by looking for a - configuration file at the URI pointed at by the environment - variable ``$DAF_BUTLER_REPOSITORY_INDEX``. The configuration file - is a simple dictionary lookup of the form: + The index of butler repositories can be configured in two ways: + 1. By setting the environment variable ``DAF_BUTLER_REPOSITORY_INDEX`` to + the URI of a configuration file. + 2. By setting the environment variable ``DAF_BUTLER_REPOSITORIES`` to the + contents of the configuration file as a string. + + In either case, the configuration is a simple dictionary lookup of the + form: .. code-block:: yaml @@ -56,9 +62,15 @@ class ButlerRepoIndex: """ index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX" - """The name of the environment variable to read to locate the index.""" + """The name of the environment variable containing the URI of the index + configuration file. + """ + repositories_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORIES" + """The name of the environment variable containing the configuration + directly as a string. + """ - _cache: ClassVar[ThreadSafeCache[ResourcePath, Config]] = ThreadSafeCache() + _cache: ClassVar[ThreadSafeCache[str, Config]] = ThreadSafeCache() """Cache of indexes. In most scenarios only one index will be found and the environment will not change. In tests this may not be true.""" @@ -67,7 +79,7 @@ class ButlerRepoIndex: every read.""" @classmethod - def _read_repository_index(cls, index_uri: ResourcePath) -> Config: + def _read_repository_index(cls, index_uri: str) -> Config: """Read the repository index from the supplied URI. Parameters @@ -107,26 +119,7 @@ def _read_repository_index(cls, index_uri: ResourcePath) -> Config: return repo_index @classmethod - def _get_index_uri(cls) -> ResourcePath: - """Find the URI to the repository index. - - Returns - ------- - index_uri : `lsst.resources.ResourcePath` - URI to the repository index. - - Raises - ------ - KeyError - Raised if the location of the index could not be determined. - """ - index_uri = os.environ.get(cls.index_env_var) - if index_uri is None: - raise KeyError(f"No repository index defined in environment variable {cls.index_env_var}") - return ResourcePath(index_uri) - - @classmethod - def _read_repository_index_from_environment(cls) -> Config: + def _read_repository_index_from_environment(cls) -> dict[str, str]: """Look in environment for index location and read it. Returns @@ -136,16 +129,29 @@ def _read_repository_index_from_environment(cls) -> Config: """ cls._most_recent_failure = "" try: - index_uri = cls._get_index_uri() - except KeyError as e: - cls._most_recent_failure = str(e) - raise - try: - repo_index = cls._read_repository_index(index_uri) + index_uri = os.getenv(cls.index_env_var) + direct_configuration = os.getenv(cls.repositories_env_var) + + if index_uri and direct_configuration: + raise RuntimeError( + f"Only one of the environment variables {cls.repositories_env_var} and" + f" {cls.index_env_var} should be set." + ) + + if direct_configuration: + return cls._validate_configuration(yaml.safe_load(direct_configuration)) + + if index_uri: + repo_index = cls._read_repository_index(index_uri) + return cls._validate_configuration(repo_index) + + raise RuntimeError( + "No repository index defined. Neither of the environment variables" + f" {cls.repositories_env_var} or {cls.index_env_var} was set." + ) except Exception as e: cls._most_recent_failure = str(e) raise - return repo_index @classmethod def get_known_repos(cls) -> set[str]: @@ -222,10 +228,12 @@ def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: if repo_uri is None: if return_label: return ResourcePath(label, forceAbsolute=False) - # This should not raise since it worked earlier. - try: - index_uri = str(cls._get_index_uri()) - except KeyError: - index_uri = "" - raise KeyError(f"Label '{label}' not known to repository index at {index_uri}") + raise KeyError(f"Label '{label}' not known to repository index") return ResourcePath(repo_uri) + + @classmethod + def _validate_configuration(cls, obj: Any) -> dict[str, str]: + try: + return TypeAdapter(dict[str, str]).validate_python(obj) + except ValidationError as e: + raise ValueError("Repository index not in expected format") from e diff --git a/tests/test_butler.py b/tests/test_butler.py index 59a5f7b2c6..aab2a68889 100644 --- a/tests/test_butler.py +++ b/tests/test_butler.py @@ -667,7 +667,7 @@ def testConstructor(self) -> None: # Check that we can create Butler when the alias file is not found. butler = Butler.from_config(self.tmpConfigFile, writeable=False) self.assertIsInstance(butler, Butler) - with self.assertRaises(KeyError) as cm: + with self.assertRaises(RuntimeError) as cm: # No environment variable set. Butler.get_repo_uri("label") self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False)) @@ -677,6 +677,27 @@ def testConstructor(self) -> None: Butler.from_config("not_there") self.assertEqual(Butler.get_known_repos(), set()) + def testDafButlerRepositories(self): + with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORIES": "label: https://someuri.com"}): + self.assertEqual(str(Butler.get_repo_uri("label")), "https://someuri.com") + + with unittest.mock.patch.dict( + os.environ, + { + "DAF_BUTLER_REPOSITORIES": "label: https://someuri.com", + "DAF_BUTLER_REPOSITORY_INDEX": "https://someuri.com", + }, + ): + with self.assertRaisesRegex(RuntimeError, "Only one of the environment variables"): + Butler.get_repo_uri("label") + + with unittest.mock.patch.dict( + os.environ, + {"DAF_BUTLER_REPOSITORIES": "invalid"}, + ): + with self.assertRaisesRegex(ValueError, "Repository index not in expected format"): + Butler.get_repo_uri("label") + def testBasicPutGet(self) -> None: storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") self.runPutGetTest(storageClass, "test_metric")