Skip to content

Commit

Permalink
Allow direct configuration of repository index
Browse files Browse the repository at this point in the history
Instead of requiring the repository index to be loaded from a file, allow direct configuration as a string in a new environment variable DAF_BUTLER_REPOSITORIES.

The file was problematic for services for a few reasons:
1. We don't have anywhere to host it -- currently it is stored in S3, but services using Butler server will not have the credentials to access it.
2. There are cache invalidation issues.  If it's configured directly, Phalanx can automatically restart services using it when the configuration changes.
3. Manually editing live configuration files is error-prone.  If this configuration lives in Phalanx, we are able to validate the configuration before deploying it.
  • Loading branch information
dhirving committed Jan 26, 2024
1 parent 6af44e7 commit df0fb97
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 42 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-42660.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The Butler repository index can now be configured by a new environment variable `DAF_BUTLER_REPOSITORIES`, which contains the configuration directly instead of requiring lookup via a URI.
90 changes: 49 additions & 41 deletions python/lsst/daf/butler/_butler_repo_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@
__all__ = ("ButlerRepoIndex",)

import os
from typing import ClassVar
from typing import Any, ClassVar

import yaml
from lsst.resources import ResourcePath
from pydantic import TypeAdapter, ValidationError

from ._config import Config
from ._utilities.thread_safe_cache import ThreadSafeCache
Expand All @@ -41,10 +43,14 @@
class ButlerRepoIndex:
"""Index of all known butler repositories.
The index of butler repositories is found by looking for a
configuration file at the URI pointed at by the environment
variable ``$DAF_BUTLER_REPOSITORY_INDEX``. The configuration file
is a simple dictionary lookup of the form:
The index of butler repositories can be configured in two ways:
1. By setting the environment variable ``DAF_BUTLER_REPOSITORY_INDEX`` to
the URI of a configuration file.
2. By setting the environment variable ``DAF_BUTLER_REPOSITORIES`` to the
contents of the configuration file as a string.
In either case, the configuration is a simple dictionary lookup of the
form:
.. code-block:: yaml
Expand All @@ -56,9 +62,15 @@ class ButlerRepoIndex:
"""

index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX"
"""The name of the environment variable to read to locate the index."""
"""The name of the environment variable containing the URI of the index
configuration file.
"""
repositories_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORIES"
"""The name of the environment variable containing the configuration
directly as a string.
"""

_cache: ClassVar[ThreadSafeCache[ResourcePath, Config]] = ThreadSafeCache()
_cache: ClassVar[ThreadSafeCache[str, Config]] = ThreadSafeCache()
"""Cache of indexes. In most scenarios only one index will be found
and the environment will not change. In tests this may not be true."""

Expand All @@ -67,7 +79,7 @@ class ButlerRepoIndex:
every read."""

@classmethod
def _read_repository_index(cls, index_uri: ResourcePath) -> Config:
def _read_repository_index(cls, index_uri: str) -> Config:
"""Read the repository index from the supplied URI.
Parameters
Expand Down Expand Up @@ -107,26 +119,7 @@ def _read_repository_index(cls, index_uri: ResourcePath) -> Config:
return repo_index

@classmethod
def _get_index_uri(cls) -> ResourcePath:
"""Find the URI to the repository index.
Returns
-------
index_uri : `lsst.resources.ResourcePath`
URI to the repository index.
Raises
------
KeyError
Raised if the location of the index could not be determined.
"""
index_uri = os.environ.get(cls.index_env_var)
if index_uri is None:
raise KeyError(f"No repository index defined in environment variable {cls.index_env_var}")
return ResourcePath(index_uri)

@classmethod
def _read_repository_index_from_environment(cls) -> Config:
def _read_repository_index_from_environment(cls) -> dict[str, str]:
"""Look in environment for index location and read it.
Returns
Expand All @@ -136,16 +129,29 @@ def _read_repository_index_from_environment(cls) -> Config:
"""
cls._most_recent_failure = ""
try:
index_uri = cls._get_index_uri()
except KeyError as e:
cls._most_recent_failure = str(e)
raise
try:
repo_index = cls._read_repository_index(index_uri)
index_uri = os.getenv(cls.index_env_var)
direct_configuration = os.getenv(cls.repositories_env_var)

if index_uri and direct_configuration:
raise RuntimeError(
f"Only one of the environment variables {cls.repositories_env_var} and"
f" {cls.index_env_var} should be set."
)

if direct_configuration:
return cls._validate_configuration(yaml.safe_load(direct_configuration))

if index_uri:
repo_index = cls._read_repository_index(index_uri)
return cls._validate_configuration(repo_index)

raise RuntimeError(
"No repository index defined. Neither of the environment variables"
f" {cls.repositories_env_var} or {cls.index_env_var} was set."
)
except Exception as e:
cls._most_recent_failure = str(e)
raise
return repo_index

@classmethod
def get_known_repos(cls) -> set[str]:
Expand Down Expand Up @@ -222,10 +228,12 @@ def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
if repo_uri is None:
if return_label:
return ResourcePath(label, forceAbsolute=False)
# This should not raise since it worked earlier.
try:
index_uri = str(cls._get_index_uri())
except KeyError:
index_uri = "<environment variable not defined>"
raise KeyError(f"Label '{label}' not known to repository index at {index_uri}")
raise KeyError(f"Label '{label}' not known to repository index")
return ResourcePath(repo_uri)

@classmethod
def _validate_configuration(cls, obj: Any) -> dict[str, str]:
try:
return TypeAdapter(dict[str, str]).validate_python(obj)
except ValidationError as e:
raise ValueError("Repository index not in expected format") from e
23 changes: 22 additions & 1 deletion tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ def testConstructor(self) -> None:
# Check that we can create Butler when the alias file is not found.
butler = Butler.from_config(self.tmpConfigFile, writeable=False)
self.assertIsInstance(butler, Butler)
with self.assertRaises(KeyError) as cm:
with self.assertRaises(RuntimeError) as cm:
# No environment variable set.
Butler.get_repo_uri("label")
self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False))
Expand All @@ -677,6 +677,27 @@ def testConstructor(self) -> None:
Butler.from_config("not_there")
self.assertEqual(Butler.get_known_repos(), set())

def testDafButlerRepositories(self):
with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORIES": "label: https://someuri.com"}):
self.assertEqual(str(Butler.get_repo_uri("label")), "https://someuri.com")

with unittest.mock.patch.dict(
os.environ,
{
"DAF_BUTLER_REPOSITORIES": "label: https://someuri.com",
"DAF_BUTLER_REPOSITORY_INDEX": "https://someuri.com",
},
):
with self.assertRaisesRegex(RuntimeError, "Only one of the environment variables"):
Butler.get_repo_uri("label")

with unittest.mock.patch.dict(
os.environ,
{"DAF_BUTLER_REPOSITORIES": "invalid"},
):
with self.assertRaisesRegex(ValueError, "Repository index not in expected format"):
Butler.get_repo_uri("label")

def testBasicPutGet(self) -> None:
storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
self.runPutGetTest(storageClass, "test_metric")
Expand Down

0 comments on commit df0fb97

Please sign in to comment.