Skip to content

Commit

Permalink
Allow hosting multiple repos from one server
Browse files Browse the repository at this point in the history
In most environments, users will access multiple Butler repositories (multiple data releases, prompt processing, etc.).  To avoid the need to instantiate a separate service for every repository, allow multiple repositories to be hosted from a single server.

This uses the same DAF_BUTLER_REPOSITORY_INDEX configuration system as DirectButler.
  • Loading branch information
dhirving committed Jan 3, 2024
1 parent 2d24a30 commit d22addd
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 97 deletions.
3 changes: 2 additions & 1 deletion compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ services:
ports:
- "8080:8080"
environment:
BUTLER_SERVER_CONFIG_URI: "/butler_root"
DAF_BUTLER_REPOSITORY_INDEX: "/butler_config/repository_index.yaml"
volumes:
- ../ci_hsc_gen3/DATA:/butler_root
- ./docker/compose_files:/butler_config
1 change: 1 addition & 0 deletions docker/compose_files/repository_index.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hsc_gen3: /butler_root
50 changes: 0 additions & 50 deletions python/lsst/daf/butler/remote_butler/server/_config.py

This file was deleted.

38 changes: 24 additions & 14 deletions python/lsst/daf/butler/remote_butler/server/_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,34 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from functools import cache
from typing import Annotated

from lsst.daf.butler import Butler
from lsst.daf.butler.direct_butler import DirectButler
from fastapi import Depends
from lsst.daf.butler import LabeledButlerFactory

from ._config import get_config_from_env
from ._factory import Factory

_butler_factory = LabeledButlerFactory()

@cache
def _make_global_butler() -> DirectButler:
config = get_config_from_env()
butler = Butler.from_config(config.config_uri)
if not isinstance(butler, DirectButler):
raise TypeError("Server can only use a DirectButler")
return butler

async def butler_factory_dependency() -> LabeledButlerFactory:
"""Return a global LabeledButlerFactory instance. This will be used to
construct internal DirectButler instances for interacting with the Butler
repositories we are serving.
"""
return _butler_factory

Check warning on line 43 in python/lsst/daf/butler/remote_butler/server/_dependencies.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/server/_dependencies.py#L43

Added line #L43 was not covered by tests

def factory_dependency() -> Factory:
"""Return factory dependency for injection into FastAPI."""
return Factory(butler=_make_global_butler())

async def factory_dependency(
repository: str, butler_factory: Annotated[LabeledButlerFactory, Depends(butler_factory_dependency)]
) -> Factory:
"""Return Factory object for injection into FastAPI.
Parameters
----------
repository : `str`
Label of the repository for lookup from the repository index.
butler_factory : `LabeledButlerFactory`
Factory for instantiating DirectButlers.
"""
return Factory(butler_factory=butler_factory, repository=repository)
20 changes: 14 additions & 6 deletions python/lsst/daf/butler/remote_butler/server/_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,30 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from lsst.daf.butler import LabeledButlerFactory
from lsst.daf.butler.direct_butler import DirectButler

__all__ = ("Factory",)


class Factory:
"""Class to provide a cached Butler instance.
"""Class for instantiating per-request dependencies, following the pattern
in `SQR-072 <https://sqr-072.lsst.io/>`_.
Parameters
----------
butler : `DirectButler`
Butler to use.
repository : `str`
The label of the Butler repository requested by the user.
butler_factory : `LabeledButlerFactory`
Factory used to instantiate Butler instances.
"""

def __init__(self, *, butler: DirectButler):
self._butler = butler
def __init__(self, *, repository: str, butler_factory: LabeledButlerFactory):
self._repository = repository
self._butler_factory = butler_factory

def create_butler(self) -> DirectButler:
return self._butler
butler = self._butler_factory.create_butler(label=self._repository, access_token=None)
if not isinstance(butler, DirectButler):
raise TypeError("Server can only use a DirectButler")

Check warning on line 53 in python/lsst/daf/butler/remote_butler/server/_factory.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/server/_factory.py#L53

Added line #L53 was not covered by tests
return butler
7 changes: 6 additions & 1 deletion python/lsst/daf/butler/remote_butler/server/_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,12 @@

app = FastAPI()
app.add_middleware(GZipMiddleware, minimum_size=1000)
app.include_router(external_router, prefix=_DEFAULT_API_PATH)

# A single instance of the server can serve data from multiple Butler
# repositories. This 'repository' path placeholder is consumed by
# factory_dependency().
repository_placeholder = "{repository}"
app.include_router(external_router, prefix=f"{_DEFAULT_API_PATH}/repo/{repository_placeholder}")


@app.exception_handler(MissingDatasetTypeError)
Expand Down
41 changes: 18 additions & 23 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
from fastapi.testclient import TestClient
from lsst.daf.butler.remote_butler import RemoteButler, RemoteButlerFactory
from lsst.daf.butler.remote_butler._authentication import _EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY
from lsst.daf.butler.remote_butler.server import Factory, app
from lsst.daf.butler.remote_butler.server._dependencies import factory_dependency
from lsst.daf.butler.remote_butler.server import app
from lsst.daf.butler.remote_butler.server._dependencies import butler_factory_dependency
from lsst.resources.s3utils import clean_test_environment_for_s3, getS3Client
from moto import mock_s3
except ImportError:
Expand Down Expand Up @@ -70,6 +70,8 @@

TESTDIR = os.path.abspath(os.path.dirname(__file__))

TEST_REPOSITORY_NAME = "testrepo"


def _make_test_client(app, raise_server_exceptions=True):
client = TestClient(app, raise_server_exceptions=raise_server_exceptions)
Expand All @@ -80,7 +82,7 @@ def _make_remote_butler(http_client, *, collections: str | None = None):
options = None
if collections is not None:
options = ButlerInstanceOptions(collections=collections)
factory = RemoteButlerFactory("https://test.example/api/butler", http_client)
factory = RemoteButlerFactory(f"https://test.example/api/butler/repo/{TEST_REPOSITORY_NAME}", http_client)
return factory.create_butler_for_access_token("fake-access-token", butler_options=options)


Expand Down Expand Up @@ -115,12 +117,9 @@ def setUpClass(cls):
cls.simple_dataset_ref = _create_simple_dataset(cls.repo.butler)

# Override the server's Butler initialization to point at our test repo
server_butler = Butler.from_config(cls.root, writeable=True)

def create_factory_dependency():
return Factory(butler=server_butler)
server_butler_factory = LabeledButlerFactory({TEST_REPOSITORY_NAME: cls.root})

app.dependency_overrides[factory_dependency] = create_factory_dependency
app.dependency_overrides[butler_factory_dependency] = lambda: server_butler_factory

# Set up the RemoteButler that will connect to the server
cls.client = _make_test_client(app)
Expand All @@ -140,29 +139,23 @@ def create_factory_dependency():
# Populate the test server.
# The DatastoreMock is required because the datasets referenced in
# these imports do not point at real files.
DatastoreMock.apply(server_butler)
server_butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
server_butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml"))
DatastoreMock.apply(cls.repo.butler)
cls.repo.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
cls.repo.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml"))

@classmethod
def tearDownClass(cls):
del app.dependency_overrides[factory_dependency]
del app.dependency_overrides[butler_factory_dependency]
removeTestTempDir(cls.root)

def test_health_check(self):
response = self.client.get("/")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.json()["name"], "butler")

def test_simple(self):
response = self.client.get("/api/butler/v1/universe")
self.assertEqual(response.status_code, 200)
self.assertIn("namespace", response.json())

def test_remote_butler(self):
def test_dimension_universe(self):
universe = self.butler.dimensions
self.assertEqual(universe.namespace, "daf_butler")
self.assertFalse(self.butler.isWriteable())

def test_get_dataset_type(self):
bias_type = self.butler.get_dataset_type("bias")
Expand Down Expand Up @@ -243,22 +236,24 @@ def test_instantiate_via_butler_http_search(self):
def override_read(http_resource_path):
return self.client.get(http_resource_path.geturl()).content

server_url = f"https://test.example/api/butler/repo/{TEST_REPOSITORY_NAME}/"

with patch.object(HttpResourcePath, "read", override_read):
# Add access key to environment variables. RemoteButler
# instantiation will throw an error if access key is not
# available.
with mock_env({_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY: "fake-access-token"}):
butler = Butler(
"https://test.example/api/butler",
server_url,
collections=["collection1", "collection2"],
run="collection2",
)
butler_factory = LabeledButlerFactory({"server": "https://test.example/api/butler"})
butler_factory = LabeledButlerFactory({"server": server_url})
factory_created_butler = butler_factory.create_butler(label="server", access_token="token")
self.assertIsInstance(butler, RemoteButler)
self.assertIsInstance(factory_created_butler, RemoteButler)
self.assertEqual(butler._server_url, "https://test.example/api/butler/")
self.assertEqual(factory_created_butler._server_url, "https://test.example/api/butler/")
self.assertEqual(butler._server_url, server_url)
self.assertEqual(factory_created_butler._server_url, server_url)

self.assertEqual(butler.collections, ("collection1", "collection2"))
self.assertEqual(butler.run, "collection2")
Expand Down
11 changes: 9 additions & 2 deletions tests_integration/test_docker_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,28 @@ def _run_server_docker():

port = 8080
butler_root = "/butler_root"

# Set up a repository index file to be read in by the server
index_filename = "repo_index.yaml"
repo_name = "testserver"
with open(os.path.join(temp_dir, index_filename), "wb") as fh:
fh.write(f"{repo_name}: {butler_root}\n".encode())

docker_image = os.getenv("BUTLER_SERVER_DOCKER_IMAGE")
if not docker_image:
raise Exception("BUTLER_SERVER_DOCKER_IMAGE must be set")
container = (
DockerContainer(docker_image)
.with_exposed_ports(port)
.with_env("BUTLER_SERVER_CONFIG_URI", butler_root)
.with_env("DAF_BUTLER_REPOSITORY_INDEX", f"{butler_root}/{index_filename}")
.with_volume_mapping(temp_dir, butler_root, "rw")
)

with container:
server_host = container.get_container_host_ip()
server_port = container.get_exposed_port(port)
server_url = f"http://{server_host}:{server_port}"
full_server_url = f"{server_url}/api/butler"
full_server_url = f"{server_url}/api/butler/repo/{repo_name}"
try:
_wait_for_startup(server_url)
yield full_server_url
Expand Down

0 comments on commit d22addd

Please sign in to comment.