From cb64a78eea5d0bc4de54059be5f6a82de06769c8 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 22 Nov 2024 16:53:33 +0100 Subject: [PATCH 01/21] Move telemetry code to own subpackage --- baybe/telemetry/__init__.py | 22 ++++++++++++++++++++++ baybe/{ => telemetry}/telemetry.py | 5 +---- 2 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 baybe/telemetry/__init__.py rename baybe/{ => telemetry}/telemetry.py (98%) diff --git a/baybe/telemetry/__init__.py b/baybe/telemetry/__init__.py new file mode 100644 index 000000000..2df5e21b6 --- /dev/null +++ b/baybe/telemetry/__init__.py @@ -0,0 +1,22 @@ +"""Telemetry functionality for BayBE. + +For more details, see https://emdgroup.github.io/baybe/stable/userguide/envvars.html#telemetry +""" + +from baybe.telemetry.telemetry import ( + TELEM_LABELS, + VARNAME_TELEMETRY_ENABLED, + VARNAME_TELEMETRY_HOSTNAME, + VARNAME_TELEMETRY_USERNAME, + telemetry_record_recommended_measurement_percentage, + telemetry_record_value, +) + +__all__ = [ + "TELEM_LABELS", + "VARNAME_TELEMETRY_ENABLED", + "VARNAME_TELEMETRY_HOSTNAME", + "VARNAME_TELEMETRY_USERNAME", + "telemetry_record_recommended_measurement_percentage", + "telemetry_record_value", +] diff --git a/baybe/telemetry.py b/baybe/telemetry/telemetry.py similarity index 98% rename from baybe/telemetry.py rename to baybe/telemetry/telemetry.py index f3879cef4..6ba12fea1 100644 --- a/baybe/telemetry.py +++ b/baybe/telemetry/telemetry.py @@ -1,7 +1,4 @@ -"""Telemetry functionality for BayBE. - -For more details, see https://emdgroup.github.io/baybe/stable/userguide/envvars.html#telemetry -""" +"""Telemetry functionality for BayBE.""" import getpass import hashlib From cc1a6e2ab09a7da46c6c154b7c7f1aec720290bb Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 22 Nov 2024 15:40:00 +0100 Subject: [PATCH 02/21] Use existing is_enable utility --- baybe/telemetry/telemetry.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/baybe/telemetry/telemetry.py b/baybe/telemetry/telemetry.py index 6ba12fea1..3c95fc7fe 100644 --- a/baybe/telemetry/telemetry.py +++ b/baybe/telemetry/telemetry.py @@ -43,6 +43,17 @@ "NAKED_INITIAL_MEASUREMENTS": "count_naked-initial-measurements-added", } + +def is_enabled() -> bool: + """Tell whether telemetry currently is enabled. + + Telemetry can be disabled by setting the respective environment variable. + """ + return strtobool( + os.environ.get(VARNAME_TELEMETRY_ENABLED, DEFAULT_TELEMETRY_ENABLED) + ) + + # Attempt telemetry import try: from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( @@ -55,7 +66,7 @@ except ImportError: # Failed telemetry install/import should not fail baybe, so telemetry is being # disabled in that case - if strtobool(os.environ.get(VARNAME_TELEMETRY_ENABLED, DEFAULT_TELEMETRY_ENABLED)): + if is_enabled(): warnings.warn( "Opentelemetry could not be imported, potentially it is not installed. " "Disabling baybe telemetry.", @@ -64,16 +75,6 @@ os.environ[VARNAME_TELEMETRY_ENABLED] = "false" -def is_enabled() -> bool: - """Tell whether telemetry currently is enabled. - - Telemetry can be disabled by setting the respective environment variable. - """ - return strtobool( - os.environ.get(VARNAME_TELEMETRY_ENABLED, DEFAULT_TELEMETRY_ENABLED) - ) - - # Attempt telemetry initialization if is_enabled(): # Assign default user and machine name From e2cf05f76a8b6117674f9b1f92656973e42d2c6b Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 22 Nov 2024 16:45:20 +0100 Subject: [PATCH 03/21] Remove dead code --- baybe/telemetry/telemetry.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/baybe/telemetry/telemetry.py b/baybe/telemetry/telemetry.py index 3c95fc7fe..2be327b4f 100644 --- a/baybe/telemetry/telemetry.py +++ b/baybe/telemetry/telemetry.py @@ -6,7 +6,6 @@ import socket import warnings from collections.abc import Sequence -from urllib.parse import urlparse import pandas as pd @@ -98,26 +97,6 @@ def is_enabled() -> bool: # Test endpoint URL try: - # Parse endpoint URL - _endpoint_url_parsed = urlparse(_endpoint_url) - _endpoint_hostname = _endpoint_url_parsed.hostname - _endpoint_port = _endpoint_url_parsed.port if _endpoint_url_parsed.port else 80 - try: - _TIMEOUT_S = float( - os.environ.get( - VARNAME_TELEMETRY_VPN_CHECK_TIMEOUT, - DEFAULT_TELEMETRY_VPN_CHECK_TIMEOUT, - ) - ) - except (ValueError, TypeError): - warnings.warn( - f"WARNING: Value passed for environment variable " - f"{VARNAME_TELEMETRY_VPN_CHECK_TIMEOUT} is not a valid floating point " - f"number. Using default of {DEFAULT_TELEMETRY_VPN_CHECK_TIMEOUT}.", - UserWarning, - ) - _TIMEOUT_S = float(DEFAULT_TELEMETRY_VPN_CHECK_TIMEOUT) - # Send a test request. If there is no internet connection or a firewall is # present this will throw an error and telemetry will be deactivated. if strtobool( From d62b087bbf16e6a8fc04e1672747f6b0a1475037 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Fri, 22 Nov 2024 17:17:17 +0100 Subject: [PATCH 04/21] Split code into public and private parts --- baybe/telemetry/__init__.py | 2 +- .../telemetry/{telemetry.py => _telemetry.py} | 108 ++--------------- baybe/telemetry/api.py | 112 ++++++++++++++++++ 3 files changed, 120 insertions(+), 102 deletions(-) rename baybe/telemetry/{telemetry.py => _telemetry.py} (55%) create mode 100644 baybe/telemetry/api.py diff --git a/baybe/telemetry/__init__.py b/baybe/telemetry/__init__.py index 2df5e21b6..d95e01030 100644 --- a/baybe/telemetry/__init__.py +++ b/baybe/telemetry/__init__.py @@ -3,7 +3,7 @@ For more details, see https://emdgroup.github.io/baybe/stable/userguide/envvars.html#telemetry """ -from baybe.telemetry.telemetry import ( +from baybe.telemetry.api import ( TELEM_LABELS, VARNAME_TELEMETRY_ENABLED, VARNAME_TELEMETRY_HOSTNAME, diff --git a/baybe/telemetry/telemetry.py b/baybe/telemetry/_telemetry.py similarity index 55% rename from baybe/telemetry/telemetry.py rename to baybe/telemetry/_telemetry.py index 2be327b4f..9911697f2 100644 --- a/baybe/telemetry/telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -1,28 +1,25 @@ -"""Telemetry functionality for BayBE.""" +"""Internal telemetry logic.""" import getpass import hashlib import os import socket import warnings -from collections.abc import Sequence -import pandas as pd - -from baybe.parameters.base import Parameter +from baybe.telemetry.api import ( + VARNAME_TELEMETRY_ENABLED, + VARNAME_TELEMETRY_HOSTNAME, + VARNAME_TELEMETRY_USERNAME, + is_enabled, +) from baybe.utils.boolean import strtobool -from baybe.utils.dataframe import fuzzy_row_match # Telemetry environment variable names -VARNAME_TELEMETRY_ENABLED = "BAYBE_TELEMETRY_ENABLED" VARNAME_TELEMETRY_ENDPOINT = "BAYBE_TELEMETRY_ENDPOINT" VARNAME_TELEMETRY_VPN_CHECK = "BAYBE_TELEMETRY_VPN_CHECK" VARNAME_TELEMETRY_VPN_CHECK_TIMEOUT = "BAYBE_TELEMETRY_VPN_CHECK_TIMEOUT" -VARNAME_TELEMETRY_USERNAME = "BAYBE_TELEMETRY_USERNAME" -VARNAME_TELEMETRY_HOSTNAME = "BAYBE_TELEMETRY_HOSTNAME" # Telemetry settings defaults -DEFAULT_TELEMETRY_ENABLED = "true" DEFAULT_TELEMETRY_ENDPOINT = ( "https://public.telemetry.baybe.p.uptimize.merckgroup.com:4317" ) @@ -30,29 +27,6 @@ DEFAULT_TELEMETRY_VPN_CHECK_TIMEOUT = "0.5" -# Telemetry labels for metrics -TELEM_LABELS = { - "RECOMMENDED_MEASUREMENTS_PERCENTAGE": "value_recommended-measurements-percentage", - "BATCH_SIZE": "value_batch-size", - "COUNT_ADD_RESULTS": "count_add-results", - "COUNT_RECOMMEND": "count_recommend", - "NUM_PARAMETERS": "value_num-parameters", - "NUM_CONSTRAINTS": "value_num-constraints", - "COUNT_SEARCHSPACE_CREATION": "count_searchspace-created", - "NAKED_INITIAL_MEASUREMENTS": "count_naked-initial-measurements-added", -} - - -def is_enabled() -> bool: - """Tell whether telemetry currently is enabled. - - Telemetry can be disabled by setting the respective environment variable. - """ - return strtobool( - os.environ.get(VARNAME_TELEMETRY_ENABLED, DEFAULT_TELEMETRY_ENABLED) - ) - - # Attempt telemetry import try: from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( @@ -158,22 +132,6 @@ def get_user_details() -> dict[str, str]: return {"host": hostname_hash, "user": username_hash, "version": __version__} -def telemetry_record_value(instrument_name: str, value: int | float) -> None: - """Transmit a given value under a given label to the telemetry backend. - - The values are recorded as histograms, i.e. the info about record time and sample - size is also available. This can be used to count function calls (record the - value 1) or statistics about any variable (record its value). Due to serialization - limitations only certain data types of value are allowed. - - Args: - instrument_name: The label under which this statistic is logged. - value: The value of the statistic to be logged. - """ - if is_enabled(): - _submit_scalar_value(instrument_name, value) - - def _submit_scalar_value(instrument_name: str, value: int | float) -> None: """See :func:`baybe.telemetry.telemetry_record_value`.""" if instrument_name in _instruments: @@ -185,55 +143,3 @@ def _submit_scalar_value(instrument_name: str, value: int | float) -> None: ) _instruments[instrument_name] = histogram histogram.record(value, get_user_details()) - - -def telemetry_record_recommended_measurement_percentage( - cached_recommendation: pd.DataFrame, - measurements: pd.DataFrame, - parameters: Sequence[Parameter], - numerical_measurements_must_be_within_tolerance: bool, -) -> None: - """Submit the percentage of added measurements. - - More precisely, submit the percentage of added measurements that correspond to - previously recommended ones (called cached recommendations). - - The matching is performed via fuzzy row matching, using the utils function - :func:`baybe.utils.dataframe.fuzzy_row_match`. The calculation is only performed - if telemetry is enabled. If no cached recommendation exists the percentage is not - calculated and instead a different event ('naked initial measurement added') is - recorded. - - Args: - cached_recommendation: The cached recommendations. - measurements: The measurements which are supposed to be checked against cached - recommendations. - parameters: The list of parameters spanning the entire search space. - numerical_measurements_must_be_within_tolerance: If ``True``, numerical - parameter entries are matched with the reference elements only if there is - a match within the parameter tolerance. If ``False``, the closest match - is considered, irrespective of the distance. - """ - if is_enabled(): - if len(cached_recommendation) > 0: - recommended_measurements_percentage = ( - len( - fuzzy_row_match( - cached_recommendation, - measurements, - parameters, - numerical_measurements_must_be_within_tolerance, - ) - ) - / len(cached_recommendation) - * 100.0 - ) - _submit_scalar_value( - TELEM_LABELS["RECOMMENDED_MEASUREMENTS_PERCENTAGE"], - recommended_measurements_percentage, - ) - else: - _submit_scalar_value( - TELEM_LABELS["NAKED_INITIAL_MEASUREMENTS"], - 1, - ) diff --git a/baybe/telemetry/api.py b/baybe/telemetry/api.py new file mode 100644 index 000000000..8c09ab423 --- /dev/null +++ b/baybe/telemetry/api.py @@ -0,0 +1,112 @@ +"""The telemetry API accessible from within BayBE code.""" + +import os +from collections.abc import Sequence + +import pandas as pd + +from baybe.parameters.base import Parameter +from baybe.utils.boolean import strtobool +from baybe.utils.dataframe import fuzzy_row_match + +# Telemetry labels for metrics +TELEM_LABELS = { + "RECOMMENDED_MEASUREMENTS_PERCENTAGE": "value_recommended-measurements-percentage", + "BATCH_SIZE": "value_batch-size", + "COUNT_ADD_RESULTS": "count_add-results", + "COUNT_RECOMMEND": "count_recommend", + "NUM_PARAMETERS": "value_num-parameters", + "NUM_CONSTRAINTS": "value_num-constraints", + "COUNT_SEARCHSPACE_CREATION": "count_searchspace-created", + "NAKED_INITIAL_MEASUREMENTS": "count_naked-initial-measurements-added", +} + +# Telemetry environment variable names +VARNAME_TELEMETRY_ENABLED = "BAYBE_TELEMETRY_ENABLED" +VARNAME_TELEMETRY_HOSTNAME = "BAYBE_TELEMETRY_HOSTNAME" +VARNAME_TELEMETRY_USERNAME = "BAYBE_TELEMETRY_USERNAME" + +# Telemetry settings defaults +DEFAULT_TELEMETRY_ENABLED = "true" + + +def is_enabled() -> bool: + """Tell whether telemetry currently is enabled. + + Telemetry can be disabled by setting the respective environment variable. + """ + return strtobool( + os.environ.get(VARNAME_TELEMETRY_ENABLED, DEFAULT_TELEMETRY_ENABLED) + ) + + +def telemetry_record_value(instrument_name: str, value: int | float) -> None: + """Transmit a given value under a given label to the telemetry backend. + + The values are recorded as histograms, i.e. the info about record time and sample + size is also available. This can be used to count function calls (record the + value 1) or statistics about any variable (record its value). Due to serialization + limitations only certain data types of value are allowed. + + Args: + instrument_name: The label under which this statistic is logged. + value: The value of the statistic to be logged. + """ + from baybe.telemetry._telemetry import _submit_scalar_value + + if is_enabled(): + _submit_scalar_value(instrument_name, value) + + +def telemetry_record_recommended_measurement_percentage( + cached_recommendation: pd.DataFrame, + measurements: pd.DataFrame, + parameters: Sequence[Parameter], + numerical_measurements_must_be_within_tolerance: bool, +) -> None: + """Submit the percentage of added measurements. + + More precisely, submit the percentage of added measurements that correspond to + previously recommended ones (called cached recommendations). + + The matching is performed via fuzzy row matching, using the utils function + :func:`baybe.utils.dataframe.fuzzy_row_match`. The calculation is only performed + if telemetry is enabled. If no cached recommendation exists the percentage is not + calculated and instead a different event ('naked initial measurement added') is + recorded. + + Args: + cached_recommendation: The cached recommendations. + measurements: The measurements which are supposed to be checked against cached + recommendations. + parameters: The list of parameters spanning the entire search space. + numerical_measurements_must_be_within_tolerance: If ``True``, numerical + parameter entries are matched with the reference elements only if there is + a match within the parameter tolerance. If ``False``, the closest match + is considered, irrespective of the distance. + """ + from baybe.telemetry._telemetry import _submit_scalar_value + + if is_enabled(): + if len(cached_recommendation) > 0: + recommended_measurements_percentage = ( + len( + fuzzy_row_match( + cached_recommendation, + measurements, + parameters, + numerical_measurements_must_be_within_tolerance, + ) + ) + / len(cached_recommendation) + * 100.0 + ) + _submit_scalar_value( + TELEM_LABELS["RECOMMENDED_MEASUREMENTS_PERCENTAGE"], + recommended_measurements_percentage, + ) + else: + _submit_scalar_value( + TELEM_LABELS["NAKED_INITIAL_MEASUREMENTS"], + 1, + ) From 9a1af1d284086855f96305ee045af1b0ca0a06d4 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 26 Nov 2024 14:48:19 +0100 Subject: [PATCH 05/21] Add class for lazy loading of telemetry objects --- baybe/telemetry/_telemetry.py | 117 +++++++++++++++++++++++----------- 1 file changed, 81 insertions(+), 36 deletions(-) diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index 9911697f2..2970bf164 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -1,10 +1,16 @@ """Internal telemetry logic.""" +from __future__ import annotations + import getpass import hashlib import os import socket import warnings +from typing import TYPE_CHECKING, Any + +from attrs import define, field, fields +from typing_extensions import override from baybe.telemetry.api import ( VARNAME_TELEMETRY_ENABLED, @@ -14,6 +20,13 @@ ) from baybe.utils.boolean import strtobool +if TYPE_CHECKING: + from opentelemetry.metrics import Histogram, Meter + from opentelemetry.sdk.metrics import MeterProvider + from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader + from opentelemetry.sdk.resources import Resource + + # Telemetry environment variable names VARNAME_TELEMETRY_ENDPOINT = "BAYBE_TELEMETRY_ENDPOINT" VARNAME_TELEMETRY_VPN_CHECK = "BAYBE_TELEMETRY_VPN_CHECK" @@ -27,26 +40,72 @@ DEFAULT_TELEMETRY_VPN_CHECK_TIMEOUT = "0.5" -# Attempt telemetry import -try: - from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( - OTLPMetricExporter, - ) - from opentelemetry.metrics import Histogram, get_meter, set_meter_provider - from opentelemetry.sdk.metrics import MeterProvider - from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader - from opentelemetry.sdk.resources import Resource -except ImportError: - # Failed telemetry install/import should not fail baybe, so telemetry is being - # disabled in that case - if is_enabled(): - warnings.warn( - "Opentelemetry could not be imported, potentially it is not installed. " - "Disabling baybe telemetry.", - UserWarning, +@define +class TelemetryTools: + _is_initialized: bool = False + """Boolean flag for lazy initialization.""" + + # Telemetry objects + instruments: dict[str, Histogram] = field(factory=dict) + resource: Resource | None = None + reader: PeriodicExportingMetricReader | None = None + provider: MeterProvider | None = None + meter: Meter | None = None + + @override + def __getattribute__(self, name: str, /) -> Any: + if name not in [ + (fields(TelemetryTools)).instruments.name, + self._lazy_initialize.__name__, + ]: + try: + self._lazy_initialize() + except Exception: + if is_enabled(): + warnings.warn( + "Opentelemetry could not be imported, potentially it is " + "not installed. Disabling BayBE telemetry.", + UserWarning, + ) + os.environ[VARNAME_TELEMETRY_ENABLED] = "false" + + return super().__getattribute__(name) + + def _lazy_initialize(self) -> None: + """Lazily initialize the telemetry objects upon first access.""" + if self._is_initialized: + return + + # Lazy imports + from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( + OTLPMetricExporter, ) - os.environ[VARNAME_TELEMETRY_ENABLED] = "false" + from opentelemetry.metrics import get_meter, set_meter_provider + from opentelemetry.sdk.metrics import MeterProvider + from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader + from opentelemetry.sdk.resources import Resource + # Initialize instruments + self.resource = Resource.create( + {"service.namespace": "BayBE", "service.name": "SDK"} + ) + self.reader = PeriodicExportingMetricReader( + exporter=OTLPMetricExporter( + endpoint=_endpoint_url, + insecure=True, + ) + ) + self.provider = MeterProvider( + resource=self.resource, metric_readers=[self.reader] + ) + set_meter_provider(self.provider) + self.meter = get_meter("aws-otel", "1.0") + + # Mark initialization as completed + self._is_initialized = True + + +tools = TelemetryTools() # Attempt telemetry initialization if is_enabled(): @@ -78,20 +137,6 @@ ): socket.gethostbyname("verkehrsnachrichten.merck.de") - # User has connectivity to the telemetry endpoint, so we initialize - _instruments: dict[str, Histogram] = {} - _resource = Resource.create( - {"service.namespace": "BayBE", "service.name": "SDK"} - ) - _reader = PeriodicExportingMetricReader( - exporter=OTLPMetricExporter( - endpoint=_endpoint_url, - insecure=True, - ) - ) - _provider = MeterProvider(resource=_resource, metric_readers=[_reader]) - set_meter_provider(_provider) - _meter = get_meter("aws-otel", "1.0") except Exception as ex: # Catching broad exception here and disabling telemetry in that case to avoid # any telemetry timeouts or interference for the user in case of unexpected @@ -134,12 +179,12 @@ def get_user_details() -> dict[str, str]: def _submit_scalar_value(instrument_name: str, value: int | float) -> None: """See :func:`baybe.telemetry.telemetry_record_value`.""" - if instrument_name in _instruments: - histogram = _instruments[instrument_name] + if instrument_name in tools.instruments: + histogram = tools.instruments[instrument_name] else: - histogram = _meter.create_histogram( + histogram = tools.meter.create_histogram( instrument_name, description=f"Histogram for instrument {instrument_name}", ) - _instruments[instrument_name] = histogram + tools.instruments[instrument_name] = histogram histogram.record(value, get_user_details()) From 077189709c97956cc37826b065d15bb0c9457a9f Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 26 Nov 2024 14:59:14 +0100 Subject: [PATCH 06/21] Move constants to top level --- baybe/telemetry/_telemetry.py | 43 ++++++++++++++++------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index 2970bf164..450ec6124 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -40,6 +40,23 @@ DEFAULT_TELEMETRY_VPN_CHECK_TIMEOUT = "0.5" +try: + DEFAULT_TELEMETRY_USERNAME = ( + hashlib.sha256(getpass.getuser().upper().encode()).hexdigest().upper()[:10] + ) +except ModuleNotFoundError: + # `getpass.getuser()`` does not work on Windows if all the environment variables + # it checks are empty. Since there is no way of inferring the username in this case, + # we use a fallback. + DEFAULT_TELEMETRY_USERNAME = "UNKNOWN" + +DEFAULT_TELEMETRY_HOSTNAME = ( + hashlib.sha256(socket.gethostname().encode()).hexdigest().upper()[:10] +) + +ENDPOINT_URL = os.environ.get(VARNAME_TELEMETRY_ENDPOINT, DEFAULT_TELEMETRY_ENDPOINT) + + @define class TelemetryTools: _is_initialized: bool = False @@ -91,7 +108,7 @@ def _lazy_initialize(self) -> None: ) self.reader = PeriodicExportingMetricReader( exporter=OTLPMetricExporter( - endpoint=_endpoint_url, + endpoint=ENDPOINT_URL, insecure=True, ) ) @@ -109,25 +126,6 @@ def _lazy_initialize(self) -> None: # Attempt telemetry initialization if is_enabled(): - # Assign default user and machine name - try: - DEFAULT_TELEMETRY_USERNAME = ( - hashlib.sha256(getpass.getuser().upper().encode()).hexdigest().upper()[:10] - ) - except ModuleNotFoundError: - # getpass.getuser() does not work on Windows if all the environment variables - # it checks are empty. Since then there is no way of inferring the username, we - # use UNKNOWN as fallback. - DEFAULT_TELEMETRY_USERNAME = "UNKNOWN" - - DEFAULT_TELEMETRY_HOSTNAME = ( - hashlib.sha256(socket.gethostname().encode()).hexdigest().upper()[:10] - ) - - _endpoint_url = os.environ.get( - VARNAME_TELEMETRY_ENDPOINT, DEFAULT_TELEMETRY_ENDPOINT - ) - # Test endpoint URL try: # Send a test request. If there is no internet connection or a firewall is @@ -146,15 +144,14 @@ def _lazy_initialize(self) -> None: # This warning is only printed for developers to make them aware of # potential issues warnings.warn( - f"WARNING: BayBE Telemetry endpoint {_endpoint_url} cannot be reached. " + f"WARNING: BayBE Telemetry endpoint {ENDPOINT_URL} cannot be reached. " "Disabling telemetry. The exception encountered was: " f"{type(ex).__name__}, {ex}", UserWarning, ) os.environ[VARNAME_TELEMETRY_ENABLED] = "false" else: - DEFAULT_TELEMETRY_USERNAME = "UNKNOWN" - DEFAULT_TELEMETRY_HOSTNAME = "UNKNOWN" + pass def get_user_details() -> dict[str, str]: From 417e0e4a6cfd46f2baf4cc7cc34ad67bbc5e1a0f Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 26 Nov 2024 15:36:02 +0100 Subject: [PATCH 07/21] Add transmission queue --- baybe/telemetry/_telemetry.py | 24 +++++++++++++++++++++++- baybe/telemetry/api.py | 23 +++++++++++------------ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index 450ec6124..76c505cd2 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -7,6 +7,7 @@ import os import socket import warnings +from queue import Queue from typing import TYPE_CHECKING, Any from attrs import define, field, fields @@ -122,7 +123,28 @@ def _lazy_initialize(self) -> None: self._is_initialized = True +class CloseableQueue(Queue): + """A queue that can be shut down, ignoring incoming items thereafter.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._closed = False + + def close(self): + """Remove all queue elements and prevent new ones from being added.""" + with self.mutex: + self._closed = True + self.queue.clear() + + @override + def put(self, item, block=True, timeout=None): + if self._closed: + return + super().put(item, block, timeout) + + tools = TelemetryTools() +transmission_queue = CloseableQueue() # Attempt telemetry initialization if is_enabled(): @@ -149,7 +171,7 @@ def _lazy_initialize(self) -> None: f"{type(ex).__name__}, {ex}", UserWarning, ) - os.environ[VARNAME_TELEMETRY_ENABLED] = "false" + transmission_queue.close() else: pass diff --git a/baybe/telemetry/api.py b/baybe/telemetry/api.py index 8c09ab423..586bd891e 100644 --- a/baybe/telemetry/api.py +++ b/baybe/telemetry/api.py @@ -52,10 +52,10 @@ def telemetry_record_value(instrument_name: str, value: int | float) -> None: instrument_name: The label under which this statistic is logged. value: The value of the statistic to be logged. """ - from baybe.telemetry._telemetry import _submit_scalar_value - if is_enabled(): - _submit_scalar_value(instrument_name, value) + from baybe.telemetry._telemetry import transmission_queue + + transmission_queue.put((instrument_name, value)) def telemetry_record_recommended_measurement_percentage( @@ -85,9 +85,9 @@ def telemetry_record_recommended_measurement_percentage( a match within the parameter tolerance. If ``False``, the closest match is considered, irrespective of the distance. """ - from baybe.telemetry._telemetry import _submit_scalar_value - if is_enabled(): + from baybe.telemetry._telemetry import transmission_queue + if len(cached_recommendation) > 0: recommended_measurements_percentage = ( len( @@ -101,12 +101,11 @@ def telemetry_record_recommended_measurement_percentage( / len(cached_recommendation) * 100.0 ) - _submit_scalar_value( - TELEM_LABELS["RECOMMENDED_MEASUREMENTS_PERCENTAGE"], - recommended_measurements_percentage, + transmission_queue.put( + ( + TELEM_LABELS["RECOMMENDED_MEASUREMENTS_PERCENTAGE"], + recommended_measurements_percentage, + ) ) else: - _submit_scalar_value( - TELEM_LABELS["NAKED_INITIAL_MEASUREMENTS"], - 1, - ) + transmission_queue.put((TELEM_LABELS["NAKED_INITIAL_MEASUREMENTS"], 1)) From 7b3b0a7b24a8f065d662ea2d450801c9554e4df5 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 26 Nov 2024 16:02:27 +0100 Subject: [PATCH 08/21] Extract connection test into function --- baybe/telemetry/_telemetry.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index 76c505cd2..a0a916744 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -39,8 +39,6 @@ ) DEFAULT_TELEMETRY_VPN_CHECK = "true" DEFAULT_TELEMETRY_VPN_CHECK_TIMEOUT = "0.5" - - try: DEFAULT_TELEMETRY_USERNAME = ( hashlib.sha256(getpass.getuser().upper().encode()).hexdigest().upper()[:10] @@ -50,12 +48,15 @@ # it checks are empty. Since there is no way of inferring the username in this case, # we use a fallback. DEFAULT_TELEMETRY_USERNAME = "UNKNOWN" - DEFAULT_TELEMETRY_HOSTNAME = ( hashlib.sha256(socket.gethostname().encode()).hexdigest().upper()[:10] ) +# Derived constants ENDPOINT_URL = os.environ.get(VARNAME_TELEMETRY_ENDPOINT, DEFAULT_TELEMETRY_ENDPOINT) +TELEMETRY_VPN_CHECK = strtobool( + os.environ.get(VARNAME_TELEMETRY_VPN_CHECK, DEFAULT_TELEMETRY_VPN_CHECK) +) @define @@ -143,19 +144,12 @@ def put(self, item, block=True, timeout=None): super().put(item, block, timeout) -tools = TelemetryTools() -transmission_queue = CloseableQueue() - -# Attempt telemetry initialization -if is_enabled(): - # Test endpoint URL +def test_connection() -> None: + """Close the transmission queue if the telemetry endpoint is unreachable.""" try: # Send a test request. If there is no internet connection or a firewall is # present this will throw an error and telemetry will be deactivated. - if strtobool( - os.environ.get(VARNAME_TELEMETRY_VPN_CHECK, DEFAULT_TELEMETRY_VPN_CHECK) - ): - socket.gethostbyname("verkehrsnachrichten.merck.de") + socket.gethostbyname("verkehrsnachrichten.merck.de") except Exception as ex: # Catching broad exception here and disabling telemetry in that case to avoid @@ -172,8 +166,6 @@ def put(self, item, block=True, timeout=None): UserWarning, ) transmission_queue.close() -else: - pass def get_user_details() -> dict[str, str]: @@ -207,3 +199,10 @@ def _submit_scalar_value(instrument_name: str, value: int | float) -> None: ) tools.instruments[instrument_name] = histogram histogram.record(value, get_user_details()) + + +tools = TelemetryTools() +transmission_queue = CloseableQueue() + +if is_enabled() and TELEMETRY_VPN_CHECK: + test_connection() From 895dd2c372a1d15dadbbeccea801dffc231b919a Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 26 Nov 2024 16:20:13 +0100 Subject: [PATCH 09/21] Transmit telemetry events from daeman thread --- baybe/telemetry/_telemetry.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index a0a916744..af32f440c 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -8,6 +8,7 @@ import socket import warnings from queue import Queue +from threading import Thread from typing import TYPE_CHECKING, Any from attrs import define, field, fields @@ -144,6 +145,14 @@ def put(self, item, block=True, timeout=None): super().put(item, block, timeout) +def transmit_events(queue: Queue) -> None: + """Transmit the telemetry events waiting in the given queue.""" + while True: + event = queue.get() + submit_scalar_value(*event) + queue.task_done() + + def test_connection() -> None: """Close the transmission queue if the telemetry endpoint is unreachable.""" try: @@ -188,7 +197,7 @@ def get_user_details() -> dict[str, str]: return {"host": hostname_hash, "user": username_hash, "version": __version__} -def _submit_scalar_value(instrument_name: str, value: int | float) -> None: +def submit_scalar_value(instrument_name: str, value: int | float) -> None: """See :func:`baybe.telemetry.telemetry_record_value`.""" if instrument_name in tools.instruments: histogram = tools.instruments[instrument_name] @@ -203,6 +212,7 @@ def _submit_scalar_value(instrument_name: str, value: int | float) -> None: tools = TelemetryTools() transmission_queue = CloseableQueue() +Thread(target=transmit_events, args=(transmission_queue,), daemon=True).start() if is_enabled() and TELEMETRY_VPN_CHECK: test_connection() From d7d30aff8b6cea0edc04695851478a28c8868485 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Tue, 26 Nov 2024 17:03:09 +0100 Subject: [PATCH 10/21] Start the daemon only when connection test is successful --- baybe/telemetry/_telemetry.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index af32f440c..2e9d5a1a7 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -132,6 +132,11 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._closed = False + @property + def is_closed(self) -> bool: + """Boolean value indicating if the queue is closed.""" + return self._closed + def close(self): """Remove all queue elements and prevent new ones from being added.""" with self.mutex: @@ -212,7 +217,9 @@ def submit_scalar_value(instrument_name: str, value: int | float) -> None: tools = TelemetryTools() transmission_queue = CloseableQueue() -Thread(target=transmit_events, args=(transmission_queue,), daemon=True).start() if is_enabled() and TELEMETRY_VPN_CHECK: test_connection() + +if not transmission_queue.is_closed: + Thread(target=transmit_events, args=(transmission_queue,), daemon=True).start() From f5c3cadc5ce76b0a2bf59112f959d9cce68a2f50 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Wed, 27 Nov 2024 10:16:49 +0100 Subject: [PATCH 11/21] Run connection test from daemon thread --- baybe/telemetry/_telemetry.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index 2e9d5a1a7..4f49e0ca3 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -158,28 +158,42 @@ def transmit_events(queue: Queue) -> None: queue.task_done() -def test_connection() -> None: - """Close the transmission queue if the telemetry endpoint is unreachable.""" +def test_connection() -> Exception | None: + """Check if the telemetry endpoint is reachable.""" try: # Send a test request. If there is no internet connection or a firewall is # present this will throw an error and telemetry will be deactivated. socket.gethostbyname("verkehrsnachrichten.merck.de") + return None except Exception as ex: # Catching broad exception here and disabling telemetry in that case to avoid # any telemetry timeouts or interference for the user in case of unexpected # errors. Possible ones are for instance ``socket.gaierror`` in case the user # has no internet connection. + return ex + + +def daemon_task() -> None: + # Telemetry inactive + if not is_enabled(): + return + + # Telemetry inactive but endpoint not reachable + if TELEMETRY_VPN_CHECK and (ex := test_connection()) is not None: if os.environ.get(VARNAME_TELEMETRY_USERNAME, "").startswith("DEV_"): - # This warning is only printed for developers to make them aware of - # potential issues + # Only printed for developers to make them aware of potential issues warnings.warn( f"WARNING: BayBE Telemetry endpoint {ENDPOINT_URL} cannot be reached. " - "Disabling telemetry. The exception encountered was: " + f"Disabling telemetry. The exception encountered was: " f"{type(ex).__name__}, {ex}", UserWarning, ) transmission_queue.close() + return + + # If everything is ready for transmission, process the incoming events + transmit_events(transmission_queue) def get_user_details() -> dict[str, str]: @@ -217,9 +231,4 @@ def submit_scalar_value(instrument_name: str, value: int | float) -> None: tools = TelemetryTools() transmission_queue = CloseableQueue() - -if is_enabled() and TELEMETRY_VPN_CHECK: - test_connection() - -if not transmission_queue.is_closed: - Thread(target=transmit_events, args=(transmission_queue,), daemon=True).start() +Thread(target=daemon_task).start() From e4f35129965428a26dba69c1dcd71f2abd806029 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Wed, 27 Nov 2024 10:20:56 +0100 Subject: [PATCH 12/21] Suppress mypy warning for lazy attribute --- baybe/telemetry/_telemetry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index 4f49e0ca3..4d0d16d3e 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -221,7 +221,7 @@ def submit_scalar_value(instrument_name: str, value: int | float) -> None: if instrument_name in tools.instruments: histogram = tools.instruments[instrument_name] else: - histogram = tools.meter.create_histogram( + histogram = tools.meter.create_histogram( # type: ignore[union-attr] instrument_name, description=f"Histogram for instrument {instrument_name}", ) From 671832dcde86c1f5fdbc8236cc8931c8cbeb2adb Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Wed, 27 Nov 2024 10:23:08 +0100 Subject: [PATCH 13/21] Drop unused constants --- baybe/telemetry/_telemetry.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index 4d0d16d3e..5bf98bb8d 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -32,14 +32,12 @@ # Telemetry environment variable names VARNAME_TELEMETRY_ENDPOINT = "BAYBE_TELEMETRY_ENDPOINT" VARNAME_TELEMETRY_VPN_CHECK = "BAYBE_TELEMETRY_VPN_CHECK" -VARNAME_TELEMETRY_VPN_CHECK_TIMEOUT = "BAYBE_TELEMETRY_VPN_CHECK_TIMEOUT" # Telemetry settings defaults DEFAULT_TELEMETRY_ENDPOINT = ( "https://public.telemetry.baybe.p.uptimize.merckgroup.com:4317" ) DEFAULT_TELEMETRY_VPN_CHECK = "true" -DEFAULT_TELEMETRY_VPN_CHECK_TIMEOUT = "0.5" try: DEFAULT_TELEMETRY_USERNAME = ( hashlib.sha256(getpass.getuser().upper().encode()).hexdigest().upper()[:10] From 2440d73d4fed1ab997a17ddee33fbaf40db55cc2 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Wed, 27 Nov 2024 10:36:03 +0100 Subject: [PATCH 14/21] Polish docstrings --- baybe/telemetry/_telemetry.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index 5bf98bb8d..e75c7767e 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -60,8 +60,10 @@ @define class TelemetryTools: + """Class for lazy-initialization of telemetry objects.""" + _is_initialized: bool = False - """Boolean flag for lazy initialization.""" + """Boolean flag indicating if initialization is completed.""" # Telemetry objects instruments: dict[str, Histogram] = field(factory=dict) @@ -72,16 +74,17 @@ class TelemetryTools: @override def __getattribute__(self, name: str, /) -> Any: + """Lazily initialize telemetry objects upon first access.""" if name not in [ (fields(TelemetryTools)).instruments.name, - self._lazy_initialize.__name__, + self._initialize.__name__, ]: try: - self._lazy_initialize() + self._initialize() except Exception: if is_enabled(): warnings.warn( - "Opentelemetry could not be imported, potentially it is " + "Opentelemetry could not be imported. Potentially it is " "not installed. Disabling BayBE telemetry.", UserWarning, ) @@ -89,8 +92,8 @@ def __getattribute__(self, name: str, /) -> Any: return super().__getattribute__(name) - def _lazy_initialize(self) -> None: - """Lazily initialize the telemetry objects upon first access.""" + def _initialize(self) -> None: + """Initialize the telemetry objects.""" if self._is_initialized: return @@ -159,31 +162,31 @@ def transmit_events(queue: Queue) -> None: def test_connection() -> Exception | None: """Check if the telemetry endpoint is reachable.""" try: - # Send a test request. If there is no internet connection or a firewall is - # present this will throw an error and telemetry will be deactivated. + # Send a test request. If the request fails (e.g. no connection, outside + # VPN, or firewall) this will throw an error. socket.gethostbyname("verkehrsnachrichten.merck.de") return None except Exception as ex: - # Catching broad exception here and disabling telemetry in that case to avoid - # any telemetry timeouts or interference for the user in case of unexpected - # errors. Possible ones are for instance ``socket.gaierror`` in case the user + # Catching broad exception here to avoid interference for the user. + # Possible errors are for instance ``socket.gaierror`` in case the user # has no internet connection. return ex def daemon_task() -> None: - # Telemetry inactive + """The telemetry logic to be executed in the daemon thread.""" # noqa + # Telemetry is inactive if not is_enabled(): return - # Telemetry inactive but endpoint not reachable + # Telemetry is active but the endpoint is not reachable if TELEMETRY_VPN_CHECK and (ex := test_connection()) is not None: if os.environ.get(VARNAME_TELEMETRY_USERNAME, "").startswith("DEV_"): # Only printed for developers to make them aware of potential issues warnings.warn( - f"WARNING: BayBE Telemetry endpoint {ENDPOINT_URL} cannot be reached. " - f"Disabling telemetry. The exception encountered was: " + f"WARNING: BayBE Telemetry endpoint '{ENDPOINT_URL}' cannot be " + f"reached. Disabling telemetry. The exception encountered was: " f"{type(ex).__name__}, {ex}", UserWarning, ) @@ -215,7 +218,7 @@ def get_user_details() -> dict[str, str]: def submit_scalar_value(instrument_name: str, value: int | float) -> None: - """See :func:`baybe.telemetry.telemetry_record_value`.""" + """See :func:`baybe.telemetry.api.telemetry_record_value`.""" if instrument_name in tools.instruments: histogram = tools.instruments[instrument_name] else: From 4e4fdb9e292d8111dbf96e2b67f929e6311323a8 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Wed, 27 Nov 2024 10:36:39 +0100 Subject: [PATCH 15/21] Close queue immediately when telemetry is deactivated --- baybe/telemetry/_telemetry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index e75c7767e..779f1e095 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -178,6 +178,7 @@ def daemon_task() -> None: """The telemetry logic to be executed in the daemon thread.""" # noqa # Telemetry is inactive if not is_enabled(): + transmission_queue.close() return # Telemetry is active but the endpoint is not reachable From 8a5e1ef30a758440e5d459a0973af609d24f4ba3 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Wed, 27 Nov 2024 10:39:06 +0100 Subject: [PATCH 16/21] Print telemetry warning only for developers --- baybe/telemetry/_telemetry.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index 779f1e095..806b344ea 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -82,7 +82,7 @@ def __getattribute__(self, name: str, /) -> Any: try: self._initialize() except Exception: - if is_enabled(): + if is_enabled() and user_is_developer(): warnings.warn( "Opentelemetry could not be imported. Potentially it is " "not installed. Disabling BayBE telemetry.", @@ -183,7 +183,7 @@ def daemon_task() -> None: # Telemetry is active but the endpoint is not reachable if TELEMETRY_VPN_CHECK and (ex := test_connection()) is not None: - if os.environ.get(VARNAME_TELEMETRY_USERNAME, "").startswith("DEV_"): + if user_is_developer(): # Only printed for developers to make them aware of potential issues warnings.warn( f"WARNING: BayBE Telemetry endpoint '{ENDPOINT_URL}' cannot be " @@ -218,6 +218,11 @@ def get_user_details() -> dict[str, str]: return {"host": hostname_hash, "user": username_hash, "version": __version__} +def user_is_developer() -> bool: + """Determine if the user is a developer.""" + return os.environ.get(VARNAME_TELEMETRY_USERNAME, "").startswith("DEV_") + + def submit_scalar_value(instrument_name: str, value: int | float) -> None: """See :func:`baybe.telemetry.api.telemetry_record_value`.""" if instrument_name in tools.instruments: From 201b52c10317c69900197e226d962c66fd14add7 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Wed, 27 Nov 2024 10:40:24 +0100 Subject: [PATCH 17/21] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 850814871..86d4f426e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `CustomDiscreteParameter` does not allow duplicated rows in `data` anymore - De-/activating Polars via `BAYBE_DEACTIVATE_POLARS` now requires passing values compatible with `strtobool` +- Telemetry now runs in a daemon thread ### Fixed - Rare bug arising from degenerate `SubstanceParameter.comp_df` rows that caused From 7917af83222859239ae5a9664ffcbba756402189 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Wed, 19 Feb 2025 11:05:06 +0100 Subject: [PATCH 18/21] Rename TELEM_LABELS to TELEMETRY_LABELS for consistency --- baybe/campaign.py | 8 ++++---- baybe/searchspace/core.py | 8 ++++---- baybe/telemetry/__init__.py | 4 ++-- baybe/telemetry/api.py | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/baybe/campaign.py b/baybe/campaign.py index a76c1ad96..ffb26d14b 100644 --- a/baybe/campaign.py +++ b/baybe/campaign.py @@ -36,7 +36,7 @@ from baybe.surrogates.base import SurrogateProtocol from baybe.targets.base import Target from baybe.telemetry import ( - TELEM_LABELS, + TELEMETRY_LABELS, telemetry_record_recommended_measurement_percentage, telemetry_record_value, ) @@ -303,7 +303,7 @@ def add_measurements( self._searchspace_metadata.loc[idxs_matched, _MEASURED] = True # Telemetry - telemetry_record_value(TELEM_LABELS["COUNT_ADD_RESULTS"], 1) + telemetry_record_value(TELEMETRY_LABELS["COUNT_ADD_RESULTS"], 1) telemetry_record_recommended_measurement_percentage( self._cached_recommendation, data, self.parameters ) @@ -501,8 +501,8 @@ def recommend( self._searchspace_metadata.loc[rec.index, _RECOMMENDED] = True # Telemetry - telemetry_record_value(TELEM_LABELS["COUNT_RECOMMEND"], 1) - telemetry_record_value(TELEM_LABELS["BATCH_SIZE"], batch_size) + telemetry_record_value(TELEMETRY_LABELS["COUNT_RECOMMEND"], 1) + telemetry_record_value(TELEMETRY_LABELS["BATCH_SIZE"], batch_size) return rec diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 56bab3243..51afba4d1 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -26,7 +26,7 @@ ) from baybe.searchspace.validation import validate_parameters from baybe.serialization import SerialMixin, converter, select_constructor_hook -from baybe.telemetry import TELEM_LABELS, telemetry_record_value +from baybe.telemetry import TELEMETRY_LABELS, telemetry_record_value from baybe.utils.plotting import to_string @@ -85,10 +85,10 @@ def __attrs_post_init__(self): validate_constraints(self.constraints, self.parameters) # Telemetry - telemetry_record_value(TELEM_LABELS["COUNT_SEARCHSPACE_CREATION"], 1) - telemetry_record_value(TELEM_LABELS["NUM_PARAMETERS"], len(self.parameters)) + telemetry_record_value(TELEMETRY_LABELS["COUNT_SEARCHSPACE_CREATION"], 1) + telemetry_record_value(TELEMETRY_LABELS["NUM_PARAMETERS"], len(self.parameters)) telemetry_record_value( - TELEM_LABELS["NUM_CONSTRAINTS"], + TELEMETRY_LABELS["NUM_CONSTRAINTS"], len(self.constraints) if self.constraints else 0, ) diff --git a/baybe/telemetry/__init__.py b/baybe/telemetry/__init__.py index d95e01030..ddecc76cc 100644 --- a/baybe/telemetry/__init__.py +++ b/baybe/telemetry/__init__.py @@ -4,7 +4,7 @@ """ from baybe.telemetry.api import ( - TELEM_LABELS, + TELEMETRY_LABELS, VARNAME_TELEMETRY_ENABLED, VARNAME_TELEMETRY_HOSTNAME, VARNAME_TELEMETRY_USERNAME, @@ -13,7 +13,7 @@ ) __all__ = [ - "TELEM_LABELS", + "TELEMETRY_LABELS", "VARNAME_TELEMETRY_ENABLED", "VARNAME_TELEMETRY_HOSTNAME", "VARNAME_TELEMETRY_USERNAME", diff --git a/baybe/telemetry/api.py b/baybe/telemetry/api.py index 7d6edad69..9bc3108d7 100644 --- a/baybe/telemetry/api.py +++ b/baybe/telemetry/api.py @@ -10,7 +10,7 @@ from baybe.utils.dataframe import fuzzy_row_match # Telemetry labels for metrics -TELEM_LABELS = { +TELEMETRY_LABELS = { "RECOMMENDED_MEASUREMENTS_PERCENTAGE": "value_recommended-measurements-percentage", "BATCH_SIZE": "value_batch-size", "COUNT_ADD_RESULTS": "count_add-results", @@ -84,7 +84,7 @@ def telemetry_record_recommended_measurement_percentage( from baybe.telemetry._telemetry import transmission_queue if cached_recommendation.empty: - transmission_queue.put((TELEM_LABELS["NAKED_INITIAL_MEASUREMENTS"], 1)) + transmission_queue.put((TELEMETRY_LABELS["NAKED_INITIAL_MEASUREMENTS"], 1)) else: recommended_measurements_percentage = ( len(fuzzy_row_match(cached_recommendation, measurements, parameters)) @@ -93,7 +93,7 @@ def telemetry_record_recommended_measurement_percentage( ) transmission_queue.put( ( - TELEM_LABELS["RECOMMENDED_MEASUREMENTS_PERCENTAGE"], + TELEMETRY_LABELS["RECOMMENDED_MEASUREMENTS_PERCENTAGE"], recommended_measurements_percentage, ) ) From 2186ecf43450579a0f133158d641b695211beca9 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Wed, 19 Feb 2025 11:06:23 +0100 Subject: [PATCH 19/21] Break infinite recursion --- baybe/telemetry/_telemetry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index 806b344ea..68bf87803 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -77,7 +77,7 @@ def __getattribute__(self, name: str, /) -> Any: """Lazily initialize telemetry objects upon first access.""" if name not in [ (fields(TelemetryTools)).instruments.name, - self._initialize.__name__, + "_initialize", ]: try: self._initialize() From 19520e486127461d0f0eb19c3f18dfcf2e60f532 Mon Sep 17 00:00:00 2001 From: AdrianSosic Date: Wed, 19 Feb 2025 11:12:46 +0100 Subject: [PATCH 20/21] Avoid the term daemon since we use a regular background thread --- CHANGELOG.md | 2 +- baybe/telemetry/_telemetry.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1536a8f9..87e8402fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Acquisition function indicator `is_mc` has been removed in favor of new indicators `supports_batching` and `supports_pending_experiments` -- Telemetry now runs in a daemon thread +- Telemetry now runs in a separate thread ### Fixed - Incorrect optimization direction with `PSTD` with a single minimization target diff --git a/baybe/telemetry/_telemetry.py b/baybe/telemetry/_telemetry.py index 68bf87803..42beba3b7 100644 --- a/baybe/telemetry/_telemetry.py +++ b/baybe/telemetry/_telemetry.py @@ -174,8 +174,8 @@ def test_connection() -> Exception | None: return ex -def daemon_task() -> None: - """The telemetry logic to be executed in the daemon thread.""" # noqa +def telemetry_task() -> None: + """The telemetry logic to be executed in a background thread.""" # noqa # Telemetry is inactive if not is_enabled(): transmission_queue.close() @@ -238,4 +238,4 @@ def submit_scalar_value(instrument_name: str, value: int | float) -> None: tools = TelemetryTools() transmission_queue = CloseableQueue() -Thread(target=daemon_task).start() +Thread(target=telemetry_task).start() From fbfe21d384d9fd89b85e4c6f4ebd82d7d6d5fada Mon Sep 17 00:00:00 2001 From: Martin Fitzner Date: Wed, 19 Feb 2025 17:12:05 +0100 Subject: [PATCH 21/21] Update telemetry test script --- tests/simulate_telemetry.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/simulate_telemetry.py b/tests/simulate_telemetry.py index 39f7793b4..3e2612330 100644 --- a/tests/simulate_telemetry.py +++ b/tests/simulate_telemetry.py @@ -7,7 +7,6 @@ from random import randint from baybe.campaign import Campaign -from baybe.objective import Objective from baybe.parameters import NumericalDiscreteParameter, SubstanceParameter from baybe.recommenders import ( BotorchRecommender, @@ -16,11 +15,8 @@ ) from baybe.searchspace import SearchSpace from baybe.targets import NumericalTarget -from baybe.telemetry import ( - VARNAME_TELEMETRY_ENABLED, - VARNAME_TELEMETRY_USERNAME, - get_user_details, -) +from baybe.telemetry import VARNAME_TELEMETRY_ENABLED, VARNAME_TELEMETRY_USERNAME +from baybe.telemetry._telemetry import get_user_details from baybe.utils.dataframe import add_fake_measurements dict_solvent = { @@ -68,9 +64,7 @@ parameters=parameters, constraints=None, ), - "objective": Objective( - mode="SINGLE", targets=[NumericalTarget(name="Yield", mode="MAX")] - ), + "objective": NumericalTarget(name="Yield", mode="MAX").to_objective(), "recommender": TwoPhaseMetaRecommender( recommender=BotorchRecommender(), initial_recommender=RandomRecommender(),