From 0a0de06e00d2d83bcce408227aaed42d42866729 Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Tue, 11 Feb 2025 14:10:24 +0100 Subject: [PATCH] feat(tracing): Backfill missing `sample_rand` on `PropagationContext` Whenever the `PropagationContext` continues an incoming trace (i.e. whenever the `trace_id` is set, rather than being randomly generated as for a new trace), check if the `sample_rand` is present and valid in the incoming DSC. If the `sample_rand` is missing, generate it deterministically based on the `trace_id` and backfill it into the DSC on the `PropagationContext`. When generating the backfilled `sample_rand`, we ensure the generated value is consistent with the incoming trace's sampling decision and sample rate, if both of these are present. Otherwise, we generate a new value in the range [0, 1). Future PRs will address propagating the `sample_rand` to transactions generated with `continue_trace` (allowing the `sample_rand` to be propagated on outgoing traces), and will also allow `sample_rand` to be used for making sampling decisions. Ref #3998 --- sentry_sdk/tracing_utils.py | 77 ++++++++++++++++++++++++++++++++ tests/test_api.py | 7 +-- tests/test_propagationcontext.py | 53 ++++++++++++++++++++++ 3 files changed, 134 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index 9ea2d9859a..094d5984a8 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -6,6 +6,7 @@ from collections.abc import Mapping from datetime import timedelta from functools import wraps +from random import Random from urllib.parse import quote, unquote import uuid @@ -397,6 +398,8 @@ def __init__( self.dynamic_sampling_context = dynamic_sampling_context """Data that is used for dynamic sampling decisions.""" + self._fill_sample_rand() + @classmethod def from_incoming_data(cls, incoming_data): # type: (Dict[str, Any]) -> Optional[PropagationContext] @@ -418,6 +421,9 @@ def from_incoming_data(cls, incoming_data): propagation_context = PropagationContext() propagation_context.update(sentrytrace_data) + if propagation_context is not None: + propagation_context._fill_sample_rand() + return propagation_context @property @@ -425,6 +431,7 @@ def trace_id(self): # type: () -> str """The trace id of the Sentry trace.""" if not self._trace_id: + # New trace, don't fill in sample_rand self._trace_id = uuid.uuid4().hex return self._trace_id @@ -433,6 +440,7 @@ def trace_id(self): def trace_id(self, value): # type: (str) -> None self._trace_id = value + self._fill_sample_rand() @property def span_id(self): @@ -469,6 +477,46 @@ def __repr__(self): self.dynamic_sampling_context, ) + def _fill_sample_rand(self): + # type: () -> None + """ + If the sample_rand is missing from the Dynamic Sampling Context (or invalid), + we generate it here. + + We only generate a sample_rand if the trace_id is set. + + If we have a parent_sampled value and a sample_rate in the DSC, we compute + a sample_rand value randomly in the range [0, sample_rate) if parent_sampled is True, + or in the range [sample_rate, 1) if parent_sampled is False. If either parent_sampled + or sample_rate is missing, we generate a random value in the range [0, 1). + + The sample_rand is deterministically generated from the trace_id. + """ + if self._trace_id is None: + # We only want to generate a sample_rand if the _trace_id is set. + return + + # Ensure that the dynamic_sampling_context is a dict + self.dynamic_sampling_context = self.dynamic_sampling_context or {} + + sample_rand = _try_float(self.dynamic_sampling_context.get("sample_rand")) + if sample_rand is not None and 0 <= sample_rand < 1: + # sample_rand is present and valid, so don't overwrite it + return + + # Get a random value in [0, 1) + random_value = Random(self.trace_id).random() + + # Get the sample rate and compute the transformation that will map the random value + # to the desired range: [0, 1), [0, sample_rate), or [sample_rate, 1). + sample_rate = _try_float(self.dynamic_sampling_context.get("sample_rate")) + factor, offset = _sample_rand_transormation(self.parent_sampled, sample_rate) + + # Transform the random value to the desired range + self.dynamic_sampling_context["sample_rand"] = str( + random_value * factor + offset + ) + class Baggage: """ @@ -744,6 +792,35 @@ def get_current_span(scope=None): return current_span +def _try_float(value): + # type: (Any) -> Optional[float] + """Small utility to convert a value to a float, if possible.""" + try: + return float(value) + except (ValueError, TypeError): + return None + + +def _sample_rand_transormation(parent_sampled, sample_rate): + # type: (Optional[bool], Optional[float]) -> tuple[float, float] + """ + Compute the factor and offset to scale and translate a random number in [0, 1) to + a range consistent with the parent_sampled and sample_rate values. + + The return value is a tuple (factor, offset) such that, given random_value in [0, 1), + and new_value = random_value * factor + offset: + - new_value will be unchanged if either parent_sampled or sample_rate is None + - if parent_sampled and sample_rate are both set, new_value will be in [0, sample_rate) + if parent_sampled is True, or in [sample_rate, 1) if parent_sampled is False + """ + if parent_sampled is None or sample_rate is None: + return 1.0, 0.0 + elif parent_sampled is True: + return sample_rate, 0.0 + else: # parent_sampled is False + return 1.0 - sample_rate, sample_rate + + # Circular imports from sentry_sdk.tracing import ( BAGGAGE_HEADER_NAME, diff --git a/tests/test_api.py b/tests/test_api.py index 3b2a9c8fb7..edfca2268f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -79,7 +79,7 @@ def test_traceparent_with_tracing_disabled(sentry_init): assert get_traceparent() == expected_traceparent -@pytest.mark.forked +# @pytest.mark.forked def test_baggage_with_tracing_disabled(sentry_init): sentry_init(release="1.0.0", environment="dev") propagation_context = get_isolation_scope()._propagation_context @@ -111,7 +111,7 @@ def test_continue_trace(sentry_init): transaction = continue_trace( { "sentry-trace": "{}-{}-{}".format(trace_id, parent_span_id, parent_sampled), - "baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19", + "baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19,sentry-sample_rand=0.1234567890", }, name="some name", ) @@ -123,7 +123,8 @@ def test_continue_trace(sentry_init): assert propagation_context.parent_span_id == parent_span_id assert propagation_context.parent_sampled == parent_sampled assert propagation_context.dynamic_sampling_context == { - "trace_id": "566e3688a61d4bc888951642d6f14a19" + "trace_id": "566e3688a61d4bc888951642d6f14a19", + "sample_rand": "0.1234567890", } diff --git a/tests/test_propagationcontext.py b/tests/test_propagationcontext.py index c650071511..5a9e1814f1 100644 --- a/tests/test_propagationcontext.py +++ b/tests/test_propagationcontext.py @@ -1,3 +1,5 @@ +import pytest + from sentry_sdk.tracing_utils import PropagationContext @@ -12,6 +14,8 @@ def test_empty_context(): assert ctx.parent_span_id is None assert ctx.parent_sampled is None + + # Don't fill in sample_rand on empty context assert ctx.dynamic_sampling_context is None @@ -32,6 +36,8 @@ def test_context_with_values(): assert ctx.parent_sampled assert ctx.dynamic_sampling_context == { "foo": "bar", + # sample_rand deterministically generated from trace_id + "sample_rand": "0.20286121767364262", } @@ -51,6 +57,8 @@ def test_lacy_uuids(): def test_property_setters(): ctx = PropagationContext() + + # this setter also generates the sample_rand ctx.trace_id = "X234567890abcdef1234567890abcdef" ctx.span_id = "X234567890abcdef" @@ -59,6 +67,9 @@ def test_property_setters(): assert ctx._span_id == "X234567890abcdef" assert ctx.span_id == "X234567890abcdef" + assert ctx.dynamic_sampling_context is not None + assert ctx.dynamic_sampling_context["sample_rand"] == "0.3053911827014587" + def test_update(): ctx = PropagationContext() @@ -81,3 +92,45 @@ def test_update(): assert ctx.dynamic_sampling_context is None assert not hasattr(ctx, "foo") + + +def test_existing_sample_rand_kept(): + ctx = PropagationContext( + trace_id="00000000000000000000000000000000", + dynamic_sampling_context={"sample_rand": "0.5"}, + ) + + # If sample_rand was regenerated, the value would be 0.8766381713144122 based on the trace_id + assert ctx.dynamic_sampling_context["sample_rand"] == "0.5" + + +@pytest.mark.parametrize( + ("parent_sampled", "sample_rate", "expected_sample_rand"), + ( + (None, None, "0.8766381713144122"), + (None, "0.5", "0.8766381713144122"), + (False, None, "0.8766381713144122"), + (True, None, "0.8766381713144122"), + (False, "0.0", "0.8766381713144122"), + (False, "0.01", "0.8778717896012681"), + (True, "0.01", "0.008766381713144122"), + (False, "0.1", "0.888974354182971"), + (True, "0.1", "0.08766381713144122"), + (False, "0.5", "0.9383190856572061"), + (True, "0.5", "0.4383190856572061"), + (True, "1.0", "0.8766381713144122"), + ), +) +def test_sample_rand_filled(parent_sampled, sample_rate, expected_sample_rand): + """When continuing a trace, we want to fill in the sample_rand value if it's missing.""" + dsc = {} + if sample_rate is not None: + dsc["sample_rate"] = sample_rate + + ctx = PropagationContext( + trace_id="00000000000000000000000000000000", + parent_sampled=parent_sampled, + dynamic_sampling_context=dsc, + ) + + assert ctx.dynamic_sampling_context["sample_rand"] == expected_sample_rand