From 0a0de06e00d2d83bcce408227aaed42d42866729 Mon Sep 17 00:00:00 2001
From: Daniel Szoke <daniel.szoke@sentry.io>
Date: Tue, 11 Feb 2025 14:10:24 +0100
Subject: [PATCH] feat(tracing): Backfill missing `sample_rand` on
 `PropagationContext`

Whenever the `PropagationContext` continues an incoming trace (i.e. whenever the `trace_id` is set, rather than being randomly generated as for a new trace), check if the `sample_rand` is present and valid in the incoming DSC. If the `sample_rand` is missing, generate it deterministically based on the `trace_id` and backfill it into the DSC on the `PropagationContext`.

When generating the backfilled `sample_rand`, we ensure the generated value is consistent with the incoming trace's sampling decision and sample rate, if both of these are present. Otherwise, we generate a new value in the range [0, 1).

Future PRs will address propagating the `sample_rand` to transactions generated with `continue_trace` (allowing the `sample_rand` to be propagated on outgoing traces), and will also allow `sample_rand` to be used for making sampling decisions.

Ref #3998
---
 sentry_sdk/tracing_utils.py      | 77 ++++++++++++++++++++++++++++++++
 tests/test_api.py                |  7 +--
 tests/test_propagationcontext.py | 53 ++++++++++++++++++++++
 3 files changed, 134 insertions(+), 3 deletions(-)

diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py
index 9ea2d9859a..094d5984a8 100644
--- a/sentry_sdk/tracing_utils.py
+++ b/sentry_sdk/tracing_utils.py
@@ -6,6 +6,7 @@
 from collections.abc import Mapping
 from datetime import timedelta
 from functools import wraps
+from random import Random
 from urllib.parse import quote, unquote
 import uuid
 
@@ -397,6 +398,8 @@ def __init__(
         self.dynamic_sampling_context = dynamic_sampling_context
         """Data that is used for dynamic sampling decisions."""
 
+        self._fill_sample_rand()
+
     @classmethod
     def from_incoming_data(cls, incoming_data):
         # type: (Dict[str, Any]) -> Optional[PropagationContext]
@@ -418,6 +421,9 @@ def from_incoming_data(cls, incoming_data):
                     propagation_context = PropagationContext()
                 propagation_context.update(sentrytrace_data)
 
+        if propagation_context is not None:
+            propagation_context._fill_sample_rand()
+
         return propagation_context
 
     @property
@@ -425,6 +431,7 @@ def trace_id(self):
         # type: () -> str
         """The trace id of the Sentry trace."""
         if not self._trace_id:
+            # New trace, don't fill in sample_rand
             self._trace_id = uuid.uuid4().hex
 
         return self._trace_id
@@ -433,6 +440,7 @@ def trace_id(self):
     def trace_id(self, value):
         # type: (str) -> None
         self._trace_id = value
+        self._fill_sample_rand()
 
     @property
     def span_id(self):
@@ -469,6 +477,46 @@ def __repr__(self):
             self.dynamic_sampling_context,
         )
 
+    def _fill_sample_rand(self):
+        # type: () -> None
+        """
+        If the sample_rand is missing from the Dynamic Sampling Context (or invalid),
+        we generate it here.
+
+        We only generate a sample_rand if the trace_id is set.
+
+        If we have a parent_sampled value and a sample_rate in the DSC, we compute
+        a sample_rand value randomly in the range [0, sample_rate) if parent_sampled is True,
+        or in the range [sample_rate, 1) if parent_sampled is False. If either parent_sampled
+        or sample_rate is missing, we generate a random value in the range [0, 1).
+
+        The sample_rand is deterministically generated from the trace_id.
+        """
+        if self._trace_id is None:
+            # We only want to generate a sample_rand if the _trace_id is set.
+            return
+
+        # Ensure that the dynamic_sampling_context is a dict
+        self.dynamic_sampling_context = self.dynamic_sampling_context or {}
+
+        sample_rand = _try_float(self.dynamic_sampling_context.get("sample_rand"))
+        if sample_rand is not None and 0 <= sample_rand < 1:
+            # sample_rand is present and valid, so don't overwrite it
+            return
+
+        # Get a random value in [0, 1)
+        random_value = Random(self.trace_id).random()
+
+        # Get the sample rate and compute the transformation that will map the random value
+        # to the desired range: [0, 1), [0, sample_rate), or [sample_rate, 1).
+        sample_rate = _try_float(self.dynamic_sampling_context.get("sample_rate"))
+        factor, offset = _sample_rand_transormation(self.parent_sampled, sample_rate)
+
+        # Transform the random value to the desired range
+        self.dynamic_sampling_context["sample_rand"] = str(
+            random_value * factor + offset
+        )
+
 
 class Baggage:
     """
@@ -744,6 +792,35 @@ def get_current_span(scope=None):
     return current_span
 
 
+def _try_float(value):
+    # type: (Any) -> Optional[float]
+    """Small utility to convert a value to a float, if possible."""
+    try:
+        return float(value)
+    except (ValueError, TypeError):
+        return None
+
+
+def _sample_rand_transormation(parent_sampled, sample_rate):
+    # type: (Optional[bool], Optional[float]) -> tuple[float, float]
+    """
+    Compute the factor and offset to scale and translate a random number in [0, 1) to
+    a range consistent with the parent_sampled and sample_rate values.
+
+    The return value is a tuple (factor, offset) such that, given random_value in [0, 1),
+    and new_value = random_value * factor + offset:
+      - new_value will be unchanged if either parent_sampled or sample_rate is None
+      - if parent_sampled and sample_rate are both set, new_value will be in [0, sample_rate)
+        if parent_sampled is True, or in [sample_rate, 1) if parent_sampled is False
+    """
+    if parent_sampled is None or sample_rate is None:
+        return 1.0, 0.0
+    elif parent_sampled is True:
+        return sample_rate, 0.0
+    else:  # parent_sampled is False
+        return 1.0 - sample_rate, sample_rate
+
+
 # Circular imports
 from sentry_sdk.tracing import (
     BAGGAGE_HEADER_NAME,
diff --git a/tests/test_api.py b/tests/test_api.py
index 3b2a9c8fb7..edfca2268f 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -79,7 +79,7 @@ def test_traceparent_with_tracing_disabled(sentry_init):
     assert get_traceparent() == expected_traceparent
 
 
-@pytest.mark.forked
+# @pytest.mark.forked
 def test_baggage_with_tracing_disabled(sentry_init):
     sentry_init(release="1.0.0", environment="dev")
     propagation_context = get_isolation_scope()._propagation_context
@@ -111,7 +111,7 @@ def test_continue_trace(sentry_init):
     transaction = continue_trace(
         {
             "sentry-trace": "{}-{}-{}".format(trace_id, parent_span_id, parent_sampled),
-            "baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19",
+            "baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19,sentry-sample_rand=0.1234567890",
         },
         name="some name",
     )
@@ -123,7 +123,8 @@ def test_continue_trace(sentry_init):
         assert propagation_context.parent_span_id == parent_span_id
         assert propagation_context.parent_sampled == parent_sampled
         assert propagation_context.dynamic_sampling_context == {
-            "trace_id": "566e3688a61d4bc888951642d6f14a19"
+            "trace_id": "566e3688a61d4bc888951642d6f14a19",
+            "sample_rand": "0.1234567890",
         }
 
 
diff --git a/tests/test_propagationcontext.py b/tests/test_propagationcontext.py
index c650071511..5a9e1814f1 100644
--- a/tests/test_propagationcontext.py
+++ b/tests/test_propagationcontext.py
@@ -1,3 +1,5 @@
+import pytest
+
 from sentry_sdk.tracing_utils import PropagationContext
 
 
@@ -12,6 +14,8 @@ def test_empty_context():
 
     assert ctx.parent_span_id is None
     assert ctx.parent_sampled is None
+
+    # Don't fill in sample_rand on empty context
     assert ctx.dynamic_sampling_context is None
 
 
@@ -32,6 +36,8 @@ def test_context_with_values():
     assert ctx.parent_sampled
     assert ctx.dynamic_sampling_context == {
         "foo": "bar",
+        # sample_rand deterministically generated from trace_id
+        "sample_rand": "0.20286121767364262",
     }
 
 
@@ -51,6 +57,8 @@ def test_lacy_uuids():
 
 def test_property_setters():
     ctx = PropagationContext()
+
+    # this setter also generates the sample_rand
     ctx.trace_id = "X234567890abcdef1234567890abcdef"
     ctx.span_id = "X234567890abcdef"
 
@@ -59,6 +67,9 @@ def test_property_setters():
     assert ctx._span_id == "X234567890abcdef"
     assert ctx.span_id == "X234567890abcdef"
 
+    assert ctx.dynamic_sampling_context is not None
+    assert ctx.dynamic_sampling_context["sample_rand"] == "0.3053911827014587"
+
 
 def test_update():
     ctx = PropagationContext()
@@ -81,3 +92,45 @@ def test_update():
     assert ctx.dynamic_sampling_context is None
 
     assert not hasattr(ctx, "foo")
+
+
+def test_existing_sample_rand_kept():
+    ctx = PropagationContext(
+        trace_id="00000000000000000000000000000000",
+        dynamic_sampling_context={"sample_rand": "0.5"},
+    )
+
+    # If sample_rand was regenerated, the value would be 0.8766381713144122 based on the trace_id
+    assert ctx.dynamic_sampling_context["sample_rand"] == "0.5"
+
+
+@pytest.mark.parametrize(
+    ("parent_sampled", "sample_rate", "expected_sample_rand"),
+    (
+        (None, None, "0.8766381713144122"),
+        (None, "0.5", "0.8766381713144122"),
+        (False, None, "0.8766381713144122"),
+        (True, None, "0.8766381713144122"),
+        (False, "0.0", "0.8766381713144122"),
+        (False, "0.01", "0.8778717896012681"),
+        (True, "0.01", "0.008766381713144122"),
+        (False, "0.1", "0.888974354182971"),
+        (True, "0.1", "0.08766381713144122"),
+        (False, "0.5", "0.9383190856572061"),
+        (True, "0.5", "0.4383190856572061"),
+        (True, "1.0", "0.8766381713144122"),
+    ),
+)
+def test_sample_rand_filled(parent_sampled, sample_rate, expected_sample_rand):
+    """When continuing a trace, we want to fill in the sample_rand value if it's missing."""
+    dsc = {}
+    if sample_rate is not None:
+        dsc["sample_rate"] = sample_rate
+
+    ctx = PropagationContext(
+        trace_id="00000000000000000000000000000000",
+        parent_sampled=parent_sampled,
+        dynamic_sampling_context=dsc,
+    )
+
+    assert ctx.dynamic_sampling_context["sample_rand"] == expected_sample_rand