Skip to content

Commit

Permalink
feat(tracing): Backfill missing sample_rand on PropagationContext
Browse files Browse the repository at this point in the history
Whenever the `PropagationContext` continues an incoming trace (i.e. whenever the `trace_id` is set, rather than being randomly generated as for a new trace), check if the `sample_rand` is present and valid in the incoming DSC. If the `sample_rand` is missing, generate it deterministically based on the `trace_id` and backfill it into the DSC on the `PropagationContext`.

When generating the backfilled `sample_rand`, we ensure the generated value is consistent with the incoming trace's sampling decision and sample rate, if both of these are present. Otherwise, we generate a new value in the range [0, 1).

Future PRs will address propagating the `sample_rand` to transactions generated with `continue_trace` (allowing the `sample_rand` to be propagated on outgoing traces), and will also allow `sample_rand` to be used for making sampling decisions.

Ref #3998
  • Loading branch information
szokeasaurusrex committed Feb 18, 2025
1 parent c6b5994 commit 382ef64
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 2 deletions.
70 changes: 70 additions & 0 deletions sentry_sdk/tracing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from collections.abc import Mapping
from datetime import timedelta
from functools import wraps
from random import Random
from urllib.parse import quote, unquote
import uuid

Expand All @@ -19,6 +20,7 @@
match_regex_list,
qualname_from_function,
to_string,
try_float,
is_sentry_url,
_is_external_source,
_is_in_project_root,
Expand Down Expand Up @@ -418,13 +420,17 @@ def from_incoming_data(cls, incoming_data):
propagation_context = PropagationContext()
propagation_context.update(sentrytrace_data)

if propagation_context is not None:
propagation_context._fill_sample_rand()

return propagation_context

@property
def trace_id(self):
# type: () -> str
"""The trace id of the Sentry trace."""
if not self._trace_id:
# New trace, don't fill in sample_rand
self._trace_id = uuid.uuid4().hex

return self._trace_id
Expand Down Expand Up @@ -469,6 +475,55 @@ def __repr__(self):
self.dynamic_sampling_context,
)

def _fill_sample_rand(self):
# type: () -> None
"""
Ensure that there is a valid sample_rand value in the dynamic_sampling_context.
If there is a valid sample_rand value in the dynamic_sampling_context, we keep it.
Otherwise, we generate a sample_rand value according to the following:
- If we have a parent_sampled value and a sample_rate in the DSC, we compute
a sample_rand value randomly in the range:
- [0, sample_rate) if parent_sampled is True,
- or, in the range [sample_rate, 1) if parent_sampled is False.
- If either parent_sampled or sample_rate is missing, we generate a random
value in the range [0, 1).
The sample_rand is deterministically generated from the trace_id, if present.
This function does nothing if there is no dynamic_sampling_context.
"""
if self.dynamic_sampling_context is None:
return

sample_rand = try_float(self.dynamic_sampling_context.get("sample_rand"))
if sample_rand is not None and 0 <= sample_rand < 1:
# sample_rand is present and valid, so don't overwrite it
return

# Get the sample rate and compute the transformation that will map the random value
# to the desired range: [0, 1), [0, sample_rate), or [sample_rate, 1).
sample_rate = try_float(self.dynamic_sampling_context.get("sample_rate"))
lower, upper = _sample_rand_range(self.parent_sampled, sample_rate)

if lower >= upper:
# lower >= upper might happen if the incoming trace's sampled flag
# and sample_rate are inconsistent, e.g. sample_rate=0.0 but sampled=True.
# We cannot generate a sensible sample_rand value in this case.
return

random = Random(self.trace_id)
sample_rand = upper
while sample_rand == upper:
# The built-in uniform() method can, in some cases, return the
# upper bound. We request a new value until we get a different
# value.
sample_rand = random.uniform(lower, upper)

self.dynamic_sampling_context["sample_rand"] = str(sample_rand)


class Baggage:
"""
Expand Down Expand Up @@ -748,6 +803,21 @@ def get_current_span(scope=None):
return current_span


def _sample_rand_range(parent_sampled, sample_rate):
# type: (Optional[bool], Optional[float]) -> tuple[float, float]
"""
Compute the lower (inclusive) and upper (exclusive) bounds of the range of values
that a generated sample_rand value must fall into, given the parent_sampled and
sample_rate values.
"""
if parent_sampled is None or sample_rate is None:
return 0.0, 1.0
elif parent_sampled is True:
return 0.0, sample_rate
else: # parent_sampled is False
return sample_rate, 1.0


# Circular imports
from sentry_sdk.tracing import (
BAGGAGE_HEADER_NAME,
Expand Down
9 changes: 9 additions & 0 deletions sentry_sdk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1888,3 +1888,12 @@ def should_be_treated_as_error(ty, value):
return False

return True


def try_float(value):
# type: (Any) -> Optional[float]
"""Small utility to convert a value to a float, if possible."""
try:
return float(value)
except (ValueError, TypeError):
return None
5 changes: 3 additions & 2 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_continue_trace(sentry_init):
transaction = continue_trace(
{
"sentry-trace": "{}-{}-{}".format(trace_id, parent_span_id, parent_sampled),
"baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19",
"baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19,sentry-sample_rand=0.1234567890",
},
name="some name",
)
Expand All @@ -123,7 +123,8 @@ def test_continue_trace(sentry_init):
assert propagation_context.parent_span_id == parent_span_id
assert propagation_context.parent_sampled == parent_sampled
assert propagation_context.dynamic_sampling_context == {
"trace_id": "566e3688a61d4bc888951642d6f14a19"
"trace_id": "566e3688a61d4bc888951642d6f14a19",
"sample_rand": "0.1234567890",
}


Expand Down
55 changes: 55 additions & 0 deletions tests/test_propagationcontext.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
import pytest

from sentry_sdk.tracing_utils import PropagationContext


SAMPLED_FLAG = {
None: "",
False: "-0",
True: "-1",
}
"""Maps the `sampled` value to the flag appended to the sentry-trace header."""


def test_empty_context():
ctx = PropagationContext()

Expand Down Expand Up @@ -51,13 +61,15 @@ def test_lazy_uuids():

def test_property_setters():
ctx = PropagationContext()

ctx.trace_id = "X234567890abcdef1234567890abcdef"
ctx.span_id = "X234567890abcdef"

assert ctx._trace_id == "X234567890abcdef1234567890abcdef"
assert ctx.trace_id == "X234567890abcdef1234567890abcdef"
assert ctx._span_id == "X234567890abcdef"
assert ctx.span_id == "X234567890abcdef"
assert ctx.dynamic_sampling_context is None


def test_update():
Expand All @@ -81,3 +93,46 @@ def test_update():
assert ctx.dynamic_sampling_context is None

assert not hasattr(ctx, "foo")


def test_existing_sample_rand_kept():
ctx = PropagationContext(
trace_id="00000000000000000000000000000000",
dynamic_sampling_context={"sample_rand": "0.5"},
)

# If sample_rand was regenerated, the value would be 0.8766381713144122 based on the trace_id
assert ctx.dynamic_sampling_context["sample_rand"] == "0.5"


@pytest.mark.parametrize(
("parent_sampled", "sample_rate", "expected_sample_rand"),
(
(None, None, "0.8766381713144122"),
(None, "0.5", "0.8766381713144122"),
(False, None, "0.8766381713144122"),
(True, None, "0.8766381713144122"),
(False, "0.0", "0.8766381713144122"),
(False, "0.01", "0.8778717896012681"),
(True, "0.01", "0.008766381713144122"),
(False, "0.1", "0.888974354182971"),
(True, "0.1", "0.08766381713144122"),
(False, "0.5", "0.9383190856572061"),
(True, "0.5", "0.4383190856572061"),
(True, "1.0", "0.8766381713144122"),
),
)
def test_sample_rand_filled(parent_sampled, sample_rate, expected_sample_rand):
"""When continuing a trace, we want to fill in the sample_rand value if it's missing."""
dsc = {}
if sample_rate is not None:
dsc["sample_rate"] = sample_rate

ctx = PropagationContext().from_incoming_data(
{
"sentry-trace": f"00000000000000000000000000000000-0000000000000000{SAMPLED_FLAG[parent_sampled]}",
"baggage": f"sentry-sample_rate={sample_rate}",
}
)

assert ctx.dynamic_sampling_context["sample_rand"] == expected_sample_rand

0 comments on commit 382ef64

Please sign in to comment.