Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(tracing): Backfill missing sample_rand on PropagationContext #4038

Merged
merged 3 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions sentry_sdk/scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
logger,
)

import typing
from typing import TYPE_CHECKING

if TYPE_CHECKING:
Expand Down Expand Up @@ -1146,8 +1147,20 @@ def continue_trace(
"""
self.generate_propagation_context(environ_or_headers)

# When we generate the propagation context, the sample_rand value is set
# if missing or invalid (we use the original value if it's valid).
# We want the transaction to use the same sample_rand value. Due to duplicated
# propagation logic in the transaction, we pass it in to avoid recomputing it
# in the transaction.
# TYPE SAFETY: self.generate_propagation_context() ensures that self._propagation_context
# is not None.
sample_rand = typing.cast(
PropagationContext, self._propagation_context
)._sample_rand()

transaction = Transaction.continue_from_headers(
normalize_incoming_data(environ_or_headers),
_sample_rand=sample_rand,
op=op,
origin=origin,
name=name,
Expand Down
23 changes: 19 additions & 4 deletions sentry_sdk/tracing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import uuid
import random
import warnings
from datetime import datetime, timedelta, timezone
from enum import Enum
Expand Down Expand Up @@ -477,6 +476,8 @@ def continue_from_environ(
def continue_from_headers(
cls,
headers, # type: Mapping[str, str]
*,
_sample_rand=None, # type: Optional[str]
**kwargs, # type: Any
):
# type: (...) -> Transaction
Expand All @@ -485,6 +486,8 @@ def continue_from_headers(
the ``sentry-trace`` and ``baggage`` headers).

:param headers: The dictionary with the HTTP headers to pull information from.
:param _sample_rand: If provided, we override the sample_rand value from the
incoming headers with this value. (internal use only)
"""
# TODO move this to the Transaction class
if cls is Span:
Expand All @@ -495,7 +498,9 @@ def continue_from_headers(

# TODO-neel move away from this kwargs stuff, it's confusing and opaque
# make more explicit
baggage = Baggage.from_incoming_header(headers.get(BAGGAGE_HEADER_NAME))
baggage = Baggage.from_incoming_header(
headers.get(BAGGAGE_HEADER_NAME), _sample_rand=_sample_rand
)
kwargs.update({BAGGAGE_HEADER_NAME: baggage})

sentrytrace_kwargs = extract_sentrytrace_data(
Expand Down Expand Up @@ -779,6 +784,7 @@ class Transaction(Span):
"_profile",
"_continuous_profile",
"_baggage",
"_sample_rand",
)

def __init__( # type: ignore[misc]
Expand All @@ -803,6 +809,14 @@ def __init__( # type: ignore[misc]
self._continuous_profile = None # type: Optional[ContinuousProfile]
self._baggage = baggage

baggage_sample_rand = (
None if self._baggage is None else self._baggage._sample_rand()
)
if baggage_sample_rand is not None:
self._sample_rand = baggage_sample_rand
else:
self._sample_rand = _generate_sample_rand(self.trace_id)

def __repr__(self):
# type: () -> str
return (
Expand Down Expand Up @@ -1173,10 +1187,10 @@ def _set_initial_sampling_decision(self, sampling_context):
self.sampled = False
return

# Now we roll the dice. random.random is inclusive of 0, but not of 1,
# Now we roll the dice. self._sample_rand is inclusive of 0, but not of 1,
# so strict < is safe here. In case sample_rate is a boolean, cast it
# to a float (True becomes 1.0 and False becomes 0.0)
self.sampled = random.random() < self.sample_rate
self.sampled = self._sample_rand < self.sample_rate

if self.sampled:
logger.debug(
Expand Down Expand Up @@ -1333,6 +1347,7 @@ async def my_async_function():
Baggage,
EnvironHeaders,
extract_sentrytrace_data,
_generate_sample_rand,
has_tracing_enabled,
maybe_create_breadcrumbs_from_span,
)
Expand Down
141 changes: 139 additions & 2 deletions sentry_sdk/tracing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import sys
from collections.abc import Mapping
from datetime import timedelta
from decimal import ROUND_DOWN, Decimal
from functools import wraps
from random import Random
from urllib.parse import quote, unquote
import uuid

Expand All @@ -19,6 +21,7 @@
match_regex_list,
qualname_from_function,
to_string,
try_convert,
is_sentry_url,
_is_external_source,
_is_in_project_root,
Expand All @@ -45,6 +48,7 @@
"[ \t]*$" # whitespace
)


# This is a normal base64 regex, modified to reflect that fact that we strip the
# trailing = or == off
base64_stripped = (
Expand Down Expand Up @@ -418,13 +422,17 @@ def from_incoming_data(cls, incoming_data):
propagation_context = PropagationContext()
propagation_context.update(sentrytrace_data)

if propagation_context is not None:
propagation_context._fill_sample_rand()

return propagation_context

@property
def trace_id(self):
# type: () -> str
"""The trace id of the Sentry trace."""
if not self._trace_id:
# New trace, don't fill in sample_rand
self._trace_id = uuid.uuid4().hex

return self._trace_id
Expand Down Expand Up @@ -469,6 +477,68 @@ def __repr__(self):
self.dynamic_sampling_context,
)

def _fill_sample_rand(self):
# type: () -> None
"""
Ensure that there is a valid sample_rand value in the dynamic_sampling_context.

If there is a valid sample_rand value in the dynamic_sampling_context, we keep it.
Otherwise, we generate a sample_rand value according to the following:

- If we have a parent_sampled value and a sample_rate in the DSC, we compute
a sample_rand value randomly in the range:
- [0, sample_rate) if parent_sampled is True,
- or, in the range [sample_rate, 1) if parent_sampled is False.

- If either parent_sampled or sample_rate is missing, we generate a random
value in the range [0, 1).

The sample_rand is deterministically generated from the trace_id, if present.

This function does nothing if there is no dynamic_sampling_context.
"""
if self.dynamic_sampling_context is None:
return

sample_rand = try_convert(
Decimal, self.dynamic_sampling_context.get("sample_rand")
)
if sample_rand is not None and 0 <= sample_rand < 1:
# sample_rand is present and valid, so don't overwrite it
return

# Get the sample rate and compute the transformation that will map the random value
# to the desired range: [0, 1), [0, sample_rate), or [sample_rate, 1).
sample_rate = try_convert(
float, self.dynamic_sampling_context.get("sample_rate")
)
lower, upper = _sample_rand_range(self.parent_sampled, sample_rate)

try:
sample_rand = _generate_sample_rand(self.trace_id, interval=(lower, upper))
except ValueError:
# ValueError is raised if the interval is invalid, i.e. lower >= upper.
# lower >= upper might happen if the incoming trace's sampled flag
# and sample_rate are inconsistent, e.g. sample_rate=0.0 but sampled=True.
# We cannot generate a sensible sample_rand value in this case.
logger.debug(
f"Could not backfill sample_rand, since parent_sampled={self.parent_sampled} "
f"and sample_rate={sample_rate}."
)
return

self.dynamic_sampling_context["sample_rand"] = (
f"{sample_rand:.6f}" # noqa: E231
)

def _sample_rand(self):
# type: () -> Optional[str]
"""Convenience method to get the sample_rand value from the dynamic_sampling_context."""
if self.dynamic_sampling_context is None:
return None

return self.dynamic_sampling_context.get("sample_rand")


class Baggage:
"""
Expand All @@ -491,8 +561,13 @@ def __init__(
self.mutable = mutable

@classmethod
def from_incoming_header(cls, header):
# type: (Optional[str]) -> Baggage
def from_incoming_header(
cls,
header, # type: Optional[str]
*,
_sample_rand=None, # type: Optional[str]
):
# type: (...) -> Baggage
"""
freeze if incoming header already has sentry baggage
"""
Expand All @@ -515,6 +590,10 @@ def from_incoming_header(cls, header):
else:
third_party_items += ("," if third_party_items else "") + item

if _sample_rand is not None:
sentry_items["sample_rand"] = str(_sample_rand)
mutable = False

return Baggage(sentry_items, third_party_items, mutable)

@classmethod
Expand Down Expand Up @@ -566,6 +645,7 @@ def populate_from_transaction(cls, transaction):
options = client.options or {}

sentry_items["trace_id"] = transaction.trace_id
sentry_items["sample_rand"] = str(transaction._sample_rand)

if options.get("environment"):
sentry_items["environment"] = options["environment"]
Expand Down Expand Up @@ -638,6 +718,20 @@ def strip_sentry_baggage(header):
)
)

def _sample_rand(self):
# type: () -> Optional[Decimal]
"""Convenience method to get the sample_rand value from the sentry_items.

We validate the value and parse it as a Decimal before returning it. The value is considered
valid if it is a Decimal in the range [0, 1).
"""
sample_rand = try_convert(Decimal, self.sentry_items.get("sample_rand"))

if sample_rand is not None and Decimal(0) <= sample_rand < Decimal(1):
return sample_rand

return None

def __repr__(self):
# type: () -> str
return f'<Baggage "{self.serialize(include_third_party=True)}", mutable={self.mutable}>'
Expand Down Expand Up @@ -748,6 +842,49 @@ def get_current_span(scope=None):
return current_span


def _generate_sample_rand(
trace_id, # type: Optional[str]
*,
interval=(0.0, 1.0), # type: tuple[float, float]
):
# type: (...) -> Decimal
"""Generate a sample_rand value from a trace ID.

The generated value will be pseudorandomly chosen from the provided
interval. Specifically, given (lower, upper) = interval, the generated
value will be in the range [lower, upper). The value has 6-digit precision,
so when printing with .6f, the value will never be rounded up.

The pseudorandom number generator is seeded with the trace ID.
"""
lower, upper = interval
if not lower < upper: # using `if lower >= upper` would handle NaNs incorrectly
raise ValueError("Invalid interval: lower must be less than upper")

rng = Random(trace_id)
sample_rand = upper
while sample_rand >= upper:
sample_rand = rng.uniform(lower, upper)

# Round down to exactly six decimal-digit precision.
return Decimal(sample_rand).quantize(Decimal("0.000001"), rounding=ROUND_DOWN)


def _sample_rand_range(parent_sampled, sample_rate):
# type: (Optional[bool], Optional[float]) -> tuple[float, float]
"""
Compute the lower (inclusive) and upper (exclusive) bounds of the range of values
that a generated sample_rand value must fall into, given the parent_sampled and
sample_rate values.
"""
if parent_sampled is None or sample_rate is None:
return 0.0, 1.0
elif parent_sampled is True:
return 0.0, sample_rate
else: # parent_sampled is False
return sample_rate, 1.0


# Circular imports
from sentry_sdk.tracing import (
BAGGAGE_HEADER_NAME,
Expand Down
17 changes: 17 additions & 0 deletions sentry_sdk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1888,3 +1888,20 @@ def should_be_treated_as_error(ty, value):
return False

return True


if TYPE_CHECKING:
T = TypeVar("T")


def try_convert(convert_func, value):
# type: (Callable[[Any], T], Any) -> Optional[T]
"""
Attempt to convert from an unknown type to a specific type, using the
given function. Return None if the conversion fails, i.e. if the function
raises an exception.
"""
try:
return convert_func(value)
except Exception:
return None
25 changes: 13 additions & 12 deletions tests/integrations/aiohttp/test_aiohttp.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,18 +626,19 @@ async def handler(request):

raw_server = await aiohttp_raw_server(handler)

with start_transaction(
name="/interactions/other-dogs/new-dog",
op="greeting.sniff",
trace_id="0123456789012345678901234567890",
):
client = await aiohttp_client(raw_server)
resp = await client.get("/", headers={"bagGage": "custom=value"})

assert (
resp.request_info.headers["baggage"]
== "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
)
with mock.patch("sentry_sdk.tracing_utils.Random.uniform", return_value=0.5):
with start_transaction(
name="/interactions/other-dogs/new-dog",
op="greeting.sniff",
trace_id="0123456789012345678901234567890",
):
client = await aiohttp_client(raw_server)
resp = await client.get("/", headers={"bagGage": "custom=value"})

assert (
resp.request_info.headers["baggage"]
== "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-sample_rand=0.500000,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
)


@pytest.mark.asyncio
Expand Down
Loading
Loading