From 0d23b726b6b47b81acc2a1d2ba359d845467c71d Mon Sep 17 00:00:00 2001 From: Daniel Szoke <7881302+szokeasaurusrex@users.noreply.github.com> Date: Wed, 26 Feb 2025 16:00:06 +0100 Subject: [PATCH] feat(tracing): Backfill missing `sample_rand` on `PropagationContext` (#4038) Whenever the `PropagationContext` continues an incoming trace (i.e. whenever the `trace_id` is set, rather than being randomly generated as for a new trace), check if the `sample_rand` is present and valid in the incoming DSC. If the `sample_rand` is missing, generate it deterministically based on the `trace_id` and backfill it into the DSC on the `PropagationContext`. When generating the backfilled `sample_rand`, we ensure the generated value is consistent with the incoming trace's sampling decision and sample rate, if both of these are present. Otherwise, we generate a new value in the range [0, 1). Additionally, we propagate the `sample_rand` to transactions generated with `continue_trace` (allowing the `sample_rand` to be propagated on outgoing traces), and also allow `sample_rand` to be used for making sampling decisions. Ref #3998 --------- Co-authored-by: Ivana Kellyer --- sentry_sdk/scope.py | 13 ++ sentry_sdk/tracing.py | 23 ++- sentry_sdk/tracing_utils.py | 141 +++++++++++++++++- sentry_sdk/utils.py | 17 +++ tests/integrations/aiohttp/test_aiohttp.py | 25 ++-- tests/integrations/celery/test_celery.py | 35 +++-- tests/integrations/httpx/test_httpx.py | 48 +++--- tests/integrations/stdlib/test_httplib.py | 13 +- tests/test_api.py | 11 +- tests/test_dsc.py | 3 +- tests/test_monitor.py | 12 +- tests/test_propagationcontext.py | 99 ++++++++++++ tests/tracing/test_integration_tests.py | 10 +- tests/tracing/test_sample_rand.py | 55 +++++++ tests/tracing/test_sample_rand_propagation.py | 43 ++++++ tests/tracing/test_sampling.py | 13 +- 16 files changed, 474 insertions(+), 87 deletions(-) create mode 100644 tests/tracing/test_sample_rand.py create mode 100644 tests/tracing/test_sample_rand_propagation.py diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py index fbe97ddf44..6a5e70a6eb 100644 --- a/sentry_sdk/scope.py +++ b/sentry_sdk/scope.py @@ -43,6 +43,7 @@ logger, ) +import typing from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -1146,8 +1147,20 @@ def continue_trace( """ self.generate_propagation_context(environ_or_headers) + # When we generate the propagation context, the sample_rand value is set + # if missing or invalid (we use the original value if it's valid). + # We want the transaction to use the same sample_rand value. Due to duplicated + # propagation logic in the transaction, we pass it in to avoid recomputing it + # in the transaction. + # TYPE SAFETY: self.generate_propagation_context() ensures that self._propagation_context + # is not None. + sample_rand = typing.cast( + PropagationContext, self._propagation_context + )._sample_rand() + transaction = Transaction.continue_from_headers( normalize_incoming_data(environ_or_headers), + _sample_rand=sample_rand, op=op, origin=origin, name=name, diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index cf708b839e..866609a66e 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -1,5 +1,4 @@ import uuid -import random import warnings from datetime import datetime, timedelta, timezone from enum import Enum @@ -477,6 +476,8 @@ def continue_from_environ( def continue_from_headers( cls, headers, # type: Mapping[str, str] + *, + _sample_rand=None, # type: Optional[str] **kwargs, # type: Any ): # type: (...) -> Transaction @@ -485,6 +486,8 @@ def continue_from_headers( the ``sentry-trace`` and ``baggage`` headers). :param headers: The dictionary with the HTTP headers to pull information from. + :param _sample_rand: If provided, we override the sample_rand value from the + incoming headers with this value. (internal use only) """ # TODO move this to the Transaction class if cls is Span: @@ -495,7 +498,9 @@ def continue_from_headers( # TODO-neel move away from this kwargs stuff, it's confusing and opaque # make more explicit - baggage = Baggage.from_incoming_header(headers.get(BAGGAGE_HEADER_NAME)) + baggage = Baggage.from_incoming_header( + headers.get(BAGGAGE_HEADER_NAME), _sample_rand=_sample_rand + ) kwargs.update({BAGGAGE_HEADER_NAME: baggage}) sentrytrace_kwargs = extract_sentrytrace_data( @@ -779,6 +784,7 @@ class Transaction(Span): "_profile", "_continuous_profile", "_baggage", + "_sample_rand", ) def __init__( # type: ignore[misc] @@ -803,6 +809,14 @@ def __init__( # type: ignore[misc] self._continuous_profile = None # type: Optional[ContinuousProfile] self._baggage = baggage + baggage_sample_rand = ( + None if self._baggage is None else self._baggage._sample_rand() + ) + if baggage_sample_rand is not None: + self._sample_rand = baggage_sample_rand + else: + self._sample_rand = _generate_sample_rand(self.trace_id) + def __repr__(self): # type: () -> str return ( @@ -1173,10 +1187,10 @@ def _set_initial_sampling_decision(self, sampling_context): self.sampled = False return - # Now we roll the dice. random.random is inclusive of 0, but not of 1, + # Now we roll the dice. self._sample_rand is inclusive of 0, but not of 1, # so strict < is safe here. In case sample_rate is a boolean, cast it # to a float (True becomes 1.0 and False becomes 0.0) - self.sampled = random.random() < self.sample_rate + self.sampled = self._sample_rand < self.sample_rate if self.sampled: logger.debug( @@ -1333,6 +1347,7 @@ async def my_async_function(): Baggage, EnvironHeaders, extract_sentrytrace_data, + _generate_sample_rand, has_tracing_enabled, maybe_create_breadcrumbs_from_span, ) diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index ae72b8cce9..b1e2050708 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -5,7 +5,9 @@ import sys from collections.abc import Mapping from datetime import timedelta +from decimal import ROUND_DOWN, Decimal from functools import wraps +from random import Random from urllib.parse import quote, unquote import uuid @@ -19,6 +21,7 @@ match_regex_list, qualname_from_function, to_string, + try_convert, is_sentry_url, _is_external_source, _is_in_project_root, @@ -45,6 +48,7 @@ "[ \t]*$" # whitespace ) + # This is a normal base64 regex, modified to reflect that fact that we strip the # trailing = or == off base64_stripped = ( @@ -418,6 +422,9 @@ def from_incoming_data(cls, incoming_data): propagation_context = PropagationContext() propagation_context.update(sentrytrace_data) + if propagation_context is not None: + propagation_context._fill_sample_rand() + return propagation_context @property @@ -425,6 +432,7 @@ def trace_id(self): # type: () -> str """The trace id of the Sentry trace.""" if not self._trace_id: + # New trace, don't fill in sample_rand self._trace_id = uuid.uuid4().hex return self._trace_id @@ -469,6 +477,68 @@ def __repr__(self): self.dynamic_sampling_context, ) + def _fill_sample_rand(self): + # type: () -> None + """ + Ensure that there is a valid sample_rand value in the dynamic_sampling_context. + + If there is a valid sample_rand value in the dynamic_sampling_context, we keep it. + Otherwise, we generate a sample_rand value according to the following: + + - If we have a parent_sampled value and a sample_rate in the DSC, we compute + a sample_rand value randomly in the range: + - [0, sample_rate) if parent_sampled is True, + - or, in the range [sample_rate, 1) if parent_sampled is False. + + - If either parent_sampled or sample_rate is missing, we generate a random + value in the range [0, 1). + + The sample_rand is deterministically generated from the trace_id, if present. + + This function does nothing if there is no dynamic_sampling_context. + """ + if self.dynamic_sampling_context is None: + return + + sample_rand = try_convert( + Decimal, self.dynamic_sampling_context.get("sample_rand") + ) + if sample_rand is not None and 0 <= sample_rand < 1: + # sample_rand is present and valid, so don't overwrite it + return + + # Get the sample rate and compute the transformation that will map the random value + # to the desired range: [0, 1), [0, sample_rate), or [sample_rate, 1). + sample_rate = try_convert( + float, self.dynamic_sampling_context.get("sample_rate") + ) + lower, upper = _sample_rand_range(self.parent_sampled, sample_rate) + + try: + sample_rand = _generate_sample_rand(self.trace_id, interval=(lower, upper)) + except ValueError: + # ValueError is raised if the interval is invalid, i.e. lower >= upper. + # lower >= upper might happen if the incoming trace's sampled flag + # and sample_rate are inconsistent, e.g. sample_rate=0.0 but sampled=True. + # We cannot generate a sensible sample_rand value in this case. + logger.debug( + f"Could not backfill sample_rand, since parent_sampled={self.parent_sampled} " + f"and sample_rate={sample_rate}." + ) + return + + self.dynamic_sampling_context["sample_rand"] = ( + f"{sample_rand:.6f}" # noqa: E231 + ) + + def _sample_rand(self): + # type: () -> Optional[str] + """Convenience method to get the sample_rand value from the dynamic_sampling_context.""" + if self.dynamic_sampling_context is None: + return None + + return self.dynamic_sampling_context.get("sample_rand") + class Baggage: """ @@ -491,8 +561,13 @@ def __init__( self.mutable = mutable @classmethod - def from_incoming_header(cls, header): - # type: (Optional[str]) -> Baggage + def from_incoming_header( + cls, + header, # type: Optional[str] + *, + _sample_rand=None, # type: Optional[str] + ): + # type: (...) -> Baggage """ freeze if incoming header already has sentry baggage """ @@ -515,6 +590,10 @@ def from_incoming_header(cls, header): else: third_party_items += ("," if third_party_items else "") + item + if _sample_rand is not None: + sentry_items["sample_rand"] = str(_sample_rand) + mutable = False + return Baggage(sentry_items, third_party_items, mutable) @classmethod @@ -566,6 +645,7 @@ def populate_from_transaction(cls, transaction): options = client.options or {} sentry_items["trace_id"] = transaction.trace_id + sentry_items["sample_rand"] = str(transaction._sample_rand) if options.get("environment"): sentry_items["environment"] = options["environment"] @@ -638,6 +718,20 @@ def strip_sentry_baggage(header): ) ) + def _sample_rand(self): + # type: () -> Optional[Decimal] + """Convenience method to get the sample_rand value from the sentry_items. + + We validate the value and parse it as a Decimal before returning it. The value is considered + valid if it is a Decimal in the range [0, 1). + """ + sample_rand = try_convert(Decimal, self.sentry_items.get("sample_rand")) + + if sample_rand is not None and Decimal(0) <= sample_rand < Decimal(1): + return sample_rand + + return None + def __repr__(self): # type: () -> str return f'' @@ -748,6 +842,49 @@ def get_current_span(scope=None): return current_span +def _generate_sample_rand( + trace_id, # type: Optional[str] + *, + interval=(0.0, 1.0), # type: tuple[float, float] +): + # type: (...) -> Decimal + """Generate a sample_rand value from a trace ID. + + The generated value will be pseudorandomly chosen from the provided + interval. Specifically, given (lower, upper) = interval, the generated + value will be in the range [lower, upper). The value has 6-digit precision, + so when printing with .6f, the value will never be rounded up. + + The pseudorandom number generator is seeded with the trace ID. + """ + lower, upper = interval + if not lower < upper: # using `if lower >= upper` would handle NaNs incorrectly + raise ValueError("Invalid interval: lower must be less than upper") + + rng = Random(trace_id) + sample_rand = upper + while sample_rand >= upper: + sample_rand = rng.uniform(lower, upper) + + # Round down to exactly six decimal-digit precision. + return Decimal(sample_rand).quantize(Decimal("0.000001"), rounding=ROUND_DOWN) + + +def _sample_rand_range(parent_sampled, sample_rate): + # type: (Optional[bool], Optional[float]) -> tuple[float, float] + """ + Compute the lower (inclusive) and upper (exclusive) bounds of the range of values + that a generated sample_rand value must fall into, given the parent_sampled and + sample_rate values. + """ + if parent_sampled is None or sample_rate is None: + return 0.0, 1.0 + elif parent_sampled is True: + return 0.0, sample_rate + else: # parent_sampled is False + return sample_rate, 1.0 + + # Circular imports from sentry_sdk.tracing import ( BAGGAGE_HEADER_NAME, diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index b2a39b7af1..89b2354c52 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -1888,3 +1888,20 @@ def should_be_treated_as_error(ty, value): return False return True + + +if TYPE_CHECKING: + T = TypeVar("T") + + +def try_convert(convert_func, value): + # type: (Callable[[Any], T], Any) -> Optional[T] + """ + Attempt to convert from an unknown type to a specific type, using the + given function. Return None if the conversion fails, i.e. if the function + raises an exception. + """ + try: + return convert_func(value) + except Exception: + return None diff --git a/tests/integrations/aiohttp/test_aiohttp.py b/tests/integrations/aiohttp/test_aiohttp.py index 83dc021844..ef7c04e90a 100644 --- a/tests/integrations/aiohttp/test_aiohttp.py +++ b/tests/integrations/aiohttp/test_aiohttp.py @@ -626,18 +626,19 @@ async def handler(request): raw_server = await aiohttp_raw_server(handler) - with start_transaction( - name="/interactions/other-dogs/new-dog", - op="greeting.sniff", - trace_id="0123456789012345678901234567890", - ): - client = await aiohttp_client(raw_server) - resp = await client.get("/", headers={"bagGage": "custom=value"}) - - assert ( - resp.request_info.headers["baggage"] - == "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true" - ) + with mock.patch("sentry_sdk.tracing_utils.Random.uniform", return_value=0.5): + with start_transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + trace_id="0123456789012345678901234567890", + ): + client = await aiohttp_client(raw_server) + resp = await client.get("/", headers={"bagGage": "custom=value"}) + + assert ( + resp.request_info.headers["baggage"] + == "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-sample_rand=0.500000,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true" + ) @pytest.mark.asyncio diff --git a/tests/integrations/celery/test_celery.py b/tests/integrations/celery/test_celery.py index e51341599f..8c794bd5ff 100644 --- a/tests/integrations/celery/test_celery.py +++ b/tests/integrations/celery/test_celery.py @@ -509,22 +509,25 @@ def test_baggage_propagation(init_celery): def dummy_task(self, x, y): return _get_headers(self) - with start_transaction() as transaction: - result = dummy_task.apply_async( - args=(1, 0), - headers={"baggage": "custom=value"}, - ).get() - - assert sorted(result["baggage"].split(",")) == sorted( - [ - "sentry-release=abcdef", - "sentry-trace_id={}".format(transaction.trace_id), - "sentry-environment=production", - "sentry-sample_rate=1.0", - "sentry-sampled=true", - "custom=value", - ] - ) + # patch random.uniform to return a predictable sample_rand value + with mock.patch("sentry_sdk.tracing_utils.Random.uniform", return_value=0.5): + with start_transaction() as transaction: + result = dummy_task.apply_async( + args=(1, 0), + headers={"baggage": "custom=value"}, + ).get() + + assert sorted(result["baggage"].split(",")) == sorted( + [ + "sentry-release=abcdef", + "sentry-trace_id={}".format(transaction.trace_id), + "sentry-environment=production", + "sentry-sample_rand=0.500000", + "sentry-sample_rate=1.0", + "sentry-sampled=true", + "custom=value", + ] + ) def test_sentry_propagate_traces_override(init_celery): diff --git a/tests/integrations/httpx/test_httpx.py b/tests/integrations/httpx/test_httpx.py index d37e1fddf2..5a35b68076 100644 --- a/tests/integrations/httpx/test_httpx.py +++ b/tests/integrations/httpx/test_httpx.py @@ -170,30 +170,32 @@ def test_outgoing_trace_headers_append_to_baggage( url = "http://example.com/" - with start_transaction( - name="/interactions/other-dogs/new-dog", - op="greeting.sniff", - trace_id="01234567890123456789012345678901", - ) as transaction: - if asyncio.iscoroutinefunction(httpx_client.get): - response = asyncio.get_event_loop().run_until_complete( - httpx_client.get(url, headers={"baGGage": "custom=data"}) + # patch random.uniform to return a predictable sample_rand value + with mock.patch("sentry_sdk.tracing_utils.Random.uniform", return_value=0.5): + with start_transaction( + name="/interactions/other-dogs/new-dog", + op="greeting.sniff", + trace_id="01234567890123456789012345678901", + ) as transaction: + if asyncio.iscoroutinefunction(httpx_client.get): + response = asyncio.get_event_loop().run_until_complete( + httpx_client.get(url, headers={"baGGage": "custom=data"}) + ) + else: + response = httpx_client.get(url, headers={"baGGage": "custom=data"}) + + request_span = transaction._span_recorder.spans[-1] + assert response.request.headers[ + "sentry-trace" + ] == "{trace_id}-{parent_span_id}-{sampled}".format( + trace_id=transaction.trace_id, + parent_span_id=request_span.span_id, + sampled=1, + ) + assert ( + response.request.headers["baggage"] + == "custom=data,sentry-trace_id=01234567890123456789012345678901,sentry-sample_rand=0.500000,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true" ) - else: - response = httpx_client.get(url, headers={"baGGage": "custom=data"}) - - request_span = transaction._span_recorder.spans[-1] - assert response.request.headers[ - "sentry-trace" - ] == "{trace_id}-{parent_span_id}-{sampled}".format( - trace_id=transaction.trace_id, - parent_span_id=request_span.span_id, - sampled=1, - ) - assert ( - response.request.headers["baggage"] - == "custom=data,sentry-trace_id=01234567890123456789012345678901,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true" - ) @pytest.mark.parametrize( diff --git a/tests/integrations/stdlib/test_httplib.py b/tests/integrations/stdlib/test_httplib.py index 227a24336c..892e07980b 100644 --- a/tests/integrations/stdlib/test_httplib.py +++ b/tests/integrations/stdlib/test_httplib.py @@ -1,4 +1,3 @@ -import random from http.client import HTTPConnection, HTTPSConnection from socket import SocketIO from urllib.error import HTTPError @@ -189,7 +188,7 @@ def test_outgoing_trace_headers(sentry_init, monkeypatch): "baggage": ( "other-vendor-value-1=foo;bar;baz, sentry-trace_id=771a43a4192642f0b136d5159a501700, " "sentry-public_key=49d0f7386ad645858ae85020e393bef3, sentry-sample_rate=0.01337, " - "sentry-user_id=Am%C3%A9lie, other-vendor-value-2=foo;bar;" + "sentry-user_id=Am%C3%A9lie, sentry-sample_rand=0.132521102938283, other-vendor-value-2=foo;bar;" ), } @@ -222,7 +221,8 @@ def test_outgoing_trace_headers(sentry_init, monkeypatch): "sentry-trace_id=771a43a4192642f0b136d5159a501700," "sentry-public_key=49d0f7386ad645858ae85020e393bef3," "sentry-sample_rate=1.0," - "sentry-user_id=Am%C3%A9lie" + "sentry-user_id=Am%C3%A9lie," + "sentry-sample_rand=0.132521102938283" ) assert request_headers["baggage"] == expected_outgoing_baggage @@ -235,11 +235,9 @@ def test_outgoing_trace_headers_head_sdk(sentry_init, monkeypatch): mock_send = mock.Mock() monkeypatch.setattr(HTTPSConnection, "send", mock_send) - # make sure transaction is always sampled - monkeypatch.setattr(random, "random", lambda: 0.1) - sentry_init(traces_sample_rate=0.5, release="foo") - transaction = Transaction.continue_from_headers({}) + with mock.patch("sentry_sdk.tracing_utils.Random.uniform", return_value=0.25): + transaction = Transaction.continue_from_headers({}) with start_transaction(transaction=transaction, name="Head SDK tx") as transaction: HTTPSConnection("www.squirrelchasers.com").request("GET", "/top-chasers") @@ -261,6 +259,7 @@ def test_outgoing_trace_headers_head_sdk(sentry_init, monkeypatch): expected_outgoing_baggage = ( "sentry-trace_id=%s," + "sentry-sample_rand=0.250000," "sentry-environment=production," "sentry-release=foo," "sentry-sample_rate=0.5," diff --git a/tests/test_api.py b/tests/test_api.py index 3b2a9c8fb7..08c295a5c4 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,4 +1,6 @@ import pytest + +import re from unittest import mock import sentry_sdk @@ -95,10 +97,10 @@ def test_baggage_with_tracing_disabled(sentry_init): def test_baggage_with_tracing_enabled(sentry_init): sentry_init(traces_sample_rate=1.0, release="1.0.0", environment="dev") with start_transaction() as transaction: - expected_baggage = "sentry-trace_id={},sentry-environment=dev,sentry-release=1.0.0,sentry-sample_rate=1.0,sentry-sampled={}".format( + expected_baggage_re = r"^sentry-trace_id={},sentry-sample_rand=0\.\d{{6}},sentry-environment=dev,sentry-release=1\.0\.0,sentry-sample_rate=1\.0,sentry-sampled={}$".format( transaction.trace_id, "true" if transaction.sampled else "false" ) - assert get_baggage() == expected_baggage + assert re.match(expected_baggage_re, get_baggage()) @pytest.mark.forked @@ -111,7 +113,7 @@ def test_continue_trace(sentry_init): transaction = continue_trace( { "sentry-trace": "{}-{}-{}".format(trace_id, parent_span_id, parent_sampled), - "baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19", + "baggage": "sentry-trace_id=566e3688a61d4bc888951642d6f14a19,sentry-sample_rand=0.123456", }, name="some name", ) @@ -123,7 +125,8 @@ def test_continue_trace(sentry_init): assert propagation_context.parent_span_id == parent_span_id assert propagation_context.parent_sampled == parent_sampled assert propagation_context.dynamic_sampling_context == { - "trace_id": "566e3688a61d4bc888951642d6f14a19" + "trace_id": "566e3688a61d4bc888951642d6f14a19", + "sample_rand": "0.123456", } diff --git a/tests/test_dsc.py b/tests/test_dsc.py index 4837384a8e..8e549d0cf8 100644 --- a/tests/test_dsc.py +++ b/tests/test_dsc.py @@ -8,7 +8,6 @@ This is not tested in this file. """ -import random from unittest import mock import pytest @@ -176,7 +175,7 @@ def my_traces_sampler(sampling_context): } # We continue the incoming trace and start a new transaction - with mock.patch.object(random, "random", return_value=0.2): + with mock.patch("sentry_sdk.tracing_utils.Random.uniform", return_value=0.125): transaction = sentry_sdk.continue_trace(incoming_http_headers) with sentry_sdk.start_transaction(transaction, name="foo"): pass diff --git a/tests/test_monitor.py b/tests/test_monitor.py index 03e415b5cc..b48d9f6282 100644 --- a/tests/test_monitor.py +++ b/tests/test_monitor.py @@ -1,4 +1,3 @@ -import random from collections import Counter from unittest import mock @@ -68,17 +67,16 @@ def test_transaction_uses_downsampled_rate( monitor = sentry_sdk.get_client().monitor monitor.interval = 0.1 - # make sure rng doesn't sample - monkeypatch.setattr(random, "random", lambda: 0.9) - assert monitor.is_healthy() is True monitor.run() assert monitor.is_healthy() is False assert monitor.downsample_factor == 1 - with sentry_sdk.start_transaction(name="foobar") as transaction: - assert transaction.sampled is False - assert transaction.sample_rate == 0.5 + # make sure we don't sample the transaction + with mock.patch("sentry_sdk.tracing_utils.Random.uniform", return_value=0.75): + with sentry_sdk.start_transaction(name="foobar") as transaction: + assert transaction.sampled is False + assert transaction.sample_rate == 0.5 assert Counter(record_lost_event_calls) == Counter( [ diff --git a/tests/test_propagationcontext.py b/tests/test_propagationcontext.py index 85f82913f8..a0ce1094fa 100644 --- a/tests/test_propagationcontext.py +++ b/tests/test_propagationcontext.py @@ -1,6 +1,19 @@ +from unittest import mock +from unittest.mock import Mock + +import pytest + from sentry_sdk.tracing_utils import PropagationContext +SAMPLED_FLAG = { + None: "", + False: "-0", + True: "-1", +} +"""Maps the `sampled` value to the flag appended to the sentry-trace header.""" + + def test_empty_context(): ctx = PropagationContext() @@ -51,6 +64,7 @@ def test_lazy_uuids(): def test_property_setters(): ctx = PropagationContext() + ctx.trace_id = "X234567890abcdef1234567890abcdef" ctx.span_id = "X234567890abcdef" @@ -58,6 +72,7 @@ def test_property_setters(): assert ctx.trace_id == "X234567890abcdef1234567890abcdef" assert ctx._span_id == "X234567890abcdef" assert ctx.span_id == "X234567890abcdef" + assert ctx.dynamic_sampling_context is None def test_update(): @@ -81,3 +96,87 @@ def test_update(): assert ctx.dynamic_sampling_context is None assert not hasattr(ctx, "foo") + + +def test_existing_sample_rand_kept(): + ctx = PropagationContext( + trace_id="00000000000000000000000000000000", + dynamic_sampling_context={"sample_rand": "0.5"}, + ) + + # If sample_rand was regenerated, the value would be 0.919221 based on the trace_id + assert ctx.dynamic_sampling_context["sample_rand"] == "0.5" + + +@pytest.mark.parametrize( + ("parent_sampled", "sample_rate", "expected_interval"), + ( + # Note that parent_sampled and sample_rate do not scale the + # sample_rand value, only determine the range of the value. + # Expected values are determined by parent_sampled, sample_rate, + # and the trace_id. + (None, None, (0.0, 1.0)), + (None, "0.5", (0.0, 1.0)), + (False, None, (0.0, 1.0)), + (True, None, (0.0, 1.0)), + (False, "0.0", (0.0, 1.0)), + (False, "0.01", (0.01, 1.0)), + (True, "0.01", (0.0, 0.01)), + (False, "0.1", (0.1, 1.0)), + (True, "0.1", (0.0, 0.1)), + (False, "0.5", (0.5, 1.0)), + (True, "0.5", (0.0, 0.5)), + (True, "1.0", (0.0, 1.0)), + ), +) +def test_sample_rand_filled(parent_sampled, sample_rate, expected_interval): + """When continuing a trace, we want to fill in the sample_rand value if it's missing.""" + if sample_rate is not None: + sample_rate_str = f",sentry-sample_rate={sample_rate}" # noqa: E231 + else: + sample_rate_str = "" + + # for convenience, we'll just return the lower bound of the interval + mock_uniform = mock.Mock(return_value=expected_interval[0]) + + def mock_random_class(seed): + assert seed == "00000000000000000000000000000000", "seed should be the trace_id" + rv = Mock() + rv.uniform = mock_uniform + return rv + + with mock.patch("sentry_sdk.tracing_utils.Random", mock_random_class): + ctx = PropagationContext().from_incoming_data( + { + "sentry-trace": f"00000000000000000000000000000000-0000000000000000{SAMPLED_FLAG[parent_sampled]}", + # Placeholder is needed, since we only add sample_rand if sentry items are present in baggage + "baggage": f"sentry-placeholder=asdf{sample_rate_str}", + } + ) + + assert ( + ctx.dynamic_sampling_context["sample_rand"] + == f"{expected_interval[0]:.6f}" # noqa: E231 + ) + assert mock_uniform.call_count == 1 + assert mock_uniform.call_args[0] == expected_interval + + +def test_sample_rand_rounds_down(): + # Mock value that should round down to 0.999_999 + mock_uniform = mock.Mock(return_value=0.999_999_9) + + def mock_random_class(_): + rv = Mock() + rv.uniform = mock_uniform + return rv + + with mock.patch("sentry_sdk.tracing_utils.Random", mock_random_class): + ctx = PropagationContext().from_incoming_data( + { + "sentry-trace": "00000000000000000000000000000000-0000000000000000", + "baggage": "sentry-placeholder=asdf", + } + ) + + assert ctx.dynamic_sampling_context["sample_rand"] == "0.999999" diff --git a/tests/tracing/test_integration_tests.py b/tests/tracing/test_integration_tests.py index 13d1a7a77b..61ef14b7d0 100644 --- a/tests/tracing/test_integration_tests.py +++ b/tests/tracing/test_integration_tests.py @@ -1,8 +1,8 @@ import gc -import random import re import sys import weakref +from unittest import mock import pytest @@ -169,9 +169,8 @@ def test_dynamic_sampling_head_sdk_creates_dsc( envelopes = capture_envelopes() # make sure transaction is sampled for both cases - monkeypatch.setattr(random, "random", lambda: 0.1) - - transaction = Transaction.continue_from_headers({}, name="Head SDK tx") + with mock.patch("sentry_sdk.tracing_utils.Random.uniform", return_value=0.25): + transaction = Transaction.continue_from_headers({}, name="Head SDK tx") # will create empty mutable baggage baggage = transaction._baggage @@ -196,12 +195,14 @@ def test_dynamic_sampling_head_sdk_creates_dsc( "release": "foo", "sample_rate": str(sample_rate), "sampled": "true" if transaction.sampled else "false", + "sample_rand": "0.250000", "transaction": "Head SDK tx", "trace_id": trace_id, } expected_baggage = ( "sentry-trace_id=%s," + "sentry-sample_rand=0.250000," "sentry-environment=production," "sentry-release=foo," "sentry-transaction=Head%%20SDK%%20tx," @@ -217,6 +218,7 @@ def test_dynamic_sampling_head_sdk_creates_dsc( "environment": "production", "release": "foo", "sample_rate": str(sample_rate), + "sample_rand": "0.250000", "sampled": "true" if transaction.sampled else "false", "transaction": "Head SDK tx", "trace_id": trace_id, diff --git a/tests/tracing/test_sample_rand.py b/tests/tracing/test_sample_rand.py new file mode 100644 index 0000000000..b8f5c042ed --- /dev/null +++ b/tests/tracing/test_sample_rand.py @@ -0,0 +1,55 @@ +from unittest import mock + +import pytest + +import sentry_sdk +from sentry_sdk.tracing_utils import Baggage + + +@pytest.mark.parametrize("sample_rand", (0.0, 0.25, 0.5, 0.75)) +@pytest.mark.parametrize("sample_rate", (0.0, 0.25, 0.5, 0.75, 1.0)) +def test_deterministic_sampled(sentry_init, capture_events, sample_rate, sample_rand): + """ + Test that sample_rand is generated on new traces, that it is used to + make the sampling decision, and that it is included in the transaction's + baggage. + """ + sentry_init(traces_sample_rate=sample_rate) + events = capture_events() + + with mock.patch( + "sentry_sdk.tracing_utils.Random.uniform", return_value=sample_rand + ): + with sentry_sdk.start_transaction() as transaction: + assert ( + transaction.get_baggage().sentry_items["sample_rand"] + == f"{sample_rand:.6f}" # noqa: E231 + ) + + # Transaction event captured if sample_rand < sample_rate, indicating that + # sample_rand is used to make the sampling decision. + assert len(events) == int(sample_rand < sample_rate) + + +@pytest.mark.parametrize("sample_rand", (0.0, 0.25, 0.5, 0.75)) +@pytest.mark.parametrize("sample_rate", (0.0, 0.25, 0.5, 0.75, 1.0)) +def test_transaction_uses_incoming_sample_rand( + sentry_init, capture_events, sample_rate, sample_rand +): + """ + Test that the transaction uses the sample_rand value from the incoming baggage. + """ + baggage = Baggage(sentry_items={"sample_rand": f"{sample_rand:.6f}"}) # noqa: E231 + + sentry_init(traces_sample_rate=sample_rate) + events = capture_events() + + with sentry_sdk.start_transaction(baggage=baggage) as transaction: + assert ( + transaction.get_baggage().sentry_items["sample_rand"] + == f"{sample_rand:.6f}" # noqa: E231 + ) + + # Transaction event captured if sample_rand < sample_rate, indicating that + # sample_rand is used to make the sampling decision. + assert len(events) == int(sample_rand < sample_rate) diff --git a/tests/tracing/test_sample_rand_propagation.py b/tests/tracing/test_sample_rand_propagation.py new file mode 100644 index 0000000000..ea3ea548ff --- /dev/null +++ b/tests/tracing/test_sample_rand_propagation.py @@ -0,0 +1,43 @@ +""" +These tests exist to verify that Scope.continue_trace() correctly propagates the +sample_rand value onto the transaction's baggage. + +We check both the case where there is an incoming sample_rand, as well as the case +where we need to compute it because it is missing. +""" + +from unittest import mock +from unittest.mock import Mock + +import sentry_sdk + + +def test_continue_trace_with_sample_rand(): + """ + Test that an incoming sample_rand is propagated onto the transaction's baggage. + """ + headers = { + "sentry-trace": "00000000000000000000000000000000-0000000000000000-0", + "baggage": "sentry-sample_rand=0.1,sentry-sample_rate=0.5", + } + + transaction = sentry_sdk.continue_trace(headers) + assert transaction.get_baggage().sentry_items["sample_rand"] == "0.1" + + +def test_continue_trace_missing_sample_rand(): + """ + Test that a missing sample_rand is filled in onto the transaction's baggage. + """ + + headers = { + "sentry-trace": "00000000000000000000000000000000-0000000000000000", + "baggage": "sentry-placeholder=asdf", + } + + mock_uniform = Mock(return_value=0.5) + + with mock.patch("sentry_sdk.tracing_utils.Random.uniform", mock_uniform): + transaction = sentry_sdk.continue_trace(headers) + + assert transaction.get_baggage().sentry_items["sample_rand"] == "0.500000" diff --git a/tests/tracing/test_sampling.py b/tests/tracing/test_sampling.py index 1ad08ecec2..1761a3dbac 100644 --- a/tests/tracing/test_sampling.py +++ b/tests/tracing/test_sampling.py @@ -7,6 +7,7 @@ import sentry_sdk from sentry_sdk import start_span, start_transaction, capture_exception from sentry_sdk.tracing import Transaction +from sentry_sdk.tracing_utils import Baggage from sentry_sdk.utils import logger @@ -73,9 +74,9 @@ def test_uses_traces_sample_rate_correctly( ): sentry_init(traces_sample_rate=traces_sample_rate) - with mock.patch.object(random, "random", return_value=0.5): - transaction = start_transaction(name="dogpark") - assert transaction.sampled is expected_decision + baggage = Baggage(sentry_items={"sample_rand": "0.500000"}) + transaction = start_transaction(name="dogpark", baggage=baggage) + assert transaction.sampled is expected_decision @pytest.mark.parametrize( @@ -89,9 +90,9 @@ def test_uses_traces_sampler_return_value_correctly( ): sentry_init(traces_sampler=mock.Mock(return_value=traces_sampler_return_value)) - with mock.patch.object(random, "random", return_value=0.5): - transaction = start_transaction(name="dogpark") - assert transaction.sampled is expected_decision + baggage = Baggage(sentry_items={"sample_rand": "0.500000"}) + transaction = start_transaction(name="dogpark", baggage=baggage) + assert transaction.sampled is expected_decision @pytest.mark.parametrize("traces_sampler_return_value", [True, False])