diff --git a/src/sentry/search/eap/columns.py b/src/sentry/search/eap/columns.py index 5a9501e05d3c50..fb046a71463487 100644 --- a/src/sentry/search/eap/columns.py +++ b/src/sentry/search/eap/columns.py @@ -4,6 +4,9 @@ from typing import Any from dateutil.tz import tz +from sentry_protos.snuba.v1.attribute_conditional_aggregation_pb2 import ( + AttributeConditionalAggregation, +) from sentry_protos.snuba.v1.endpoint_trace_item_table_pb2 import Column from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType from sentry_protos.snuba.v1.trace_item_attribute_pb2 import ( @@ -102,25 +105,46 @@ class VirtualColumnDefinition: @dataclass(frozen=True, kw_only=True) -class ResolvedFormula(ResolvedAttribute): - formula: Column.BinaryFormula +class ResolvedFunction(ResolvedAttribute): + """ + A Function should be used as a non-attribute column, this means an aggregate or formula is a type of function. + The function is considered resolved when it can be passed in directly to the RPC (typically meaning arguments are resolved). + """ @property - def proto_definition(self) -> Column.BinaryFormula: - """The definition of this function as needed by the RPC""" - return self.formula + def proto_definition( + self, + ) -> Column.BinaryFormula | AttributeAggregation | AttributeConditionalAggregation: + raise NotImplementedError() @property def proto_type(self) -> AttributeKey.Type.ValueType: - """The rpc always returns functions as floats, especially count() even though it should be an integer - - see: https://www.notion.so/sentry/Should-count-return-an-int-in-the-v1-RPC-API-1348b10e4b5d80498bfdead194cc304e - """ return constants.DOUBLE @dataclass(frozen=True, kw_only=True) -class ResolvedFunction(ResolvedAttribute): +class ResolvedFormula(ResolvedFunction): + """ + A formula is a type of function that may accept a parameter, it divides an attribute, aggregate or formula by another. + The FormulaDefinition contains a method `resolve`, which takes in the argument passed into the function and returns the resolved formula. + For example if the user queries for `http_response_rate(5), the FormulaDefinition calles `resolve` with the argument `5` and returns the `ResolvedFormula`. + """ + + formula: Column.BinaryFormula + + @property + def proto_definition(self) -> Column.BinaryFormula: + """The definition of this function as needed by the RPC""" + return self.formula + + +@dataclass(frozen=True, kw_only=True) +class ResolvedAggregate(ResolvedFunction): + """ + An aggregate is the most primitive type of function, these are the ones that are availble via the RPC directly and contain no logic + Examples of this are `sum()` and `avg()`. + """ + # The internal rpc alias for this column internal_name: Function.ValueType # Whether to enable extrapolation @@ -140,18 +164,13 @@ def proto_definition(self) -> AttributeAggregation: ), ) - @property - def proto_type(self) -> AttributeKey.Type.ValueType: - """The rpc always returns functions as floats, especially count() even though it should be an integer - - see: https://www.notion.so/sentry/Should-count-return-an-int-in-the-v1-RPC-API-1348b10e4b5d80498bfdead194cc304e - """ - return constants.DOUBLE - -@dataclass +@dataclass(kw_only=True) class FunctionDefinition: - internal_function: Function.ValueType + """ + The FunctionDefinition is a base class for defining a function, a function is a non-attribute column. + """ + # The list of arguments for this function arguments: list[ArgumentDefinition] # The search_type the argument should be the default type for this column @@ -160,19 +179,32 @@ class FunctionDefinition: infer_search_type_from_arguments: bool = True # The internal rpc type for this function, optional as it can mostly be inferred from search_type internal_type: AttributeKey.Type.ValueType | None = None - # Processor is the function run in the post process step to transform a row into the final result - processor: Callable[[Any], Any] | None = None # Whether to request extrapolation or not, should be true for all functions except for _sample functions for debugging extrapolation: bool = True + # Processor is the function run in the post process step to transform a row into the final result + processor: Callable[[Any], Any] | None = None @property def required_arguments(self) -> list[ArgumentDefinition]: return [arg for arg in self.arguments if arg.default_arg is None and not arg.ignored] + def resolve( + self, + alias: str, + search_type: constants.SearchType, + resolved_argument: AttributeKey | Any | None, + ) -> ResolvedFormula | ResolvedAggregate: + raise NotImplementedError() + + +@dataclass(kw_only=True) +class AggregateDefinition(FunctionDefinition): + internal_function: Function.ValueType + def resolve( self, alias: str, search_type: constants.SearchType, resolved_argument: AttributeKey | None - ) -> ResolvedFunction: - return ResolvedFunction( + ) -> ResolvedAggregate: + return ResolvedAggregate( public_alias=alias, internal_name=self.internal_function, search_type=search_type, @@ -183,22 +215,10 @@ def resolve( ) -@dataclass -class FormulaDefinition: - # The list of arguments for this function - arguments: list[ArgumentDefinition] +@dataclass(kw_only=True) +class FormulaDefinition(FunctionDefinition): # A function that takes in the resolved argument and returns a Column.BinaryFormula - formula_resolver: Callable[[Any], Any] - # The search_type the argument should be the default type for this column - default_search_type: constants.SearchType - # Try to infer the search type from the function arguments - infer_search_type_from_arguments: bool = True - # The internal rpc type for this function, optional as it can mostly be inferred from search_type - internal_type: AttributeKey.Type.ValueType | None = None - # Processor is the function run in the post process step to transform a row into the final result - processor: Callable[[Any], Column.BinaryFormula] | None = None - # Whether to request extrapolation or not, should be true for all functions except for _sample functions for debugging - extrapolation: bool = True + formula_resolver: Callable[[Any], Column.BinaryFormula] @property def required_arguments(self) -> list[ArgumentDefinition]: @@ -270,7 +290,7 @@ def project_term_resolver( @dataclass(frozen=True) class ColumnDefinitions: - functions: dict[str, FunctionDefinition] + aggregates: dict[str, AggregateDefinition] formulas: dict[str, FormulaDefinition] columns: dict[str, ResolvedColumn] contexts: dict[str, VirtualColumnDefinition] diff --git a/src/sentry/search/eap/ourlog_columns.py b/src/sentry/search/eap/ourlog_columns.py index be3a492d3a2c44..c00cde047a60d8 100644 --- a/src/sentry/search/eap/ourlog_columns.py +++ b/src/sentry/search/eap/ourlog_columns.py @@ -73,7 +73,7 @@ OURLOG_DEFINITIONS = ColumnDefinitions( - functions={}, + aggregates={}, formulas={}, columns=OURLOG_ATTRIBUTE_DEFINITIONS, contexts=OURLOG_VIRTUAL_CONTEXTS, diff --git a/src/sentry/search/eap/resolver.py b/src/sentry/search/eap/resolver.py index 9c6e6536aa48f7..f4d4233c299482 100644 --- a/src/sentry/search/eap/resolver.py +++ b/src/sentry/search/eap/resolver.py @@ -36,12 +36,12 @@ from sentry.exceptions import InvalidSearchQuery from sentry.search.eap import constants from sentry.search.eap.columns import ( + AggregateDefinition, ColumnDefinitions, FormulaDefinition, - FunctionDefinition, + ResolvedAggregate, ResolvedColumn, ResolvedFormula, - ResolvedFunction, VirtualColumnDefinition, ) from sentry.search.eap.types import SearchResolverConfig @@ -65,7 +65,7 @@ class SearchResolver: field(default_factory=dict) ) _resolved_function_cache: dict[ - str, tuple[ResolvedFunction | ResolvedFormula, VirtualColumnDefinition | None] + str, tuple[ResolvedFormula | ResolvedAggregate, VirtualColumnDefinition | None] ] = field(default_factory=dict) @sentry_sdk.trace @@ -553,7 +553,7 @@ def resolve_contexts( @sentry_sdk.trace def resolve_columns(self, selected_columns: list[str]) -> tuple[ - list[ResolvedColumn | ResolvedFunction | ResolvedFormula], + list[ResolvedColumn | ResolvedAggregate | ResolvedFormula], list[VirtualColumnDefinition | None], ]: """Given a list of columns resolve them and get their context if applicable @@ -590,12 +590,14 @@ def resolve_columns(self, selected_columns: list[str]) -> tuple[ def resolve_column( self, column: str, match: Match | None = None - ) -> tuple[ResolvedColumn | ResolvedFunction | ResolvedFormula, VirtualColumnDefinition | None]: + ) -> tuple[ + ResolvedColumn | ResolvedAggregate | ResolvedFormula, VirtualColumnDefinition | None + ]: """Column is either an attribute or an aggregate, this function will determine which it is and call the relevant resolve function""" match = fields.is_function(column) if match: - return self.resolve_aggregate(column, match) + return self.resolve_function(column, match) else: return self.resolve_attribute(column) @@ -670,27 +672,27 @@ def resolve_attribute( raise InvalidSearchQuery(f"Could not parse {column}") @sentry_sdk.trace - def resolve_aggregates( + def resolve_functions( self, columns: list[str] - ) -> tuple[list[ResolvedFunction | ResolvedFormula], list[VirtualColumnDefinition | None]]: - """Helper function to resolve a list of aggregates instead of 1 attribute at a time""" - resolved_aggregates, resolved_contexts = [], [] + ) -> tuple[list[ResolvedFormula | ResolvedAggregate], list[VirtualColumnDefinition | None]]: + """Helper function to resolve a list of functions instead of 1 attribute at a time""" + resolved_functions, resolved_contexts = [], [] for column in columns: - aggregate, context = self.resolve_aggregate(column) - resolved_aggregates.append(aggregate) + function, context = self.resolve_function(column) + resolved_functions.append(function) resolved_contexts.append(context) - return resolved_aggregates, resolved_contexts + return resolved_functions, resolved_contexts - def resolve_aggregate( + def resolve_function( self, column: str, match: Match | None = None - ) -> tuple[ResolvedFunction | ResolvedFormula, VirtualColumnDefinition | None]: + ) -> tuple[ResolvedFormula | ResolvedAggregate, VirtualColumnDefinition | None]: if column in self._resolved_function_cache: return self._resolved_function_cache[column] # Check if the column looks like a function (matches a pattern), parse the function name and args out if match is None: match = fields.is_function(column) if match is None: - raise InvalidSearchQuery(f"{column} is not an aggregate") + raise InvalidSearchQuery(f"{column} is not a function") function = match.group("function") columns = match.group("columns") @@ -698,9 +700,9 @@ def resolve_aggregate( alias = match.group("alias") or column # Get the function definition - function_definition: FunctionDefinition | FormulaDefinition - if function in self.definitions.functions: - function_definition = self.definitions.functions[function] + function_definition: AggregateDefinition | FormulaDefinition + if function in self.definitions.aggregates: + function_definition = self.definitions.aggregates[function] elif function in self.definitions.formulas: function_definition = self.definitions.formulas[function] else: diff --git a/src/sentry/search/eap/span_columns.py b/src/sentry/search/eap/span_columns.py index fa4f46b7a27138..181c889715e567 100644 --- a/src/sentry/search/eap/span_columns.py +++ b/src/sentry/search/eap/span_columns.py @@ -19,10 +19,10 @@ from sentry.exceptions import InvalidSearchQuery from sentry.search.eap import constants from sentry.search.eap.columns import ( + AggregateDefinition, ArgumentDefinition, ColumnDefinitions, FormulaDefinition, - FunctionDefinition, ResolvedColumn, VirtualColumnDefinition, datetime_processor, @@ -471,8 +471,8 @@ def _validator(input: str) -> bool: "http_response_rate": http_response_rate } -SPAN_FUNCTION_DEFINITIONS = { - "sum": FunctionDefinition( +SPAN_AGGREGATE_DEFINITIONS = { + "sum": AggregateDefinition( internal_function=Function.FUNCTION_SUM, default_search_type="duration", arguments=[ @@ -487,7 +487,7 @@ def _validator(input: str) -> bool: ) ], ), - "avg": FunctionDefinition( + "avg": AggregateDefinition( internal_function=Function.FUNCTION_AVG, default_search_type="duration", arguments=[ @@ -503,7 +503,7 @@ def _validator(input: str) -> bool: ) ], ), - "avg_sample": FunctionDefinition( + "avg_sample": AggregateDefinition( internal_function=Function.FUNCTION_AVG, default_search_type="duration", arguments=[ @@ -520,7 +520,7 @@ def _validator(input: str) -> bool: ], extrapolation=False, ), - "count": FunctionDefinition( + "count": AggregateDefinition( internal_function=Function.FUNCTION_COUNT, infer_search_type_from_arguments=False, default_search_type="integer", @@ -537,7 +537,7 @@ def _validator(input: str) -> bool: ) ], ), - "count_sample": FunctionDefinition( + "count_sample": AggregateDefinition( internal_function=Function.FUNCTION_COUNT, infer_search_type_from_arguments=False, default_search_type="integer", @@ -555,7 +555,7 @@ def _validator(input: str) -> bool: ], extrapolation=False, ), - "p50": FunctionDefinition( + "p50": AggregateDefinition( internal_function=Function.FUNCTION_P50, default_search_type="duration", arguments=[ @@ -570,7 +570,7 @@ def _validator(input: str) -> bool: ) ], ), - "p50_sample": FunctionDefinition( + "p50_sample": AggregateDefinition( internal_function=Function.FUNCTION_P50, default_search_type="duration", arguments=[ @@ -586,7 +586,7 @@ def _validator(input: str) -> bool: ], extrapolation=False, ), - "p75": FunctionDefinition( + "p75": AggregateDefinition( internal_function=Function.FUNCTION_P75, default_search_type="duration", arguments=[ @@ -601,7 +601,7 @@ def _validator(input: str) -> bool: ) ], ), - "p90": FunctionDefinition( + "p90": AggregateDefinition( internal_function=Function.FUNCTION_P90, default_search_type="duration", arguments=[ @@ -616,7 +616,7 @@ def _validator(input: str) -> bool: ) ], ), - "p95": FunctionDefinition( + "p95": AggregateDefinition( internal_function=Function.FUNCTION_P95, default_search_type="duration", arguments=[ @@ -631,7 +631,7 @@ def _validator(input: str) -> bool: ) ], ), - "p99": FunctionDefinition( + "p99": AggregateDefinition( internal_function=Function.FUNCTION_P99, default_search_type="duration", arguments=[ @@ -646,7 +646,7 @@ def _validator(input: str) -> bool: ) ], ), - "p100": FunctionDefinition( + "p100": AggregateDefinition( internal_function=Function.FUNCTION_MAX, default_search_type="duration", arguments=[ @@ -661,7 +661,7 @@ def _validator(input: str) -> bool: ) ], ), - "max": FunctionDefinition( + "max": AggregateDefinition( internal_function=Function.FUNCTION_MAX, default_search_type="duration", arguments=[ @@ -677,7 +677,7 @@ def _validator(input: str) -> bool: ) ], ), - "min": FunctionDefinition( + "min": AggregateDefinition( internal_function=Function.FUNCTION_MIN, default_search_type="duration", arguments=[ @@ -693,7 +693,7 @@ def _validator(input: str) -> bool: ) ], ), - "count_unique": FunctionDefinition( + "count_unique": AggregateDefinition( internal_function=Function.FUNCTION_UNIQ, default_search_type="integer", infer_search_type_from_arguments=False, @@ -721,7 +721,7 @@ def _validator(input: str) -> bool: } SPAN_DEFINITIONS = ColumnDefinitions( - functions=SPAN_FUNCTION_DEFINITIONS, + aggregates=SPAN_AGGREGATE_DEFINITIONS, formulas=SPAN_FORMULA_DEFINITIONS, columns=SPAN_ATTRIBUTE_DEFINITIONS, contexts=SPAN_VIRTUAL_CONTEXTS, diff --git a/src/sentry/search/eap/uptime_check_columns.py b/src/sentry/search/eap/uptime_check_columns.py index 6e91643cdf87a0..2c6f5360a01f56 100644 --- a/src/sentry/search/eap/uptime_check_columns.py +++ b/src/sentry/search/eap/uptime_check_columns.py @@ -86,7 +86,7 @@ UPTIME_CHECK_DEFINITIONS = ColumnDefinitions( - functions={}, + aggregates={}, formulas={}, columns=UPTIME_CHECK_ATTRIBUTE_DEFINITIONS, contexts=UPTIME_CHECK_VIRTUAL_CONTEXTS, diff --git a/src/sentry/snuba/rpc_dataset_common.py b/src/sentry/snuba/rpc_dataset_common.py index 47cb167b2e7e73..47e3b852e645d5 100644 --- a/src/sentry/snuba/rpc_dataset_common.py +++ b/src/sentry/snuba/rpc_dataset_common.py @@ -5,7 +5,7 @@ from sentry_protos.snuba.v1.request_common_pb2 import PageToken from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeAggregation, AttributeKey -from sentry.search.eap.columns import ResolvedColumn, ResolvedFormula, ResolvedFunction +from sentry.search.eap.columns import ResolvedAggregate, ResolvedColumn, ResolvedFormula from sentry.search.eap.resolver import SearchResolver from sentry.search.eap.types import CONFIDENCES, ConfidenceData, EAPResponse from sentry.search.events.fields import get_function_alias @@ -16,10 +16,10 @@ logger = logging.getLogger("sentry.snuba.spans_rpc") -def categorize_column(column: ResolvedColumn | ResolvedFunction | ResolvedFormula) -> Column: +def categorize_column(column: ResolvedColumn | ResolvedAggregate | ResolvedFormula) -> Column: if isinstance(column, ResolvedFormula): return Column(formula=column.proto_definition, label=column.public_alias) - if isinstance(column, ResolvedFunction): + if isinstance(column, ResolvedAggregate): return Column(aggregation=column.proto_definition, label=column.public_alias) else: return Column(key=column.proto_definition, label=column.public_alias) diff --git a/src/sentry/snuba/spans_rpc.py b/src/sentry/snuba/spans_rpc.py index 3cda748c1901bd..9f76bcc446f973 100644 --- a/src/sentry/snuba/spans_rpc.py +++ b/src/sentry/snuba/spans_rpc.py @@ -13,7 +13,7 @@ from sentry.api.event_search import SearchFilter, SearchKey, SearchValue from sentry.exceptions import InvalidSearchQuery -from sentry.search.eap.columns import ResolvedColumn, ResolvedFormula, ResolvedFunction +from sentry.search.eap.columns import ResolvedAggregate, ResolvedColumn, ResolvedFormula from sentry.search.eap.constants import DOUBLE, INT, MAX_ROLLUP_POINTS, STRING, VALID_GRANULARITIES from sentry.search.eap.resolver import SearchResolver from sentry.search.eap.span_columns import SPAN_DEFINITIONS @@ -76,11 +76,11 @@ def get_timeseries_query( config: SearchResolverConfig, granularity_secs: int, extra_conditions: TraceItemFilter | None = None, -) -> tuple[TimeSeriesRequest, list[ResolvedFunction | ResolvedFormula], list[ResolvedColumn]]: +) -> tuple[TimeSeriesRequest, list[ResolvedFormula | ResolvedAggregate], list[ResolvedColumn]]: resolver = get_resolver(params=params, config=config) meta = resolver.resolve_meta(referrer=referrer) query, _, query_contexts = resolver.resolve_query(query_string) - (aggregations, _) = resolver.resolve_aggregates(y_axes) + (aggregations, _) = resolver.resolve_functions(y_axes) (groupbys, _) = resolver.resolve_attributes(groupby) if extra_conditions is not None: if query is not None: