Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ref(eap): cleanup from custom formulas #86330

Merged
merged 9 commits into from
Mar 5, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 41 additions & 40 deletions src/sentry/search/eap/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
from typing import Any

from dateutil.tz import tz
from sentry_protos.snuba.v1.attribute_conditional_aggregation_pb2 import (
AttributeConditionalAggregation,
)
from sentry_protos.snuba.v1.endpoint_trace_item_table_pb2 import Column
from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import (
Expand Down Expand Up @@ -102,25 +105,31 @@ class VirtualColumnDefinition:


@dataclass(frozen=True, kw_only=True)
class ResolvedFormula(ResolvedAttribute):
formula: Column.BinaryFormula
class ResolvedFunction(ResolvedAttribute):

@property
def proto_definition(self) -> Column.BinaryFormula:
"""The definition of this function as needed by the RPC"""
return self.formula
def proto_definition(
self,
) -> Column.BinaryFormula | AttributeAggregation | AttributeConditionalAggregation:
raise NotImplementedError()

@property
def proto_type(self) -> AttributeKey.Type.ValueType:
"""The rpc always returns functions as floats, especially count() even though it should be an integer

see: https://www.notion.so/sentry/Should-count-return-an-int-in-the-v1-RPC-API-1348b10e4b5d80498bfdead194cc304e
"""
return constants.DOUBLE


@dataclass(frozen=True, kw_only=True)
class ResolvedFunction(ResolvedAttribute):
class ResolvedFormula(ResolvedFunction):
formula: Column.BinaryFormula

@property
def proto_definition(self) -> Column.BinaryFormula:
"""The definition of this function as needed by the RPC"""
return self.formula


@dataclass(frozen=True, kw_only=True)
class ResolvedAggregate(ResolvedFunction):
# The internal rpc alias for this column
internal_name: Function.ValueType
# Whether to enable extrapolation
Expand All @@ -140,18 +149,9 @@ def proto_definition(self) -> AttributeAggregation:
),
)

@property
def proto_type(self) -> AttributeKey.Type.ValueType:
"""The rpc always returns functions as floats, especially count() even though it should be an integer

see: https://www.notion.so/sentry/Should-count-return-an-int-in-the-v1-RPC-API-1348b10e4b5d80498bfdead194cc304e
"""
return constants.DOUBLE


@dataclass
@dataclass(kw_only=True)
class FunctionDefinition:
internal_function: Function.ValueType
# The list of arguments for this function
arguments: list[ArgumentDefinition]
# The search_type the argument should be the default type for this column
Expand All @@ -160,19 +160,32 @@ class FunctionDefinition:
infer_search_type_from_arguments: bool = True
# The internal rpc type for this function, optional as it can mostly be inferred from search_type
internal_type: AttributeKey.Type.ValueType | None = None
# Processor is the function run in the post process step to transform a row into the final result
processor: Callable[[Any], Any] | None = None
# Whether to request extrapolation or not, should be true for all functions except for _sample functions for debugging
extrapolation: bool = True
# Processor is the function run in the post process step to transform a row into the final result
processor: Callable[[Any], Any] | None = None

@property
def required_arguments(self) -> list[ArgumentDefinition]:
return [arg for arg in self.arguments if arg.default_arg is None and not arg.ignored]

def resolve(
self,
alias: str,
search_type: constants.SearchType,
resolved_argument: AttributeKey | Any | None,
) -> ResolvedFormula | ResolvedAggregate:
raise NotImplementedError()


@dataclass(kw_only=True)
class AggregateDefinition(FunctionDefinition):
internal_function: Function.ValueType

def resolve(
self, alias: str, search_type: constants.SearchType, resolved_argument: AttributeKey | None
) -> ResolvedFunction:
return ResolvedFunction(
) -> ResolvedAggregate:
return ResolvedAggregate(
public_alias=alias,
internal_name=self.internal_function,
search_type=search_type,
Expand All @@ -183,22 +196,10 @@ def resolve(
)


@dataclass
class FormulaDefinition:
# The list of arguments for this function
arguments: list[ArgumentDefinition]
@dataclass(kw_only=True)
class FormulaDefinition(FunctionDefinition):
# A function that takes in the resolved argument and returns a Column.BinaryFormula
formula_resolver: Callable[[Any], Any]
# The search_type the argument should be the default type for this column
default_search_type: constants.SearchType
# Try to infer the search type from the function arguments
infer_search_type_from_arguments: bool = True
# The internal rpc type for this function, optional as it can mostly be inferred from search_type
internal_type: AttributeKey.Type.ValueType | None = None
# Processor is the function run in the post process step to transform a row into the final result
processor: Callable[[Any], Column.BinaryFormula] | None = None
# Whether to request extrapolation or not, should be true for all functions except for _sample functions for debugging
extrapolation: bool = True
formula_resolver: Callable[[Any], Column.BinaryFormula]

@property
def required_arguments(self) -> list[ArgumentDefinition]:
Expand Down Expand Up @@ -270,7 +271,7 @@ def project_term_resolver(

@dataclass(frozen=True)
class ColumnDefinitions:
functions: dict[str, FunctionDefinition]
aggregates: dict[str, AggregateDefinition]
formulas: dict[str, FormulaDefinition]
columns: dict[str, ResolvedColumn]
contexts: dict[str, VirtualColumnDefinition]
Expand Down
2 changes: 1 addition & 1 deletion src/sentry/search/eap/ourlog_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@


OURLOG_DEFINITIONS = ColumnDefinitions(
functions={},
aggregates={},
formulas={},
columns=OURLOG_ATTRIBUTE_DEFINITIONS,
contexts=OURLOG_VIRTUAL_CONTEXTS,
Expand Down
40 changes: 21 additions & 19 deletions src/sentry/search/eap/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@
from sentry.exceptions import InvalidSearchQuery
from sentry.search.eap import constants
from sentry.search.eap.columns import (
AggregateDefinition,
ColumnDefinitions,
FormulaDefinition,
FunctionDefinition,
ResolvedAggregate,
ResolvedColumn,
ResolvedFormula,
ResolvedFunction,
VirtualColumnDefinition,
)
from sentry.search.eap.types import SearchResolverConfig
Expand All @@ -65,7 +65,7 @@ class SearchResolver:
field(default_factory=dict)
)
_resolved_function_cache: dict[
str, tuple[ResolvedFunction | ResolvedFormula, VirtualColumnDefinition | None]
str, tuple[ResolvedFormula | ResolvedAggregate, VirtualColumnDefinition | None]
] = field(default_factory=dict)

@sentry_sdk.trace
Expand Down Expand Up @@ -553,7 +553,7 @@ def resolve_contexts(

@sentry_sdk.trace
def resolve_columns(self, selected_columns: list[str]) -> tuple[
list[ResolvedColumn | ResolvedFunction | ResolvedFormula],
list[ResolvedColumn | ResolvedAggregate | ResolvedFormula],
list[VirtualColumnDefinition | None],
]:
"""Given a list of columns resolve them and get their context if applicable
Expand Down Expand Up @@ -590,12 +590,14 @@ def resolve_columns(self, selected_columns: list[str]) -> tuple[

def resolve_column(
self, column: str, match: Match | None = None
) -> tuple[ResolvedColumn | ResolvedFunction | ResolvedFormula, VirtualColumnDefinition | None]:
) -> tuple[
ResolvedColumn | ResolvedAggregate | ResolvedFormula, VirtualColumnDefinition | None
]:
"""Column is either an attribute or an aggregate, this function will determine which it is and call the relevant
resolve function"""
match = fields.is_function(column)
if match:
return self.resolve_aggregate(column, match)
return self.resolve_function(column, match)
else:
return self.resolve_attribute(column)

Expand Down Expand Up @@ -670,37 +672,37 @@ def resolve_attribute(
raise InvalidSearchQuery(f"Could not parse {column}")

@sentry_sdk.trace
def resolve_aggregates(
def resolve_functions(
self, columns: list[str]
) -> tuple[list[ResolvedFunction | ResolvedFormula], list[VirtualColumnDefinition | None]]:
"""Helper function to resolve a list of aggregates instead of 1 attribute at a time"""
resolved_aggregates, resolved_contexts = [], []
) -> tuple[list[ResolvedFormula | ResolvedAggregate], list[VirtualColumnDefinition | None]]:
"""Helper function to resolve a list of functions instead of 1 attribute at a time"""
resolved_functions, resolved_contexts = [], []
for column in columns:
aggregate, context = self.resolve_aggregate(column)
resolved_aggregates.append(aggregate)
function, context = self.resolve_function(column)
resolved_functions.append(function)
resolved_contexts.append(context)
return resolved_aggregates, resolved_contexts
return resolved_functions, resolved_contexts

def resolve_aggregate(
def resolve_function(
self, column: str, match: Match | None = None
) -> tuple[ResolvedFunction | ResolvedFormula, VirtualColumnDefinition | None]:
) -> tuple[ResolvedFormula | ResolvedAggregate, VirtualColumnDefinition | None]:
if column in self._resolved_function_cache:
return self._resolved_function_cache[column]
# Check if the column looks like a function (matches a pattern), parse the function name and args out
if match is None:
match = fields.is_function(column)
if match is None:
raise InvalidSearchQuery(f"{column} is not an aggregate")
raise InvalidSearchQuery(f"{column} is not a function")

function = match.group("function")
columns = match.group("columns")
# Alias defaults to the name of the function
alias = match.group("alias") or column

# Get the function definition
function_definition: FunctionDefinition | FormulaDefinition
if function in self.definitions.functions:
function_definition = self.definitions.functions[function]
function_definition: AggregateDefinition | FormulaDefinition
if function in self.definitions.aggregates:
function_definition = self.definitions.aggregates[function]
elif function in self.definitions.formulas:
function_definition = self.definitions.formulas[function]
else:
Expand Down
Loading
Loading