Skip to content

Commit

Permalink
ref(grouping): Clean up fingerprinting helpers (#85483)
Browse files Browse the repository at this point in the history
This is some refactoring I did back when I was digging around in the grouping code, which I just rediscovered on an old branch. Mostly just things I renamed for clarity as I was trying to understand the code, but also a bit of simplification/clarification of code (things like using an explicit ternary rather than relying on the precedence order of `and` and `or`, stripping a specific character from a string rather than slicing it, using `any` rather than testing a boolean condition in a for loop, etc.). No behavior changes.
  • Loading branch information
lobsterkatie authored Mar 3, 2025
1 parent 0dc6dfd commit 19a602c
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 104 deletions.
100 changes: 49 additions & 51 deletions src/sentry/grouping/fingerprinting/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ def __init__(self, event: Mapping[str, Any]) -> None:
self._family: list[_FamilyInfo] | None = None
self._release: list[_ReleaseInfo] | None = None

def get_values(self, match_type: str) -> list[dict[str, Any]]:
"""
Pull values from all the spots in the event appropriate to the given match type.
"""
return getattr(self, "_get_" + match_type)()

def _get_messages(self) -> list[_MessageInfo]:
if self._messages is None:
self._messages = []
Expand Down Expand Up @@ -243,12 +249,6 @@ def _get_release(self) -> list[_ReleaseInfo]:
self._release = self._release or [{"release": self.event.get("release")}]
return self._release

def get_values(self, match_type: str) -> list[dict[str, Any]]:
"""
Pull values from all the spots in the event appropriate to the given match type.
"""
return getattr(self, "_get_" + match_type)()


class FingerprintingRules:
def __init__(
Expand Down Expand Up @@ -416,11 +416,9 @@ def match_type(self) -> str:
return "release"
return "frames"

def matches(self, values: dict[str, Any]) -> bool:
rv = self._positive_match(values)
if self.negated:
rv = not rv
return rv
def matches(self, event_values: dict[str, Any]) -> bool:
match_found = self._positive_match(event_values)
return not match_found if self.negated else match_found

def _positive_path_match(self, value: str | None) -> bool:
if value is None:
Expand All @@ -433,43 +431,42 @@ def _positive_path_match(self, value: str | None) -> bool:
return True
return False

def _positive_match(self, values: dict[str, Any]) -> bool:
# `path` is special in that it tests against two values (`abs_path` and `filename`)
def _positive_match(self, event_values: dict[str, Any]) -> bool:
# Handle cases where `self.key` isn't 1-to-1 with the corresponding key in `event_values`
if self.key == "path":
value = values.get("abs_path")
if self._positive_path_match(value):
return True
alt_value = values.get("filename")
if alt_value != value:
if self._positive_path_match(alt_value):
return True
return False
return any(
self._positive_path_match(value)
# Use a set so that if the values match, we don't needlessly check both
for value in {event_values.get("abs_path"), event_values.get("filename")}
)

# message tests against exception value also, as this is what users expect
if self.key == "message":
for key in ("message", "value"):
value = values.get(key)
if value is not None and glob_match(value, self.pattern, ignorecase=True):
return True
return False
return any(
value is not None and glob_match(value, self.pattern, ignorecase=True)
# message tests against exception value also, as this is what users expect
for value in [event_values.get("message"), event_values.get("value")]
)

# For the rest, `self.key` matches the key in `event_values`
value = event_values.get(self.key)

value = values.get(self.key)
if value is None:
return False
elif self.key in ["package", "release"]:
if self._positive_path_match(value):
return True
elif self.key in ["family", "sdk"]:

if self.key in ["package", "release"]:
return self._positive_path_match(value)

if self.key in ["family", "sdk"]:
flags = self.pattern.split(",")
if "all" in flags or value in flags:
return True
elif self.key == "app":
ref_val = bool_from_string(self.pattern)
if ref_val is not None and ref_val == value:
return True
elif glob_match(value, self.pattern, ignorecase=self.key in ("level", "value")):
return True
return False
return "all" in flags or value in flags

if self.key == "app":
return value == bool_from_string(self.pattern)

if self.key in ["level", "value"]:
return glob_match(value, self.pattern, ignorecase=True)

return glob_match(value, self.pattern, ignorecase=False)

def _to_config_structure(self) -> list[str]:
key = self.key
Expand All @@ -479,18 +476,17 @@ def _to_config_structure(self) -> list[str]:

@classmethod
def _from_config_structure(cls, matcher: list[str]) -> Self:
key = matcher[0]
if key.startswith("!"):
key = key[1:]
negated = True
else:
negated = False
return cls(key, matcher[1], negated)
key, pattern = matcher

negated = key.startswith("!")
key = key.lstrip("!")

return cls(key, pattern, negated)

@property
def text(self) -> str:
return '{}{}:"{}"'.format(
self.negated and "!" or "",
"!" if self.negated else "",
self.key,
self.pattern,
)
Expand All @@ -517,8 +513,8 @@ def test_for_match_with_event(
matchers_by_match_type.setdefault(matcher.match_type, []).append(matcher)

for match_type, matchers in matchers_by_match_type.items():
for values in event_datastore.get_values(match_type):
if all(x.matches(values) for x in matchers):
for event_values in event_datastore.get_values(match_type):
if all(matcher.matches(event_values) for matcher in matchers):
break
else:
return None
Expand Down Expand Up @@ -586,6 +582,7 @@ def visit_fingerprinting_rules(
changelog = []
rules = []
in_header = True

for child in children:
if isinstance(child, str):
if in_header and child.startswith("##"):
Expand All @@ -595,6 +592,7 @@ def visit_fingerprinting_rules(
elif child is not None:
rules.append(child)
in_header = False

return FingerprintingRules(
rules=rules,
changelog=inspect.cleandoc("\n".join(changelog)).rstrip() or None,
Expand Down
126 changes: 73 additions & 53 deletions src/sentry/grouping/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,20 @@
DEFAULT_FINGERPRINT_VARIABLE = "{{ default }}"


def parse_fingerprint_var(value: str) -> str | None:
match = _fingerprint_var_re.match(value)
if match is not None and match.end() == len(value):
def parse_fingerprint_entry_as_variable(entry: str) -> str | None:
"""
Determine if the given fingerprint entry is a variable, and if it is, return its key (that is,
extract the variable name from a variable string of the form "{{ var_name }}"). If the given
entry isn't the correct form to be a variable, return None.
"""
match = _fingerprint_var_re.match(entry)
if match is not None and match.end() == len(entry):
return match.group(1)
return None


def is_default_fingerprint_var(value: str) -> bool:
return parse_fingerprint_var(value) == "default"
return parse_fingerprint_entry_as_variable(value) == "default"


def hash_from_values(values: Iterable[str | int | UUID | ExceptionGroupingComponent]) -> str:
Expand Down Expand Up @@ -82,80 +87,95 @@ def bool_from_string(value: str) -> bool | None:
return None


def get_fingerprint_value(var: str, data: NodeData | Mapping[str, Any]) -> str | None:
if var == "transaction":
return data.get("transaction") or "<no-transaction>"
elif var == "message":
def resolve_fingerprint_variable(
variable_key: str, event_data: NodeData | Mapping[str, Any]
) -> str | None:
if variable_key == "transaction":
return event_data.get("transaction") or "<no-transaction>"

elif variable_key == "message":
message = (
get_path(data, "logentry", "formatted")
or get_path(data, "logentry", "message")
or get_path(data, "exception", "values", -1, "value")
get_path(event_data, "logentry", "formatted")
or get_path(event_data, "logentry", "message")
or get_path(event_data, "exception", "values", -1, "value")
)
return message or "<no-message>"
elif var in ("type", "error.type"):
ty = get_path(data, "exception", "values", -1, "type")
return ty or "<no-type>"
elif var in ("value", "error.value"):
value = get_path(data, "exception", "values", -1, "value")

elif variable_key in ("type", "error.type"):
exception_type = get_path(event_data, "exception", "values", -1, "type")
return exception_type or "<no-type>"

elif variable_key in ("value", "error.value"):
value = get_path(event_data, "exception", "values", -1, "value")
return value or "<no-value>"
elif var in ("function", "stack.function"):
frame = get_crash_frame_from_event_data(data)

elif variable_key in ("function", "stack.function"):
frame = get_crash_frame_from_event_data(event_data)
func = frame.get("function") if frame else None
return func or "<no-function>"
elif var in ("path", "stack.abs_path"):
frame = get_crash_frame_from_event_data(data)

elif variable_key in ("path", "stack.abs_path"):
frame = get_crash_frame_from_event_data(event_data)
abs_path = frame.get("abs_path") or frame.get("filename") if frame else None
return abs_path or "<no-abs-path>"
elif var == "stack.filename":
frame = get_crash_frame_from_event_data(data)

elif variable_key == "stack.filename":
frame = get_crash_frame_from_event_data(event_data)
filename = frame.get("filename") or frame.get("abs_path") if frame else None
return filename or "<no-filename>"
elif var in ("module", "stack.module"):
frame = get_crash_frame_from_event_data(data)
mod = frame.get("module") if frame else None
return mod or "<no-module>"
elif var in ("package", "stack.package"):
frame = get_crash_frame_from_event_data(data)

elif variable_key in ("module", "stack.module"):
frame = get_crash_frame_from_event_data(event_data)
module = frame.get("module") if frame else None
return module or "<no-module>"

elif variable_key in ("package", "stack.package"):
frame = get_crash_frame_from_event_data(event_data)
pkg = frame.get("package") if frame else None
if pkg:
# If the package is formatted as either a POSIX or Windows path, grab the last segment
pkg = pkg.rsplit("/", 1)[-1].rsplit("\\", 1)[-1]
return pkg or "<no-package>"
elif var == "level":
return data.get("level") or "<no-level>"
elif var == "logger":
return data.get("logger") or "<no-logger>"
elif var.startswith("tags."):

elif variable_key == "level":
return event_data.get("level") or "<no-level>"

elif variable_key == "logger":
return event_data.get("logger") or "<no-logger>"

elif variable_key.startswith("tags."):
# Turn "tags.some_tag" into just "some_tag"
tag = var[5:]
for t, value in data.get("tags") or ():
if t == tag and value is not None:
return value
return "<no-value-for-tag-%s>" % tag
requested_tag = variable_key[5:]
for tag_name, tag_value in event_data.get("tags") or ():
if tag_name == requested_tag and tag_value is not None:
return tag_value
return "<no-value-for-tag-%s>" % requested_tag
else:
return None


def resolve_fingerprint_values(values: list[str], event_data: NodeData) -> list[str]:
def _get_fingerprint_value(value: str) -> str:
var = parse_fingerprint_var(value)
if var == "default":
def resolve_fingerprint_values(fingerprint: list[str], event_data: NodeData) -> list[str]:
def _resolve_single_entry(entry: str) -> str:
variable_key = parse_fingerprint_entry_as_variable(entry)
if variable_key == "default": # entry is some variation of `{{ default }}`
return DEFAULT_FINGERPRINT_VARIABLE
if var is None:
return value
rv = get_fingerprint_value(var, event_data)
if rv is None:
return value
return rv
if variable_key is None: # entry isn't a variable
return entry
resolved_value = resolve_fingerprint_variable(variable_key, event_data)
if resolved_value is None: # variable wasn't recognized
return entry
return resolved_value

return [_get_fingerprint_value(x) for x in values]
return [_resolve_single_entry(entry) for entry in fingerprint]


def expand_title_template(template: str, event_data: Mapping[str, Any]) -> str:
def _handle_match(match: Match[str]) -> str:
var = match.group(1)
rv = get_fingerprint_value(var, event_data)
if rv is not None:
return rv
variable_key = match.group(1)
resolved_value = resolve_fingerprint_variable(variable_key, event_data)
if resolved_value is not None:
return resolved_value
# If the variable can't be resolved, return it as is
return match.group(0)

return _fingerprint_var_re.sub(_handle_match, template)

0 comments on commit 19a602c

Please sign in to comment.