Skip to content

Commit

Permalink
fix(rpc): Handle wildcard free text search (#86050)
Browse files Browse the repository at this point in the history
- Discover was Automatically treating any search on its free_text_key as
a wildcard, but that doesn't really make sense to me and I expect its
not the intended behaviour
- This will treat all free_text_key searches as wildcard searches
  • Loading branch information
wmak authored Mar 3, 2025
1 parent 840136f commit cd24681
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 2 deletions.
27 changes: 25 additions & 2 deletions src/sentry/api/event_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,17 @@ def translate_wildcard_as_clickhouse_pattern(pattern: str) -> str:
return "".join(chars)


def wrap_free_text(string: str, autowrap: bool) -> str:
if not autowrap:
return string
# Free text already had wildcarding on it, leave it alone
if string.startswith("*") or string.endswith("*"):
return string
# Otherwise always wrap it with wildcarding
else:
return f"*{string}*"


def translate_escape_sequences(string: str) -> str:
"""
A non-wildcard pattern can contain escape sequences that we need to handle.
Expand Down Expand Up @@ -604,6 +615,9 @@ class SearchConfig:
# Which key we should return any free text under
free_text_key = "message"

# Whether to wrap free_text_keys in asterisks
wildcard_free_text: bool = False

@classmethod
def create_from(cls, search_config: SearchConfig, **overrides):
config = cls(**asdict(search_config))
Expand Down Expand Up @@ -707,13 +721,22 @@ def visit_free_text_unquoted(self, node, children):
def visit_free_text(self, node, children):
if not children[0]:
return None
return SearchFilter(SearchKey(self.config.free_text_key), "=", SearchValue(children[0]))
# Free text searches need to be treated like they were wildcards
return SearchFilter(
SearchKey(self.config.free_text_key),
"=",
SearchValue(wrap_free_text(children[0], self.config.wildcard_free_text)),
)

def visit_paren_group(self, node, children):
if not self.config.allow_boolean:
# It's possible to have a valid search that includes parens, so we
# can't just error out when we find a paren expression.
return SearchFilter(SearchKey(self.config.free_text_key), "=", SearchValue(node.text))
return SearchFilter(
SearchKey(self.config.free_text_key),
"=",
SearchValue(wrap_free_text(node.text, self.config.wildcard_free_text)),
)

children = remove_space(remove_optional_nodes(flatten(children)))
children = flatten(children[1])
Expand Down
2 changes: 2 additions & 0 deletions src/sentry/search/eap/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
)

from sentry.api import event_search
from sentry.api.event_search import SearchConfig
from sentry.exceptions import InvalidSearchQuery
from sentry.search.eap import constants
from sentry.search.eap.columns import (
Expand Down Expand Up @@ -154,6 +155,7 @@ def __resolve_query(
try:
parsed_terms = event_search.parse_search_query(
querystring,
config=SearchConfig(wildcard_free_text=True),
params=self.params.filter_params,
get_field_type=self.get_field_type,
get_function_result_type=self.get_field_type,
Expand Down
30 changes: 30 additions & 0 deletions tests/snuba/api/endpoints/test_organization_events_ourlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,33 @@ def test_timestamp_order(self):
assert ts.timestamp() == pytest.approx(timestamp_from_nanos, abs=5), "timestamp"

assert meta["dataset"] == self.dataset

def test_free_text_wildcard_filter(self):
logs = [
self.create_ourlog(
{"body": "bar"},
timestamp=self.ten_mins_ago,
),
self.create_ourlog(
{"body": "foo"},
timestamp=self.nine_mins_ago,
),
]
self.store_ourlogs(logs)
response = self.do_request(
{
"field": ["log.body"],
"query": "foo",
"orderby": "log.body",
"project": self.project.id,
"dataset": self.dataset,
}
)

assert response.status_code == 200, response.content
data = response.data["data"]
meta = response.data["meta"]
assert len(data) == 1
assert data[0]["log.body"] == "foo"

assert meta["dataset"] == self.dataset
34 changes: 34 additions & 0 deletions tests/snuba/api/endpoints/test_organization_events_span_indexed.py
Original file line number Diff line number Diff line change
Expand Up @@ -1282,6 +1282,40 @@ def test_wildcard_queries_with_asterisk_literals(self):
assert len(response.data["data"]) == 1
assert response.data["data"][0]["span.description"] == "select * from database"

def test_free_text_wildcard_filter(self):
spans = [
self.create_span(
{"description": "barbarbar", "sentry_tags": {"status": "invalid_argument"}},
start_ts=self.ten_mins_ago,
),
self.create_span(
{"description": "foofoofoo", "sentry_tags": {"status": "success"}},
start_ts=self.ten_mins_ago,
),
]
self.store_spans(spans, is_eap=self.is_eap)
response = self.do_request(
{
"field": ["count()", "description"],
"query": "oof",
"orderby": "-count()",
"project": self.project.id,
"dataset": self.dataset,
}
)

assert response.status_code == 200, response.content
data = response.data["data"]
meta = response.data["meta"]
assert len(data) == 1
assert data == [
{
"count()": 1,
"description": "foofoofoo",
},
]
assert meta["dataset"] == self.dataset


class OrganizationEventsEAPSpanEndpointTest(OrganizationEventsSpanIndexedEndpointTest):
is_eap = True
Expand Down

0 comments on commit cd24681

Please sign in to comment.