Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

APP-5657 : Add support for MatchPhase query to the SDKs #548

Merged
merged 6 commits into from
Mar 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions pyatlan/model/fields/atlan_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pyatlan.model.search import (
Exists,
Match,
MatchPhrase,
Prefix,
Query,
Range,
Expand Down Expand Up @@ -327,6 +328,22 @@ def match(self, value: StrictStr) -> Query:
query=value,
)

def match_phrase(self, value: StrictStr) -> Query:
"""
Returns a query that will textually match the specified phrase within the field,
ensuring that the words appear in the exact order provided. This analyzes the
provided value according to the same analysis carried out on the field
(for example, tokenization, stemming, and so on).

:param value: the string value to match against
:returns: a query that will only match assets whose analyzed value for the field matches the value provided
(which will also be analyzed)
"""
return MatchPhrase(
field=self.text_field_name,
query=value,
)


class InternalKeywordField(KeywordField):
"""
Expand Down
23 changes: 23 additions & 0 deletions pyatlan/model/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -1843,6 +1843,29 @@ def to_dict(self):
return {self.type_name: {self.field: parameters}}


@dataclass(config=ConfigDict(smart_union=True, extra="forbid")) # type: ignore
class MatchPhrase(Query):
field: str
query: StrictStr
analyzer: Optional[str] = None
slop: Optional[int] = None
zero_terms_query: Optional[Literal["none", "all"]] = None
boost: Optional[float] = None
type_name: Literal["match_phrase"] = "match_phrase"

def to_dict(self):
parameters = {"query": self.query}
if self.analyzer is not None:
parameters["analyzer"] = self.analyzer
if self.slop is not None:
parameters["slop"] = self.slop
if self.zero_terms_query is not None:
parameters["zero_terms_query"] = self.zero_terms_query
if self.boost is not None:
parameters["boost"] = self.boost
return {self.type_name: {self.field: parameters}}


@dataclass(config=ConfigDict(smart_union=True, extra="forbid")) # type: ignore
class SortItem:
field: StrictStr
Expand Down
127 changes: 127 additions & 0 deletions tests/unit/test_search_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
import pytest
from pydantic.v1 import StrictBool, StrictStr, ValidationError

from pyatlan.model.assets import Asset
from pyatlan.model.audit import AuditSearchRequest
from pyatlan.model.enums import AtlanConnectorType, CertificateStatus
from pyatlan.model.fluent_search import FluentSearch
from pyatlan.model.search import (
DSL,
Bool,
Expand All @@ -18,6 +20,7 @@
Match,
MatchAll,
MatchNone,
MatchPhrase,
Prefix,
Range,
Regexp,
Expand Down Expand Up @@ -1659,3 +1662,127 @@ def test_dsl_serialization_and_deserialization():
) == dsl_through_model.json(exclude_unset=True, by_alias=True)

assert dsl_through_raw.json() == dsl_through_model.json()


@pytest.mark.parametrize(
"field, query, analyzer, slop, zero_terms_query, boost, expected",
[
(
"name",
"test",
None,
None,
None,
None,
{"match_phrase": {"name": {"query": "test"}}},
),
(
"name",
"test",
"an analyzer",
None,
None,
None,
{"match_phrase": {"name": {"query": "test", "analyzer": "an analyzer"}}},
),
(
"name",
"test",
"an analyzer",
2,
None,
None,
{
"match_phrase": {
"name": {
"query": "test",
"analyzer": "an analyzer",
"slop": 2,
}
}
},
),
(
"name",
"test",
"an analyzer",
2,
"none",
1.0,
{
"match_phrase": {
"name": {
"query": "test",
"analyzer": "an analyzer",
"slop": 2,
"zero_terms_query": "none",
"boost": 1.0,
}
}
},
),
(
"name",
"test",
None,
0,
"all",
2.0,
{
"match_phrase": {
"name": {
"query": "test",
"slop": 0,
"zero_terms_query": "all",
"boost": 2.0,
}
}
},
),
(
"description",
"another test",
"standard",
1,
"none",
None,
{
"match_phrase": {
"description": {
"query": "another test",
"analyzer": "standard",
"slop": 1,
"zero_terms_query": "none",
}
}
},
),
],
)
def test_match_phrase_to_dict(
field,
query,
analyzer,
slop,
zero_terms_query,
boost,
expected,
):
assert (
MatchPhrase(
field=field,
query=query,
analyzer=analyzer,
slop=slop,
zero_terms_query=zero_terms_query,
boost=boost,
).to_dict()
== expected
)


def test_match_phrase_textfield():
search_request = FluentSearch().where(Asset.NAME.match_phrase("tmp")).to_request()
assert search_request.dsl.query == Bool(
filter=[MatchPhrase(field="name", query="tmp")]
)
Loading