Skip to content

Commit

Permalink
Merge pull request #548 from atlanhq/APP-5657
Browse files Browse the repository at this point in the history
APP-5657 : Add support for `MatchPhase` query
  • Loading branch information
Aryamanz29 authored Mar 3, 2025
2 parents 831ab2d + 395c9ed commit 98891a3
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 0 deletions.
17 changes: 17 additions & 0 deletions pyatlan/model/fields/atlan_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pyatlan.model.search import (
Exists,
Match,
MatchPhrase,
Prefix,
Query,
Range,
Expand Down Expand Up @@ -327,6 +328,22 @@ def match(self, value: StrictStr) -> Query:
query=value,
)

def match_phrase(self, value: StrictStr) -> Query:
"""
Returns a query that will textually match the specified phrase within the field,
ensuring that the words appear in the exact order provided. This analyzes the
provided value according to the same analysis carried out on the field
(for example, tokenization, stemming, and so on).
:param value: the string value to match against
:returns: a query that will only match assets whose analyzed value for the field matches the value provided
(which will also be analyzed)
"""
return MatchPhrase(
field=self.text_field_name,
query=value,
)


class InternalKeywordField(KeywordField):
"""
Expand Down
23 changes: 23 additions & 0 deletions pyatlan/model/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -1843,6 +1843,29 @@ def to_dict(self):
return {self.type_name: {self.field: parameters}}


@dataclass(config=ConfigDict(smart_union=True, extra="forbid")) # type: ignore
class MatchPhrase(Query):
field: str
query: StrictStr
analyzer: Optional[str] = None
slop: Optional[int] = None
zero_terms_query: Optional[Literal["none", "all"]] = None
boost: Optional[float] = None
type_name: Literal["match_phrase"] = "match_phrase"

def to_dict(self):
parameters = {"query": self.query}
if self.analyzer is not None:
parameters["analyzer"] = self.analyzer
if self.slop is not None:
parameters["slop"] = self.slop
if self.zero_terms_query is not None:
parameters["zero_terms_query"] = self.zero_terms_query
if self.boost is not None:
parameters["boost"] = self.boost
return {self.type_name: {self.field: parameters}}


@dataclass(config=ConfigDict(smart_union=True, extra="forbid")) # type: ignore
class SortItem:
field: StrictStr
Expand Down
127 changes: 127 additions & 0 deletions tests/unit/test_search_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
import pytest
from pydantic.v1 import StrictBool, StrictStr, ValidationError

from pyatlan.model.assets import Asset
from pyatlan.model.audit import AuditSearchRequest
from pyatlan.model.enums import AtlanConnectorType, CertificateStatus
from pyatlan.model.fluent_search import FluentSearch
from pyatlan.model.search import (
DSL,
Bool,
Expand All @@ -18,6 +20,7 @@
Match,
MatchAll,
MatchNone,
MatchPhrase,
Prefix,
Range,
Regexp,
Expand Down Expand Up @@ -1659,3 +1662,127 @@ def test_dsl_serialization_and_deserialization():
) == dsl_through_model.json(exclude_unset=True, by_alias=True)

assert dsl_through_raw.json() == dsl_through_model.json()


@pytest.mark.parametrize(
"field, query, analyzer, slop, zero_terms_query, boost, expected",
[
(
"name",
"test",
None,
None,
None,
None,
{"match_phrase": {"name": {"query": "test"}}},
),
(
"name",
"test",
"an analyzer",
None,
None,
None,
{"match_phrase": {"name": {"query": "test", "analyzer": "an analyzer"}}},
),
(
"name",
"test",
"an analyzer",
2,
None,
None,
{
"match_phrase": {
"name": {
"query": "test",
"analyzer": "an analyzer",
"slop": 2,
}
}
},
),
(
"name",
"test",
"an analyzer",
2,
"none",
1.0,
{
"match_phrase": {
"name": {
"query": "test",
"analyzer": "an analyzer",
"slop": 2,
"zero_terms_query": "none",
"boost": 1.0,
}
}
},
),
(
"name",
"test",
None,
0,
"all",
2.0,
{
"match_phrase": {
"name": {
"query": "test",
"slop": 0,
"zero_terms_query": "all",
"boost": 2.0,
}
}
},
),
(
"description",
"another test",
"standard",
1,
"none",
None,
{
"match_phrase": {
"description": {
"query": "another test",
"analyzer": "standard",
"slop": 1,
"zero_terms_query": "none",
}
}
},
),
],
)
def test_match_phrase_to_dict(
field,
query,
analyzer,
slop,
zero_terms_query,
boost,
expected,
):
assert (
MatchPhrase(
field=field,
query=query,
analyzer=analyzer,
slop=slop,
zero_terms_query=zero_terms_query,
boost=boost,
).to_dict()
== expected
)


def test_match_phrase_textfield():
search_request = FluentSearch().where(Asset.NAME.match_phrase("tmp")).to_request()
assert search_request.dsl.query == Bool(
filter=[MatchPhrase(field="name", query="tmp")]
)

0 comments on commit 98891a3

Please sign in to comment.