From a23c87d2c348678442f72931cd0770345655f531 Mon Sep 17 00:00:00 2001 From: vaibhavatlan Date: Thu, 27 Feb 2025 15:33:50 +0530 Subject: [PATCH 1/4] added support for MatchPhrase query --- pyatlan/model/search.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pyatlan/model/search.py b/pyatlan/model/search.py index 1808d3c9a..33bd823db 100644 --- a/pyatlan/model/search.py +++ b/pyatlan/model/search.py @@ -1843,6 +1843,29 @@ def to_dict(self): return {self.type_name: {self.field: parameters}} +@dataclass(config=ConfigDict(smart_union=True, extra="forbid")) # type: ignore +class MatchPhrase(Query): + field: str + query: StrictStr + analyzer: Optional[str] = None + slop: Optional[int] = None + zero_terms_query: Optional[Literal["none", "all"]] = None + boost: Optional[float] = None + type_name: Literal["match_phrase"] = "match_phrase" + + def to_dict(self): + parameters = {"query": self.query} + if self.analyzer is not None: + parameters["analyzer"] = self.analyzer + if self.slop is not None: + parameters["slop"] = self.slop + if self.zero_terms_query is not None: + parameters["zero_terms_query"] = self.zero_terms_query + if self.boost is not None: + parameters["boost"] = self.boost + return {self.type_name: {self.field: parameters}} + + @dataclass(config=ConfigDict(smart_union=True, extra="forbid")) # type: ignore class SortItem: field: StrictStr From c1935d40ec632efbd8f96f215054db9434445ec3 Mon Sep 17 00:00:00 2001 From: vaibhavatlan Date: Thu, 27 Feb 2025 16:43:51 +0530 Subject: [PATCH 2/4] Added the unit tests --- tests/unit/test_search_model.py | 118 ++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/tests/unit/test_search_model.py b/tests/unit/test_search_model.py index b463c0387..f56a9e0d3 100644 --- a/tests/unit/test_search_model.py +++ b/tests/unit/test_search_model.py @@ -18,6 +18,7 @@ Match, MatchAll, MatchNone, + MatchPhrase, Prefix, Range, Regexp, @@ -1659,3 +1660,120 @@ def test_dsl_serialization_and_deserialization(): ) == dsl_through_model.json(exclude_unset=True, by_alias=True) assert dsl_through_raw.json() == dsl_through_model.json() + + +@pytest.mark.parametrize( + "field, query, analyzer, slop, zero_terms_query, boost, expected", + [ + ( + "name", + "test", + None, + None, + None, + None, + {"match_phrase": {"name": {"query": "test"}}}, + ), + ( + "name", + "test", + "an analyzer", + None, + None, + None, + {"match_phrase": {"name": {"query": "test", "analyzer": "an analyzer"}}}, + ), + ( + "name", + "test", + "an analyzer", + 2, + None, + None, + { + "match_phrase": { + "name": { + "query": "test", + "analyzer": "an analyzer", + "slop": 2, + } + } + }, + ), + ( + "name", + "test", + "an analyzer", + 2, + "none", + 1.0, + { + "match_phrase": { + "name": { + "query": "test", + "analyzer": "an analyzer", + "slop": 2, + "zero_terms_query": "none", + "boost": 1.0, + } + } + }, + ), + ( + "name", + "test", + None, + 0, + "all", + 2.0, + { + "match_phrase": { + "name": { + "query": "test", + "slop": 0, + "zero_terms_query": "all", + "boost": 2.0, + } + } + }, + ), + ( + "description", + "another test", + "standard", + 1, + "none", + None, + { + "match_phrase": { + "description": { + "query": "another test", + "analyzer": "standard", + "slop": 1, + "zero_terms_query": "none", + } + } + }, + ), + ], +) +def test_match_phrase_to_dict( + field, + query, + analyzer, + slop, + zero_terms_query, + boost, + expected, +): + assert ( + MatchPhrase( + field=field, + query=query, + analyzer=analyzer, + slop=slop, + zero_terms_query=zero_terms_query, + boost=boost, + ).to_dict() + == expected + ) From 0a9039a261fc838f2468957234bbb87f1f0bfe8d Mon Sep 17 00:00:00 2001 From: vaibhavatlan Date: Fri, 28 Feb 2025 13:00:36 +0530 Subject: [PATCH 3/4] added the method for match_phrase --- pyatlan/model/fields/atlan_fields.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pyatlan/model/fields/atlan_fields.py b/pyatlan/model/fields/atlan_fields.py index 1d3a99b31..f4162ff59 100644 --- a/pyatlan/model/fields/atlan_fields.py +++ b/pyatlan/model/fields/atlan_fields.py @@ -14,6 +14,7 @@ from pyatlan.model.search import ( Exists, Match, + MatchPhrase, Prefix, Query, Range, @@ -327,6 +328,22 @@ def match(self, value: StrictStr) -> Query: query=value, ) + def match_phrase(self, value: StrictStr) -> Query: + """ + Returns a query that will textually match the specified phrase within the field, + ensuring that the words appear in the exact order provided. This analyzes the + provided value according to the same analysis carried out on the field + (for example, tokenization, stemming, and so on). + + :param value: the string value to match against + :returns: a query that will only match assets whose analyzed value for the field matches the value provided + (which will also be analyzed) + """ + return MatchPhrase( + field=self.text_field_name, + query=value, + ) + class InternalKeywordField(KeywordField): """ From b4f348756e7ef041fc5d9fde71d11e76410583e0 Mon Sep 17 00:00:00 2001 From: vaibhavatlan Date: Fri, 28 Feb 2025 23:52:09 +0530 Subject: [PATCH 4/4] added the unit test case for Textfield.match_phrase() --- tests/unit/test_search_model.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/unit/test_search_model.py b/tests/unit/test_search_model.py index f56a9e0d3..82d27ef8e 100644 --- a/tests/unit/test_search_model.py +++ b/tests/unit/test_search_model.py @@ -7,8 +7,10 @@ import pytest from pydantic.v1 import StrictBool, StrictStr, ValidationError +from pyatlan.model.assets import Asset from pyatlan.model.audit import AuditSearchRequest from pyatlan.model.enums import AtlanConnectorType, CertificateStatus +from pyatlan.model.fluent_search import FluentSearch from pyatlan.model.search import ( DSL, Bool, @@ -1777,3 +1779,10 @@ def test_match_phrase_to_dict( ).to_dict() == expected ) + + +def test_match_phrase_textfield(): + search_request = FluentSearch().where(Asset.NAME.match_phrase("tmp")).to_request() + assert search_request.dsl.query == Bool( + filter=[MatchPhrase(field="name", query="tmp")] + )