Merge pull request #548 from atlanhq/APP-5657

APP-5657 : Add support for `MatchPhase` query
atlanhq · Mar 3, 2025 · 98891a3 · 98891a3
2 parents 831ab2d + 395c9ed
commit 98891a3
Show file tree

Hide file tree

Showing 3 changed files with 167 additions and 0 deletions.
diff --git a/pyatlan/model/fields/atlan_fields.py b/pyatlan/model/fields/atlan_fields.py
@@ -14,6 +14,7 @@
 from pyatlan.model.search import (
     Exists,
     Match,
+    MatchPhrase,
     Prefix,
     Query,
     Range,
@@ -327,6 +328,22 @@ def match(self, value: StrictStr) -> Query:
             query=value,
         )
 
+    def match_phrase(self, value: StrictStr) -> Query:
+        """
+        Returns a query that will textually match the specified phrase within the field,
+        ensuring that the words appear in the exact order provided. This analyzes the
+        provided value according to the same analysis carried out on the field
+        (for example, tokenization, stemming, and so on).
+
+        :param value: the string value to match against
+        :returns: a query that will only match assets whose analyzed value for the field matches the value provided
+                  (which will also be analyzed)
+        """
+        return MatchPhrase(
+            field=self.text_field_name,
+            query=value,
+        )
+
 
 class InternalKeywordField(KeywordField):
     """

diff --git a/pyatlan/model/search.py b/pyatlan/model/search.py
@@ -1843,6 +1843,29 @@ def to_dict(self):
         return {self.type_name: {self.field: parameters}}
 
 
+@dataclass(config=ConfigDict(smart_union=True, extra="forbid"))  # type: ignore
+class MatchPhrase(Query):
+    field: str
+    query: StrictStr
+    analyzer: Optional[str] = None
+    slop: Optional[int] = None
+    zero_terms_query: Optional[Literal["none", "all"]] = None
+    boost: Optional[float] = None
+    type_name: Literal["match_phrase"] = "match_phrase"
+
+    def to_dict(self):
+        parameters = {"query": self.query}
+        if self.analyzer is not None:
+            parameters["analyzer"] = self.analyzer
+        if self.slop is not None:
+            parameters["slop"] = self.slop
+        if self.zero_terms_query is not None:
+            parameters["zero_terms_query"] = self.zero_terms_query
+        if self.boost is not None:
+            parameters["boost"] = self.boost
+        return {self.type_name: {self.field: parameters}}
+
+
 @dataclass(config=ConfigDict(smart_union=True, extra="forbid"))  # type: ignore
 class SortItem:
     field: StrictStr

diff --git a/tests/unit/test_search_model.py b/tests/unit/test_search_model.py
@@ -7,8 +7,10 @@
 import pytest
 from pydantic.v1 import StrictBool, StrictStr, ValidationError
 
+from pyatlan.model.assets import Asset
 from pyatlan.model.audit import AuditSearchRequest
 from pyatlan.model.enums import AtlanConnectorType, CertificateStatus
+from pyatlan.model.fluent_search import FluentSearch
 from pyatlan.model.search import (
     DSL,
     Bool,
@@ -18,6 +20,7 @@
     Match,
     MatchAll,
     MatchNone,
+    MatchPhrase,
     Prefix,
     Range,
     Regexp,
@@ -1659,3 +1662,127 @@ def test_dsl_serialization_and_deserialization():
     ) == dsl_through_model.json(exclude_unset=True, by_alias=True)
 
     assert dsl_through_raw.json() == dsl_through_model.json()
+
+
+@pytest.mark.parametrize(
+    "field, query, analyzer, slop, zero_terms_query, boost, expected",
+    [
+        (
+            "name",
+            "test",
+            None,
+            None,
+            None,
+            None,
+            {"match_phrase": {"name": {"query": "test"}}},
+        ),
+        (
+            "name",
+            "test",
+            "an analyzer",
+            None,
+            None,
+            None,
+            {"match_phrase": {"name": {"query": "test", "analyzer": "an analyzer"}}},
+        ),
+        (
+            "name",
+            "test",
+            "an analyzer",
+            2,
+            None,
+            None,
+            {
+                "match_phrase": {
+                    "name": {
+                        "query": "test",
+                        "analyzer": "an analyzer",
+                        "slop": 2,
+                    }
+                }
+            },
+        ),
+        (
+            "name",
+            "test",
+            "an analyzer",
+            2,
+            "none",
+            1.0,
+            {
+                "match_phrase": {
+                    "name": {
+                        "query": "test",
+                        "analyzer": "an analyzer",
+                        "slop": 2,
+                        "zero_terms_query": "none",
+                        "boost": 1.0,
+                    }
+                }
+            },
+        ),
+        (
+            "name",
+            "test",
+            None,
+            0,
+            "all",
+            2.0,
+            {
+                "match_phrase": {
+                    "name": {
+                        "query": "test",
+                        "slop": 0,
+                        "zero_terms_query": "all",
+                        "boost": 2.0,
+                    }
+                }
+            },
+        ),
+        (
+            "description",
+            "another test",
+            "standard",
+            1,
+            "none",
+            None,
+            {
+                "match_phrase": {
+                    "description": {
+                        "query": "another test",
+                        "analyzer": "standard",
+                        "slop": 1,
+                        "zero_terms_query": "none",
+                    }
+                }
+            },
+        ),
+    ],
+)
+def test_match_phrase_to_dict(
+    field,
+    query,
+    analyzer,
+    slop,
+    zero_terms_query,
+    boost,
+    expected,
+):
+    assert (
+        MatchPhrase(
+            field=field,
+            query=query,
+            analyzer=analyzer,
+            slop=slop,
+            zero_terms_query=zero_terms_query,
+            boost=boost,
+        ).to_dict()
+        == expected
+    )
+
+
+def test_match_phrase_textfield():
+    search_request = FluentSearch().where(Asset.NAME.match_phrase("tmp")).to_request()
+    assert search_request.dsl.query == Bool(
+        filter=[MatchPhrase(field="name", query="tmp")]
+    )