From e9198c9e8e0a08abd28109b9cd8bae36bacbb6ca Mon Sep 17 00:00:00 2001 From: Aryamanz29 Date: Mon, 24 Feb 2025 15:48:53 +0530 Subject: [PATCH] APP-5632: Handled search edge cases - when `entityGuidsAll` is empty `[]` or `"undefined"` in the request DSL --- pyatlan/model/search_log.py | 40 +++++++++++++++++++++++++++++++------ pyatlan/utils.py | 13 ++++++++++++ 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/pyatlan/model/search_log.py b/pyatlan/model/search_log.py index 97c73f045..1ca789582 100644 --- a/pyatlan/model/search_log.py +++ b/pyatlan/model/search_log.py @@ -23,6 +23,7 @@ Term, Terms, ) +from pyatlan.utils import deep_get BY_TIMESTAMP = [SortItem("timestamp", order=SortOrder.ASCENDING)] @@ -405,15 +406,40 @@ def current_page(self) -> List[SearchLogEntry]: """ return self._log_entries - def _get_sl_unique_key(self, entity: SearchLogEntry) -> str: + def _get_sl_unique_key(self, entity: SearchLogEntry) -> Optional[str]: """ - Returns a unique key for a `SearchLogEntry` - by combining the `entity_guids_all[0]` with the timestamp. + Returns a unique key for a `SearchLogEntry` by + combining `entity_guid` with the timestamp. NOTE: This is necessary because the search log API - does not provide a unique identifier for logs + does not provide a unique identifier for logs. + + :param: search log entry + :returns: unique key or None if no valid key is found """ - return f"{entity.entity_guids_all[0]}:{entity.timestamp}" + entity_guid = entity.entity_guids_all[0] if entity.entity_guids_all else None + + # If entity_guid is not present, try to extract it from request_dsl; otherwise, return None + if not entity_guid: + terms = deep_get( + entity.request_dsl, "query.function_score.query.bool.filter.bool.must" + ) + if not terms: + return None + + if isinstance(terms, list): + for term in terms: + if isinstance(term, dict) and term.get("term", {}).get("__guid"): + entity_guid = term["term"]["__guid"] + break + elif isinstance(terms, dict): + entity_guid = terms.get("term", {}).get("__guid") + + return ( + f"{entity_guid}:{entity.timestamp}" + if entity_guid and entity_guid != "undefined" + else None + ) def next_page(self, start=None, size=None) -> bool: """ @@ -435,7 +461,9 @@ def next_page(self, start=None, size=None) -> bool: # If it has, then exclude it from the current results; # otherwise, we may encounter duplicate search log records. self._processed_log_entries.update( - self._get_sl_unique_key(entity) for entity in self._log_entries + key + for entity in self._log_entries + if (key := self._get_sl_unique_key(entity)) ) return self._get_next_page() if self._log_entries else False diff --git a/pyatlan/utils.py b/pyatlan/utils.py index 2c5df5853..f67097817 100644 --- a/pyatlan/utils.py +++ b/pyatlan/utils.py @@ -145,6 +145,19 @@ def validate_required_fields(field_names: List[str], values: List[Any]): raise ValueError(f"{field_name} cannot be an empty list") +def deep_get(dictionary, keys, default=None): + """ + Returns dict key value using dict and it's dot_key string, + + ie: key1.key2_nested.key3_nested, if found, otherwise returns default (`None`). + """ + return reduce( + lambda d, key: d.get(key, default) if isinstance(d, dict) else default, + keys.split("."), + dictionary, + ) + + @dataclass class EndpointMixin: prefix: str