From 362d7bbd2e23ad01661557d2707822c44ff4863c Mon Sep 17 00:00:00 2001 From: treff7es Date: Mon, 2 Dec 2024 14:32:46 +0100 Subject: [PATCH] Gracefully handle missing model group --- .../aws/sagemaker_processors/feature_groups.py | 11 +++++++---- .../source/aws/sagemaker_processors/models.py | 15 ++++++++++++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py index b8b96c6306a3bb..fb8b539ca8519e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py @@ -1,3 +1,5 @@ +import logging + from dataclasses import dataclass from typing import TYPE_CHECKING, Iterable, List @@ -28,6 +30,8 @@ FeatureGroupSummaryTypeDef, ) +logger = logging.getLogger(__name__) + @dataclass class FeatureGroupProcessor: @@ -197,11 +201,10 @@ def get_feature_wu( full_table_name = f"{glue_database}.{glue_table}" - self.report.report_warning( - full_table_name, - f"""Note: table {full_table_name} is an AWS Glue object. + logging.info( + f"""Note: table {full_table_name} is an AWS Glue object. This source does not ingest all metadata for Glue tables. To view full table metadata, run Glue ingestion - (see https://datahubproject.io/docs/metadata-ingestion/#aws-glue-glue)""", + (see https://datahubproject.io/docs/generated/ingestion/sources/glue)""", ) feature_sources.append( diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py index eef2b26ee08f2e..4c65c755770ce6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py @@ -1,3 +1,4 @@ +import logging from collections import defaultdict from dataclasses import dataclass, field from datetime import datetime @@ -65,6 +66,8 @@ "Unknown": DeploymentStatusClass.UNKNOWN, } +logger = logging.getLogger(__name__) + @dataclass class ModelProcessor: @@ -424,9 +427,19 @@ def get_model_wu( model_group_arns = model_uri_groups | model_image_groups + # Filter, sort the model group names, and log missing keys in one shot model_group_names = sorted( - [self.group_arn_to_name[x] for x in model_group_arns] + [ + self.group_arn_to_name[arn] + if arn in self.group_arn_to_name + else logger.warning( + f"Model is associated with a group ARN {arn} which was not listed in the model groups" + ) + or arn + for arn in model_group_arns + ] ) + model_group_urns = [ builder.make_ml_model_group_urn("sagemaker", x, self.env) for x in model_group_names