From b5592861ba2991f5cd2d2ab1cf47596473092c87 Mon Sep 17 00:00:00 2001 From: Ryota Egashira Date: Mon, 3 Mar 2025 14:15:36 -0800 Subject: [PATCH] Added Enum and refactoring --- .../src/datahub/ingestion/source/vertexai.py | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/vertexai.py b/metadata-ingestion/src/datahub/ingestion/source/vertexai.py index ef2c04a49dc8a..3ff2c22c41339 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/vertexai.py +++ b/metadata-ingestion/src/datahub/ingestion/source/vertexai.py @@ -50,6 +50,7 @@ TimeStampClass, VersionTagClass, ) +from datahub.utilities.str_enum import StrEnum T = TypeVar("T") @@ -86,6 +87,15 @@ def __init__(self, **data: Any): ) +class MLTypes(StrEnum): + # Generic SubTypes + TRAINING_JOB = "Training Job" + MODEL = "ML Model" + MODEL_GROUP = "ML Model Group" + ENDPOINT = "Endpoint" + DATASET = "Dataset" + + @platform_name("Vertex AI", id="vertexai") @config_class(VertexAIConfig) @support_status(SupportStatus.TESTING) @@ -137,12 +147,8 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: # TODO Fetch Experiments and Experiment Runs def _gen_project_workunits(self) -> Iterable[MetadataWorkUnit]: - container_key = ProjectIdKey( - project_id=self.config.project_id, platform=self.platform - ) - yield from gen_containers( - container_key=container_key, + container_key=self._get_project_container(), name=self.config.project_id, sub_types=["Project"], ) @@ -225,7 +231,7 @@ def _gen_ml_group_workunits( ) # TODO add following when metadata model for mlgroup is updated (these aspects not supported currently) - # aspects.append(SubTypesClass(typeNames=["Training Job"])) + # aspects.append(SubTypesClass(typeNames=[MLTypes.MODEL_GROUP])) # aspects.append(ContainerClass(container=self._get_project_container().as_urn())) yield from auto_workunit( @@ -280,7 +286,7 @@ def _gen_data_process_workunits( externalUrl=self._make_job_external_url(job), id=job.name ) ) - aspects.append(SubTypesClass(typeNames=["Training Job"])) + aspects.append(SubTypesClass(typeNames=[MLTypes.TRAINING_JOB])) aspects.append(ContainerClass(container=self._get_project_container().as_urn())) @@ -395,7 +401,7 @@ def _get_dataset_workunits( ) ) - aspects.append(SubTypesClass(typeNames=["Dataset"])) + aspects.append(SubTypesClass(typeNames=[MLTypes.DATASET])) # Create a container for Project as parent of the dataset aspects.append(ContainerClass(container=self._get_project_container().as_urn())) @@ -491,7 +497,7 @@ def _gen_endpoint_workunits( ) ) - aspects.append(SubTypesClass(typeNames=["Endpoint"])) + aspects.append(SubTypesClass(typeNames=[MLTypes.ENDPOINT])) yield from auto_workunit( MetadataChangeProposalWrapper.construct_many(endpoint_urn, aspects=aspects)