diff --git a/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py b/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py index 95490147d4820..1bad28e50945c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py @@ -1,7 +1,8 @@ import logging -from typing import Dict, Iterable, List, Optional +from typing import Any, Dict, Iterable, List, Optional from urllib.parse import unquote +from pandas import DataFrame from pydantic import Field, SecretStr, validator from datahub.configuration.common import ConfigModel @@ -20,9 +21,14 @@ from datahub.ingestion.source.data_lake_common.config import PathSpecsConfigMixin from datahub.ingestion.source.data_lake_common.data_lake_utils import PLATFORM_GCS from datahub.ingestion.source.data_lake_common.path_spec import PathSpec, is_gcs_uri +from datahub.ingestion.source.gcs.gcs_utils import ( + get_gcs_bucket_name, + get_gcs_bucket_relative_path, +) from datahub.ingestion.source.s3.config import DataLakeSourceConfig +from datahub.ingestion.source.s3.datalake_profiler_config import DataLakeProfilerConfig from datahub.ingestion.source.s3.report import DataLakeSourceReport -from datahub.ingestion.source.s3.source import S3Source +from datahub.ingestion.source.s3.source import S3Source, TableData from datahub.ingestion.source.state.stale_entity_removal_handler import ( StaleEntityRemovalHandler, StatefulStaleMetadataRemovalConfig, @@ -57,6 +63,19 @@ class GCSSourceConfig( description="Number of files to list to sample for schema inference. This will be ignored if sample_files is set to False in the pathspec.", ) + profiling: Optional[DataLakeProfilerConfig] = Field( + default=DataLakeProfilerConfig(), description="Data profiling configuration" + ) + + spark_driver_memory: str = Field( + default="4g", description="Max amount of memory to grant Spark." + ) + + spark_config: Dict[str, Any] = Field( + description="Spark configuration properties", + default={}, + ) + stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None @validator("path_specs", always=True) @@ -72,6 +91,9 @@ def check_path_specs_and_infer_platform( return path_specs + def is_profiling_enabled(self) -> bool: + return self.profiling is not None and self.profiling.enabled + class GCSSourceReport(DataLakeSourceReport): pass @@ -82,7 +104,7 @@ class GCSSourceReport(DataLakeSourceReport): @support_status(SupportStatus.INCUBATING) @capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") -@capability(SourceCapability.DATA_PROFILING, "Not supported", supported=False) +@capability(SourceCapability.DATA_PROFILING, "Enabled via configuration") class GCSSource(StatefulIngestionSourceBase): def __init__(self, config: GCSSourceConfig, ctx: PipelineContext): super().__init__(config, ctx) @@ -110,6 +132,11 @@ def create_equivalent_s3_config(self): env=self.config.env, max_rows=self.config.max_rows, number_of_files_to_sample=self.config.number_of_files_to_sample, + profiling=self.config.profiling, + spark_driver_memory=self.config.spark_driver_memory, + spark_config=self.config.spark_config, + use_s3_bucket_tags=False, + use_s3_object_tags=False, ) return s3_config @@ -145,6 +172,28 @@ def s3_source_overrides(self, source: S3Source) -> S3Source: source.create_s3_path = lambda bucket_name, key: unquote( # type: ignore f"s3://{bucket_name}/{key}" ) + + if self.config.is_profiling_enabled(): + original_read_file_spark = source.read_file_spark + + from types import MethodType + + def read_file_spark_with_gcs( + self_source: S3Source, file: str, ext: str + ) -> Optional[DataFrame]: + # Convert s3:// path back to gs:// for Spark + if file.startswith("s3://"): + file = f"gs://{file[5:]}" + return original_read_file_spark(file, ext) + + source.read_file_spark = MethodType(read_file_spark_with_gcs, source) # type: ignore + + def get_external_url_override(table_data: TableData) -> Optional[str]: + bucket_name = get_gcs_bucket_name(table_data.table_path) + key_prefix = get_gcs_bucket_relative_path(table_data.table_path) + return f"https://console.cloud.google.com/storage/browser/{bucket_name}/{key_prefix}" + + source.get_external_url = get_external_url_override # type: ignore return source def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_utils.py b/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_utils.py index eaf7030045765..9766c2cd71fc5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_utils.py @@ -22,10 +22,18 @@ def strip_gcs_prefix(gcs_uri: str) -> str: return gcs_uri[len(GCS_PREFIX) :] -def get_gcs_bucket_name(path): - if not is_gcs_uri(path): - raise ValueError(f"Not a GCS URI. Must start with prefixe: {GCS_PREFIX}") - return strip_gcs_prefix(path).split("/")[0] +def get_gcs_bucket_name(path: str) -> str: + """Get the bucket name from either a GCS (gs://) or S3-style (s3://) URI.""" + # Handle both gs:// and s3:// prefixes since we use S3-style URIs internally + if is_gcs_uri(path): + return strip_gcs_prefix(path).split("/")[0] + elif path.startswith("s3://"): + # For internal S3-style paths used by the source + return path[5:].split("/")[0] + else: + raise ValueError( + f"Not a valid GCS or S3 URI. Must start with prefixes: {GCS_PREFIX} or s3://" + ) def get_gcs_bucket_relative_path(gcs_uri: str) -> str: diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index 02206ae15ab1c..71ba287f0985f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -605,6 +605,16 @@ def __create_partition_summary_aspect( maxPartition=max_partition_summary, minPartition=min_partition_summary ) + def get_external_url(self, table_data: TableData) -> Optional[str]: + if self.is_s3_platform() and self.source_config.aws_config: + # Get region from AWS config, default to us-east-1 if not specified + region = self.source_config.aws_config.aws_region or "us-east-1" + bucket_name = get_bucket_name(table_data.table_path) + key_prefix = get_bucket_relative_path(table_data.table_path) + external_url = f"https://{region}.console.aws.amazon.com/s3/buckets/{bucket_name}?prefix={key_prefix}" + return external_url + return None + def ingest_table( self, table_data: TableData, path_spec: PathSpec ) -> Iterable[MetadataWorkUnit]: @@ -674,6 +684,7 @@ def ingest_table( if max_partition else None ), + externalUrl=self.get_external_url(table_data), ) aspects.append(dataset_properties) if table_data.size_in_bytes > 0: diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json index 63888d6bc4351..c44a4950241d3 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json @@ -12,7 +12,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -28,6 +28,7 @@ "number_of_files": "1", "size_in_bytes": "172" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/no_extension/small", "name": "small", "description": "", "tags": [] @@ -35,7 +36,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -129,7 +130,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -146,12 +147,12 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586848010000 + "lastUpdatedTimestamp": 1586847850000 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -174,7 +175,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -190,7 +191,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -207,7 +208,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -225,7 +226,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -246,7 +247,23 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -269,7 +286,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -285,7 +302,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -302,7 +319,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -320,7 +337,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -328,40 +345,40 @@ "entityType": "container", "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, + { + "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + } + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - }, - { - "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", - "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" - } - ] + "container": "urn:li:container:c8d940d2010edd365619411b385b11e4" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -384,7 +401,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -400,7 +417,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -417,7 +434,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -435,23 +452,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:c8d940d2010edd365619411b385b11e4" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -480,7 +481,23 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -503,7 +520,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -519,7 +536,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -536,7 +553,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -554,23 +571,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -603,7 +604,23 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:9b4624d58669059c9e62afb3d7341944", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -626,7 +643,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -642,7 +659,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -659,7 +676,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -677,23 +694,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:9b4624d58669059c9e62afb3d7341944", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -730,7 +731,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -746,7 +747,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -762,7 +763,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -803,7 +804,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_without_extension.json", + "runId": "file_inference_without_extension.json", "lastRunId": "no-run-id-provided" } }, diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json index 8087ea591beef..9a5664214f956 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json @@ -28,6 +28,7 @@ "number_of_files": "1", "size_in_bytes": "172" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/no_extension/small", "name": "small", "description": "", "tags": [] @@ -134,20 +135,19 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "operation", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "instance": "test-platform-instance", - "env": "DEV", - "folder_abs_path": "my-test-bucket/folder_a" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, - "name": "folder_a", - "env": "DEV" + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847850000 } }, "systemMetadata": { @@ -158,12 +158,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + "customProperties": { + "platform": "s3", + "instance": "test-platform-instance", + "env": "DEV", + "bucket_name": "my-test-bucket" + }, + "name": "my-test-bucket", + "env": "DEV" } }, "systemMetadata": { @@ -174,7 +181,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -189,19 +196,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847850000 + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" } }, "systemMetadata": { @@ -212,13 +214,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Folder" + "S3 bucket" ] } }, @@ -232,17 +234,15 @@ "entityType": "container", "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "instance": "test-platform-instance", - "env": "DEV", - "bucket_name": "my-test-bucket" - }, - "name": "my-test-bucket", - "env": "DEV" + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + } + ] } }, "systemMetadata": { @@ -253,12 +253,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" } }, "systemMetadata": { @@ -269,13 +269,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "containerProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "customProperties": { + "platform": "s3", + "instance": "test-platform-instance", + "env": "DEV", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a", + "env": "DEV" } }, "systemMetadata": { @@ -288,11 +294,10 @@ "entityType": "container", "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "removed": false } }, "systemMetadata": { @@ -303,17 +308,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - } - ] + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" } }, "systemMetadata": { @@ -324,13 +325,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "S3 bucket" + "Folder" ] } }, @@ -367,7 +368,23 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:c8d940d2010edd365619411b385b11e4" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -376,9 +393,9 @@ "platform": "s3", "instance": "test-platform-instance", "env": "DEV", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" }, - "name": "folder_aaa", + "name": "folder_aa", "env": "DEV" } }, @@ -390,12 +407,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" + "removed": false } }, "systemMetadata": { @@ -406,12 +423,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" } }, "systemMetadata": { @@ -422,7 +440,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -442,17 +460,23 @@ "entityType": "container", "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "instance": "test-platform-instance", - "env": "DEV", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa", - "env": "DEV" + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, + { + "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + }, + { + "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" + } + ] } }, "systemMetadata": { @@ -463,12 +487,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" } }, "systemMetadata": { @@ -479,13 +503,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "containerProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "customProperties": { + "platform": "s3", + "instance": "test-platform-instance", + "env": "DEV", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa", + "env": "DEV" } }, "systemMetadata": { @@ -498,11 +528,10 @@ "entityType": "container", "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "removed": false } }, "systemMetadata": { @@ -513,14 +542,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" } }, "systemMetadata": { @@ -531,24 +559,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - }, - { - "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", - "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" - }, - { - "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" - } + "typeNames": [ + "Folder" ] } }, @@ -593,12 +610,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:9b4624d58669059c9e62afb3d7341944", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:c8d940d2010edd365619411b385b11e4" + "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" } }, "systemMetadata": { @@ -634,10 +651,10 @@ "entityType": "container", "entityUrn": "urn:li:container:9b4624d58669059c9e62afb3d7341944", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + "removed": false } }, "systemMetadata": { @@ -650,10 +667,11 @@ "entityType": "container", "entityUrn": "urn:li:container:9b4624d58669059c9e62afb3d7341944", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" } }, "systemMetadata": { @@ -680,23 +698,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:9b4624d58669059c9e62afb3d7341944", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "file_without_extension.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:9b4624d58669059c9e62afb3d7341944", diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json index 64c1505414ff8..568af72545ee4 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json @@ -9,6 +9,7 @@ "customProperties": { "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv/part3.csv" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/food_csv", "name": "food_csv", "description": "", "tags": [] @@ -46,7 +47,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -58,7 +59,7 @@ "isPartOfKey": false }, { - "fieldPath": "height", + "fieldPath": "weight", "nullable": false, "type": { "type": { @@ -70,26 +71,26 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "height", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "color", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -102,6 +103,28 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847820000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", @@ -141,19 +164,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847820000 + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -184,10 +201,10 @@ "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "browsePathsV2", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "path": [] } }, "systemMetadata": { @@ -198,12 +215,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [] + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" } }, "systemMetadata": { @@ -266,28 +283,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa", - "env": "UAT" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", @@ -347,10 +342,16 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa", + "env": "UAT" } }, "systemMetadata": { @@ -361,12 +362,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "removed": false } }, "systemMetadata": { @@ -379,12 +380,10 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -397,10 +396,12 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -438,16 +439,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa", - "env": "UAT" + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { @@ -460,10 +455,16 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa", + "env": "UAT" } }, "systemMetadata": { @@ -492,12 +493,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -510,10 +509,12 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -571,10 +572,27 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] } }, "systemMetadata": { @@ -593,6 +611,7 @@ "customProperties": { "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/food_parquet", "name": "food_parquet", "description": "", "tags": [] @@ -630,7 +649,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -642,14 +661,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -666,7 +685,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -678,14 +697,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -698,39 +717,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - }, - { - "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", - "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" - }, - { - "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" - }, - { - "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", - "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", @@ -769,22 +755,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", @@ -818,6 +788,38 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json index f86c652462fd4..6cd38c846c249 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json @@ -9,6 +9,7 @@ "customProperties": { "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/food_parquet", "name": "food_parquet", "description": "", "tags": [] @@ -46,7 +47,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -58,14 +59,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -82,7 +83,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -94,14 +95,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -114,6 +115,28 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847840000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", @@ -153,19 +176,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847840000 + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -196,10 +213,10 @@ "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "browsePathsV2", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "path": [] } }, "systemMetadata": { @@ -210,12 +227,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [] + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" } }, "systemMetadata": { @@ -226,7 +243,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -234,9 +251,9 @@ "customProperties": { "platform": "s3", "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + "folder_abs_path": "my-test-bucket/folder_a" }, - "name": "folder_aa", + "name": "folder_a", "env": "UAT" } }, @@ -248,12 +265,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + "removed": false } }, "systemMetadata": { @@ -264,12 +281,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -282,16 +299,12 @@ "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a" - }, - "name": "folder_a", - "env": "UAT" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -304,10 +317,15 @@ "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + ] } }, "systemMetadata": { @@ -318,12 +336,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" } }, "systemMetadata": { @@ -336,12 +354,16 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "containerProperties", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa", + "env": "UAT" } }, "systemMetadata": { @@ -352,14 +374,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "removed": false } }, "systemMetadata": { @@ -370,17 +390,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - } - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -393,10 +408,12 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -432,12 +449,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { @@ -472,10 +489,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "removed": false } }, "systemMetadata": { @@ -488,10 +505,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -518,22 +535,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition_exclude.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", @@ -567,10 +568,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" } }, "systemMetadata": { @@ -583,10 +584,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + "removed": false } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json index 2575db41ca8b7..61800e6e2a4dd 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json @@ -9,6 +9,7 @@ "customProperties": { "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv/part3.csv" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/food_csv", "name": "folder_aaa.food_csv", "description": "", "tags": [] @@ -46,7 +47,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -58,7 +59,7 @@ "isPartOfKey": false }, { - "fieldPath": "height", + "fieldPath": "weight", "nullable": false, "type": { "type": { @@ -70,26 +71,26 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "height", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "color", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -102,6 +103,28 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847820000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", @@ -141,19 +164,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847820000 + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -184,10 +201,10 @@ "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "browsePathsV2", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "path": [] } }, "systemMetadata": { @@ -198,12 +215,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [] + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" } }, "systemMetadata": { @@ -266,28 +283,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa", - "env": "UAT" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", @@ -347,10 +342,16 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa", + "env": "UAT" } }, "systemMetadata": { @@ -361,12 +362,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "removed": false } }, "systemMetadata": { @@ -379,12 +380,10 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -397,10 +396,12 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -438,16 +439,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa", - "env": "UAT" + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { @@ -460,10 +455,16 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa", + "env": "UAT" } }, "systemMetadata": { @@ -492,12 +493,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -510,10 +509,12 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -571,10 +572,27 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] } }, "systemMetadata": { @@ -593,6 +611,7 @@ "customProperties": { "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/food_parquet", "name": "folder_aaa.food_parquet", "description": "", "tags": [] @@ -630,7 +649,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -642,14 +661,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -666,7 +685,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -678,14 +697,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -698,39 +717,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - }, - { - "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", - "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" - }, - { - "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" - }, - { - "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", - "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", @@ -769,22 +755,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition_filename.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", @@ -818,6 +788,38 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json index 272beb57e85e1..b1a63ea313a38 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json @@ -9,6 +9,7 @@ "customProperties": { "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/food_parquet", "name": "food_parquet", "description": "", "tags": [] @@ -46,7 +47,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -58,14 +59,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -82,7 +83,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -94,14 +95,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -114,6 +115,28 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847840000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", @@ -153,19 +176,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847840000 + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -196,10 +213,10 @@ "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "browsePathsV2", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "path": [] } }, "systemMetadata": { @@ -210,12 +227,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [] + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" } }, "systemMetadata": { @@ -226,7 +243,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -234,9 +251,9 @@ "customProperties": { "platform": "s3", "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + "folder_abs_path": "my-test-bucket/folder_a" }, - "name": "folder_aa", + "name": "folder_a", "env": "UAT" } }, @@ -248,12 +265,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + "removed": false } }, "systemMetadata": { @@ -264,12 +281,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -282,16 +299,12 @@ "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a" - }, - "name": "folder_a", - "env": "UAT" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -304,10 +317,15 @@ "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + } + ] } }, "systemMetadata": { @@ -318,12 +336,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "container": "urn:li:container:86297df39321e4948dbe8b8e941de98b" } }, "systemMetadata": { @@ -336,12 +354,16 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "containerProperties", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa", + "env": "UAT" } }, "systemMetadata": { @@ -352,14 +374,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "removed": false } }, "systemMetadata": { @@ -370,17 +390,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - } - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -393,10 +408,12 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -432,12 +449,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { @@ -472,10 +489,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "removed": false } }, "systemMetadata": { @@ -488,10 +505,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -518,22 +535,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_no_partition_glob.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", @@ -567,10 +568,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" } }, "systemMetadata": { @@ -583,10 +584,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + "removed": false } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_basic.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_basic.json index ca598d06076e3..61156e6322f0f 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_basic.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_basic.json @@ -10,6 +10,7 @@ "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", "number_of_partitions": "1" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/pokemon_abilities_json", "name": "folder_aaa.pokemon_abilities_json", "description": "", "created": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_keyval.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_keyval.json index 0dc1c4016d895..4dd287de5fd1a 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_keyval.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_keyval.json @@ -10,6 +10,7 @@ "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", "number_of_partitions": "1" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/pokemon_abilities_json", "name": "folder_aaa.pokemon_abilities_json", "description": "", "created": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema.json index 756036e8c704d..2c75a959b3d53 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema.json @@ -10,6 +10,7 @@ "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", "number_of_partitions": "1" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/pokemon_abilities_json", "name": "folder_aaa.pokemon_abilities_json", "description": "", "created": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json index 21623e2216565..a7add44b55fd2 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json @@ -9,6 +9,7 @@ "customProperties": { "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv/part3.csv" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/food_csv", "name": "folder_aaa.food_csv", "description": "", "tags": [] @@ -46,7 +47,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -58,7 +59,7 @@ "isPartOfKey": false }, { - "fieldPath": "height", + "fieldPath": "weight", "nullable": false, "type": { "type": { @@ -70,26 +71,26 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "height", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "color", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -102,6 +103,28 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847820000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", @@ -141,19 +164,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847820000 + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -184,10 +201,10 @@ "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "browsePathsV2", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "path": [] } }, "systemMetadata": { @@ -198,12 +215,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [] + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" } }, "systemMetadata": { @@ -266,28 +283,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa", - "env": "UAT" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", @@ -347,10 +342,16 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa", + "env": "UAT" } }, "systemMetadata": { @@ -361,12 +362,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "removed": false } }, "systemMetadata": { @@ -379,12 +380,10 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -397,10 +396,12 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -438,16 +439,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa", - "env": "UAT" + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { @@ -460,10 +455,16 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa", + "env": "UAT" } }, "systemMetadata": { @@ -492,12 +493,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -510,10 +509,12 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -571,10 +572,27 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] } }, "systemMetadata": { @@ -593,6 +611,7 @@ "customProperties": { "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/food_parquet", "name": "folder_aaa.food_parquet", "description": "", "tags": [] @@ -630,7 +649,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -642,14 +661,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -666,7 +685,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -678,14 +697,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -700,29 +719,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "operation", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - }, - { - "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", - "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" - }, - { - "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" - }, - { - "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", - "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" - } - ] + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847840000 } }, "systemMetadata": { @@ -735,16 +743,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "container", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847840000 + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" } }, "systemMetadata": { @@ -757,10 +759,27 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] } }, "systemMetadata": { @@ -780,6 +799,7 @@ "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", "number_of_partitions": "1" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/pokemon_abilities_json", "name": "folder_aaa.pokemon_abilities_json", "description": "", "created": { @@ -823,31 +843,43 @@ }, "fields": [ { - "fieldPath": "effect_changes", + "fieldPath": "effect_changes.effect_entries.effect", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries", + "fieldPath": "effect_changes.effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.effect", + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", "nullable": false, "type": { "type": { @@ -871,19 +903,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.is_native", + "fieldPath": "effect_changes.effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.name", + "fieldPath": "effect_changes.version_group.name", "nullable": false, "type": { "type": { @@ -895,7 +927,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.url", + "fieldPath": "effect_changes.version_group.url", "nullable": false, "type": { "type": { @@ -919,19 +951,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.name", + "fieldPath": "effect_changes", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.url", + "fieldPath": "effect_entries.effect", "nullable": false, "type": { "type": { @@ -943,19 +975,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries", + "fieldPath": "effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.effect", + "fieldPath": "effect_entries.language.url", "nullable": false, "type": { "type": { @@ -979,7 +1011,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.name", + "fieldPath": "effect_entries.short_effect", "nullable": false, "type": { "type": { @@ -991,19 +1023,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.url", + "fieldPath": "effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.short_effect", + "fieldPath": "flavor_text_entries.flavor_text", "nullable": false, "type": { "type": { @@ -1015,19 +1047,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries", + "fieldPath": "flavor_text_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.flavor_text", + "fieldPath": "flavor_text_entries.language.url", "nullable": false, "type": { "type": { @@ -1051,7 +1083,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.name", + "fieldPath": "flavor_text_entries.version_group.name", "nullable": false, "type": { "type": { @@ -1063,7 +1095,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.url", + "fieldPath": "flavor_text_entries.version_group.url", "nullable": false, "type": { "type": { @@ -1087,19 +1119,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.name", + "fieldPath": "flavor_text_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.url", + "fieldPath": "generation.name", "nullable": false, "type": { "type": { @@ -1111,19 +1143,7 @@ "isPartOfKey": false }, { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", + "fieldPath": "generation.url", "nullable": false, "type": { "type": { @@ -1135,14 +1155,14 @@ "isPartOfKey": false }, { - "fieldPath": "generation.url", + "fieldPath": "generation", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -1183,31 +1203,19 @@ "isPartOfKey": false }, { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", + "fieldPath": "names.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "names.language.name", + "fieldPath": "names.language.url", "nullable": false, "type": { "type": { @@ -1219,14 +1227,14 @@ "isPartOfKey": false }, { - "fieldPath": "names.language.url", + "fieldPath": "names.language", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -1243,7 +1251,7 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon", + "fieldPath": "names", "nullable": false, "type": { "type": { @@ -1267,19 +1275,19 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon", + "fieldPath": "pokemon.pokemon.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.name", + "fieldPath": "pokemon.pokemon.url", "nullable": false, "type": { "type": { @@ -1291,14 +1299,14 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.url", + "fieldPath": "pokemon.pokemon", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -1313,6 +1321,18 @@ "nativeDataType": "int", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false } ] } @@ -1325,12 +1345,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "operation", "aspect": { "json": { - "removed": false + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847980000 } }, "systemMetadata": { @@ -1366,7 +1392,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1399,34 +1441,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847990000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + "removed": false } }, "systemMetadata": { @@ -1437,29 +1457,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - }, - { - "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", - "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" - }, - { - "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" - }, - { - "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", - "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" - } - ] + "removed": false } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json index 154bce421e18a..408281b9d4b11 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json @@ -9,6 +9,7 @@ "customProperties": { "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv/part3.csv" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/food_csv", "name": "folder_aaa.food_csv", "description": "", "tags": [] @@ -46,7 +47,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -58,7 +59,7 @@ "isPartOfKey": false }, { - "fieldPath": "height", + "fieldPath": "weight", "nullable": false, "type": { "type": { @@ -70,26 +71,26 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "height", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "color", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -102,6 +103,28 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847820000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", @@ -141,19 +164,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847820000 + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -184,10 +201,10 @@ "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "browsePathsV2", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "path": [] } }, "systemMetadata": { @@ -198,12 +215,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [] + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" } }, "systemMetadata": { @@ -266,28 +283,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa", - "env": "UAT" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", @@ -347,10 +342,16 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa", + "env": "UAT" } }, "systemMetadata": { @@ -361,12 +362,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "removed": false } }, "systemMetadata": { @@ -379,12 +380,10 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -397,10 +396,12 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -438,16 +439,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa", - "env": "UAT" + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { @@ -460,10 +455,16 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa", + "env": "UAT" } }, "systemMetadata": { @@ -492,12 +493,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -510,10 +509,12 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -571,10 +572,27 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] } }, "systemMetadata": { @@ -593,6 +611,7 @@ "customProperties": { "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/food_parquet", "name": "folder_aaa.food_parquet", "description": "", "tags": [] @@ -630,7 +649,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -642,14 +661,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -666,7 +685,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -678,14 +697,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -700,29 +719,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "operation", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - }, - { - "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", - "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" - }, - { - "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" - }, - { - "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", - "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" - } - ] + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847840000 } }, "systemMetadata": { @@ -735,16 +743,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "container", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847840000 + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" } }, "systemMetadata": { @@ -757,10 +759,27 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] } }, "systemMetadata": { @@ -780,6 +799,7 @@ "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", "number_of_partitions": "7" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/pokemon_abilities_json", "name": "folder_aaa.pokemon_abilities_json", "description": "", "created": { @@ -823,31 +843,43 @@ }, "fields": [ { - "fieldPath": "effect_changes", + "fieldPath": "effect_changes.effect_entries.effect", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries", + "fieldPath": "effect_changes.effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.effect", + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", "nullable": false, "type": { "type": { @@ -871,19 +903,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.is_native", + "fieldPath": "effect_changes.effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.name", + "fieldPath": "effect_changes.version_group.name", "nullable": false, "type": { "type": { @@ -895,7 +927,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.url", + "fieldPath": "effect_changes.version_group.url", "nullable": false, "type": { "type": { @@ -919,19 +951,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.name", + "fieldPath": "effect_changes", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.url", + "fieldPath": "effect_entries.effect", "nullable": false, "type": { "type": { @@ -943,19 +975,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries", + "fieldPath": "effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.effect", + "fieldPath": "effect_entries.language.url", "nullable": false, "type": { "type": { @@ -979,7 +1011,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.name", + "fieldPath": "effect_entries.short_effect", "nullable": false, "type": { "type": { @@ -991,19 +1023,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.url", + "fieldPath": "effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.short_effect", + "fieldPath": "flavor_text_entries.flavor_text", "nullable": false, "type": { "type": { @@ -1015,19 +1047,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries", + "fieldPath": "flavor_text_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.flavor_text", + "fieldPath": "flavor_text_entries.language.url", "nullable": false, "type": { "type": { @@ -1051,7 +1083,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.name", + "fieldPath": "flavor_text_entries.version_group.name", "nullable": false, "type": { "type": { @@ -1063,7 +1095,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.url", + "fieldPath": "flavor_text_entries.version_group.url", "nullable": false, "type": { "type": { @@ -1087,19 +1119,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.name", + "fieldPath": "flavor_text_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.url", + "fieldPath": "generation.name", "nullable": false, "type": { "type": { @@ -1111,19 +1143,7 @@ "isPartOfKey": false }, { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", + "fieldPath": "generation.url", "nullable": false, "type": { "type": { @@ -1135,14 +1155,14 @@ "isPartOfKey": false }, { - "fieldPath": "generation.url", + "fieldPath": "generation", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -1183,31 +1203,19 @@ "isPartOfKey": false }, { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", + "fieldPath": "names.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "names.language.name", + "fieldPath": "names.language.url", "nullable": false, "type": { "type": { @@ -1219,14 +1227,14 @@ "isPartOfKey": false }, { - "fieldPath": "names.language.url", + "fieldPath": "names.language", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -1243,7 +1251,7 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon", + "fieldPath": "names", "nullable": false, "type": { "type": { @@ -1267,19 +1275,19 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon", + "fieldPath": "pokemon.pokemon.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.name", + "fieldPath": "pokemon.pokemon.url", "nullable": false, "type": { "type": { @@ -1291,14 +1299,14 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.url", + "fieldPath": "pokemon.pokemon", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -1313,6 +1321,18 @@ "nativeDataType": "int", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false } ] } @@ -1325,12 +1345,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "operation", "aspect": { "json": { - "removed": false + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847980000 } }, "systemMetadata": { @@ -1366,7 +1392,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1399,34 +1441,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847990000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + "removed": false } }, "systemMetadata": { @@ -1437,29 +1457,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - }, - { - "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", - "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" - }, - { - "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" - }, - { - "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", - "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" - } - ] + "removed": false } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json index f483f806e6193..b1f7bb7fa28eb 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json @@ -9,6 +9,7 @@ "customProperties": { "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv/part3.csv" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/food_csv", "name": "folder_aaa.food_csv", "description": "", "tags": [] @@ -46,7 +47,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -58,7 +59,7 @@ "isPartOfKey": false }, { - "fieldPath": "height", + "fieldPath": "weight", "nullable": false, "type": { "type": { @@ -70,26 +71,26 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "height", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "color", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -102,6 +103,28 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847820000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", @@ -141,19 +164,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "entityType": "container", + "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847820000 + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -184,10 +201,10 @@ "entityType": "container", "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "browsePathsV2", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "path": [] } }, "systemMetadata": { @@ -198,12 +215,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [] + "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" } }, "systemMetadata": { @@ -266,28 +283,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa", - "env": "UAT" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", @@ -347,10 +342,16 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa", + "env": "UAT" } }, "systemMetadata": { @@ -361,12 +362,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + "removed": false } }, "systemMetadata": { @@ -379,12 +380,10 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -397,10 +396,12 @@ "entityType": "container", "entityUrn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -438,16 +439,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "UAT", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa", - "env": "UAT" + "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" } }, "systemMetadata": { @@ -460,10 +455,16 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + "customProperties": { + "platform": "s3", + "env": "UAT", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa", + "env": "UAT" } }, "systemMetadata": { @@ -492,12 +493,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -510,10 +509,12 @@ "entityType": "container", "entityUrn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -571,10 +572,27 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] } }, "systemMetadata": { @@ -593,6 +611,7 @@ "customProperties": { "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/food_parquet", "name": "folder_aaa.food_parquet", "description": "", "tags": [] @@ -630,7 +649,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -642,14 +661,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -666,7 +685,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -678,14 +697,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -700,29 +719,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "operation", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - }, - { - "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", - "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" - }, - { - "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" - }, - { - "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", - "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" - } - ] + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847840000 } }, "systemMetadata": { @@ -735,16 +743,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "container", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847840000 + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" } }, "systemMetadata": { @@ -757,10 +759,27 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + "path": [ + { + "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", + "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" + }, + { + "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", + "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" + }, + { + "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", + "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" + }, + { + "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", + "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + ] } }, "systemMetadata": { @@ -780,6 +799,7 @@ "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", "number_of_partitions": "2" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/pokemon_abilities_json", "name": "folder_aaa.pokemon_abilities_json", "description": "", "created": { @@ -823,31 +843,43 @@ }, "fields": [ { - "fieldPath": "effect_changes", + "fieldPath": "effect_changes.effect_entries.effect", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries", + "fieldPath": "effect_changes.effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.effect", + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", "nullable": false, "type": { "type": { @@ -871,19 +903,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.is_native", + "fieldPath": "effect_changes.effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.name", + "fieldPath": "effect_changes.version_group.name", "nullable": false, "type": { "type": { @@ -895,7 +927,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.url", + "fieldPath": "effect_changes.version_group.url", "nullable": false, "type": { "type": { @@ -919,19 +951,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.name", + "fieldPath": "effect_changes", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.url", + "fieldPath": "effect_entries.effect", "nullable": false, "type": { "type": { @@ -943,19 +975,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries", + "fieldPath": "effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.effect", + "fieldPath": "effect_entries.language.url", "nullable": false, "type": { "type": { @@ -979,7 +1011,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.name", + "fieldPath": "effect_entries.short_effect", "nullable": false, "type": { "type": { @@ -991,19 +1023,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.url", + "fieldPath": "effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.short_effect", + "fieldPath": "flavor_text_entries.flavor_text", "nullable": false, "type": { "type": { @@ -1015,19 +1047,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries", + "fieldPath": "flavor_text_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.flavor_text", + "fieldPath": "flavor_text_entries.language.url", "nullable": false, "type": { "type": { @@ -1051,7 +1083,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.name", + "fieldPath": "flavor_text_entries.version_group.name", "nullable": false, "type": { "type": { @@ -1063,7 +1095,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.url", + "fieldPath": "flavor_text_entries.version_group.url", "nullable": false, "type": { "type": { @@ -1087,19 +1119,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.name", + "fieldPath": "flavor_text_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.url", + "fieldPath": "generation.name", "nullable": false, "type": { "type": { @@ -1111,19 +1143,7 @@ "isPartOfKey": false }, { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", + "fieldPath": "generation.url", "nullable": false, "type": { "type": { @@ -1135,14 +1155,14 @@ "isPartOfKey": false }, { - "fieldPath": "generation.url", + "fieldPath": "generation", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -1183,31 +1203,19 @@ "isPartOfKey": false }, { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", + "fieldPath": "names.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "names.language.name", + "fieldPath": "names.language.url", "nullable": false, "type": { "type": { @@ -1219,14 +1227,14 @@ "isPartOfKey": false }, { - "fieldPath": "names.language.url", + "fieldPath": "names.language", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -1243,7 +1251,7 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon", + "fieldPath": "names", "nullable": false, "type": { "type": { @@ -1267,19 +1275,19 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon", + "fieldPath": "pokemon.pokemon.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.name", + "fieldPath": "pokemon.pokemon.url", "nullable": false, "type": { "type": { @@ -1291,14 +1299,14 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.url", + "fieldPath": "pokemon.pokemon", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -1313,6 +1321,18 @@ "nativeDataType": "int", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false } ] } @@ -1325,12 +1345,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "operation", "aspect": { "json": { - "removed": false + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847980000 } }, "systemMetadata": { @@ -1366,7 +1392,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1399,34 +1441,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847990000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" + "removed": false } }, "systemMetadata": { @@ -1437,29 +1457,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:050fedde7a12cb8c8447db8d298f5577", - "urn": "urn:li:container:050fedde7a12cb8c8447db8d298f5577" - }, - { - "id": "urn:li:container:86297df39321e4948dbe8b8e941de98b", - "urn": "urn:li:container:86297df39321e4948dbe8b8e941de98b" - }, - { - "id": "urn:li:container:273fbeff7bd9ecb74982205aadd77994", - "urn": "urn:li:container:273fbeff7bd9ecb74982205aadd77994" - }, - { - "id": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2", - "urn": "urn:li:container:ec362903c4c7de60197fcc7b7a79e4c2" - } - ] + "removed": false } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json index 38ce5188e0a8e..434602915d4ee 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json @@ -28,6 +28,7 @@ "number_of_files": "1", "size_in_bytes": "3575" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv", "name": "NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv", "description": "", "tags": [] @@ -65,19 +66,19 @@ }, "fields": [ { - "fieldPath": "2", + "fieldPath": "Sampling Date", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.DateType": {} } }, - "nativeDataType": "string", + "nativeDataType": "date", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "3", + "fieldPath": "Site ID", "nullable": false, "type": { "type": { @@ -89,7 +90,7 @@ "isPartOfKey": false }, { - "fieldPath": "Br \n(mg/L)", + "fieldPath": "Park ID", "nullable": false, "type": { "type": { @@ -101,7 +102,7 @@ "isPartOfKey": false }, { - "fieldPath": "Ca \n(mg/L)", + "fieldPath": "Lat (\u00b0N)", "nullable": false, "type": { "type": { @@ -113,7 +114,7 @@ "isPartOfKey": false }, { - "fieldPath": "Cl \n(mg/L)", + "fieldPath": "Long (\u00b0W)", "nullable": false, "type": { "type": { @@ -125,7 +126,7 @@ "isPartOfKey": false }, { - "fieldPath": "Cond (\u00b5S/cm)", + "fieldPath": "Water Temp (\u00b0C)", "nullable": false, "type": { "type": { @@ -137,31 +138,31 @@ "isPartOfKey": false }, { - "fieldPath": "DO (mg/L)", + "fieldPath": "Cond (\u00b5S/cm)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "DOC [mg/L C]", + "fieldPath": "pH", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "F \n(mg/L)", + "fieldPath": "DO (mg/L)", "nullable": false, "type": { "type": { @@ -173,19 +174,19 @@ "isPartOfKey": false }, { - "fieldPath": "K \n(mg/L)", + "fieldPath": "Secchi Depth (m)", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "Lat (\u00b0N)", + "fieldPath": "UV Absorbance, 254nm", "nullable": false, "type": { "type": { @@ -197,7 +198,7 @@ "isPartOfKey": false }, { - "fieldPath": "Long (\u00b0W)", + "fieldPath": "DOC [mg/L C]", "nullable": false, "type": { "type": { @@ -209,7 +210,7 @@ "isPartOfKey": false }, { - "fieldPath": "Mg \n(mg/L)", + "fieldPath": "SUVA, 254nm", "nullable": false, "type": { "type": { @@ -245,31 +246,31 @@ "isPartOfKey": false }, { - "fieldPath": "Na \n(mg/L)", + "fieldPath": "PO4-P \n(mg P/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "PO4-P \n(mg P/L)", + "fieldPath": "TDN \n(mg N/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "Park ID", + "fieldPath": "TDP \n(mg P/L)", "nullable": false, "type": { "type": { @@ -281,7 +282,7 @@ "isPartOfKey": false }, { - "fieldPath": "SO4-S \n(mg/L)", + "fieldPath": "Cl \n(mg/L)", "nullable": false, "type": { "type": { @@ -293,7 +294,7 @@ "isPartOfKey": false }, { - "fieldPath": "SUVA, 254nm", + "fieldPath": "SO4-S \n(mg/L)", "nullable": false, "type": { "type": { @@ -305,19 +306,19 @@ "isPartOfKey": false }, { - "fieldPath": "Sampling Date", + "fieldPath": "F \n(mg/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.DateType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "date", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "Secchi Depth (m)", + "fieldPath": "Br \n(mg/L)", "nullable": false, "type": { "type": { @@ -329,19 +330,19 @@ "isPartOfKey": false }, { - "fieldPath": "Site ID", + "fieldPath": "Na \n(mg/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "TDN \n(mg N/L)", + "fieldPath": "K \n(mg/L)", "nullable": false, "type": { "type": { @@ -353,19 +354,19 @@ "isPartOfKey": false }, { - "fieldPath": "TDP \n(mg P/L)", + "fieldPath": "Ca \n(mg/L)", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "UV Absorbance, 254nm", + "fieldPath": "Mg \n(mg/L)", "nullable": false, "type": { "type": { @@ -377,19 +378,19 @@ "isPartOfKey": false }, { - "fieldPath": "Water Temp (\u00b0C)", + "fieldPath": "d18O", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "number", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "d18O", + "fieldPath": "dD", "nullable": false, "type": { "type": { @@ -401,7 +402,7 @@ "isPartOfKey": false }, { - "fieldPath": "dD", + "fieldPath": "field29", "nullable": false, "type": { "type": { @@ -413,7 +414,7 @@ "isPartOfKey": false }, { - "fieldPath": "field29", + "fieldPath": "2", "nullable": false, "type": { "type": { @@ -425,7 +426,7 @@ "isPartOfKey": false }, { - "fieldPath": "pH", + "fieldPath": "3", "nullable": false, "type": { "type": { @@ -447,7 +448,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "operation", "aspect": { @@ -458,7 +459,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847660000 + "lastUpdatedTimestamp": 1586847610000 } }, "systemMetadata": { @@ -468,13 +469,20 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + "customProperties": { + "platform": "s3", + "instance": "test-platform-instance", + "env": "DEV", + "bucket_name": "my-test-bucket" + }, + "name": "my-test-bucket", + "env": "DEV" } }, "systemMetadata": { @@ -484,8 +492,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -500,19 +508,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847610000 + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" } }, "systemMetadata": { @@ -522,14 +525,15 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "typeNames": [ + "S3 bucket" + ] } }, "systemMetadata": { @@ -542,17 +546,15 @@ "entityType": "container", "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "instance": "test-platform-instance", - "env": "DEV", - "bucket_name": "my-test-bucket" - }, - "name": "my-test-bucket", - "env": "DEV" + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + } + ] } }, "systemMetadata": { @@ -563,12 +565,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" } }, "systemMetadata": { @@ -579,13 +581,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "containerProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "customProperties": { + "platform": "s3", + "instance": "test-platform-instance", + "env": "DEV", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a", + "env": "DEV" } }, "systemMetadata": { @@ -595,20 +603,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv", - "number_of_files": "1", - "size_in_bytes": "172" - }, - "name": "small.csv", - "description": "", - "tags": [] + "removed": false } }, "systemMetadata": { @@ -619,17 +620,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - } - ] + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" } }, "systemMetadata": { @@ -640,13 +637,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "S3 bucket" + "Folder" ] } }, @@ -657,8 +654,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -671,18 +668,6 @@ { "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" - }, - { - "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" - }, - { - "id": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", - "urn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" - }, - { - "id": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", - "urn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" } ] } @@ -694,146 +679,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "changeType": "UPSERT", - "aspectName": "schemaMetadata", + "aspectName": "container", "aspect": { "json": { - "schemaName": "small.csv", - "platform": "urn:li:dataPlatform:s3", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "1st chord", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "2nd chord", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "3rd chord", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "4th chord", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "Progression Quality", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "s3", - "instance": "test-platform-instance", - "env": "DEV", - "folder_abs_path": "my-test-bucket/folder_a" - }, - "name": "folder_a", - "env": "DEV" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false + "container": "urn:li:container:c8d940d2010edd365619411b385b11e4" } }, "systemMetadata": { @@ -898,24 +750,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", @@ -965,13 +799,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "container": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" } }, "systemMetadata": { @@ -982,21 +815,19 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "containerProperties", "aspect": { "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - }, - { - "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", - "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" - } - ] + "customProperties": { + "platform": "s3", + "instance": "test-platform-instance", + "env": "DEV", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa", + "env": "DEV" } }, "systemMetadata": { @@ -1007,12 +838,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:c8d940d2010edd365619411b385b11e4" + "removed": false } }, "systemMetadata": { @@ -1022,19 +853,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847620000 + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" } }, "systemMetadata": { @@ -1044,8 +870,26 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "entityType": "container", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1066,10 +910,6 @@ { "id": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", "urn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" - }, - { - "id": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", - "urn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" } ] } @@ -1080,22 +920,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", @@ -1114,12 +938,33 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, + { + "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + }, + { + "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" + }, + { + "id": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "urn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" + }, + { + "id": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "urn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + } + ] } }, "systemMetadata": { @@ -1157,6 +1002,7 @@ "number_of_files": "1", "size_in_bytes": "619" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro", "name": "chord_progressions_avro.avro", "description": "", "tags": [] @@ -1170,52 +1016,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "schemaMetadata", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv", - "number_of_files": "1", - "size_in_bytes": "604" - }, - "name": "chord_progressions_csv.csv", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "chord_progressions_csv.csv", + "schemaName": "chord_progressions_avro.avro", "platform": "urn:li:dataPlatform:s3", "version": 0, "created": { @@ -1234,62 +1040,62 @@ }, "fields": [ { - "fieldPath": "1st chord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FirstChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "FirstChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "2nd chord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "3rd chord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "4th chord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "Progression Quality", + "fieldPath": "[version=2.0].[type=Record].[type=string].ProgressionQuality", "nullable": false, "type": { "type": { "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "string", + "nativeDataType": "ProgressionQuality", "recursive": false, "isPartOfKey": false } @@ -1302,6 +1108,44 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847620000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", @@ -1341,12 +1185,53 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv", + "number_of_files": "1", + "size_in_bytes": "604" + }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv", + "name": "chord_progressions_csv.csv", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "chord_progressions_avro.avro", + "schemaName": "chord_progressions_csv.csv", "platform": "urn:li:dataPlatform:s3", "version": 0, "created": { @@ -1365,62 +1250,62 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=Record].[type=long].FirstChord", + "fieldPath": "1st chord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FirstChord", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "2nd chord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "3rd chord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "4th chord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=string].ProgressionQuality", + "fieldPath": "Progression Quality", "nullable": false, "type": { "type": { "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "ProgressionQuality", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -1434,20 +1319,72 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "operation", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "instance": "test-platform-instance", - "env": "DEV", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, - "name": "folder_aaa", - "env": "DEV" + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847630000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, + { + "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + }, + { + "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" + }, + { + "id": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "urn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" + }, + { + "id": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "urn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + } + ] } }, "systemMetadata": { @@ -1485,6 +1422,7 @@ "number_of_files": "1", "size_in_bytes": "4646" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/countries_json.json", "name": "countries_json.json", "description": "", "tags": [] @@ -1522,14 +1460,14 @@ }, "fields": [ { - "fieldPath": "countries", + "fieldPath": "countries.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, @@ -1546,14 +1484,14 @@ "isPartOfKey": false }, { - "fieldPath": "countries.name", + "fieldPath": "countries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false } @@ -1568,44 +1506,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" - }, - { - "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", - "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" - }, - { - "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" - }, - { - "id": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", - "urn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" - }, - { - "id": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", - "urn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", "aspectName": "operation", "aspect": { @@ -1616,7 +1517,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847630000 + "lastUpdatedTimestamp": 1586847640000 } }, "systemMetadata": { @@ -1627,7 +1528,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1680,77 +1581,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847640000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -1766,15 +1597,21 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "datasetProperties", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet", + "number_of_files": "1", + "size_in_bytes": "4206" + }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/food_parquet.parquet", + "name": "food_parquet.parquet", + "description": "", + "tags": [] } }, "systemMetadata": { @@ -1784,28 +1621,89 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "schemaMetadata", "aspect": { "json": { - "path": [ + "schemaName": "food_parquet.parquet", + "platform": "urn:li:dataPlatform:s3", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false }, { - "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", - "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + "fieldPath": "weight", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false }, { - "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", - "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" + "fieldPath": "height", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "int64", + "recursive": false, + "isPartOfKey": false }, { - "id": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", - "urn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" + "fieldPath": "color", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "healthy", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "bool", + "recursive": false, + "isPartOfKey": false } ] } @@ -1818,34 +1716,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847670000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "operation", "aspect": { "json": { - "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847650000 } }, "systemMetadata": { @@ -1856,12 +1738,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" } }, "systemMetadata": { @@ -1872,13 +1754,33 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "browsePathsV2", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + }, + { + "id": "urn:li:container:647eefb4dfda8695baf1aa0775d78689", + "urn": "urn:li:container:647eefb4dfda8695baf1aa0775d78689" + }, + { + "id": "urn:li:container:c8d940d2010edd365619411b385b11e4", + "urn": "urn:li:container:c8d940d2010edd365619411b385b11e4" + }, + { + "id": "urn:li:container:b0037296cdd497e3137aa0628b8687bc", + "urn": "urn:li:container:b0037296cdd497e3137aa0628b8687bc" + }, + { + "id": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9", + "urn": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + } + ] } }, "systemMetadata": { @@ -1889,7 +1791,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -1906,17 +1808,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet", + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv", "number_of_files": "1", - "size_in_bytes": "4206" + "size_in_bytes": "172" }, - "name": "food_parquet.parquet", + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/small.csv", + "name": "small.csv", "description": "", "tags": [] } @@ -1929,12 +1832,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "food_parquet.parquet", + "schemaName": "small.csv", "platform": "urn:li:dataPlatform:s3", "version": 0, "created": { @@ -1953,62 +1856,62 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "1st chord", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "2nd chord", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "height", + "fieldPath": "3rd chord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "4th chord", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "Progression Quality", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -2023,19 +1926,18 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "operation", "aspect": { "json": { - "customProperties": { - "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv", - "number_of_files": "1", - "size_in_bytes": "34056" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, - "name": "wa_fn_usec_hr_employee_attrition_csv.csv", - "description": "", - "tags": [] + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847660000 } }, "systemMetadata": { @@ -2046,7 +1948,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -2081,6 +1999,47 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:s3", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema_inferred_from": "s3://my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv", + "number_of_files": "1", + "size_in_bytes": "34056" + }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv", + "name": "wa_fn_usec_hr_employee_attrition_csv.csv", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", @@ -2489,7 +2448,45 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847670000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -2526,18 +2523,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "status", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847650000 + "removed": false } }, "systemMetadata": { @@ -2548,12 +2539,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:de5780654849d6a18b66df2f9cb8e8d9" + "removed": false } }, "systemMetadata": { @@ -2564,7 +2555,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2596,7 +2603,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2612,7 +2619,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2694,7 +2701,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "PATCH", "aspectName": "datasetProperties", "aspect": { @@ -2703,7 +2710,7 @@ "op": "add", "path": "/lastModified", "value": { - "time": 1586847670000 + "time": 1586847640000 } } ] @@ -2716,7 +2723,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "PATCH", "aspectName": "datasetProperties", "aspect": { @@ -2725,7 +2732,7 @@ "op": "add", "path": "/lastModified", "value": { - "time": 1586847660000 + "time": 1586847650000 } } ] @@ -2738,7 +2745,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "PATCH", "aspectName": "datasetProperties", "aspect": { @@ -2747,7 +2754,7 @@ "op": "add", "path": "/lastModified", "value": { - "time": 1586847640000 + "time": 1586847660000 } } ] @@ -2760,7 +2767,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "PATCH", "aspectName": "datasetProperties", "aspect": { @@ -2769,7 +2776,7 @@ "op": "add", "path": "/lastModified", "value": { - "time": 1586847650000 + "time": 1586847670000 } } ] diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json index 7f657cb69180a..506e599521990 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json @@ -11,6 +11,7 @@ "number_of_files": "1", "size_in_bytes": "619" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro", "name": "chord_progressions_avro.avro", "description": "", "tags": [] @@ -60,38 +61,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -117,19 +118,20 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "globalTags", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa", - "env": "PROD" + "tags": [ + { + "tag": "urn:li:tag:baz:bob" + }, + { + "tag": "urn:li:tag:foo:bar" + } + ] } }, "systemMetadata": { @@ -139,13 +141,19 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "operation", "aspect": { "json": { - "container": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847620000 } }, "systemMetadata": { @@ -155,20 +163,19 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "containerProperties", "aspect": { "json": { - "tags": [ - { - "tag": "urn:li:tag:baz:bob" - }, - { - "tag": "urn:li:tag:foo:bar" - } - ] + "customProperties": { + "platform": "s3", + "env": "PROD", + "bucket_name": "my-test-bucket" + }, + "name": "my-test-bucket", + "env": "PROD" } }, "systemMetadata": { @@ -178,19 +185,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "status", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847620000 + "removed": false } }, "systemMetadata": { @@ -201,12 +202,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -217,13 +218,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Folder" + "S3 bucket" ] } }, @@ -237,16 +238,10 @@ "entityType": "container", "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "bucket_name": "my-test-bucket" - }, - "name": "my-test-bucket", - "env": "PROD" + "path": [] } }, "systemMetadata": { @@ -259,16 +254,10 @@ "entityType": "container", "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "folder_abs_path": "my-test-bucket/folder_a" - }, - "name": "folder_a", - "env": "PROD" + "container": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" } }, "systemMetadata": { @@ -281,10 +270,16 @@ "entityType": "container", "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "platform": "s3", + "env": "PROD", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a", + "env": "PROD" } }, "systemMetadata": { @@ -297,10 +292,10 @@ "entityType": "container", "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { @@ -311,12 +306,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -366,12 +361,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "container": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" } }, "systemMetadata": { @@ -384,10 +379,16 @@ "entityType": "container", "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "containerProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "customProperties": { + "platform": "s3", + "env": "PROD", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa", + "env": "PROD" } }, "systemMetadata": { @@ -398,12 +399,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" + "removed": false } }, "systemMetadata": { @@ -414,12 +415,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -430,13 +431,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "S3 bucket" + "Folder" ] } }, @@ -475,16 +476,10 @@ "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa", - "env": "PROD" + "container": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" } }, "systemMetadata": { @@ -497,10 +492,16 @@ "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" + "customProperties": { + "platform": "s3", + "env": "PROD", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa", + "env": "PROD" } }, "systemMetadata": { @@ -529,12 +530,10 @@ "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -547,10 +546,12 @@ "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -608,10 +609,27 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "urn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" + }, + { + "id": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", + "urn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" + }, + { + "id": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "urn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" + }, + { + "id": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", + "urn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0" + } + ] } }, "systemMetadata": { @@ -632,6 +650,7 @@ "number_of_files": "1", "size_in_bytes": "604" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv", "name": "chord_progressions_csv.csv", "description": "", "tags": [] @@ -739,27 +758,17 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "globalTags", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", - "urn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" - }, - { - "id": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", - "urn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" - }, + "tags": [ { - "id": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", - "urn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" + "tag": "urn:li:tag:baz:bob" }, { - "id": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", - "urn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0" + "tag": "urn:li:tag:foo:bar" } ] } @@ -770,37 +779,20 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:baz:bob", - "changeType": "UPSERT", - "aspectName": "tagKey", - "aspect": { - "json": { - "name": "baz:bob" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_spec_for_files.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "operation", "aspect": { "json": { - "tags": [ - { - "tag": "urn:li:tag:baz:bob" - }, - { - "tag": "urn:li:tag:foo:bar" - } - ] + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847630000 } }, "systemMetadata": { @@ -813,16 +805,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "container", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847630000 + "container": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0" } }, "systemMetadata": { @@ -866,12 +852,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0" + "removed": false } }, "systemMetadata": { @@ -896,6 +882,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:baz:bob", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "baz:bob" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:foo:bar", diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json index 6e2e966f1f7b4..eab9f0b5d546b 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json @@ -11,6 +11,7 @@ "number_of_files": "1", "size_in_bytes": "619" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro", "name": "chord_progressions_avro.avro", "description": "", "tags": [] @@ -60,38 +61,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -117,19 +118,20 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "globalTags", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa", - "env": "PROD" + "tags": [ + { + "tag": "urn:li:tag:baz:bob" + }, + { + "tag": "urn:li:tag:foo:bar" + } + ] } }, "systemMetadata": { @@ -139,13 +141,19 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "operation", "aspect": { "json": { - "container": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847620000 } }, "systemMetadata": { @@ -155,20 +163,19 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "containerProperties", "aspect": { "json": { - "tags": [ - { - "tag": "urn:li:tag:baz:bob" - }, - { - "tag": "urn:li:tag:foo:bar" - } - ] + "customProperties": { + "platform": "s3", + "env": "PROD", + "bucket_name": "my-test-bucket" + }, + "name": "my-test-bucket", + "env": "PROD" } }, "systemMetadata": { @@ -178,19 +185,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "status", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847620000 + "removed": false } }, "systemMetadata": { @@ -201,12 +202,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -217,13 +218,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Folder" + "S3 bucket" ] } }, @@ -237,16 +238,10 @@ "entityType": "container", "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "bucket_name": "my-test-bucket" - }, - "name": "my-test-bucket", - "env": "PROD" + "path": [] } }, "systemMetadata": { @@ -259,16 +254,10 @@ "entityType": "container", "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "folder_abs_path": "my-test-bucket/folder_a" - }, - "name": "folder_a", - "env": "PROD" + "container": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" } }, "systemMetadata": { @@ -281,10 +270,16 @@ "entityType": "container", "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "platform": "s3", + "env": "PROD", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a", + "env": "PROD" } }, "systemMetadata": { @@ -297,10 +292,10 @@ "entityType": "container", "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { @@ -311,12 +306,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -366,12 +361,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "container": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" } }, "systemMetadata": { @@ -384,10 +379,16 @@ "entityType": "container", "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "containerProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "customProperties": { + "platform": "s3", + "env": "PROD", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa", + "env": "PROD" } }, "systemMetadata": { @@ -398,12 +399,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" + "removed": false } }, "systemMetadata": { @@ -414,12 +415,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -430,13 +431,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "S3 bucket" + "Folder" ] } }, @@ -475,16 +476,10 @@ "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa", - "env": "PROD" + "container": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" } }, "systemMetadata": { @@ -497,10 +492,16 @@ "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" + "customProperties": { + "platform": "s3", + "env": "PROD", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa", + "env": "PROD" } }, "systemMetadata": { @@ -529,12 +530,10 @@ "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -547,10 +546,12 @@ "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -608,10 +609,27 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "urn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" + }, + { + "id": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", + "urn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" + }, + { + "id": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "urn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" + }, + { + "id": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", + "urn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0" + } + ] } }, "systemMetadata": { @@ -632,6 +650,7 @@ "number_of_files": "1", "size_in_bytes": "604" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket-2?prefix=folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv", "name": "chord_progressions_csv.csv", "description": "", "tags": [] @@ -739,27 +758,17 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket-2/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "globalTags", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", - "urn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" - }, - { - "id": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", - "urn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" - }, + "tags": [ { - "id": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", - "urn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" + "tag": "urn:li:tag:baz:bob" }, { - "id": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", - "urn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0" + "tag": "urn:li:tag:foo:bar" } ] } @@ -770,6 +779,28 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket-2/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847630000 + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", @@ -793,20 +824,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket-2/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "status", "aspect": { "json": { - "tags": [ - { - "tag": "urn:li:tag:baz:bob" - }, - { - "tag": "urn:li:tag:foo:bar" - } - ] + "removed": false } }, "systemMetadata": { @@ -816,19 +840,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket-2/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847630000 + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -841,10 +859,12 @@ "entityType": "container", "entityUrn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "S3 bucket" + ] } }, "systemMetadata": { @@ -857,12 +877,10 @@ "entityType": "container", "entityUrn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "S3 bucket" - ] + "path": [] } }, "systemMetadata": { @@ -873,12 +891,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", + "entityUrn": "urn:li:container:f6d8484efac8152d10620c6c0699d02d", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "container": "urn:li:container:680e54d5e3a7705caa1d99893fab4924" } }, "systemMetadata": { @@ -943,12 +961,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", + "entityUrn": "urn:li:container:f6d8484efac8152d10620c6c0699d02d", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [] + "typeNames": [ + "Folder" + ] } }, "systemMetadata": { @@ -961,11 +981,14 @@ "entityType": "container", "entityUrn": "urn:li:container:f6d8484efac8152d10620c6c0699d02d", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "Folder" + "path": [ + { + "id": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", + "urn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924" + } ] } }, @@ -977,17 +1000,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:f6d8484efac8152d10620c6c0699d02d", + "entityUrn": "urn:li:container:6e8c28494477b4a90cf5fd395217bae0", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:680e54d5e3a7705caa1d99893fab4924", - "urn": "urn:li:container:680e54d5e3a7705caa1d99893fab4924" - } - ] + "container": "urn:li:container:f6d8484efac8152d10620c6c0699d02d" } }, "systemMetadata": { @@ -1050,22 +1068,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:baz:bob", - "changeType": "UPSERT", - "aspectName": "tagKey", - "aspect": { - "json": { - "name": "baz:bob" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:6e8c28494477b4a90cf5fd395217bae0", @@ -1113,32 +1115,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ce2eca2107ef4c0b47a8f4a65eff971c", "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "folder_abs_path": "my-test-bucket-2/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa", - "env": "PROD" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6d8484efac8152d10620c6c0699d02d", - "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:680e54d5e3a7705caa1d99893fab4924" + "container": "urn:li:container:6e8c28494477b4a90cf5fd395217bae0" } }, "systemMetadata": { @@ -1149,12 +1129,18 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:6e8c28494477b4a90cf5fd395217bae0", + "entityUrn": "urn:li:container:ce2eca2107ef4c0b47a8f4a65eff971c", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:f6d8484efac8152d10620c6c0699d02d" + "customProperties": { + "platform": "s3", + "env": "PROD", + "folder_abs_path": "my-test-bucket-2/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa", + "env": "PROD" } }, "systemMetadata": { @@ -1167,10 +1153,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ce2eca2107ef4c0b47a8f4a65eff971c", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:6e8c28494477b4a90cf5fd395217bae0" + "removed": false } }, "systemMetadata": { @@ -1183,10 +1169,10 @@ "entityType": "container", "entityUrn": "urn:li:container:ce2eca2107ef4c0b47a8f4a65eff971c", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -1213,22 +1199,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:ce2eca2107ef4c0b47a8f4a65eff971c", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_specs_of_different_buckets.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:ce2eca2107ef4c0b47a8f4a65eff971c", @@ -1323,6 +1293,38 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:baz:bob", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "baz:bob" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:foo:bar", diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json index be3d2efed088e..e9c826bc1b6b8 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json @@ -11,6 +11,7 @@ "number_of_files": "1", "size_in_bytes": "619" }, + "externalUrl": "https://us-east-1.console.aws.amazon.com/s3/buckets/my-test-bucket?prefix=folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro", "name": "chord_progressions_avro.avro", "description": "", "tags": [] @@ -60,38 +61,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -117,19 +118,20 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "globalTags", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa" - }, - "name": "folder_aa", - "env": "PROD" + "tags": [ + { + "tag": "urn:li:tag:baz:bob" + }, + { + "tag": "urn:li:tag:foo:bar" + } + ] } }, "systemMetadata": { @@ -139,13 +141,19 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "operation", "aspect": { "json": { - "container": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" + "timestampMillis": 1615443388097, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "operationType": "UPDATE", + "lastUpdatedTimestamp": 1586847620000 } }, "systemMetadata": { @@ -155,20 +163,19 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "globalTags", + "aspectName": "containerProperties", "aspect": { "json": { - "tags": [ - { - "tag": "urn:li:tag:baz:bob" - }, - { - "tag": "urn:li:tag:foo:bar" - } - ] + "customProperties": { + "platform": "s3", + "env": "PROD", + "bucket_name": "my-test-bucket" + }, + "name": "my-test-bucket", + "env": "PROD" } }, "systemMetadata": { @@ -178,19 +185,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "status", "aspect": { "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586847620000 + "removed": false } }, "systemMetadata": { @@ -201,12 +202,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -217,13 +218,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", + "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Folder" + "S3 bucket" ] } }, @@ -237,16 +238,10 @@ "entityType": "container", "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "bucket_name": "my-test-bucket" - }, - "name": "my-test-bucket", - "env": "PROD" + "path": [] } }, "systemMetadata": { @@ -259,16 +254,10 @@ "entityType": "container", "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "folder_abs_path": "my-test-bucket/folder_a" - }, - "name": "folder_a", - "env": "PROD" + "container": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" } }, "systemMetadata": { @@ -281,10 +270,16 @@ "entityType": "container", "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "platform": "s3", + "env": "PROD", + "folder_abs_path": "my-test-bucket/folder_a" + }, + "name": "folder_a", + "env": "PROD" } }, "systemMetadata": { @@ -297,10 +292,10 @@ "entityType": "container", "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "removed": false } }, "systemMetadata": { @@ -311,12 +306,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -366,12 +361,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "container", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "container": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c" } }, "systemMetadata": { @@ -384,10 +379,16 @@ "entityType": "container", "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "containerProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "customProperties": { + "platform": "s3", + "env": "PROD", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa" + }, + "name": "folder_aa", + "env": "PROD" } }, "systemMetadata": { @@ -398,12 +399,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a8aa32e8169b2ecc7ab4f3389c79124c", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "status", "aspect": { "json": { - "container": "urn:li:container:2151647ff17bde0f948909d19fa91b9b" + "removed": false } }, "systemMetadata": { @@ -414,12 +415,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "path": [] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -430,13 +431,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:2151647ff17bde0f948909d19fa91b9b", + "entityUrn": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "S3 bucket" + "Folder" ] } }, @@ -475,16 +476,10 @@ "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "container", "aspect": { "json": { - "customProperties": { - "platform": "s3", - "env": "PROD", - "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa", - "env": "PROD" + "container": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" } }, "systemMetadata": { @@ -497,10 +492,16 @@ "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "containerProperties", "aspect": { "json": { - "container": "urn:li:container:4f62b9a3e6794ee2cd4160bc0bbd8e15" + "customProperties": { + "platform": "s3", + "env": "PROD", + "folder_abs_path": "my-test-bucket/folder_a/folder_aa/folder_aaa" + }, + "name": "folder_aaa", + "env": "PROD" } }, "systemMetadata": { @@ -529,12 +530,10 @@ "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Folder" - ] + "platform": "urn:li:dataPlatform:s3" } }, "systemMetadata": { @@ -547,10 +546,12 @@ "entityType": "container", "entityUrn": "urn:li:container:5abb7acbb8783b9e2d266c15bf7cebc0", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "subTypes", "aspect": { "json": { - "platform": "urn:li:dataPlatform:s3" + "typeNames": [ + "Folder" + ] } }, "systemMetadata": {