From cc3782ecfd9f9547afec8ea3555bd54b777c0b58 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Tue, 4 Mar 2025 18:03:03 -0800 Subject: [PATCH 1/5] feat(models): adds subtypes to most entities in the model (#12783) --- .../src/main/resources/entity-registry.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 493b17c7c3d861..ea9939ad6b4078 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -110,6 +110,7 @@ entities: - ownership - status - testResults + - subTypes - name: dataProcessInstance doc: DataProcessInstance represents an instance of a datajob/jobflow run keyAspect: dataProcessInstanceKey @@ -211,6 +212,7 @@ entities: - structuredProperties - forms - testResults + - subTypes - name: corpGroup doc: CorpGroup represents an identity of a group of users in the enterprise. keyAspect: corpGroupKey @@ -225,6 +227,7 @@ entities: - structuredProperties - forms - testResults + - subTypes - name: domain doc: A data domain within an organization. category: core @@ -284,6 +287,7 @@ entities: - structuredProperties - forms - testResults + - subTypes - name: glossaryNode category: core keyAspect: glossaryNodeKey @@ -295,6 +299,7 @@ entities: - structuredProperties - forms - testResults + - subTypes - name: dataHubIngestionSource category: internal keyAspect: dataHubIngestionSourceKey @@ -369,6 +374,7 @@ entities: - forms - testResults - versionProperties + - subTypes - name: mlModelGroup category: core keyAspect: mlModelGroupKey @@ -387,6 +393,7 @@ entities: - structuredProperties - forms - testResults + - subTypes - name: mlModelDeployment category: core keyAspect: mlModelDeploymentKey @@ -417,6 +424,7 @@ entities: - structuredProperties - forms - testResults + - subTypes - name: mlFeature category: core keyAspect: mlFeatureKey @@ -436,6 +444,7 @@ entities: - structuredProperties - forms - testResults + - subTypes - name: mlPrimaryKey category: core keyAspect: mlPrimaryKeyKey @@ -453,6 +462,7 @@ entities: - structuredProperties - forms - testResults + - subTypes - name: telemetry category: internal keyAspect: telemetryKey @@ -493,6 +503,7 @@ entities: - documentation - testResults - deprecation + - subTypes - name: globalSettings doc: Global settings for an the platform category: internal @@ -521,6 +532,7 @@ entities: keyAspect: postKey aspects: - postInfo + - subTypes - name: dataHubStepState category: internal keyAspect: dataHubStepStateKey @@ -566,6 +578,7 @@ entities: - structuredProperties - forms - testResults + - subTypes - name: ownershipType doc: Ownership Type represents a user-created ownership category for a person or group who is responsible for an asset. category: core From aed2433c4cad26d5b5a5ae01c3dd0f1c382dd1d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Wed, 5 Mar 2025 12:41:03 +0100 Subject: [PATCH 2/5] fix: fixes mypy complaints about pkgresources (#12790) --- metadata-ingestion/pyproject.toml | 2 +- metadata-ingestion/setup.py | 1 - metadata-ingestion/tests/unit/test_packaging.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/pyproject.toml b/metadata-ingestion/pyproject.toml index f30060c5ccdbdf..3d44767e176379 100644 --- a/metadata-ingestion/pyproject.toml +++ b/metadata-ingestion/pyproject.toml @@ -1,6 +1,6 @@ [build-system] build-backend = "setuptools.build_meta" -requires = ["setuptools>=63.0.0", "wheel"] +requires = ["setuptools >= 71.1", "wheel"] [tool.ruff.lint.isort] section-order = ["future", "patch", "standard-library", "third-party", "first-party", "local-folder"] diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index d55960aa1750c2..fbeba1e510b645 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -555,7 +555,6 @@ mypy_stubs = { "types-dataclasses", - "types-setuptools", "types-six", "types-python-dateutil", # We need to avoid 2.31.0.5 and 2.31.0.4 due to diff --git a/metadata-ingestion/tests/unit/test_packaging.py b/metadata-ingestion/tests/unit/test_packaging.py index f9a3ae9562d3eb..56e877a03deaee 100644 --- a/metadata-ingestion/tests/unit/test_packaging.py +++ b/metadata-ingestion/tests/unit/test_packaging.py @@ -8,6 +8,6 @@ ) def test_datahub_version(): # Simply importing pkg_resources checks for unsatisfied dependencies. - import pkg_resources + import pkg_resources # type: ignore[import-untyped] assert pkg_resources.get_distribution(datahub_version.__package_name__).version From a0319af7db02d2eb498ad3fafc1ffa13bb125e55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Wed, 5 Mar 2025 14:25:20 +0100 Subject: [PATCH 3/5] fix(ingestion): fixes producing some URNs with reserved characters (#12772) --- metadata-ingestion/scripts/avro_codegen.py | 30 +++++++ .../src/datahub/emitter/mce_builder.py | 41 ++++++--- .../src/datahub/ingestion/source/metabase.py | 6 +- .../src/datahub/ingestion/source/mode.py | 2 +- .../tests/unit/sdk/test_mce_builder.py | 90 +++++++++++++++++-- 5 files changed, 146 insertions(+), 23 deletions(-) diff --git a/metadata-ingestion/scripts/avro_codegen.py b/metadata-ingestion/scripts/avro_codegen.py index c895e5fabfd37b..c7e4318346f1d1 100644 --- a/metadata-ingestion/scripts/avro_codegen.py +++ b/metadata-ingestion/scripts/avro_codegen.py @@ -526,6 +526,36 @@ def get_notebook_id(self) -> str: """ ], "tag": [_create_from_id.format(class_name="TagUrn")], + "chart": [ + """ +@classmethod +def create_from_ids( + cls, + platform: str, + name: str, + platform_instance: Optional[str] = None, +) -> "ChartUrn": + return ChartUrn( + dashboard_tool=platform, + chart_id=f"{platform_instance}.{name}" if platform_instance else name, + ) + """ + ], + "dashboard": [ + """ +@classmethod +def create_from_ids( + cls, + platform: str, + name: str, + platform_instance: Optional[str] = None, +) -> "DashboardUrn": + return DashboardUrn( + dashboard_tool=platform, + dashboard_id=f"{platform_instance}.{name}" if platform_instance else name, + ) + """ + ], } diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index 9fa060266a7ab8..7d6f0bdcd84759 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -52,7 +52,15 @@ UpstreamLineageClass, _Aspect as AspectAbstract, ) -from datahub.metadata.urns import DataFlowUrn, DatasetUrn, TagUrn +from datahub.metadata.urns import ( + ChartUrn, + DashboardUrn, + DataFlowUrn, + DataJobUrn, + DataPlatformUrn, + DatasetUrn, + TagUrn, +) from datahub.utilities.urn_encoder import UrnEncoder logger = logging.getLogger(__name__) @@ -119,7 +127,7 @@ def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]: def make_data_platform_urn(platform: str) -> str: if platform.startswith("urn:li:dataPlatform:"): return platform - return f"urn:li:dataPlatform:{platform}" + return DataPlatformUrn.create_from_id(platform).urn() def make_dataset_urn(platform: str, name: str, env: str = DEFAULT_ENV) -> str: @@ -236,7 +244,7 @@ def make_user_urn(username: str) -> str: Makes a user urn if the input is not a user or group urn already """ return ( - f"urn:li:corpuser:{username}" + f"urn:li:corpuser:{UrnEncoder.encode_string(username)}" if not username.startswith(("urn:li:corpuser:", "urn:li:corpGroup:")) else username ) @@ -249,7 +257,7 @@ def make_group_urn(groupname: str) -> str: if groupname and groupname.startswith(("urn:li:corpGroup:", "urn:li:corpuser:")): return groupname else: - return f"urn:li:corpGroup:{groupname}" + return f"urn:li:corpGroup:{UrnEncoder.encode_string(groupname)}" def make_tag_urn(tag: str) -> str: @@ -301,7 +309,12 @@ def make_data_flow_urn( def make_data_job_urn_with_flow(flow_urn: str, job_id: str) -> str: - return f"urn:li:dataJob:({flow_urn},{job_id})" + data_flow_urn = DataFlowUrn.from_string(flow_urn) + data_job_urn = DataJobUrn.create_from_ids( + data_flow_urn=data_flow_urn.urn(), + job_id=job_id, + ) + return data_job_urn.urn() def make_data_process_instance_urn(dataProcessInstanceId: str) -> str: @@ -324,10 +337,11 @@ def make_dashboard_urn( platform: str, name: str, platform_instance: Optional[str] = None ) -> str: # FIXME: dashboards don't currently include data platform urn prefixes. - if platform_instance: - return f"urn:li:dashboard:({platform},{platform_instance}.{name})" - else: - return f"urn:li:dashboard:({platform},{name})" + return DashboardUrn.create_from_ids( + platform=platform, + name=name, + platform_instance=platform_instance, + ).urn() def dashboard_urn_to_key(dashboard_urn: str) -> Optional[DashboardKeyClass]: @@ -342,10 +356,11 @@ def make_chart_urn( platform: str, name: str, platform_instance: Optional[str] = None ) -> str: # FIXME: charts don't currently include data platform urn prefixes. - if platform_instance: - return f"urn:li:chart:({platform},{platform_instance}.{name})" - else: - return f"urn:li:chart:({platform},{name})" + return ChartUrn.create_from_ids( + platform=platform, + name=name, + platform_instance=platform_instance, + ).urn() def chart_urn_to_key(chart_urn: str) -> Optional[ChartKeyClass]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py index 01396c8bba8209..2b0ea4c1f51f62 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -313,7 +313,7 @@ def construct_dashboard_from_api_data( return None dashboard_urn = builder.make_dashboard_urn( - self.platform, dashboard_details.get("id", "") + self.platform, str(dashboard_details.get("id", "")) ) dashboard_snapshot = DashboardSnapshot( urn=dashboard_urn, @@ -337,7 +337,7 @@ def construct_dashboard_from_api_data( card_id = card_info.get("card").get("id", "") if not card_id: continue # most likely a virtual card without an id (text or heading), not relevant. - chart_urn = builder.make_chart_urn(self.platform, card_id) + chart_urn = builder.make_chart_urn(self.platform, str(card_id)) chart_urns.append(chart_urn) dashboard_info_class = DashboardInfoClass( @@ -459,7 +459,7 @@ def construct_card_from_api_data(self, card_data: dict) -> Optional[ChartSnapsho ) return None - chart_urn = builder.make_chart_urn(self.platform, card_id) + chart_urn = builder.make_chart_urn(self.platform, str(card_id)) chart_snapshot = ChartSnapshot( urn=chart_urn, aspects=[], diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 2e7a77eae4e2a6..dd62144a8fa9d4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -377,7 +377,7 @@ def _browse_path_chart( ] def _dashboard_urn(self, report_info: dict) -> str: - return builder.make_dashboard_urn(self.platform, report_info.get("id", "")) + return builder.make_dashboard_urn(self.platform, str(report_info.get("id", ""))) def _parse_last_run_at(self, report_info: dict) -> Optional[int]: # Mode queries are refreshed, and that timestamp is reflected correctly here. diff --git a/metadata-ingestion/tests/unit/sdk/test_mce_builder.py b/metadata-ingestion/tests/unit/sdk/test_mce_builder.py index 3bdbf07bf28b7d..9e1e78d17efac2 100644 --- a/metadata-ingestion/tests/unit/sdk/test_mce_builder.py +++ b/metadata-ingestion/tests/unit/sdk/test_mce_builder.py @@ -8,6 +8,9 @@ MetadataChangeEventClass, OwnershipClass, ) +from datahub.utilities.urn_encoder import RESERVED_CHARS_EXTENDED + +_RESERVED_CHARS_STRING = "".join(sorted(list(RESERVED_CHARS_EXTENDED))) def test_can_add_aspect(): @@ -25,15 +28,90 @@ def test_can_add_aspect(): assert not builder.can_add_aspect(dataset_mce, DataFlowInfoClass) -def test_create_dataset_urn_with_reserved_chars() -> None: +def test_create_urns_with_reserved_chars() -> None: + assert ( + builder.make_dataset_urn( + platform=f"platform){_RESERVED_CHARS_STRING}", + name=f"table{_RESERVED_CHARS_STRING}", + env=builder.DEFAULT_ENV, + ) + == "urn:li:dataset:(urn:li:dataPlatform:platform%29%%28%29%2C%E2%90%9F,table%%28%29%2C%E2%90%9F,PROD)" + ) assert ( builder.make_dataset_urn_with_platform_instance( - "platform)", - "table_(name)", - "platform,instance", - builder.DEFAULT_ENV, + platform=f"platform){_RESERVED_CHARS_STRING}", + name=f"table{_RESERVED_CHARS_STRING}", + platform_instance=f"platform-instance{_RESERVED_CHARS_STRING}", + env=builder.DEFAULT_ENV, + ) + == "urn:li:dataset:(urn:li:dataPlatform:platform%29%%28%29%2C%E2%90%9F,platform-instance%%28%29%2C%E2%90%9F.table%%28%29%2C%E2%90%9F,PROD)" + ) + assert ( + builder.make_data_platform_urn( + f"platform{_RESERVED_CHARS_STRING}", + ) + == "urn:li:dataPlatform:platform%%28%29%2C%E2%90%9F" + ) + assert ( + builder.make_data_flow_urn( + orchestrator=f"orchestrator{_RESERVED_CHARS_STRING}", + flow_id=f"flowid{_RESERVED_CHARS_STRING}", + cluster=f"cluster{_RESERVED_CHARS_STRING}", + platform_instance=f"platform{_RESERVED_CHARS_STRING}", + ) + == "urn:li:dataFlow:(orchestrator%%28%29%2C%E2%90%9F,platform%%28%29%2C%E2%90%9F.flowid%%28%29%2C%E2%90%9F,cluster%%28%29%2C%E2%90%9F)" + ) + assert ( + builder.make_data_job_urn( + orchestrator=f"orchestrator{_RESERVED_CHARS_STRING}", + flow_id=f"flowid{_RESERVED_CHARS_STRING}", + cluster=f"cluster{_RESERVED_CHARS_STRING}", + platform_instance=f"platform{_RESERVED_CHARS_STRING}", + job_id=f"job_name{_RESERVED_CHARS_STRING}", + ) + == "urn:li:dataJob:(urn:li:dataFlow:(orchestrator%%28%29%2C%E2%90%9F,platform%%28%29%2C%E2%90%9F.flowid%%28%29%2C%E2%90%9F,cluster%%28%29%2C%E2%90%9F),job_name%%28%29%2C%E2%90%9F)" + ) + assert ( + builder.make_user_urn( + username=f"user{_RESERVED_CHARS_STRING}", + ) + == "urn:li:corpuser:user%%28%29%2C%E2%90%9F" + ) + assert ( + builder.make_group_urn( + groupname=f"group{_RESERVED_CHARS_STRING}", + ) + == "urn:li:corpGroup:group%%28%29%2C%E2%90%9F" + ) + assert ( + builder.make_dashboard_urn( + platform=f"platform{_RESERVED_CHARS_STRING}", + name=f"dashboard{_RESERVED_CHARS_STRING}", + platform_instance=f"platform-instance{_RESERVED_CHARS_STRING}", + ) + == "urn:li:dashboard:(platform%%28%29%2C%E2%90%9F,platform-instance%%28%29%2C%E2%90%9F.dashboard%%28%29%2C%E2%90%9F)" + ) + assert ( + builder.make_dashboard_urn( + platform=f"platform{_RESERVED_CHARS_STRING}", + name=f"dashboard{_RESERVED_CHARS_STRING}", + ) + == "urn:li:dashboard:(platform%%28%29%2C%E2%90%9F,dashboard%%28%29%2C%E2%90%9F)" + ) + assert ( + builder.make_chart_urn( + platform=f"platform{_RESERVED_CHARS_STRING}", + name=f"dashboard{_RESERVED_CHARS_STRING}", + platform_instance=f"platform-instance{_RESERVED_CHARS_STRING}", + ) + == "urn:li:chart:(platform%%28%29%2C%E2%90%9F,platform-instance%%28%29%2C%E2%90%9F.dashboard%%28%29%2C%E2%90%9F)" + ) + assert ( + builder.make_chart_urn( + platform=f"platform{_RESERVED_CHARS_STRING}", + name=f"dashboard{_RESERVED_CHARS_STRING}", ) - == "urn:li:dataset:(urn:li:dataPlatform:platform%29,platform%2Cinstance.table_%28name%29,PROD)" + == "urn:li:chart:(platform%%28%29%2C%E2%90%9F,dashboard%%28%29%2C%E2%90%9F)" ) From 85d3a9d31d43f08b3bb4f420efbbc290e6dc778f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Wed, 5 Mar 2025 15:53:01 +0100 Subject: [PATCH 4/5] feat(okta): custom properties for okta user (#12773) --- .../datahub/ingestion/source/identity/okta.py | 22 ++ ...ta_mces_golden_custom_user_name_regex.json | 66 ++++-- .../okta/okta_mces_golden_default_config.json | 134 ++++++----- ...include_deprovisioned_suspended_users.json | 220 +++++++++++------- .../okta_mces_golden_ingest_groups_users.json | 16 +- 5 files changed, 301 insertions(+), 157 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py index 42a79b8694883c..d0c84d90d6e523 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py +++ b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py @@ -666,6 +666,27 @@ def _map_okta_user_profile_to_username( self.config.okta_profile_to_username_regex, ) + def _map_okta_user_profile_custom_properties( + self, profile: UserProfile + ) -> Dict[str, str]: + # filter out the common fields that are already mapped to the CorpUserInfo aspect and the private ones + return { + k: str(v) + for k, v in profile.__dict__.items() + if v + and k + not in [ + "displayName", + "firstName", + "lastName", + "email", + "title", + "countryCode", + "department", + ] + and not k.startswith("_") + } + # Converts Okta User Profile into a CorpUserInfo. def _map_okta_user_profile(self, profile: UserProfile) -> CorpUserInfoClass: # TODO: Extract user's manager if provided. @@ -683,6 +704,7 @@ def _map_okta_user_profile(self, profile: UserProfile) -> CorpUserInfoClass: title=profile.title, countryCode=profile.countryCode, departmentName=profile.department, + customProperties=self._map_okta_user_profile_custom_properties(profile), ) def _make_corp_group_urn(self, name: str) -> str: diff --git a/metadata-ingestion/tests/integration/okta/okta_mces_golden_custom_user_name_regex.json b/metadata-ingestion/tests/integration/okta/okta_mces_golden_custom_user_name_regex.json index 784d79ff5b31c9..a731b2957a4391 100644 --- a/metadata-ingestion/tests/integration/okta/okta_mces_golden_custom_user_name_regex.json +++ b/metadata-ingestion/tests/integration/okta/okta_mces_golden_custom_user_name_regex.json @@ -18,7 +18,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -34,7 +35,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -49,7 +51,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -71,7 +74,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -87,7 +91,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -102,7 +107,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -112,12 +118,17 @@ "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "customProperties": { + "login": "john.doe@test.com", + "mobilePhone": "555-415-1337" + }, "active": true, "displayName": "JDoe", "email": "john.doe@test.com", "firstName": "John", "lastName": "Doe", - "fullName": "John Doe" + "fullName": "John Doe", + "system": false } }, { @@ -133,7 +144,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -149,7 +161,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -164,7 +177,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -174,6 +188,10 @@ "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "customProperties": { + "login": "mary.jane@test.com", + "mobilePhone": "666-415-1337" + }, "active": true, "displayName": "Mary Jane", "email": "mary.jane@test.com", @@ -182,7 +200,8 @@ "firstName": "Mary", "lastName": "Jane", "fullName": "Mary Jane", - "countryCode": "us" + "countryCode": "us", + "system": false } }, { @@ -200,7 +219,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -216,7 +236,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -231,7 +252,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -241,6 +263,10 @@ "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "customProperties": { + "login": "good.test@test.com", + "mobilePhone": "666-415-1337" + }, "active": true, "displayName": "Good Test", "email": "good.test@test.com", @@ -249,7 +275,8 @@ "firstName": "Good", "lastName": "Test", "fullName": "Good Test", - "countryCode": "eu" + "countryCode": "eu", + "system": false } }, { @@ -262,7 +289,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -278,7 +306,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -293,7 +322,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/okta/okta_mces_golden_default_config.json b/metadata-ingestion/tests/integration/okta/okta_mces_golden_default_config.json index d871550689c5be..ba161d8a2e7932 100644 --- a/metadata-ingestion/tests/integration/okta/okta_mces_golden_default_config.json +++ b/metadata-ingestion/tests/integration/okta/okta_mces_golden_default_config.json @@ -18,7 +18,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -34,7 +35,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -49,7 +51,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -71,7 +74,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -87,7 +91,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -102,28 +107,39 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { - "urn": "urn:li:corpuser:john.doe", + "urn": "urn:li:corpuser:mary.jane", "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "customProperties": { + "login": "mary.jane@test.com", + "mobilePhone": "666-415-1337" + }, "active": true, - "displayName": "JDoe", - "email": "john.doe@test.com", - "firstName": "John", - "lastName": "Doe", - "fullName": "John Doe" + "displayName": "Mary Jane", + "email": "mary.jane@test.com", + "title": "Software Engineer", + "departmentName": "Engineering", + "firstName": "Mary", + "lastName": "Jane", + "fullName": "Mary Jane", + "countryCode": "us", + "system": false } }, { "com.linkedin.pegasus2avro.identity.GroupMembership": { "groups": [ "urn:li:corpGroup:All%20Employees", + "urn:li:corpGroup:All%20Employees", + "urn:li:corpGroup:Engineering", "urn:li:corpGroup:Engineering" ] } @@ -133,12 +149,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:john.doe", + "entityUrn": "urn:li:corpuser:mary.jane", "changeType": "UPSERT", "aspectName": "origin", "aspect": { @@ -149,12 +166,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:john.doe", + "entityUrn": "urn:li:corpuser:mary.jane", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -164,35 +182,36 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { - "urn": "urn:li:corpuser:mary.jane", + "urn": "urn:li:corpuser:good.test", "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "customProperties": { + "login": "good.test@test.com", + "mobilePhone": "666-415-1337" + }, "active": true, - "displayName": "Mary Jane", - "email": "mary.jane@test.com", - "title": "Software Engineer", - "departmentName": "Engineering", - "firstName": "Mary", - "lastName": "Jane", - "fullName": "Mary Jane", - "countryCode": "us" + "displayName": "Good Test", + "email": "good.test@test.com", + "title": "Manager", + "departmentName": "Marketing", + "firstName": "Good", + "lastName": "Test", + "fullName": "Good Test", + "countryCode": "eu", + "system": false } }, { "com.linkedin.pegasus2avro.identity.GroupMembership": { - "groups": [ - "urn:li:corpGroup:All%20Employees", - "urn:li:corpGroup:All%20Employees", - "urn:li:corpGroup:Engineering", - "urn:li:corpGroup:Engineering" - ] + "groups": [] } } ] @@ -200,12 +219,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:mary.jane", + "entityUrn": "urn:li:corpuser:good.test", "changeType": "UPSERT", "aspectName": "origin", "aspect": { @@ -216,12 +236,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:mary.jane", + "entityUrn": "urn:li:corpuser:good.test", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -231,30 +252,36 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { - "urn": "urn:li:corpuser:good.test", + "urn": "urn:li:corpuser:john.doe", "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "customProperties": { + "login": "john.doe@test.com", + "mobilePhone": "555-415-1337" + }, "active": true, - "displayName": "Good Test", - "email": "good.test@test.com", - "title": "Manager", - "departmentName": "Marketing", - "firstName": "Good", - "lastName": "Test", - "fullName": "Good Test", - "countryCode": "eu" + "displayName": "JDoe", + "email": "john.doe@test.com", + "firstName": "John", + "lastName": "Doe", + "fullName": "John Doe", + "system": false } }, { "com.linkedin.pegasus2avro.identity.GroupMembership": { - "groups": [] + "groups": [ + "urn:li:corpGroup:All%20Employees", + "urn:li:corpGroup:Engineering" + ] } } ] @@ -262,12 +289,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:good.test", + "entityUrn": "urn:li:corpuser:john.doe", "changeType": "UPSERT", "aspectName": "origin", "aspect": { @@ -278,12 +306,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:good.test", + "entityUrn": "urn:li:corpuser:john.doe", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -293,7 +322,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/okta/okta_mces_golden_include_deprovisioned_suspended_users.json b/metadata-ingestion/tests/integration/okta/okta_mces_golden_include_deprovisioned_suspended_users.json index 0107d8c2a152f3..dc77a2d9665670 100644 --- a/metadata-ingestion/tests/integration/okta/okta_mces_golden_include_deprovisioned_suspended_users.json +++ b/metadata-ingestion/tests/integration/okta/okta_mces_golden_include_deprovisioned_suspended_users.json @@ -18,7 +18,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -34,7 +35,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -49,7 +51,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -71,7 +74,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -87,7 +91,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -102,28 +107,39 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { - "urn": "urn:li:corpuser:john.doe", + "urn": "urn:li:corpuser:mary.jane", "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "customProperties": { + "login": "mary.jane@test.com", + "mobilePhone": "666-415-1337" + }, "active": true, - "displayName": "JDoe", - "email": "john.doe@test.com", - "firstName": "John", - "lastName": "Doe", - "fullName": "John Doe" + "displayName": "Mary Jane", + "email": "mary.jane@test.com", + "title": "Software Engineer", + "departmentName": "Engineering", + "firstName": "Mary", + "lastName": "Jane", + "fullName": "Mary Jane", + "countryCode": "us", + "system": false } }, { "com.linkedin.pegasus2avro.identity.GroupMembership": { "groups": [ "urn:li:corpGroup:All%20Employees", + "urn:li:corpGroup:All%20Employees", + "urn:li:corpGroup:Engineering", "urn:li:corpGroup:Engineering" ] } @@ -133,12 +149,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:john.doe", + "entityUrn": "urn:li:corpuser:mary.jane", "changeType": "UPSERT", "aspectName": "origin", "aspect": { @@ -149,12 +166,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:john.doe", + "entityUrn": "urn:li:corpuser:mary.jane", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -164,35 +182,36 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { - "urn": "urn:li:corpuser:mary.jane", + "urn": "urn:li:corpuser:good.test", "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "customProperties": { + "login": "good.test@test.com", + "mobilePhone": "666-415-1337" + }, "active": true, - "displayName": "Mary Jane", - "email": "mary.jane@test.com", - "title": "Software Engineer", - "departmentName": "Engineering", - "firstName": "Mary", - "lastName": "Jane", - "fullName": "Mary Jane", - "countryCode": "us" + "displayName": "Good Test", + "email": "good.test@test.com", + "title": "Manager", + "departmentName": "Marketing", + "firstName": "Good", + "lastName": "Test", + "fullName": "Good Test", + "countryCode": "eu", + "system": false } }, { "com.linkedin.pegasus2avro.identity.GroupMembership": { - "groups": [ - "urn:li:corpGroup:All%20Employees", - "urn:li:corpGroup:All%20Employees", - "urn:li:corpGroup:Engineering", - "urn:li:corpGroup:Engineering" - ] + "groups": [] } } ] @@ -200,12 +219,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:mary.jane", + "entityUrn": "urn:li:corpuser:good.test", "changeType": "UPSERT", "aspectName": "origin", "aspect": { @@ -216,12 +236,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:mary.jane", + "entityUrn": "urn:li:corpuser:good.test", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -231,30 +252,41 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { - "urn": "urn:li:corpuser:good.test", + "urn": "urn:li:corpuser:mary.jane", "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "customProperties": { + "login": "mary.jane@test.com", + "mobilePhone": "666-415-1337" + }, "active": true, - "displayName": "Good Test", - "email": "good.test@test.com", - "title": "Manager", - "departmentName": "Marketing", - "firstName": "Good", - "lastName": "Test", - "fullName": "Good Test", - "countryCode": "eu" + "displayName": "Mary Jane", + "email": "mary.jane@test.com", + "title": "Software Engineer II", + "departmentName": "Engineering", + "firstName": "Mary", + "lastName": "Jane", + "fullName": "Mary Jane", + "countryCode": "as", + "system": false } }, { "com.linkedin.pegasus2avro.identity.GroupMembership": { - "groups": [] + "groups": [ + "urn:li:corpGroup:All%20Employees", + "urn:li:corpGroup:All%20Employees", + "urn:li:corpGroup:Engineering", + "urn:li:corpGroup:Engineering" + ] } } ] @@ -262,12 +294,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:good.test", + "entityUrn": "urn:li:corpuser:mary.jane", "changeType": "UPSERT", "aspectName": "origin", "aspect": { @@ -278,12 +311,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:good.test", + "entityUrn": "urn:li:corpuser:mary.jane", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -293,33 +327,34 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { - "urn": "urn:li:corpuser:mary.jane", + "urn": "urn:li:corpuser:bad.boyjones", "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "customProperties": { + "login": "bad.boyjones@test.com", + "mobilePhone": "666-415-1337" + }, "active": true, - "displayName": "Mary Jane", - "email": "mary.jane@test.com", - "title": "Software Engineer II", - "departmentName": "Engineering", - "firstName": "Mary", - "lastName": "Jane", - "fullName": "Mary Jane", - "countryCode": "as" + "displayName": "Bad Boy Jones", + "email": "bad.boyjones@test.com", + "firstName": "Bad", + "lastName": "Boy Jones", + "fullName": "Bad Boy Jones", + "system": false } }, { "com.linkedin.pegasus2avro.identity.GroupMembership": { "groups": [ "urn:li:corpGroup:All%20Employees", - "urn:li:corpGroup:All%20Employees", - "urn:li:corpGroup:Engineering", "urn:li:corpGroup:Engineering" ] } @@ -329,12 +364,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:mary.jane", + "entityUrn": "urn:li:corpuser:bad.boyjones", "changeType": "UPSERT", "aspectName": "origin", "aspect": { @@ -345,12 +381,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:mary.jane", + "entityUrn": "urn:li:corpuser:bad.boyjones", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -360,22 +397,28 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": { - "urn": "urn:li:corpuser:bad.boyjones", + "urn": "urn:li:corpuser:john.doe", "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "customProperties": { + "login": "john.doe@test.com", + "mobilePhone": "555-415-1337" + }, "active": true, - "displayName": "Bad Boy Jones", - "email": "bad.boyjones@test.com", - "firstName": "Bad", - "lastName": "Boy Jones", - "fullName": "Bad Boy Jones" + "displayName": "JDoe", + "email": "john.doe@test.com", + "firstName": "John", + "lastName": "Doe", + "fullName": "John Doe", + "system": false } }, { @@ -391,12 +434,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:bad.boyjones", + "entityUrn": "urn:li:corpuser:john.doe", "changeType": "UPSERT", "aspectName": "origin", "aspect": { @@ -407,12 +451,13 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { "entityType": "corpuser", - "entityUrn": "urn:li:corpuser:bad.boyjones", + "entityUrn": "urn:li:corpuser:john.doe", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -422,7 +467,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -432,6 +478,10 @@ "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { + "customProperties": { + "login": "bad.girlriri@test.com", + "mobilePhone": "666-415-1337" + }, "active": true, "displayName": "Bad Girl Riri", "email": "bad.girlriri@test.com", @@ -440,7 +490,8 @@ "firstName": "Bad", "lastName": "Girl Riri", "fullName": "Bad Girl Riri", - "countryCode": "eu" + "countryCode": "eu", + "system": false } }, { @@ -458,7 +509,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -474,7 +526,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -489,7 +542,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "test-okta-usage" + "runId": "test-okta-usage", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/okta/okta_mces_golden_ingest_groups_users.json b/metadata-ingestion/tests/integration/okta/okta_mces_golden_ingest_groups_users.json index fad38cb3398fc3..917e4798e12a2a 100644 --- a/metadata-ingestion/tests/integration/okta/okta_mces_golden_ingest_groups_users.json +++ b/metadata-ingestion/tests/integration/okta/okta_mces_golden_ingest_groups_users.json @@ -118,7 +118,10 @@ "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { - "customProperties": {}, + "customProperties": { + "login": "mary.jane@test.com", + "mobilePhone": "666-415-1337" + }, "active": true, "displayName": "Mary Jane", "email": "mary.jane@test.com", @@ -127,7 +130,8 @@ "firstName": "Mary", "lastName": "Jane", "fullName": "Mary Jane", - "countryCode": "us" + "countryCode": "us", + "system": false } }, { @@ -189,13 +193,17 @@ "aspects": [ { "com.linkedin.pegasus2avro.identity.CorpUserInfo": { - "customProperties": {}, + "customProperties": { + "login": "john.doe@test.com", + "mobilePhone": "555-415-1337" + }, "active": true, "displayName": "JDoe", "email": "john.doe@test.com", "firstName": "John", "lastName": "Doe", - "fullName": "John Doe" + "fullName": "John Doe", + "system": false } }, { From 69981675a5ad1aa24707c776cca74d0c99e2afd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Wed, 5 Mar 2025 17:11:04 +0100 Subject: [PATCH 5/5] feat(mssql): adds subtypes aspect for dataflow and datajobs (#12775) --- .../ingestion/source/common/subtypes.py | 7 ++ .../ingestion/source/sql/mssql/job_models.py | 29 +++++ .../ingestion/source/sql/mssql/source.py | 14 ++- .../golden_mces_mssql_no_db_to_file.json | 108 ++++++++++++++++-- .../golden_mces_mssql_no_db_with_filter.json | 84 +++++++++++++- .../golden_mces_mssql_to_file.json | 106 +++++++++++++++-- ...golden_mces_mssql_with_lower_case_urn.json | 108 ++++++++++++++++-- 7 files changed, 420 insertions(+), 36 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py index 8eb53259df8062..2d9bcfca91f2c2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py @@ -60,8 +60,15 @@ class BIContainerSubTypes(StrEnum): MODE_COLLECTION = "Collection" +class FlowContainerSubTypes(StrEnum): + MSSQL_JOB = "Job" + MSSQL_PROCEDURE_CONTAINER = "Procedures Container" + + class JobContainerSubTypes(StrEnum): NIFI_PROCESS_GROUP = "Process Group" + MSSQL_JOBSTEP = "Job Step" + MSSQL_STORED_PROCEDURE = "Stored Procedure" class BIAssetSubTypes(StrEnum): diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py index 3d2a8af3a54999..f30f898325f463 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py @@ -11,12 +11,17 @@ DatabaseKey, SchemaKey, ) +from datahub.ingestion.source.common.subtypes import ( + FlowContainerSubTypes, + JobContainerSubTypes, +) from datahub.metadata.schema_classes import ( ContainerClass, DataFlowInfoClass, DataJobInfoClass, DataJobInputOutputClass, DataPlatformInstanceClass, + SubTypesClass, ) @@ -211,6 +216,18 @@ def as_datajob_info_aspect(self) -> DataJobInfoClass: status=self.status, ) + @property + def as_subtypes_aspect(self) -> SubTypesClass: + assert isinstance(self.entity, (JobStep, StoredProcedure)) + type = ( + JobContainerSubTypes.MSSQL_JOBSTEP + if isinstance(self.entity, JobStep) + else JobContainerSubTypes.MSSQL_STORED_PROCEDURE + ) + return SubTypesClass( + typeNames=[type], + ) + @property def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]: if self.entity.flow.platform_instance: @@ -276,6 +293,18 @@ def as_dataflow_info_aspect(self) -> DataFlowInfoClass: externalUrl=self.external_url, ) + @property + def as_subtypes_aspect(self) -> SubTypesClass: + assert isinstance(self.entity, (MSSQLJob, MSSQLProceduresContainer)) + type = ( + FlowContainerSubTypes.MSSQL_JOB + if isinstance(self.entity, MSSQLJob) + else FlowContainerSubTypes.MSSQL_PROCEDURE_CONTAINER + ) + return SubTypesClass( + typeNames=[type], + ) + @property def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]: if self.entity.platform_instance: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index 766f94a43e73ff..ed53c34a1ac61d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -638,6 +638,11 @@ def construct_job_workunits( aspect=data_job.as_datajob_info_aspect, ).as_workunit() + yield MetadataChangeProposalWrapper( + entityUrn=data_job.urn, + aspect=data_job.as_subtypes_aspect, + ).as_workunit() + data_platform_instance_aspect = data_job.as_maybe_platform_instance_aspect if data_platform_instance_aspect: yield MetadataChangeProposalWrapper( @@ -676,8 +681,6 @@ def construct_job_workunits( ), ).as_workunit() - # TODO: Add SubType when it appear - def construct_flow_workunits( self, data_flow: MSSQLDataFlow, @@ -687,6 +690,11 @@ def construct_flow_workunits( aspect=data_flow.as_dataflow_info_aspect, ).as_workunit() + yield MetadataChangeProposalWrapper( + entityUrn=data_flow.urn, + aspect=data_flow.as_subtypes_aspect, + ).as_workunit() + data_platform_instance_aspect = data_flow.as_maybe_platform_instance_aspect if data_platform_instance_aspect: yield MetadataChangeProposalWrapper( @@ -700,8 +708,6 @@ def construct_flow_workunits( aspect=data_flow.as_container_aspect, ).as_workunit() - # TODO: Add SubType when it appear - def get_inspectors(self) -> Iterable[Inspector]: # This method can be overridden in the case that you want to dynamically # run on multiple databases. diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index 73f69273836346..beba77afe8df75 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -104,6 +104,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Job" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", @@ -112,11 +130,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "2fc72675-0c68-4260-ab00-c361b96c8c36", + "job_id": "ae341aad-8ab2-421e-b46b-147afd4b0705", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2025-01-31 08:02:41.167000", - "date_modified": "2025-01-31 08:02:41.360000", + "date_created": "2025-03-04 16:55:50.893000", + "date_modified": "2025-03-04 16:55:51.043000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -134,6 +152,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Job Step" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", @@ -2266,6 +2302,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Procedures Container" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", @@ -2279,8 +2333,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2025-01-31 08:02:40.980000", - "date_modified": "2025-01-31 08:02:40.980000" + "date_created": "2025-03-04 16:55:50.720000", + "date_modified": "2025-03-04 16:55:50.720000" }, "name": "DemoData.Foo.Proc.With.SpecialChar", "type": { @@ -2294,6 +2348,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Stored Procedure" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", @@ -2329,8 +2401,8 @@ "depending_on_procedure": "{}", "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", "input parameters": "[]", - "date_created": "2025-01-31 08:02:40.987000", - "date_modified": "2025-01-31 08:02:40.987000" + "date_created": "2025-03-04 16:55:50.727000", + "date_modified": "2025-03-04 16:55:50.727000" }, "name": "DemoData.Foo.NewProc", "type": { @@ -2344,6 +2416,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Stored Procedure" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", @@ -4969,7 +5059,7 @@ "actor": "urn:li:corpuser:_ingestion" }, "lastModified": { - "time": 1738310563767, + "time": 1741107354163, "actor": "urn:li:corpuser:_ingestion" } } @@ -5092,7 +5182,7 @@ "actor": "urn:li:corpuser:_ingestion" }, "lastModified": { - "time": 1738310563770, + "time": 1741107354165, "actor": "urn:li:corpuser:_ingestion" } } diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index 2789ccd3cd5a72..304312968b81b6 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -104,6 +104,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Job" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", @@ -112,11 +130,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "2fc72675-0c68-4260-ab00-c361b96c8c36", + "job_id": "ae341aad-8ab2-421e-b46b-147afd4b0705", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2025-01-31 08:02:41.167000", - "date_modified": "2025-01-31 08:02:41.360000", + "date_created": "2025-03-04 16:55:50.893000", + "date_modified": "2025-03-04 16:55:51.043000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -134,6 +152,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Job Step" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", @@ -2266,6 +2302,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Procedures Container" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", @@ -2279,8 +2333,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2025-01-31 08:02:40.980000", - "date_modified": "2025-01-31 08:02:40.980000" + "date_created": "2025-03-04 16:55:50.720000", + "date_modified": "2025-03-04 16:55:50.720000" }, "name": "DemoData.Foo.Proc.With.SpecialChar", "type": { @@ -2294,6 +2348,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Stored Procedure" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", @@ -2694,7 +2766,7 @@ "actor": "urn:li:corpuser:_ingestion" }, "lastModified": { - "time": 1738310565884, + "time": 1741107356597, "actor": "urn:li:corpuser:_ingestion" } } diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index a9f52e4c97f012..50b9d3618bf8bc 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -111,6 +111,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,my-instance.Weekly Demo Data Backup,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Job" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataFlow", "entityUrn": "urn:li:dataFlow:(mssql,my-instance.Weekly Demo Data Backup,PROD)", @@ -177,11 +195,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "5a260993-c4ce-4bb3-a273-eaf6ef6e0382", + "job_id": "ae341aad-8ab2-421e-b46b-147afd4b0705", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2025-01-28 15:27:31.437000", - "date_modified": "2025-01-28 15:27:31.593000", + "date_created": "2025-03-04 16:55:50.893000", + "date_modified": "2025-03-04 16:55:51.043000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -199,6 +217,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Job Step" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", @@ -2581,6 +2617,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Procedures Container" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataFlow", "entityUrn": "urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD)", @@ -2652,8 +2706,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2025-01-28 15:27:31.257000", - "date_modified": "2025-01-28 15:27:31.257000" + "date_created": "2025-03-04 16:55:50.720000", + "date_modified": "2025-03-04 16:55:50.720000" }, "name": "DemoData.Foo.Proc.With.SpecialChar", "type": { @@ -2667,6 +2721,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Stored Procedure" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", @@ -2764,8 +2836,8 @@ "depending_on_procedure": "{}", "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", "input parameters": "[]", - "date_created": "2025-01-28 15:27:31.263000", - "date_modified": "2025-01-28 15:27:31.263000" + "date_created": "2025-03-04 16:55:50.727000", + "date_modified": "2025-03-04 16:55:50.727000" }, "name": "DemoData.Foo.NewProc", "type": { @@ -2779,6 +2851,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Stored Procedure" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,my-instance.DemoData.Foo.stored_procedures,PROD),NewProc)", @@ -3259,7 +3349,7 @@ "actor": "urn:li:corpuser:_ingestion" }, "lastModified": { - "time": 1738078055642, + "time": 1741107355755, "actor": "urn:li:corpuser:_ingestion" } } diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index 189e673270b007..2f192b275d18d4 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -104,6 +104,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Job" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", @@ -112,11 +130,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "2fc72675-0c68-4260-ab00-c361b96c8c36", + "job_id": "ae341aad-8ab2-421e-b46b-147afd4b0705", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2025-01-31 08:02:41.167000", - "date_modified": "2025-01-31 08:02:41.360000", + "date_created": "2025-03-04 16:55:50.893000", + "date_modified": "2025-03-04 16:55:51.043000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -134,6 +152,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Job Step" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD),Weekly Demo Data Backup)", @@ -2266,6 +2302,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Procedures Container" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", @@ -2279,8 +2333,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2025-01-31 08:02:40.980000", - "date_modified": "2025-01-31 08:02:40.980000" + "date_created": "2025-03-04 16:55:50.720000", + "date_modified": "2025-03-04 16:55:50.720000" }, "name": "DemoData.Foo.Proc.With.SpecialChar", "type": { @@ -2294,6 +2348,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Stored Procedure" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", @@ -2329,8 +2401,8 @@ "depending_on_procedure": "{}", "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", "input parameters": "[]", - "date_created": "2025-01-31 08:02:40.987000", - "date_modified": "2025-01-31 08:02:40.987000" + "date_created": "2025-03-04 16:55:50.727000", + "date_modified": "2025-03-04 16:55:50.727000" }, "name": "DemoData.Foo.NewProc", "type": { @@ -2344,6 +2416,24 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Stored Procedure" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", @@ -5019,7 +5109,7 @@ "actor": "urn:li:corpuser:_ingestion" }, "lastModified": { - "time": 1738310566860, + "time": 1741107357875, "actor": "urn:li:corpuser:_ingestion" } } @@ -5166,7 +5256,7 @@ "actor": "urn:li:corpuser:_ingestion" }, "lastModified": { - "time": 1738310566866, + "time": 1741107357882, "actor": "urn:li:corpuser:_ingestion" } }