Skip to content

Commit

Permalink
feat(sdk): add support for institutional memory links (#12770)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Mar 4, 2025
1 parent 3b4f383 commit 6097820
Show file tree
Hide file tree
Showing 9 changed files with 327 additions and 14 deletions.
89 changes: 87 additions & 2 deletions metadata-ingestion/src/datahub/sdk/_shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
Callable,
List,
Optional,
Sequence,
Tuple,
Union,
)
Expand Down Expand Up @@ -49,6 +50,8 @@

ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]

_DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()


def make_time_stamp(ts: Optional[datetime]) -> Optional[models.TimeStampClass]:
if ts is None:
Expand Down Expand Up @@ -438,8 +441,7 @@ def _parse_glossary_term_association_class(
def _terms_audit_stamp(self) -> models.AuditStampClass:
return models.AuditStampClass(
time=0,
# TODO figure out what to put here
actor=CorpUserUrn("__ingestion").urn(),
actor=_DEFAULT_ACTOR_URN,
)

def set_terms(self, terms: TermsInputType) -> None:
Expand Down Expand Up @@ -493,3 +495,86 @@ def domain(self) -> Optional[DomainUrn]:
def set_domain(self, domain: DomainInputType) -> None:
domain_urn = DomainUrn.from_string(domain) # basically a type assertion
self._set_aspect(models.DomainsClass(domains=[str(domain_urn)]))


LinkInputType: TypeAlias = Union[
str,
Tuple[str, str], # url, description
models.InstitutionalMemoryMetadataClass,
]
LinksInputType: TypeAlias = Sequence[LinkInputType]


class HasInstitutionalMemory(Entity):
__slots__ = ()

# Internally the aspect is called institutionalMemory, and so much of the code
# uses that name. However, the public-facing API is called "links", since
# that's what we call these in the UI.

def _ensure_institutional_memory(
self,
) -> List[models.InstitutionalMemoryMetadataClass]:
return self._setdefault_aspect(
models.InstitutionalMemoryClass(elements=[])
).elements

@property
def links(self) -> Optional[List[models.InstitutionalMemoryMetadataClass]]:
if institutional_memory := self._get_aspect(models.InstitutionalMemoryClass):
return institutional_memory.elements
return None

@classmethod
def _institutional_memory_audit_stamp(self) -> models.AuditStampClass:
return models.AuditStampClass(
time=0,
actor=_DEFAULT_ACTOR_URN,
)

@classmethod
def _parse_link_association_class(
cls, link: LinkInputType
) -> models.InstitutionalMemoryMetadataClass:
if isinstance(link, models.InstitutionalMemoryMetadataClass):
return link
elif isinstance(link, str):
return models.InstitutionalMemoryMetadataClass(
url=link,
description=link,
createStamp=cls._institutional_memory_audit_stamp(),
)
elif isinstance(link, tuple) and len(link) == 2:
url, description = link
return models.InstitutionalMemoryMetadataClass(
url=url,
description=description,
createStamp=cls._institutional_memory_audit_stamp(),
)
else:
assert_never(link)

def set_links(self, links: LinksInputType) -> None:
self._set_aspect(
models.InstitutionalMemoryClass(
elements=[self._parse_link_association_class(link) for link in links]
)
)

@classmethod
def _link_key(self, link: models.InstitutionalMemoryMetadataClass) -> str:
return link.url

def add_link(self, link: LinkInputType) -> None:
add_list_unique(
self._ensure_institutional_memory(),
self._link_key,
self._parse_link_association_class(link),
)

def remove_link(self, link: LinkInputType) -> None:
remove_list_unique(
self._ensure_institutional_memory(),
self._link_key,
self._parse_link_association_class(link),
)
6 changes: 6 additions & 0 deletions metadata-ingestion/src/datahub/sdk/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
DomainInputType,
HasContainer,
HasDomain,
HasInstitutionalMemory,
HasOwnership,
HasPlatformInstance,
HasSubtype,
HasTags,
HasTerms,
LinksInputType,
OwnersInputType,
ParentContainerInputType,
TagsInputType,
Expand All @@ -41,6 +43,7 @@ class Container(
HasSubtype,
HasContainer,
HasOwnership,
HasInstitutionalMemory,
HasTags,
HasTerms,
HasDomain,
Expand Down Expand Up @@ -71,6 +74,7 @@ def __init__(
parent_container: Auto | ParentContainerInputType | None = auto,
subtype: Optional[str] = None,
owners: Optional[OwnersInputType] = None,
links: Optional[LinksInputType] = None,
tags: Optional[TagsInputType] = None,
terms: Optional[TermsInputType] = None,
domain: Optional[DomainInputType] = None,
Expand Down Expand Up @@ -133,6 +137,8 @@ def __init__(
self.set_subtype(subtype)
if owners is not None:
self.set_owners(owners)
if links is not None:
self.set_links(links)
if tags is not None:
self.set_tags(tags)
if terms is not None:
Expand Down
6 changes: 6 additions & 0 deletions metadata-ingestion/src/datahub/sdk/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@
DomainInputType,
HasContainer,
HasDomain,
HasInstitutionalMemory,
HasOwnership,
HasPlatformInstance,
HasSubtype,
HasTags,
HasTerms,
LinksInputType,
OwnersInputType,
ParentContainerInputType,
TagInputType,
Expand Down Expand Up @@ -422,6 +424,7 @@ class Dataset(
HasSubtype,
HasContainer,
HasOwnership,
HasInstitutionalMemory,
HasTags,
HasTerms,
HasDomain,
Expand Down Expand Up @@ -453,6 +456,7 @@ def __init__(
parent_container: ParentContainerInputType | Unset = unset,
subtype: Optional[str] = None,
owners: Optional[OwnersInputType] = None,
links: Optional[LinksInputType] = None,
tags: Optional[TagsInputType] = None,
terms: Optional[TermsInputType] = None,
# TODO structured_properties
Expand Down Expand Up @@ -499,6 +503,8 @@ def __init__(
self.set_subtype(subtype)
if owners is not None:
self.set_owners(owners)
if links is not None:
self.set_links(links)
if tags is not None:
self.set_tags(tags)
if terms is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,26 @@
}
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056",
"changeType": "UPSERT",
"aspectName": "institutionalMemory",
"aspect": {
"json": {
"elements": [
{
"url": "https://example.com/doc1",
"description": "https://example.com/doc1",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
}
]
}
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,34 @@
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_instance.my_db.my_schema.my_table,PROD)",
"changeType": "UPSERT",
"aspectName": "institutionalMemory",
"aspect": {
"json": {
"elements": [
{
"url": "https://example.com/doc1",
"description": "https://example.com/doc1",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
},
{
"url": "https://example.com/doc2",
"description": "Documentation 2",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
}
]
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_instance.my_db.my_schema.my_table,PROD)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,34 @@
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_instance.my_db.my_schema.my_table,PROD)",
"changeType": "UPSERT",
"aspectName": "institutionalMemory",
"aspect": {
"json": {
"elements": [
{
"url": "https://example.com/doc1",
"description": "https://example.com/doc1",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
},
{
"url": "https://example.com/doc2",
"description": "Documentation 2",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
}
]
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_instance.my_db.my_schema.my_table,PROD)",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
[
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,proj.dataset.table,PROD)",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:bigquery"
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,proj.dataset.table,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
"json": {
"schemaName": "",
"platform": "urn:li:dataPlatform:bigquery",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.schema.Schemaless": {}
},
"fields": [
{
"fieldPath": "field1",
"nullable": false,
"type": {
"type": {
"com.linkedin.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false
},
{
"fieldPath": "field2",
"nullable": false,
"description": "field2 description",
"type": {
"type": {
"com.linkedin.schema.NullType": {}
}
},
"nativeDataType": "int64",
"recursive": false,
"isPartOfKey": false
}
]
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,proj.dataset.table,PROD)",
"changeType": "UPSERT",
"aspectName": "institutionalMemory",
"aspect": {
"json": {
"elements": [
{
"url": "https://example.com/doc2",
"description": "Documentation 2",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
},
{
"url": "https://example.com/doc3",
"description": "Documentation 3",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
}
]
}
}
}
]
Loading

0 comments on commit 6097820

Please sign in to comment.