Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(sdk): add support for institutional memory links #12770

Merged
merged 3 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 87 additions & 2 deletions metadata-ingestion/src/datahub/sdk/_shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
Callable,
List,
Optional,
Sequence,
Tuple,
Union,
)
Expand Down Expand Up @@ -49,6 +50,8 @@

ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]

_DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not in the scope of this PR but, should this "ingestion actor" configurable?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe - we don't show it anywhere in the UI, so it's not really particularly relevant. it's here mainly because a couple fields require it for backwards compat



def make_time_stamp(ts: Optional[datetime]) -> Optional[models.TimeStampClass]:
if ts is None:
Expand Down Expand Up @@ -438,8 +441,7 @@ def _parse_glossary_term_association_class(
def _terms_audit_stamp(self) -> models.AuditStampClass:
return models.AuditStampClass(
time=0,
# TODO figure out what to put here
actor=CorpUserUrn("__ingestion").urn(),
actor=_DEFAULT_ACTOR_URN,
)

def set_terms(self, terms: TermsInputType) -> None:
Expand Down Expand Up @@ -493,3 +495,86 @@ def domain(self) -> Optional[DomainUrn]:
def set_domain(self, domain: DomainInputType) -> None:
domain_urn = DomainUrn.from_string(domain) # basically a type assertion
self._set_aspect(models.DomainsClass(domains=[str(domain_urn)]))


LinkInputType: TypeAlias = Union[
str,
Tuple[str, str], # url, description
models.InstitutionalMemoryMetadataClass,
]
LinksInputType: TypeAlias = Sequence[LinkInputType]


class HasInstitutionalMemory(Entity):
__slots__ = ()

# Internally the aspect is called institutionalMemory, and so much of the code
# uses that name. However, the public-facing API is called "links", since
# that's what we call these in the UI.
Comment on lines +511 to +513
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In GraphQL and OpenAPI we use InstitutionalMemory, should we align here with the UI or other APIs? 🤔

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because this sdk is for end users, we should align with the UI


def _ensure_institutional_memory(
self,
) -> List[models.InstitutionalMemoryMetadataClass]:
return self._setdefault_aspect(
models.InstitutionalMemoryClass(elements=[])
).elements

@property
def links(self) -> Optional[List[models.InstitutionalMemoryMetadataClass]]:
if institutional_memory := self._get_aspect(models.InstitutionalMemoryClass):
return institutional_memory.elements
return None

@classmethod
def _institutional_memory_audit_stamp(self) -> models.AuditStampClass:
return models.AuditStampClass(
time=0,
actor=_DEFAULT_ACTOR_URN,
)

@classmethod
def _parse_link_association_class(
cls, link: LinkInputType
) -> models.InstitutionalMemoryMetadataClass:
if isinstance(link, models.InstitutionalMemoryMetadataClass):
return link
elif isinstance(link, str):
return models.InstitutionalMemoryMetadataClass(
url=link,
description=link,
createStamp=cls._institutional_memory_audit_stamp(),
)
elif isinstance(link, tuple) and len(link) == 2:
url, description = link
return models.InstitutionalMemoryMetadataClass(
url=url,
description=description,
createStamp=cls._institutional_memory_audit_stamp(),
)
else:
assert_never(link)

def set_links(self, links: LinksInputType) -> None:
self._set_aspect(
models.InstitutionalMemoryClass(
elements=[self._parse_link_association_class(link) for link in links]
)
)

@classmethod
def _link_key(self, link: models.InstitutionalMemoryMetadataClass) -> str:
return link.url

def add_link(self, link: LinkInputType) -> None:
add_list_unique(
self._ensure_institutional_memory(),
self._link_key,
self._parse_link_association_class(link),
)

def remove_link(self, link: LinkInputType) -> None:
remove_list_unique(
self._ensure_institutional_memory(),
self._link_key,
self._parse_link_association_class(link),
)
6 changes: 6 additions & 0 deletions metadata-ingestion/src/datahub/sdk/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
DomainInputType,
HasContainer,
HasDomain,
HasInstitutionalMemory,
HasOwnership,
HasPlatformInstance,
HasSubtype,
HasTags,
HasTerms,
LinksInputType,
OwnersInputType,
ParentContainerInputType,
TagsInputType,
Expand All @@ -41,6 +43,7 @@ class Container(
HasSubtype,
HasContainer,
HasOwnership,
HasInstitutionalMemory,
HasTags,
HasTerms,
HasDomain,
Expand Down Expand Up @@ -71,6 +74,7 @@ def __init__(
parent_container: Auto | ParentContainerInputType | None = auto,
subtype: Optional[str] = None,
owners: Optional[OwnersInputType] = None,
links: Optional[LinksInputType] = None,
tags: Optional[TagsInputType] = None,
terms: Optional[TermsInputType] = None,
domain: Optional[DomainInputType] = None,
Expand Down Expand Up @@ -133,6 +137,8 @@ def __init__(
self.set_subtype(subtype)
if owners is not None:
self.set_owners(owners)
if links is not None:
self.set_links(links)
if tags is not None:
self.set_tags(tags)
if terms is not None:
Expand Down
6 changes: 6 additions & 0 deletions metadata-ingestion/src/datahub/sdk/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@
DomainInputType,
HasContainer,
HasDomain,
HasInstitutionalMemory,
HasOwnership,
HasPlatformInstance,
HasSubtype,
HasTags,
HasTerms,
LinksInputType,
OwnersInputType,
ParentContainerInputType,
TagInputType,
Expand Down Expand Up @@ -422,6 +424,7 @@ class Dataset(
HasSubtype,
HasContainer,
HasOwnership,
HasInstitutionalMemory,
HasTags,
HasTerms,
HasDomain,
Expand Down Expand Up @@ -453,6 +456,7 @@ def __init__(
parent_container: ParentContainerInputType | Unset = unset,
subtype: Optional[str] = None,
owners: Optional[OwnersInputType] = None,
links: Optional[LinksInputType] = None,
tags: Optional[TagsInputType] = None,
terms: Optional[TermsInputType] = None,
# TODO structured_properties
Expand Down Expand Up @@ -499,6 +503,8 @@ def __init__(
self.set_subtype(subtype)
if owners is not None:
self.set_owners(owners)
if links is not None:
self.set_links(links)
if tags is not None:
self.set_tags(tags)
if terms is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,26 @@
}
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056",
"changeType": "UPSERT",
"aspectName": "institutionalMemory",
"aspect": {
"json": {
"elements": [
{
"url": "https://example.com/doc1",
"description": "https://example.com/doc1",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
}
]
}
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,34 @@
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_instance.my_db.my_schema.my_table,PROD)",
"changeType": "UPSERT",
"aspectName": "institutionalMemory",
"aspect": {
"json": {
"elements": [
{
"url": "https://example.com/doc1",
"description": "https://example.com/doc1",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
},
{
"url": "https://example.com/doc2",
"description": "Documentation 2",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
}
]
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_instance.my_db.my_schema.my_table,PROD)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,34 @@
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_instance.my_db.my_schema.my_table,PROD)",
"changeType": "UPSERT",
"aspectName": "institutionalMemory",
"aspect": {
"json": {
"elements": [
{
"url": "https://example.com/doc1",
"description": "https://example.com/doc1",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
},
{
"url": "https://example.com/doc2",
"description": "Documentation 2",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
}
]
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_instance.my_db.my_schema.my_table,PROD)",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
[
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,proj.dataset.table,PROD)",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:bigquery"
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,proj.dataset.table,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
"json": {
"schemaName": "",
"platform": "urn:li:dataPlatform:bigquery",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.schema.Schemaless": {}
},
"fields": [
{
"fieldPath": "field1",
"nullable": false,
"type": {
"type": {
"com.linkedin.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false
},
{
"fieldPath": "field2",
"nullable": false,
"description": "field2 description",
"type": {
"type": {
"com.linkedin.schema.NullType": {}
}
},
"nativeDataType": "int64",
"recursive": false,
"isPartOfKey": false
}
]
}
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,proj.dataset.table,PROD)",
"changeType": "UPSERT",
"aspectName": "institutionalMemory",
"aspect": {
"json": {
"elements": [
{
"url": "https://example.com/doc2",
"description": "Documentation 2",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
},
{
"url": "https://example.com/doc3",
"description": "Documentation 3",
"createStamp": {
"time": 0,
"actor": "urn:li:corpuser:__ingestion"
}
}
]
}
}
}
]
Loading
Loading