Skip to content

Commit

Permalink
Merge branch 'master' into urn-validation-3
Browse files Browse the repository at this point in the history
  • Loading branch information
david-leifker authored Dec 3, 2024
2 parents 81ee794 + aca1cd7 commit bead6ca
Show file tree
Hide file tree
Showing 75 changed files with 1,004 additions and 75 deletions.
1 change: 1 addition & 0 deletions datahub-graphql-core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ plugins {
id "io.github.kobylynskyi.graphql.codegen" version "4.1.1"
}

apply from: '../gradle/coverage/java-coverage.gradle'

dependencies {
implementation project(':metadata-service:restli-client-api')
Expand Down
1 change: 1 addition & 0 deletions datahub-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ plugins {
}

apply from: "../gradle/versioning/versioning.gradle"
apply from: "../gradle/coverage/java-coverage.gradle"

ext {
docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry
Expand Down
7 changes: 7 additions & 0 deletions docs/managed-datahub/release-notes/v_0_3_7.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ If you are using an older CLI/SDK version, then please upgrade it. This applies
## Release Changelog
---

### v0.3.7.5

- [GMS] Fix upstream lineage patching when path contained encoded slash
- [UI] Fix merging siblings schema with v1 and v2 fields
- [UI] Fix display nullable in schema field drawer
- [Ingestion] Reduce Data Product write volume from unset side-effect

### v0.3.7.4

- [#11935](https://github.com/datahub-project/datahub/pull/11935) - Added environment variable for enabling stricter URN validation rules `STRICT_URN_VALIDATION_ENABLED` [[1](https://datahubproject.io/docs/what/urn/#restrictions)].
Expand Down
3 changes: 3 additions & 0 deletions entity-registry/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ plugins {
id 'java-test-fixtures'
}

apply from: "../gradle/coverage/java-coverage.gradle"

dependencies {
implementation spec.product.pegasus.data
implementation spec.product.pegasus.generator
Expand Down Expand Up @@ -53,3 +55,4 @@ dependencies {
testFixturesAnnotationProcessor externalDependency.lombok
}
compileTestJava.dependsOn tasks.getByPath(':entity-registry:custom-test-model:modelDeploy')

Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,8 @@ public void testLargePatchStandard() throws Exception {
UpstreamLineage result = upstreamLineageTemplate.applyPatch(upstreamLineage, jsonPatch);
long end = System.currentTimeMillis();
assertTrue(
end - start < 10000,
String.format("Expected less then 10 seconds patch actual %s ms", end - start));
end - start < 20000,
String.format("Expected less then 20 seconds patch actual %s ms", end - start));

assertEquals(result.getUpstreams().size(), 187, "Expected 1 less upstream");
assertEquals(result.getFineGrainedLineages().size(), 607);
Expand Down
31 changes: 31 additions & 0 deletions gradle/coverage/java-coverage.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apply plugin: "jacoco"

jacoco {
toolVersion = "0.8.12"
}

/*
These need to run after evaluation since jacoco plugin alters the test task based on the included test
lib dependencies defined in each subproject (junit or testNG)
*/
afterEvaluate {
test {
finalizedBy jacocoTestReport
}

jacocoTestReport {
dependsOn test
reports {
xml {
required = true
/*
Tools that aggregate and analyse coverage tools search for the coverage result files. Keeping them under one
folder will minimize the time spent searching through the full source tree.
*/
outputLocation = rootProject.layout.buildDirectory.file("coverage-reports/jacoco-${project.name}.xml")
}
csv.required = false
html.required = false
}
}
}
18 changes: 18 additions & 0 deletions gradle/coverage/python-coverage.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//coverage related args to be passed to pytest
ext.get_coverage_args = { test_name = "" ->

def coverage_file_name = "pycov-${project.name}${test_name}.xml"

/*
Tools that aggregate and analyse coverage tools search for the coverage result files. Keeping them under one folder
will minimize the time spent searching through the full source tree.
*/
def base_path = "${rootProject.buildDir}/coverage-reports"

/*
--cov=src was added via setup.cfg in many of the python projects but for some reason, was not getting picked up
consistently, so adding it explicitly. Centralizing these params would make it easier to adjust them for all python
projects (with overrides being in the sub-project build script.)
*/
return "--cov-report xml:${base_path}/${coverage_file_name} --cov=src"
}
2 changes: 2 additions & 0 deletions ingestion-scheduler/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ plugins {
id 'java'
}

apply from: "../gradle/coverage/java-coverage.gradle"

dependencies {
implementation project(path: ':metadata-models')
implementation project(path: ':metadata-io')
Expand Down
3 changes: 3 additions & 0 deletions li-utils/build.gradle
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@

plugins {
id 'java-library'
id 'pegasus'
}

apply from: "../gradle/coverage/java-coverage.gradle"


dependencies {
api spec.product.pegasus.data
Expand Down
1 change: 1 addition & 0 deletions metadata-auth/auth-api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ apply plugin: 'signing'
apply plugin: 'maven-publish'
apply plugin: 'io.codearte.nexus-staging'
apply from: '../../metadata-integration/java/versioning.gradle'
apply from: '../../gradle/coverage/java-coverage.gradle'

jar {
archiveClassifier = "lib"
Expand Down
2 changes: 2 additions & 0 deletions metadata-dao-impl/kafka-producer/build.gradle
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
apply plugin: 'java'

apply from: '../../gradle/coverage/java-coverage.gradle'

dependencies {
implementation project(':metadata-events:mxe-avro')
implementation project(':metadata-events:mxe-registration')
Expand Down
2 changes: 2 additions & 0 deletions metadata-events/mxe-utils-avro/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ plugins {
id 'pegasus'
}

apply from: "../../gradle/coverage/java-coverage.gradle"

dependencies {
api project(':metadata-events:mxe-avro')
api project(':metadata-models')
Expand Down
4 changes: 3 additions & 1 deletion metadata-ingestion-modules/airflow-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ plugins {
id 'base'
}

apply from: "../../gradle/coverage/python-coverage.gradle"

ext {
python_executable = 'python3'
venv_name = 'venv'
Expand Down Expand Up @@ -97,7 +99,7 @@ task testQuick(type: Exec, dependsOn: installTest) {
inputs.files(project.fileTree(dir: "tests/"))
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest --cov-config=setup.cfg --cov-report xml:coverage_quick.xml -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
"pytest --cov-config=setup.cfg ${get_coverage_args('quick')} -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
}


Expand Down
4 changes: 3 additions & 1 deletion metadata-ingestion-modules/dagster-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ plugins {
id 'base'
}

apply from: "../../gradle/coverage/python-coverage.gradle"

ext {
python_executable = 'python3'
venv_name = 'venv'
Expand Down Expand Up @@ -84,7 +86,7 @@ task testQuick(type: Exec, dependsOn: installDevTest) {
outputs.dir("${venv_name}")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
"pytest -vv ${get_coverage_args('quick')} --continue-on-collection-errors --junit-xml=junit.quick.xml"
}

task buildWheel(type: Exec, dependsOn: [environmentSetup]) {
Expand Down
4 changes: 3 additions & 1 deletion metadata-ingestion-modules/gx-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ plugins {
id 'base'
}

apply from: "../../gradle/coverage/python-coverage.gradle"

ext {
python_executable = 'python3'
venv_name = 'venv'
Expand Down Expand Up @@ -84,7 +86,7 @@ task testQuick(type: Exec, dependsOn: installDevTest) {
outputs.dir("${venv_name}")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
"pytest -vv ${get_coverage_args('quick')} --continue-on-collection-errors --junit-xml=junit.quick.xml"
}

task buildWheel(type: Exec, dependsOn: [environmentSetup]) {
Expand Down
6 changes: 4 additions & 2 deletions metadata-ingestion-modules/prefect-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ plugins {
id 'base'
}

apply from: "../../gradle/coverage/python-coverage.gradle"

ext {
python_executable = 'python3'
venv_name = 'venv'
Expand Down Expand Up @@ -82,14 +84,14 @@ task testQuick(type: Exec, dependsOn: installDevTest) {
outputs.dir("${venv_name}")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest --cov-config=setup.cfg --cov-report xml:coverage_quick.xml -vv --continue-on-collection-errors --junit-xml=junit.quick.xml -s"
"pytest --cov-config=setup.cfg ${get_coverage_args('quick')} -vv --continue-on-collection-errors --junit-xml=junit.quick.xml -s"
}


task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml"
"pytest -m 'not slow_integration' -vv ${get_coverage_args('full')} --continue-on-collection-errors --junit-xml=junit.full.xml"
}


Expand Down
14 changes: 6 additions & 8 deletions metadata-ingestion/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ plugins {
id 'base'
}

apply from: "../gradle/coverage/python-coverage.gradle"

ext {
python_executable = 'python3'
venv_name = 'venv'
Expand All @@ -11,10 +13,6 @@ if (!project.hasProperty("extra_pip_requirements")) {
ext.extra_pip_requirements = ""
}

def get_coverage_arg(test_name) {
return "--cov-report xml:coverage_${test_name}.xml "
}

task checkPythonVersion(type: Exec) {
commandLine python_executable, '-c',
'import sys; sys.version_info >= (3, 8), f"Python version {sys.version_info[:2]} not allowed"'
Expand Down Expand Up @@ -134,7 +132,7 @@ task testQuick(type: Exec, dependsOn: [installDev, ':metadata-models:generateJso
inputs.files(project.fileTree(dir: "src/", include: "**/*.py"))
inputs.files(project.fileTree(dir: "tests/"))
outputs.dir("${venv_name}")
def cvg_arg = get_coverage_arg("quick")
def cvg_arg = get_coverage_args("quick")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest ${cvg_arg} tests/unit --random-order --durations=20 -m 'not integration' -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
Expand Down Expand Up @@ -166,19 +164,19 @@ task testSingle(dependsOn: [installDevTest]) {
}

task testIntegrationBatch0(type: Exec, dependsOn: [installDevTest]) {
def cvg_arg = get_coverage_arg("intBatch0")
def cvg_arg = get_coverage_args("intBatch0")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest ${cvg_arg} --durations=50 -m 'integration_batch_0' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch0.xml"
}
task testIntegrationBatch1(type: Exec, dependsOn: [installDevTest]) {
def cvg_arg = get_coverage_arg("intBatch1")
def cvg_arg = get_coverage_args("intBatch1")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest ${cvg_arg} --durations=50 -m 'integration_batch_1' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch1.xml"
}
task testIntegrationBatch2(type: Exec, dependsOn: [installDevTest]) {
def cvg_arg = get_coverage_arg("intBatch2")
def cvg_arg = get_coverage_args("intBatch2")
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"pytest ${cvg_arg} --durations=20 -m 'integration_batch_2' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch2.xml"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import logging
import textwrap
from dataclasses import dataclass
from typing import TYPE_CHECKING, Iterable, List

Expand Down Expand Up @@ -28,6 +30,8 @@
FeatureGroupSummaryTypeDef,
)

logger = logging.getLogger(__name__)


@dataclass
class FeatureGroupProcessor:
Expand Down Expand Up @@ -197,11 +201,12 @@ def get_feature_wu(

full_table_name = f"{glue_database}.{glue_table}"

self.report.report_warning(
full_table_name,
f"""Note: table {full_table_name} is an AWS Glue object.
logging.info(
textwrap.dedent(
f"""Note: table {full_table_name} is an AWS Glue object. This source does not ingest all metadata for Glue tables.
To view full table metadata, run Glue ingestion
(see https://datahubproject.io/docs/metadata-ingestion/#aws-glue-glue)""",
(see https://datahubproject.io/docs/generated/ingestion/sources/glue)"""
)
)

feature_sources.append(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from collections import defaultdict
from dataclasses import dataclass, field
from datetime import datetime
Expand Down Expand Up @@ -65,6 +66,8 @@
"Unknown": DeploymentStatusClass.UNKNOWN,
}

logger = logging.getLogger(__name__)


@dataclass
class ModelProcessor:
Expand Down Expand Up @@ -385,6 +388,26 @@ def strip_quotes(string: str) -> str:
model_metrics,
)

@staticmethod
def get_group_name_from_arn(arn: str) -> str:
"""
Extract model package group name from a SageMaker ARN.
Args:
arn (str): Full ARN of the model package group
Returns:
str: Name of the model package group
Example:
>>> ModelProcessor.get_group_name_from_arn("arn:aws:sagemaker:eu-west-1:123456789:model-package-group/my-model-group")
'my-model-group'
"""
logger.debug(
f"Extracting group name from ARN: {arn} because group was not seen before"
)
return arn.split("/")[-1]

def get_model_wu(
self,
model_details: "DescribeModelOutputTypeDef",
Expand Down Expand Up @@ -425,8 +448,14 @@ def get_model_wu(
model_group_arns = model_uri_groups | model_image_groups

model_group_names = sorted(
[self.group_arn_to_name[x] for x in model_group_arns]
[
self.group_arn_to_name[x]
if x in self.group_arn_to_name
else self.get_group_name_from_arn(x)
for x in model_group_arns
]
)

model_group_urns = [
builder.make_ml_model_group_urn("sagemaker", x, self.env)
for x in model_group_names
Expand Down
Loading

0 comments on commit bead6ca

Please sign in to comment.