From 920b16465066f7660f33229697ffe38f449142e9 Mon Sep 17 00:00:00 2001 From: case-k-git Date: Tue, 19 Dec 2023 18:49:47 +0900 Subject: [PATCH 01/16] add schema optionfor test improvement --- dbt/adapters/databricks/connections.py | 2 +- tests/integration/base.py | 1 - tests/profiles.py | 5 +++++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/databricks/connections.py b/dbt/adapters/databricks/connections.py index 09292028f..0f10905f8 100644 --- a/dbt/adapters/databricks/connections.py +++ b/dbt/adapters/databricks/connections.py @@ -1258,7 +1258,7 @@ def connect() -> DatabricksSQLConnectionWrapper: http_headers=http_headers if http_headers else None, session_configuration=creds.session_properties, catalog=creds.database, - # schema=creds.schema, # TODO: Explicitly set once DBR 7.3LTS is EOL. + schema=creds.schema, _user_agent_entry=user_agent_entry, **connection_parameters, ) diff --git a/tests/integration/base.py b/tests/integration/base.py index 8bb8c94fb..18867cb78 100644 --- a/tests/integration/base.py +++ b/tests/integration/base.py @@ -189,7 +189,6 @@ def get_profile(self, adapter_type): "outputs": { "dev": dict( **get_databricks_cluster_target(adapter_type), - schema=self.unique_schema(), ), }, "target": "dev", diff --git a/tests/profiles.py b/tests/profiles.py index 73c660ff4..0d68c1f30 100644 --- a/tests/profiles.py +++ b/tests/profiles.py @@ -16,6 +16,7 @@ def get_databricks_cluster_target(profile_type: str): def _build_databricks_cluster_target( http_path: str, catalog: Optional[str] = None, + schema: Optional[str] = None, session_properties: Optional[Dict[str, str]] = None, ): profile: Dict[str, Any] = { @@ -32,6 +33,8 @@ def _build_databricks_cluster_target( } if catalog is not None: profile["catalog"] = catalog + if schema is not None: + profile["schema"] = schema if session_properties is not None: profile["session_properties"] = session_properties return profile @@ -51,6 +54,7 @@ def databricks_uc_cluster_target(): "DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH", os.getenv("DBT_DATABRICKS_HTTP_PATH") ), catalog=os.getenv("DBT_DATABRICKS_UC_INITIAL_CATALOG", "main"), + schema=os.getenv("DBT_DATABRICKS_UC_INITIAL_SCHEMA", "schema"), ) @@ -61,4 +65,5 @@ def databricks_uc_sql_endpoint_target(): os.getenv("DBT_DATABRICKS_HTTP_PATH"), ), catalog=os.getenv("DBT_DATABRICKS_UC_INITIAL_CATALOG", "main"), + schema=os.getenv("DBT_DATABRICKS_UC_INITIAL_SCHEMA", "schema"), ) From 0cf734c8c27a8a8743dfa6d476565cd550cd8409 Mon Sep 17 00:00:00 2001 From: case-k-git Date: Tue, 19 Dec 2023 18:55:08 +0900 Subject: [PATCH 02/16] add DBT_DATABRICKS_UC_INITIAL_SCHEMA option --- test.env.example | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test.env.example b/test.env.example index 9a8c9b924..0b6a24669 100644 --- a/test.env.example +++ b/test.env.example @@ -4,6 +4,9 @@ DBT_DATABRICKS_HOST_NAME= # Server http-path value DBT_DATABRICKS_HTTP_PATH= +# Set the default schema for the `databricks` profile +DBT_DATABRICKS_UC_INITIAL_SCHEMA= + ## Also able to set the http-path values for each profile ## Server http-path value for `databricks_cluster` profile # DBT_DATABRICKS_CLUSTER_HTTP_PATH= @@ -23,4 +26,4 @@ DBT_TEST_USER_3= # Development # The default is INFO. Log levels can be: DEBUG, INFO, WARNING, ERROR, or CRITICAL -# DBT_DATABRICKS_CONNECTOR_LOG_LEVEL=DEBUG \ No newline at end of file +# DBT_DATABRICKS_CONNECTOR_LOG_LEVEL=DEBUG From c4b9f8bcfc76b580798fcfcc7e20fd08d63c137c Mon Sep 17 00:00:00 2001 From: case-k-git Date: Wed, 20 Dec 2023 18:49:28 +0900 Subject: [PATCH 03/16] overwrite_unique_schema --- tests/integration/base.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/integration/base.py b/tests/integration/base.py index 18867cb78..904c370de 100644 --- a/tests/integration/base.py +++ b/tests/integration/base.py @@ -167,11 +167,8 @@ def selectors_config(self): return None def unique_schema(self): - schema = self.schema - - to_return = "{}_{}".format(self.prefix, schema) - - return to_return.lower() + schema = self.config.credentials.schema + return schema.lower() @property def default_database(self): From 14ca9407bb00ce4cf4de17add4f8cc7ded8cf5af Mon Sep 17 00:00:00 2001 From: case-k-git Date: Fri, 29 Dec 2023 15:54:38 +0900 Subject: [PATCH 04/16] test by profile schema --- tests/integration/base.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/integration/base.py b/tests/integration/base.py index 904c370de..aa8bedaf2 100644 --- a/tests/integration/base.py +++ b/tests/integration/base.py @@ -165,10 +165,12 @@ def packages_config(self): @property def selectors_config(self): return None - - def unique_schema(self): - schema = self.config.credentials.schema - return schema.lower() + + # pass profile setting schema and test passed schema. Should not be make lower case to test upper case + def unique_schema(self, schema=None): + if schema is None: + return self.config.credentials.schema + return "{}_{}".format(self.prefix, schema) @property def default_database(self): @@ -180,7 +182,7 @@ def alternative_database(self): return None def get_profile(self, adapter_type): - return { + profile = { "config": {"send_anonymous_usage_stats": False}, "test": { "outputs": { @@ -191,6 +193,8 @@ def get_profile(self, adapter_type): "target": "dev", }, } + profile["test"]["outputs"]["dev"]["schema"] = self.unique_schema(schema = profile["test"]["outputs"]["dev"]["schema"]) + return profile def _pick_profile(self): test_name = self.id().split(".")[-1] From cf747c4f62badad105eb38898983695df6ed3cc4 Mon Sep 17 00:00:00 2001 From: case-k-git Date: Fri, 29 Dec 2023 17:19:36 +0900 Subject: [PATCH 05/16] revert unrelated change --- dbt/adapters/databricks/connections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/databricks/connections.py b/dbt/adapters/databricks/connections.py index 0f10905f8..09292028f 100644 --- a/dbt/adapters/databricks/connections.py +++ b/dbt/adapters/databricks/connections.py @@ -1258,7 +1258,7 @@ def connect() -> DatabricksSQLConnectionWrapper: http_headers=http_headers if http_headers else None, session_configuration=creds.session_properties, catalog=creds.database, - schema=creds.schema, + # schema=creds.schema, # TODO: Explicitly set once DBR 7.3LTS is EOL. _user_agent_entry=user_agent_entry, **connection_parameters, ) From 2d59088130cb985adfa88314918a62314e8fe173 Mon Sep 17 00:00:00 2001 From: case-k-git Date: Tue, 9 Jan 2024 20:55:39 +0900 Subject: [PATCH 06/16] update test schema handling --- tests/integration/base.py | 8 ++++++-- .../test_incremental_on_schema_change.py | 3 --- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/integration/base.py b/tests/integration/base.py index aa8bedaf2..dd58a305a 100644 --- a/tests/integration/base.py +++ b/tests/integration/base.py @@ -166,8 +166,11 @@ def packages_config(self): def selectors_config(self): return None - # pass profile setting schema and test passed schema. Should not be make lower case to test upper case + # To test schema related test lower or upper, we can chose if we want to use the default schema or the test define schema def unique_schema(self, schema=None): + if hasattr(self, 'schema'): + schema = self.schema + return "{}_{}".format(self.prefix, schema) if schema is None: return self.config.credentials.schema return "{}_{}".format(self.prefix, schema) @@ -193,7 +196,8 @@ def get_profile(self, adapter_type): "target": "dev", }, } - profile["test"]["outputs"]["dev"]["schema"] = self.unique_schema(schema = profile["test"]["outputs"]["dev"]["schema"]) + if 'schema' in profile["test"]["outputs"]["dev"]: + profile["test"]["outputs"]["dev"]["schema"] = self.unique_schema(schema = profile["test"]["outputs"]["dev"]["schema"]) return profile def _pick_profile(self): diff --git a/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py b/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py index b19e8e2db..f461aeba7 100644 --- a/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py +++ b/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py @@ -2,9 +2,6 @@ class TestIncrementalOnSchemaChange(DBTIntegrationTest): - @property - def schema(self): - return "incremental_on_schema_change" @property def models(self): From b7cf8e6d7c037edc253f09aa978e896a9b032448 Mon Sep 17 00:00:00 2001 From: case-k-git Date: Wed, 10 Jan 2024 19:28:35 +0900 Subject: [PATCH 07/16] add non uc test handling --- tests/integration/base.py | 15 ++++++--------- tests/profiles.py | 7 ++++--- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/tests/integration/base.py b/tests/integration/base.py index dd58a305a..2530c04a7 100644 --- a/tests/integration/base.py +++ b/tests/integration/base.py @@ -166,14 +166,12 @@ def packages_config(self): def selectors_config(self): return None - # To test schema related test lower or upper, we can chose if we want to use the default schema or the test define schema - def unique_schema(self, schema=None): + # To test schema-related aspects, whether lower or upper, we can choose to use either the default schema or the test-defined schema. + def unique_schema(self, default_schema=None): if hasattr(self, 'schema'): - schema = self.schema - return "{}_{}".format(self.prefix, schema) - if schema is None: - return self.config.credentials.schema - return "{}_{}".format(self.prefix, schema) + return "{}_{}".format(self.prefix, self.schema) + self.schema = default_schema + return "{}_{}".format(self.prefix, self.schema) @property def default_database(self): @@ -196,8 +194,7 @@ def get_profile(self, adapter_type): "target": "dev", }, } - if 'schema' in profile["test"]["outputs"]["dev"]: - profile["test"]["outputs"]["dev"]["schema"] = self.unique_schema(schema = profile["test"]["outputs"]["dev"]["schema"]) + profile["test"]["outputs"]["dev"]["schema"] = self.unique_schema(default_schema=profile["test"]["outputs"]["dev"]["schema"]) return profile def _pick_profile(self): diff --git a/tests/profiles.py b/tests/profiles.py index 0d68c1f30..c00cb8985 100644 --- a/tests/profiles.py +++ b/tests/profiles.py @@ -44,7 +44,8 @@ def databricks_cluster_target(): return _build_databricks_cluster_target( http_path=os.getenv( "DBT_DATABRICKS_CLUSTER_HTTP_PATH", os.getenv("DBT_DATABRICKS_HTTP_PATH") - ) + ), + schema=os.getenv("DBT_DATABRICKS_UC_INITIAL_SCHEMA", "default_schema"), ) @@ -54,7 +55,7 @@ def databricks_uc_cluster_target(): "DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH", os.getenv("DBT_DATABRICKS_HTTP_PATH") ), catalog=os.getenv("DBT_DATABRICKS_UC_INITIAL_CATALOG", "main"), - schema=os.getenv("DBT_DATABRICKS_UC_INITIAL_SCHEMA", "schema"), + schema=os.getenv("DBT_DATABRICKS_UC_INITIAL_SCHEMA", "default_schema"), ) @@ -65,5 +66,5 @@ def databricks_uc_sql_endpoint_target(): os.getenv("DBT_DATABRICKS_HTTP_PATH"), ), catalog=os.getenv("DBT_DATABRICKS_UC_INITIAL_CATALOG", "main"), - schema=os.getenv("DBT_DATABRICKS_UC_INITIAL_SCHEMA", "schema"), + schema=os.getenv("DBT_DATABRICKS_UC_INITIAL_SCHEMA", "default_schema"), ) From 38ac511dc1b383429d6a40f452e7c166fa25691b Mon Sep 17 00:00:00 2001 From: case-k-git Date: Thu, 11 Jan 2024 09:13:02 +0900 Subject: [PATCH 08/16] refactor unique_schema function --- tests/integration/base.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/integration/base.py b/tests/integration/base.py index 2530c04a7..d0f058f2e 100644 --- a/tests/integration/base.py +++ b/tests/integration/base.py @@ -168,9 +168,8 @@ def selectors_config(self): # To test schema-related aspects, whether lower or upper, we can choose to use either the default schema or the test-defined schema. def unique_schema(self, default_schema=None): - if hasattr(self, 'schema'): - return "{}_{}".format(self.prefix, self.schema) - self.schema = default_schema + if not hasattr(self, 'schema'): + self.schema = default_schema return "{}_{}".format(self.prefix, self.schema) @property From 2700be35615a7441ac5adc62151a151a72aa0c28 Mon Sep 17 00:00:00 2001 From: Ben Cassell Date: Wed, 3 Jan 2024 09:24:43 -0800 Subject: [PATCH 09/16] This fixes the integration runner to correctly find workspace_id --- .github/workflows/build_cluster_http_path.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_cluster_http_path.py b/.github/workflows/build_cluster_http_path.py index 7cc0b7314..264018019 100644 --- a/.github/workflows/build_cluster_http_path.py +++ b/.github/workflows/build_cluster_http_path.py @@ -1,13 +1,19 @@ import os +import re -workspace_id = os.getenv("DBT_DATABRICKS_HOST_NAME")[4:18] +workspace_re = re.compile(r"^.*-(\d+)\..*$") +hostname = os.getenv("DBT_DATABRICKS_HOST_NAME", "") +matches = workspace_re.match(hostname) +if matches: + workspace_id = matches.group(1) + print(workspace_id) cluster_id = os.getenv("TEST_PECO_CLUSTER_ID") uc_cluster_id = os.getenv("TEST_PECO_UC_CLUSTER_ID") http_path = f"sql/protocolv1/o/{workspace_id}/{cluster_id}" uc_http_path = f"sql/protocolv1/o/{workspace_id}/{uc_cluster_id}" # https://stackoverflow.com/a/72225291/5093960 -env_file = os.getenv("GITHUB_ENV") +env_file = os.getenv("GITHUB_ENV", "") with open(env_file, "a") as myfile: myfile.write(f"DBT_DATABRICKS_CLUSTER_HTTP_PATH={http_path}\n") myfile.write(f"DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH={uc_http_path}\n") From 4d8ade8212410942feeca9fbf33a66b583c99491 Mon Sep 17 00:00:00 2001 From: Ben Cassell Date: Fri, 5 Jan 2024 13:42:10 -0800 Subject: [PATCH 10/16] fixing python timeout issue --- dbt/adapters/databricks/connections.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dbt/adapters/databricks/connections.py b/dbt/adapters/databricks/connections.py index 09292028f..2e429062f 100644 --- a/dbt/adapters/databricks/connections.py +++ b/dbt/adapters/databricks/connections.py @@ -1070,6 +1070,12 @@ def _cleanup_idle_connections(self) -> None: self.close(conn) conn.handle = LazyHandle(self._open2) + def get_thread_connection(self) -> Connection: + if USE_LONG_SESSIONS: + self._cleanup_idle_connections() + + return super().get_thread_connection() + def add_query( self, sql: str, From cb661a1e48e17e127b986e48a4dabc75e8c7983f Mon Sep 17 00:00:00 2001 From: Ben Cassell Date: Fri, 5 Jan 2024 13:47:21 -0800 Subject: [PATCH 11/16] changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bfab26728..dc3a38152 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ ## dbt-databricks 1.7.4 (TBD) +### Fixes + +- Fix for issue where long-running python models led to invalid session errors ([544](https://github.com/databricks/dbt-databricks/pull/544)) + ## dbt-databricks 1.7.3 (Dec 12, 2023) ### Fixes From 0382ac959e72d324aaf15b0b591510417a75c5b1 Mon Sep 17 00:00:00 2001 From: case-k-git Date: Tue, 19 Dec 2023 18:55:08 +0900 Subject: [PATCH 12/16] add DBT_DATABRICKS_UC_INITIAL_SCHEMA option --- test.env.example | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test.env.example b/test.env.example index a24ff7a5c..cd3aef4fc 100644 --- a/test.env.example +++ b/test.env.example @@ -30,3 +30,7 @@ DBT_TEST_USER_3= # The default is INFO. Log levels can be: DEBUG, INFO, WARNING, ERROR, or CRITICAL # DBT_DATABRICKS_CONNECTOR_LOG_LEVEL=DEBUG + + +# The default is INFO. Log levels can be: DEBUG, INFO, WARNING, ERROR, or CRITICAL +# DBT_DATABRICKS_CONNECTOR_LOG_LEVEL=DEBUG From 301d57515ef5a2cd94e727b0d2d75cdddf5e4904 Mon Sep 17 00:00:00 2001 From: case-k-git Date: Wed, 20 Dec 2023 18:49:28 +0900 Subject: [PATCH 13/16] overwrite_unique_schema --- tests/integration/base.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration/base.py b/tests/integration/base.py index d0f058f2e..d3bdce990 100644 --- a/tests/integration/base.py +++ b/tests/integration/base.py @@ -165,10 +165,10 @@ def packages_config(self): @property def selectors_config(self): return None - + # To test schema-related aspects, whether lower or upper, we can choose to use either the default schema or the test-defined schema. def unique_schema(self, default_schema=None): - if not hasattr(self, 'schema'): + if not hasattr(self, "schema"): self.schema = default_schema return "{}_{}".format(self.prefix, self.schema) @@ -193,8 +193,6 @@ def get_profile(self, adapter_type): "target": "dev", }, } - profile["test"]["outputs"]["dev"]["schema"] = self.unique_schema(default_schema=profile["test"]["outputs"]["dev"]["schema"]) - return profile def _pick_profile(self): test_name = self.id().split(".")[-1] From 6d4a47b8a9f6166c655e50bdca23b7518506a0d9 Mon Sep 17 00:00:00 2001 From: Ben Cassell Date: Thu, 11 Jan 2024 12:57:48 -0800 Subject: [PATCH 14/16] missing code --- tests/integration/base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/integration/base.py b/tests/integration/base.py index d3bdce990..ef3bc0698 100644 --- a/tests/integration/base.py +++ b/tests/integration/base.py @@ -194,6 +194,11 @@ def get_profile(self, adapter_type): }, } + profile["test"]["outputs"]["dev"]["schema"] = self.unique_schema( + default_schema=profile["test"]["outputs"]["dev"]["schema"] + ) + return profile + def _pick_profile(self): test_name = self.id().split(".")[-1] return _profile_from_test_name(test_name) From 2404df329974688d4e2233abc5c3a298e5bbc071 Mon Sep 17 00:00:00 2001 From: Ben Cassell Date: Thu, 11 Jan 2024 14:08:28 -0800 Subject: [PATCH 15/16] passing linter --- tests/integration/base.py | 3 ++- .../test_incremental_on_schema_change.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/base.py b/tests/integration/base.py index ef3bc0698..8d27fb5b8 100644 --- a/tests/integration/base.py +++ b/tests/integration/base.py @@ -166,7 +166,8 @@ def packages_config(self): def selectors_config(self): return None - # To test schema-related aspects, whether lower or upper, we can choose to use either the default schema or the test-defined schema. + # To test schema-related aspects, whether lower or upper, we can choose to use either the + # default schema or the test-defined schema. def unique_schema(self, default_schema=None): if not hasattr(self, "schema"): self.schema = default_schema diff --git a/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py b/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py index f461aeba7..323c9d908 100644 --- a/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py +++ b/tests/integration/incremental_on_schema_change/test_incremental_on_schema_change.py @@ -2,7 +2,6 @@ class TestIncrementalOnSchemaChange(DBTIntegrationTest): - @property def models(self): return "models" From a147b0a8e81d0e76f161cc8bd8713f4a97093eda Mon Sep 17 00:00:00 2001 From: Ben Cassell Date: Thu, 11 Jan 2024 14:16:27 -0800 Subject: [PATCH 16/16] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc3a38152..f1d0df1d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### Fixes - Fix for issue where long-running python models led to invalid session errors ([544](https://github.com/databricks/dbt-databricks/pull/544)) +- Allow schema to be specified in testing (thanks @case-k-git!) ([538](https://github.com/databricks/dbt-databricks/pull/538)) ## dbt-databricks 1.7.3 (Dec 12, 2023)