From 902ea04a115e98788d2544c1f983c241c11915f4 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Wed, 26 Jun 2024 13:44:34 -0700 Subject: [PATCH] Add SQL deployment scripts for Snowflake object hierarchy --- snowflake/objects/databases/deploy.sql | 17 ++++++++++++ .../objects/databases/recover/deploy.sql | 10 +++++++ .../databases/recover/schemas/deploy.sql | 6 +++++ .../recover/schemas/parquet/deploy.sql | 26 +++++++++++++++++++ .../schemas/parquet/file_format/deploy.sql | 8 ++++++ .../parquet/file_format/parquet_format.sql | 7 +++++ .../recover/schemas/parquet/stages/deploy.sql | 9 +++++++ .../schemas/parquet/stages/parquet_s3.sql | 6 +++++ .../recover/schemas/parquet/tables/deploy.sql | 14 ++++++++++ .../parquet/tables/enrolled_participants.sql | 2 +- 10 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 snowflake/objects/databases/deploy.sql create mode 100644 snowflake/objects/databases/recover/deploy.sql create mode 100644 snowflake/objects/databases/recover/schemas/deploy.sql create mode 100644 snowflake/objects/databases/recover/schemas/parquet/deploy.sql create mode 100644 snowflake/objects/databases/recover/schemas/parquet/file_format/deploy.sql create mode 100644 snowflake/objects/databases/recover/schemas/parquet/file_format/parquet_format.sql create mode 100644 snowflake/objects/databases/recover/schemas/parquet/stages/deploy.sql create mode 100644 snowflake/objects/databases/recover/schemas/parquet/stages/parquet_s3.sql create mode 100644 snowflake/objects/databases/recover/schemas/parquet/tables/deploy.sql diff --git a/snowflake/objects/databases/deploy.sql b/snowflake/objects/databases/deploy.sql new file mode 100644 index 00000000..91395615 --- /dev/null +++ b/snowflake/objects/databases/deploy.sql @@ -0,0 +1,17 @@ +/* + The current maximum allowed execution depth of EXECUTE IMMEDIATE FROM + statements is 5. Unfortunately, that makes a call stack which looks like: + + depth + 0 deploy.sql => + 1 databases/deploy.sql => + 2 databases/recover/deploy.sql => + 3 databases/recover/schemas/deploy.sql => + 4 databases/recover/schemas/parquet/deploy.sql => + 5 databases/recover/schemas/parquet/tables/deploy.sql => + 6 databases/recover/schemas/parquet/tables/enrolled_participants.sql + + not possible. To circumvent this issue, we omit the highest level of + abstraction (databases/deploy.sql) and instead EXECUTE IMMEDIATE FROM + database deployments individually in the primary deployment script (deploy.sql) +*/ diff --git a/snowflake/objects/databases/recover/deploy.sql b/snowflake/objects/databases/recover/deploy.sql new file mode 100644 index 00000000..892fb2ed --- /dev/null +++ b/snowflake/objects/databases/recover/deploy.sql @@ -0,0 +1,10 @@ +/* + Create a recover database (if it doesn't yet exist) for an environment and + deploy all child objects. +*/ +CREATE DATABASE IF NOT EXISTS recover_{{ environment }}; +USE DATABASE recover_{{ environment }}; + +EXECUTE IMMEDIATE + FROM './schemas/deploy.sql' + USING (environment => '{{ environment }}'); diff --git a/snowflake/objects/databases/recover/schemas/deploy.sql b/snowflake/objects/databases/recover/schemas/deploy.sql new file mode 100644 index 00000000..c73c8abb --- /dev/null +++ b/snowflake/objects/databases/recover/schemas/deploy.sql @@ -0,0 +1,6 @@ +/* + Deploy schemas and their child objects. +*/ +EXECUTE IMMEDIATE + FROM './parquet/deploy.sql' + USING (environment => '{{ environment }}'); diff --git a/snowflake/objects/databases/recover/schemas/parquet/deploy.sql b/snowflake/objects/databases/recover/schemas/parquet/deploy.sql new file mode 100644 index 00000000..093beab6 --- /dev/null +++ b/snowflake/objects/databases/recover/schemas/parquet/deploy.sql @@ -0,0 +1,26 @@ +/* + Create a parquet schema (if it doesn't yet exist) and deploy all child objects. +*/ +CREATE SCHEMA IF NOT EXISTS parquet; +USE SCHEMA parquet; + +SET parquet_file_format_name = 'parquet_format'; +SET parquet_stage_name = 'parquet_s3'; + +EXECUTE IMMEDIATE + FROM './file_format/deploy.sql' + USING ( + parquet_file_format_name => $parquet_file_format_name + ); +EXECUTE IMMEDIATE + FROM './stages/deploy.sql' + USING ( + environment => '{{ environment }}', + parquet_stage_name => $parquet_stage_name + ); +EXECUTE IMMEDIATE + FROM './tables/deploy.sql' + USING ( + parquet_file_format_name => $parquet_file_format_name, + parquet_stage_name => $parquet_stage_name + ); diff --git a/snowflake/objects/databases/recover/schemas/parquet/file_format/deploy.sql b/snowflake/objects/databases/recover/schemas/parquet/file_format/deploy.sql new file mode 100644 index 00000000..9df36a2e --- /dev/null +++ b/snowflake/objects/databases/recover/schemas/parquet/file_format/deploy.sql @@ -0,0 +1,8 @@ +/* + Deploy all file formats +*/ +EXECUTE IMMEDIATE + FROM './parquet_format.sql' + USING ( + parquet_file_format_name => '{{ parquet_file_format_name }}' + ); diff --git a/snowflake/objects/databases/recover/schemas/parquet/file_format/parquet_format.sql b/snowflake/objects/databases/recover/schemas/parquet/file_format/parquet_format.sql new file mode 100644 index 00000000..e61f943e --- /dev/null +++ b/snowflake/objects/databases/recover/schemas/parquet/file_format/parquet_format.sql @@ -0,0 +1,7 @@ +/* + Create the Parquet file format +*/ +CREATE OR REPLACE FILE FORMAT {{ parquet_file_format_name }} + TYPE = PARQUET + COMPRESSION = AUTO + USE_VECTORIZED_SCANNER = TRUE; diff --git a/snowflake/objects/databases/recover/schemas/parquet/stages/deploy.sql b/snowflake/objects/databases/recover/schemas/parquet/stages/deploy.sql new file mode 100644 index 00000000..40c00ed7 --- /dev/null +++ b/snowflake/objects/databases/recover/schemas/parquet/stages/deploy.sql @@ -0,0 +1,9 @@ +/* + Deploy all stages under the `parquet` schema. +*/ +EXECUTE IMMEDIATE + FROM './parquet_s3.sql' + USING ( + environment => '{{ environment }}', + parquet_stage_name => '{{ parquet_stage_name }}' + ); diff --git a/snowflake/objects/databases/recover/schemas/parquet/stages/parquet_s3.sql b/snowflake/objects/databases/recover/schemas/parquet/stages/parquet_s3.sql new file mode 100644 index 00000000..b6567e51 --- /dev/null +++ b/snowflake/objects/databases/recover/schemas/parquet/stages/parquet_s3.sql @@ -0,0 +1,6 @@ +/* + Create an external stage over the Parquet data in S3 +*/ +CREATE OR REPLACE STAGE parquet_s3 + URL = 's3://recover-processed-data/{{ environment }}/parquet/' + STORAGE_INTEGRATION = recover_prod_s3; diff --git a/snowflake/objects/databases/recover/schemas/parquet/tables/deploy.sql b/snowflake/objects/databases/recover/schemas/parquet/tables/deploy.sql new file mode 100644 index 00000000..0e00dd62 --- /dev/null +++ b/snowflake/objects/databases/recover/schemas/parquet/tables/deploy.sql @@ -0,0 +1,14 @@ +/* + CREATE OR ALTER all tables +*/ +CREATE OR ALTER TABLE enrolledparticipants + USING TEMPLATE ( + SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*)) + WITHIN GROUP(ORDER BY order_id) + FROM TABLE( + INFER_SCHEMA( + LOCATION => '@{{ parquet_stage_name }}/dataset_enrolledparticipants', + FILE_FORMAT => '{{ parquet_file_format_name }}' + ) + ) + ); diff --git a/snowflake/objects/databases/recover/schemas/parquet/tables/enrolled_participants.sql b/snowflake/objects/databases/recover/schemas/parquet/tables/enrolled_participants.sql index e4176bff..ad49716b 100644 --- a/snowflake/objects/databases/recover/schemas/parquet/tables/enrolled_participants.sql +++ b/snowflake/objects/databases/recover/schemas/parquet/tables/enrolled_participants.sql @@ -1,4 +1,4 @@ -create or alter TABLE enrolled_participants_empty ( +CREATE OR ALTER TABLE enrolled_participants ( "id" NUMBER(38,0), "index" NUMBER(38,0), "GlobalKey" VARCHAR(16777216),