diff --git a/config/develop/namespaced/glue-workflow.yaml b/config/develop/namespaced/glue-workflow.yaml index 6709e61..d488f24 100644 --- a/config/develop/namespaced/glue-workflow.yaml +++ b/config/develop/namespaced/glue-workflow.yaml @@ -26,3 +26,4 @@ stack_tags: {{ stack_group_config.default_stack_tags }} sceptre_user_data: dataset_schemas: !file src/glue/resources/table_columns.yaml + data_values_expectations: !file src/glue/resources/data_values_expectations.json diff --git a/templates/glue-workflow.j2 b/templates/glue-workflow.j2 index 1d661f1..f3beb62 100644 --- a/templates/glue-workflow.j2 +++ b/templates/glue-workflow.j2 @@ -308,34 +308,33 @@ Resources: StartOnCreation: true WorkflowName: !Ref JsonToParquetWorkflow - GreatExpectationsParquetTrigger: + + {% for dataset in datasets if dataset["data_type"].lower() in sceptre_user_data.data_values_expectations %} + {{ dataset['stackname_prefix'] }}GreatExpectationsParquetTrigger: Type: AWS::Glue::Trigger Properties: - Name: !Sub "${Namespace}-GreatExpectationsParquetTrigger" + Name: !Sub "${Namespace}-{{ dataset['stackname_prefix'] }}GreatExpectationsParquetTrigger" Actions: - {% for dataset in datasets %} - JobName: !Sub ${Namespace}-{{ dataset["stackname_prefix"] }}-GreatExpectationsParquetJob Arguments: - "--data-type": {{ "{}".format(dataset["data_type"].lower()) }} + "--data-type": {{ dataset["data_type"].lower() }} "--namespace": !Ref Namespace "--cfn-bucket": !Ref CloudformationBucketName "--parquet-bucket": !Ref ParquetBucketName "--shareable-artifacts-bucket": !Ref ShareableArtifactsBucketName "--expectation-suite-key-prefix": !Sub "${Namespace}/src/glue/resources/data_values_expectations.json" "--additional-python-modules": "great_expectations~=0.18,urllib3<2" - {% endfor %} - Description: This trigger runs the great expectation parquet jobs after completion of all JSON to Parquet jobs + Description: This trigger runs the great expectation parquet job for this data type after completion of the JSON to Parquet job for this data type Type: CONDITIONAL Predicate: Conditions: - {% for dataset in datasets if "Garmin" in dataset["data_type"] %} - - JobName: !Sub "${Namespace}-{{ dataset["stackname_prefix"] }}-Job" + - JobName: !Sub "${Namespace}-{{ dataset['stackname_prefix'] }}-Job" State: SUCCEEDED LogicalOperator: EQUALS - {% endfor %} Logical: AND StartOnCreation: true WorkflowName: !Ref JsonToParquetWorkflow + {% endfor %} Outputs: