From 7de2c8670bd677c38f950ab5bc26a79e5036f794 Mon Sep 17 00:00:00 2001 From: werner daehn Date: Wed, 18 Dec 2024 09:45:42 +0100 Subject: [PATCH 1/6] save --- README.md | 57 +- WebContent/WEB-INF/web.xml | 23 +- WebContent/index.html | 9 + WebContent/ui5/Config.controller.js | 31 + WebContent/ui5/Config.html | 35 + WebContent/ui5/Config.view.xml | 38 + WebContent/ui5/Rule.controller.js | 254 +++++ WebContent/ui5/Rule.html | 35 + WebContent/ui5/Rule.view.xml | 178 ++++ WebContent/ui5/Rules.controller.js | 52 + WebContent/ui5/Rules.html | 35 + WebContent/ui5/Rules.view.xml | 28 + WebContent/ui5/Sample.controller.js | 128 +++ WebContent/ui5/Sample.html | 35 + WebContent/ui5/Sample.view.xml | 74 ++ WebContent/ui5/Topics.controller.js | 37 + WebContent/ui5/Topics.html | 35 + WebContent/ui5/Topics.view.xml | 90 ++ .../ui5/controller/Ruleset.controller | 0 .../ui5/fragment/xml/leftmenu.fragment.xml | 9 + WebContent/ui5/lib/.library | 18 + WebContent/ui5/lib/FocusInput.js | 20 + WebContent/ui5/lib/JSONViewer.js | 158 +++ WebContent/ui5/lib/RulesToolPage.js | 59 ++ WebContent/ui5/lib/library.js | 32 + .../ui5/lib/themes/sap_fiori_3/library.css | 45 + .../ui5/view/Ruleset.view | 0 pom.xml | 148 ++- .../io/rtdi/appcontainer/utils/HttpUtil.java | 62 ++ .../utils/SSLHostnameVerifierNoop.java | 11 + .../utils/SSLTrustManagerNoop.java | 21 + .../utils/UsageStatisticSender.java | 62 ++ .../appcontainer/utils/UsageStatistics.java | 126 +++ .../foundation/avro/AvroJexlContext.java | 37 + .../foundation/avro/AvroRuleUtils.java | 148 +++ .../pipeline/foundation/avro/JexlArray.java | 212 ++++ .../foundation/avro/JexlAvroDeserializer.java | 80 ++ .../foundation/avro/JexlGenericData.java | 31 + .../avro/JexlGenericDatumReader.java | 42 + .../pipeline/foundation/avro/JexlRecord.java | 266 +++++ .../JerseyApplicationSettings.java | 50 +- .../bigdata/rulesservice/LoggingUtil.java | 61 ++ .../ObjectMapperContextResolver.java | 27 + .../rulesservice/PropertiesException.java | 71 ++ .../RolesAllowedDynamicFeature2.java | 155 +++ .../rtdi/bigdata/rulesservice/RuleStep.java | 76 -- .../bigdata/rulesservice/RulesService.java | 586 +++++++++-- .../rulesservice/RulesServiceFactory.java | 46 - .../rulesservice/RulesServiceProperties.java | 29 - .../bigdata/rulesservice/SchemaRuleSet.java | 202 ---- .../rulesservice/config/RuleFileName.java | 46 + .../rulesservice/config/SampleFileName.java | 19 + .../rulesservice/config/ServiceSettings.java | 69 ++ .../rulesservice/config/SubjectName.java | 21 + .../rulesservice/config/TopicName.java | 13 + .../rulesservice/config/TopicRule.java | 127 +++ .../definition/RuleFileDefinition.java | 288 ++++++ .../rulesservice/definition/RuleStep.java | 62 ++ .../rulesservice/rest/ErrorResponse.java | 30 + .../rulesservice/rest/RulesEndpoint.java | 290 ------ .../bigdata/rulesservice/rest/SampleData.java | 192 ++++ .../rest/SchemaListNameEntity.java | 72 -- .../rulesservice/rest/ServiceConfig.java | 957 ++++++++++++++++++ .../rest/ServletSecurityConstants.java | 8 + .../rulesservice/rest/SuccessResponse.java | 25 + .../bigdata/rulesservice/rules/ArrayRule.java | 90 +- .../bigdata/rulesservice/rules/EmptyRule.java | 57 +- .../rulesservice/rules/GenericRules.java | 49 + .../bigdata/rulesservice/rules/Mapping.java | 60 ++ .../rulesservice/rules/PrimitiveMapping.java | 14 + .../rulesservice/rules/PrimitiveRule.java | 193 ++-- .../rulesservice/rules/RecordRule.java | 145 +-- .../rtdi/bigdata/rulesservice/rules/Rule.java | 149 ++- .../bigdata/rulesservice/rules/RuleUtils.java | 63 ++ .../rulesservice/rules/RuleWithName.java | 6 +- .../bigdata/rulesservice/rules/TestSet.java | 41 +- .../rulesservice/rules/TestSetAll.java | 47 +- .../rulesservice/rules/TestSetFirstFail.java | 56 +- .../rulesservice/rules/TestSetFirstPass.java | 54 +- .../bigdata/rulesservice/rules/UnionRule.java | 118 +++ .../servlet/MicroservicePage.java | 19 - .../servlet/ServiceDetailsPage.java | 19 - ...ector.connectorframework.IConnectorFactory | 1 - src/main/resources/log4j2.properties | 28 + src/main/resources/rulesservice.properties | 1 - .../ui5/fragment/xml/RuleFormula.xml | 18 - .../ui5/fragment/xml/SubstitutionFormula.xml | 18 - .../rulesservice/GenerateDataTest.java | 130 +++ .../bigdata/rulesservice/RulesServiceIT.java | 83 -- .../rulesservice/TransformationTest.java | 61 ++ src/test/resources/tmp/Kafka.json | 19 - .../CUSTOMER/inactive/Address validation.json | 25 + .../rules/inactive/sample_order-value.json | 107 ++ .../resources/tmp/services/RulesTest1.json | 1 - .../services/Step1/SalesOrder/SalesOrder.json | 77 -- 95 files changed, 6371 insertions(+), 1661 deletions(-) create mode 100644 WebContent/index.html create mode 100644 WebContent/ui5/Config.controller.js create mode 100644 WebContent/ui5/Config.html create mode 100644 WebContent/ui5/Config.view.xml create mode 100644 WebContent/ui5/Rule.controller.js create mode 100644 WebContent/ui5/Rule.html create mode 100644 WebContent/ui5/Rule.view.xml create mode 100644 WebContent/ui5/Rules.controller.js create mode 100644 WebContent/ui5/Rules.html create mode 100644 WebContent/ui5/Rules.view.xml create mode 100644 WebContent/ui5/Sample.controller.js create mode 100644 WebContent/ui5/Sample.html create mode 100644 WebContent/ui5/Sample.view.xml create mode 100644 WebContent/ui5/Topics.controller.js create mode 100644 WebContent/ui5/Topics.html create mode 100644 WebContent/ui5/Topics.view.xml rename {src/main/resources => WebContent}/ui5/controller/Ruleset.controller (100%) create mode 100644 WebContent/ui5/fragment/xml/leftmenu.fragment.xml create mode 100644 WebContent/ui5/lib/.library create mode 100644 WebContent/ui5/lib/FocusInput.js create mode 100644 WebContent/ui5/lib/JSONViewer.js create mode 100644 WebContent/ui5/lib/RulesToolPage.js create mode 100644 WebContent/ui5/lib/library.js create mode 100644 WebContent/ui5/lib/themes/sap_fiori_3/library.css rename {src/main/resources => WebContent}/ui5/view/Ruleset.view (100%) create mode 100644 src/main/java/io/rtdi/appcontainer/utils/HttpUtil.java create mode 100644 src/main/java/io/rtdi/appcontainer/utils/SSLHostnameVerifierNoop.java create mode 100644 src/main/java/io/rtdi/appcontainer/utils/SSLTrustManagerNoop.java create mode 100644 src/main/java/io/rtdi/appcontainer/utils/UsageStatisticSender.java create mode 100644 src/main/java/io/rtdi/appcontainer/utils/UsageStatistics.java create mode 100644 src/main/java/io/rtdi/bigdata/connector/pipeline/foundation/avro/AvroJexlContext.java create mode 100644 src/main/java/io/rtdi/bigdata/connector/pipeline/foundation/avro/AvroRuleUtils.java create mode 100644 src/main/java/io/rtdi/bigdata/connector/pipeline/foundation/avro/JexlArray.java create mode 100644 src/main/java/io/rtdi/bigdata/connector/pipeline/foundation/avro/JexlAvroDeserializer.java create mode 100644 src/main/java/io/rtdi/bigdata/connector/pipeline/foundation/avro/JexlGenericData.java create mode 100644 src/main/java/io/rtdi/bigdata/connector/pipeline/foundation/avro/JexlGenericDatumReader.java create mode 100644 src/main/java/io/rtdi/bigdata/connector/pipeline/foundation/avro/JexlRecord.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/LoggingUtil.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/ObjectMapperContextResolver.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/PropertiesException.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/RolesAllowedDynamicFeature2.java delete mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/RuleStep.java delete mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/RulesServiceFactory.java delete mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/RulesServiceProperties.java delete mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/SchemaRuleSet.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/config/RuleFileName.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/config/SampleFileName.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/config/ServiceSettings.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/config/SubjectName.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/config/TopicName.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/config/TopicRule.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/definition/RuleFileDefinition.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/definition/RuleStep.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/rest/ErrorResponse.java delete mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/rest/RulesEndpoint.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/rest/SampleData.java delete mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/rest/SchemaListNameEntity.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/rest/ServiceConfig.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/rest/ServletSecurityConstants.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/rest/SuccessResponse.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/rules/GenericRules.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/rules/Mapping.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/rules/PrimitiveMapping.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/rules/RuleUtils.java create mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/rules/UnionRule.java delete mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/servlet/MicroservicePage.java delete mode 100644 src/main/java/io/rtdi/bigdata/rulesservice/servlet/ServiceDetailsPage.java delete mode 100644 src/main/resources/META-INF/services/io.rtdi.bigdata.connector.connectorframework.IConnectorFactory create mode 100644 src/main/resources/log4j2.properties delete mode 100644 src/main/resources/rulesservice.properties delete mode 100644 src/main/resources/ui5/fragment/xml/RuleFormula.xml delete mode 100644 src/main/resources/ui5/fragment/xml/SubstitutionFormula.xml create mode 100644 src/test/java/io/rtdi/bigdata/rulesservice/GenerateDataTest.java delete mode 100644 src/test/java/io/rtdi/bigdata/rulesservice/RulesServiceIT.java create mode 100644 src/test/java/io/rtdi/bigdata/rulesservice/TransformationTest.java delete mode 100644 src/test/resources/tmp/Kafka.json create mode 100644 src/test/resources/tmp/rules/CUSTOMER/inactive/Address validation.json create mode 100644 src/test/resources/tmp/rules/inactive/sample_order-value.json delete mode 100644 src/test/resources/tmp/services/RulesTest1.json delete mode 100644 src/test/resources/tmp/services/Step1/SalesOrder/SalesOrder.json diff --git a/README.md b/README.md index 7c13166..899f934 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,16 @@ Docker image here: [dockerhub](https://hub.docker.com/r/rtdi/rulesservice) * Operational dashboards using the rule results provide information about the data quality * Different types of rules should be supported, validation rules, cleansing rules, data augmentation, standardization rules,... +## Requirements + +* Payload (value) in Avro Format +* Apache Kafka with the permissions to add a KStream +* Schema Registry + ## Installation and testing -On any computer install the Docker Daemon - if it is not already - and download this docker image with +On any computer install the Docker Daemon - if not already done - and download the docker image with docker pull rtdi/rulesservice @@ -24,7 +30,7 @@ Then start the image via docker run. For a quick test this command is sufficient docker run -d -p 80:8080 --rm --name rulesservice rtdi/rulesservice to expose a webserver at port 80 on the host running the container. Make sure to open the web page via the http prefix, as https needs more configuration. -For example [http://localhost:80/](http://localhost:80/) might do the trick of the container is hosted on the same computer. +For example [http://localhost:80/](http://localhost:80/) might do the trick if the container is hosted on the same computer. The default login for this startup method is: **rtdi / rtdi!io** @@ -43,46 +49,45 @@ For proper start commands, especially https and security related, see the [Conne ### Connect the Pipeline to Kafka -The first step is to connect the application to a Kafka server, in this example Confluent Cloud. - - +The first step is to connect the application to a Kafka server and the schema registry. ### Define Services -Each Service is a Kafka KStream, a distributed process listening on a topic and validating the data. Hence the first setting of each service are the input and output topic names to use. - -Within one rule service for each schema multiple steps can be performed, a microservice transformation so to speak. These transformation steps happen within the service. For example the first microservice step might check missing data, the next standardize on the different spellings. The result of this step is then put into a third step, validating if the STATUS is valid and consistent with other data. - - +The rules are defined by subject. To help with the rule formulas, sample data can be gathered from the topics and used when defining the rules. +Once a rule file is complete, it must be activated and associated with an input topic. -Once the structure is saved, by clicking on the individual steps the rules themselves are defined. +### Result -Because a topic can contain data for multiple schemas, the rules are defined for each schema individually. If the message schema has no rules defined, it is passed through unchanged. +The input schema is copied as new schema, which has an additional `_audit` field, containing all audit results. +There the overall rule result is stored (did it pass all tests?), a list of all rules executed and their individual results. +Querying this data allows detailed reporting which records were processed by what rule and the results. - + -After a schema has been selected, its structure with all nested elements is shown and rules can be defined for each field. +## Architecture -An example would be a rule on the OrderStatus column, which must contain the values C (..Completed) or N (..New). One way would be a a single rule on the column with a OR condition. The other option would be five rules testing for a single status value only and the test type is "Test until first Passes". +This service is runs one KStream application per topic, reading the input topic, applying the rules and sending the result to the defined output topic. +While the key can be of any format, the value must be an Avro record. - +## Rules -### Data content +### Rule types -The result of all rule transformations is stored in the audit array of the record. Querying this data allows detailed reporting which records were processed by what rule and the results. -If the input schema does not have such __audit array, a new version of the schema will be created automatically. +Imagine the input has a field `NAME` which can be null but then we want to replace it with a `?`. Such a null value is not wrong as such but we do want to see it as a warning. - +Translated into the UI, we add a rule to the field and as rule condition to be met the formula `NAME != null`. If that condition returns true, the name has a value other than null, then the rule result is `=pass`. Else its rule result is whatever is set in the UI as `if test failed...`, in our case `=warn`. +The `"?"` is entered in the `...change value to`, to assign that text to the field if the condition is not met. -### Rule types +The UI also shows the node `(more)`, which is the place to enter all rules that do not belong to a single field. Such generic rules cannot have a replacement value for obvious reasons. -Rules can be applied on fields only and this field acts as the trigger and is used when providing a substitution value. -For example, the SalesOrderHeader has a field SoldTo at root level, hence all rules will be executed once per message. +No matter if a field rule or a generic rule is entered, the formulas have access to all fields of the message. For example, a rule for the input field `NAME` might either use the formula `NAME != null` or `NAME != null && COMPANY_NAME != null`. A field level rule is not limited to formulas using this field only! -A rule on a field of the SalesItems array will be triggered for each item. Thus the field a rule belongs to controls primarily that. +In case of a nested structure as input, rules can specified at all levels and are executed for each record in that level. +For example, the `CUSTOMER` schema might have an array of `ADDRESSES`. If a rule is added at the root level, it will be executed only once. If it is inside the `ADDRESSES` array, it will be executed once per address - for each address record in the array. -For each field either a single rule "Test a column" can be defined or a Test-Set with multiple rules. +With above capabilities all rules can be defined. The more conditions the more complex the formulas get, which is not good. +For that reason there are more rule types, the Test-Set rule to specify multiple individual rules - Test for all conditions: Every single rule is applied, thus acting as a AND combination of rules. This is convenient for cases where a field needs to be checked for different conditions, e.g. the OrderDate has to be within three months or raise a warning and the OrderDate cannot be later than the expected delivery date. The rule set will return the lowest individual rule result. If all passed the result is pass. If at least one said warning, the result is a warning. And if one is failed, the result is failed. - Test until one failed: Each rule will be tested and stopped once the first rule violation is found. It is a AND combination of the rules as well but while in above case all rules are tested, here all other rules are not added to the test result. The rule set will return failed when at least one is failed and pass only if all rules passed. @@ -91,7 +96,7 @@ For each field either a single rule "Test a column" can be defined or a Test-Set For each individual rule the rule result can be specified if the condition is met. This way the rule can specify the severity, e.g. a SoldTo == null shall be treated as failed, SoldTo.length() < 5 as well but SoldTo.length() < 10 shall be a warning only. Other tests might not impact the rule result at all, they return passed always. For those the audit array will show that the rule has been tested but the data is of no harm. For example in a gender column the test could be if the value is either M or F and in all other cases a substitution value of X is used. As the gender does return the values M,F or X only, it is to be considered valid. -A more extreme case would be to assign a column with a fixed value. In that case the condition is the forumla "true", the rules result will be pass and the substitution formula the constant to assign. +A more extreme case would be to assign a column with a fixed value. In that case the condition is the formula "true", the rules result will be pass and the substitution formula the constant to assign. ### Rule syntax diff --git a/WebContent/WEB-INF/web.xml b/WebContent/WEB-INF/web.xml index 79e59ba..dbfbfa3 100644 --- a/WebContent/WEB-INF/web.xml +++ b/WebContent/WEB-INF/web.xml @@ -4,11 +4,6 @@ RulesService index.html - index.htm - index.jsp - default.html - default.htm - default.jsp jersey-servlet @@ -18,6 +13,7 @@ io.rtdi.bigdata.rulesservice.JerseyApplicationSettings 1 + true jersey-servlet @@ -29,21 +25,22 @@ /ui5/* - connectorview + rulesview + BASIC + + - connectorconfig - connectorview - connectorschema - connectoroperator + rulesview + rulesadmin diff --git a/WebContent/index.html b/WebContent/index.html new file mode 100644 index 0000000..5009a02 --- /dev/null +++ b/WebContent/index.html @@ -0,0 +1,9 @@ + + + + +

Rules Service

+

login

+ + + \ No newline at end of file diff --git a/WebContent/ui5/Config.controller.js b/WebContent/ui5/Config.controller.js new file mode 100644 index 0000000..b62110e --- /dev/null +++ b/WebContent/ui5/Config.controller.js @@ -0,0 +1,31 @@ +sap.ui.define(["sap/ui/core/mvc/Controller"], +function(Controller) {"use strict"; +return Controller.extend("io.rtdi.bigdata.rulesservice.Config", { + onInit : function() { + var model = new sap.ui.model.json.JSONModel(); + model.attachRequestFailed(function(event) { + var text = event.getParameter("responseText"); + sap.m.MessageToast.show("Reading config failed: " + text); + }); + model.loadData("../rest/config"); + this.getView().setModel(model); + }, + onSave : function(event) { + var model = this.getView().getModel(); + var post = new sap.ui.model.json.JSONModel(); + post.attachRequestFailed(function(event) { + var text = event.getParameter("responseText"); + sap.m.MessageToast.show("Save failed: " + text); + }); + post.attachRequestCompleted(function() { + console.log(post.getProperty("/")); + }); + var json = JSON.stringify(model.getProperty("/")); + var headers = { + "Content-Type": "application/json;charset=utf-8" + } + post.loadData("../rest/config", json, true, "POST", false, true, headers); + }, +}); +}); + diff --git a/WebContent/ui5/Config.html b/WebContent/ui5/Config.html new file mode 100644 index 0000000..b762884 --- /dev/null +++ b/WebContent/ui5/Config.html @@ -0,0 +1,35 @@ + + + + + + Connector + + + + + + +

Loading OpenUI5 from here

+ + + diff --git a/WebContent/ui5/Config.view.xml b/WebContent/ui5/Config.view.xml new file mode 100644 index 0000000..90314ec --- /dev/null +++ b/WebContent/ui5/Config.view.xml @@ -0,0 +1,38 @@ + + + + + + +