From c1edbd6bc92bbdd86d44cdd1193eeb9c616f48dd Mon Sep 17 00:00:00 2001 From: Brock Anderson Date: Wed, 20 Nov 2024 12:39:57 -0800 Subject: [PATCH] feat: GEO-1166 - added additional input validation for report rich text input (#848) --- backend/package-lock.json | 122 ++++++++++++++++++ backend/package.json | 1 + backend/src/config/index.ts | 6 + .../src/v1/services/validate-service.spec.ts | 57 ++++++++ backend/src/v1/services/validate-service.ts | 65 ++++++++++ 5 files changed, 251 insertions(+) diff --git a/backend/package-lock.json b/backend/package-lock.json index 66cf1fa11..166cd8011 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -41,6 +41,7 @@ "morgan": "^1.10.0", "nconf": "^0.12.1", "nocache": "^4.0.0", + "node-html-parser": "^6.1.13", "papaparse": "^5.4.1", "passport": "^0.7.0", "passport-jwt": "^4.0.1", @@ -4559,6 +4560,11 @@ "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", "license": "MIT" }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==" + }, "node_modules/bowser": { "version": "2.11.0", "resolved": "https://registry.npmjs.org/bowser/-/bowser-2.11.0.tgz", @@ -5293,6 +5299,32 @@ "node": ">= 8" } }, + "node_modules/css-select": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz", + "integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", + "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/csv-parse": { "version": "5.5.6", "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.5.6.tgz", @@ -5457,6 +5489,57 @@ "node": ">=6.0.0" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ] + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz", + "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/dotenv": { "version": "16.4.5", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", @@ -5563,6 +5646,17 @@ "node": ">=10.13.0" } }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/environment": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/environment/-/environment-1.1.0.tgz", @@ -7009,6 +7103,14 @@ "node": ">= 0.4" } }, + "node_modules/he": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", + "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", + "bin": { + "he": "bin/he" + } + }, "node_modules/helmet": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/helmet/-/helmet-8.0.0.tgz", @@ -9141,6 +9243,15 @@ "node": ">=16.0.0" } }, + "node_modules/node-html-parser": { + "version": "6.1.13", + "resolved": "https://registry.npmjs.org/node-html-parser/-/node-html-parser-6.1.13.tgz", + "integrity": "sha512-qIsTMOY4C/dAa5Q5vsobRpOOvPfC4pB61UVW2uSwZNUp0QU/jCekTal1vMmbO0DgdHeLUJpv/ARmDqErVxA3Sg==", + "dependencies": { + "css-select": "^5.1.0", + "he": "1.2.0" + } + }, "node_modules/node-int64": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz", @@ -9261,6 +9372,17 @@ "node": ">=8" } }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, "node_modules/oauth": { "version": "0.9.15", "resolved": "https://registry.npmjs.org/oauth/-/oauth-0.9.15.tgz", diff --git a/backend/package.json b/backend/package.json index b496183b7..c46fa403f 100644 --- a/backend/package.json +++ b/backend/package.json @@ -36,6 +36,7 @@ "morgan": "^1.10.0", "nconf": "^0.12.1", "nocache": "^4.0.0", + "node-html-parser": "^6.1.13", "papaparse": "^5.4.1", "passport": "^0.7.0", "passport-jwt": "^4.0.1", diff --git a/backend/src/config/index.ts b/backend/src/config/index.ts index 0f93d6689..886e3b1ae 100644 --- a/backend/src/config/index.ts +++ b/backend/src/config/index.ts @@ -76,6 +76,12 @@ config.defaults({ retries: { minTimeout: 1000, }, + reportRichText: { + maxParagraphs: + parseInt(process.env.REPORT_RICH_TEXT_MAX_PARAGRAPHS) || 100, + maxItemsPerList: + parseInt(process.env.REPORT_RICH_TEXT_MAX_ITEMS_PER_LIST) || 30, + }, }, oidc: { adminKeycloakUrl: process.env.ADMIN_KEYCLOAK_URL, diff --git a/backend/src/v1/services/validate-service.spec.ts b/backend/src/v1/services/validate-service.spec.ts index b020dd6ad..a4e1cf8d3 100644 --- a/backend/src/v1/services/validate-service.spec.ts +++ b/backend/src/v1/services/validate-service.spec.ts @@ -1046,6 +1046,63 @@ describe('validate-service', () => { }); describe('validate-service-private', () => { + describe('validateRichText', () => { + describe('if rich text is valid', () => { + it('returns an empty list', () => { + const richText = ` +

Some text

: +
    +
  1. Item 1
  2. +
  3. Item 2
  4. +
+

More text

. + `; + const fieldName = 'Employer Statement'; + const errors = validateServicePrivate.validateRichText( + richText, + fieldName, + ); + expect(errors).toStrictEqual([]); + }); + }); + describe('if rich text has too many paragraphs', () => { + it('returns an error', () => { + const maxParagraphs = config.get('server:reportRichText:maxParagraphs'); + const richText = '

'.repeat(maxParagraphs + 1); + const fieldName = 'Employer Statement'; + const errors = validateServicePrivate.validateRichText( + richText, + fieldName, + ); + expect(errors.length).toBeGreaterThan(0); + const hasParagraphBreakError = doesAnyStringContainAll(errors, [ + fieldName, + 'paragraph breaks', + ]); + expect(hasParagraphBreakError).toBeTruthy(); + }); + }); + describe('if rich text has a list with too many bullet points', () => { + it('returns an error', () => { + const maxItemsPerList = config.get( + 'server:reportRichText:maxItemsPerList', + ); + const tooManyListItems = '
  • '.repeat(maxItemsPerList + 1); + const richText = `
      ${tooManyListItems}
    `; + const fieldName = 'Employer Statement'; + const errors = validateServicePrivate.validateRichText( + richText, + fieldName, + ); + expect(errors.length).toBeGreaterThan(0); + const hasParagraphBreakError = doesAnyStringContainAll(errors, [ + fieldName, + 'list with more than the allowable number of items', + ]); + expect(hasParagraphBreakError).toBeTruthy(); + }); + }); + }); describe('validateOvertimePayAndHours', () => { describe("if Overtime Pay is specified, but Overtime Hours isn't", () => { it('returns an error', () => { diff --git a/backend/src/v1/services/validate-service.ts b/backend/src/v1/services/validate-service.ts index ad551a197..a357f5e0b 100644 --- a/backend/src/v1/services/validate-service.ts +++ b/backend/src/v1/services/validate-service.ts @@ -1,4 +1,5 @@ import { LocalDate, TemporalAdjusters } from '@js-joda/core'; +import { parse as htmlParse } from 'node-html-parser'; import { config } from '../../config'; import { JSON_REPORT_DATE_FORMAT } from '../../constants'; import { ISubmission } from './file-upload-service'; @@ -110,6 +111,20 @@ const validateService = { `Start date and end date must always be 12 months apart.`, ); } + const employerStatementErrors = validateServicePrivate.validateRichText( + submission.comments, + 'Employer Statement', + ); + if (employerStatementErrors?.length) { + bodyErrors.push(...employerStatementErrors); + } + const dataConstraintsErrors = validateServicePrivate.validateRichText( + submission.dataConstraints, + 'Data Constraints', + ); + if (dataConstraintsErrors?.length) { + bodyErrors.push(...dataConstraintsErrors); + } const validReportingYears = this.getValidReportingYears(); if (!validReportingYears.includes(submission.reportingYear)) { @@ -406,6 +421,56 @@ const validateService = { }; export const validateServicePrivate = { + /** + * Validates that the given rich text meets meets the following requirements: + * - The content is not subdivided into more than the allowable number of paragraphs + * - If any lists exist, they shouldn't have more bullet points than the allowable amount. + * Returns a list of strings containing any validation error messages. If no validation + * errors were found, returns an empty list. + */ + validateRichText(richText: string, fieldName: string): string[] { + const errorMsgs = []; + + const listTypes = ['ol', 'ul']; + + if (richText) { + try { + const result = htmlParse(richText); + const numParagraphs = result.childNodes.length; + + // Check that there are not too many paragraph breaks + // (because it is too resource-intensive for the doc-gen-service to split + // such content into multiple pages) + if (numParagraphs > config.get('server:reportRichText:maxParagraphs')) { + errorMsgs.push( + `'${fieldName}' contains ${numParagraphs} paragraph breaks which exceeds the limit of ${config.get('server:reportRichText:maxParagraphs')}.`, + ); + } + + // Check that lists don't have to many bullet points. + // (because the it would add complexity to the doc-gen-service to split long + // lists at page boundaries.) + result.childNodes.forEach((node) => { + if (listTypes.indexOf(node.rawTagName.toLowerCase()) >= 0) { + if ( + node.childNodes.length > + config.get('server:reportRichText:maxItemsPerList') + ) { + errorMsgs.push( + `'${fieldName}' contains a list with more than the allowable number of items (${config.get('server:reportRichText:maxItemsPerList')}).`, + ); + } + } + }); + } catch (e) { + //if parsing the HTML failed, return a not-very-specific error message + errorMsgs.push(`'${fieldName}' is not valid`); + } + } + + return errorMsgs; + }, + /* Performs partial validation of the given record. Only considers values of the Overtime Pay and Overtime Hours fields.