From 059a09e597bdda7b601b3a3110627d35a997028d Mon Sep 17 00:00:00 2001 From: Nick Walters Date: Fri, 21 Oct 2022 16:07:07 +0100 Subject: [PATCH] Change URL testing (#67) --- Gemfile | 9 +++- Gemfile.lock | 25 +++++----- README.md | 64 ++++++++++++++++++++------ scripts/check-url-links.sh | 13 ++++++ scripts/compile-and-create-artifact.sh | 45 ------------------ scripts/deploy.sh | 17 +++++++ 6 files changed, 100 insertions(+), 73 deletions(-) create mode 100755 scripts/check-url-links.sh delete mode 100755 scripts/compile-and-create-artifact.sh create mode 100755 scripts/deploy.sh diff --git a/Gemfile b/Gemfile index 25f2c9b..a0dd16e 100644 --- a/Gemfile +++ b/Gemfile @@ -1,6 +1,13 @@ source "https://rubygems.org" +# Fixed as Date exception occurs in govuk_tech_docs for review_date +# when use ruby 3.x ruby "2.7.6" gem "govuk_tech_docs" -gem "html-proofer" \ No newline at end of file + +# Fixed as v5.x requires ruby 3.x +gem "html-proofer", "4.4.3" + +# Fixed as v6.x has an issue when run Middleman build command +gem "haml", "5.2.2" diff --git a/Gemfile.lock b/Gemfile.lock index 0ae7a53..edbbaa5 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ GEM remote: https://rubygems.org/ specs: - activesupport (6.1.6) + activesupport (6.1.7) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 1.6, < 2) minitest (>= 5.1) @@ -33,7 +33,7 @@ GEM sass (>= 3.2, < 3.5) concurrent-ruby (1.1.10) contracts (0.13.0) - dotenv (2.7.6) + dotenv (2.8.1) em-websocket (0.5.3) eventmachine (>= 0.12.9) http_parser.rb (~> 0) @@ -137,9 +137,9 @@ GEM middleman-core (>= 3.2) rouge (~> 3.2) mini_portile2 (2.8.0) - minitest (5.15.0) + minitest (5.16.3) multi_json (1.15.0) - nokogiri (1.13.8) + nokogiri (1.13.9) mini_portile2 (~> 2.8.0) racc (~> 1.4) openapi3_parser (0.9.2) @@ -153,31 +153,31 @@ GEM parslet (2.0.0) public_suffix (5.0.0) racc (1.6.0) - rack (2.2.3.1) + rack (2.2.4) rack-livereload (0.3.17) rack rainbow (3.1.1) - rb-fsevent (0.11.1) + rb-fsevent (0.11.2) rb-inotify (0.10.1) ffi (~> 1.0) redcarpet (3.5.1) rexml (3.2.5) - rouge (3.28.0) + rouge (3.30.0) sass (3.4.25) sassc (2.4.0) ffi (~> 1.9) servolux (0.13.0) - sprockets (4.0.3) + sprockets (4.1.1) concurrent-ruby (~> 1.0) rack (> 1, < 3) temple (0.8.2) thor (1.2.1) - tilt (2.0.10) + tilt (2.0.11) toml (0.3.0) parslet (>= 1.8.0, < 3.0.0) typhoeus (1.4.0) ethon (>= 0.9.0) - tzinfo (2.0.4) + tzinfo (2.0.5) concurrent-ruby (~> 1.0) uglifier (3.2.0) execjs (>= 0.3.0, < 3) @@ -190,10 +190,11 @@ PLATFORMS DEPENDENCIES govuk_tech_docs - html-proofer + haml (= 5.2.2) + html-proofer (= 4.4.3) RUBY VERSION ruby 2.7.6 BUNDLED WITH - 2.1.4 + 2.3.23 diff --git a/README.md b/README.md index 2b11852..b653e74 100644 --- a/README.md +++ b/README.md @@ -4,20 +4,25 @@ This repository creates a Docker image and publishes the image to DockerHub. -The Docker image is pulled by other repository CI/CD and combined with their source code to create documentation websites that meet government technical documents and then push the data to GH Pages. +The Docker image is pulled by other repository CI/CD, uses the container with their source code and call the scripts with the container to create documentation websites that meet government technical documents. The data created, ie html website, is then pushed to GH Pages by the repository CI/CD. The contents of the Docker image will compile embedded ruby and markdown files, ie .html.md.erb, into HTML and assets using the GOV.UK [Technical Documentation Template](https://tdt-documentation.london.cloudapps.digital/) / [Source code](https://github.com/alphagov/tech-docs-template). -There are two scripts within the Docker image that both use the middleman gem: +There are three scripts within the Docker image, that use the Middleman gem to either build or serve the data: - [preview.sh](scripts/preview.sh) - serve the compiled HTML and assets on a localhost port - useful for previewing the site locally -- [compile-and-create-artifact.sh](scripts/compile-and-create-artifact.sh) - compiles the source code and places the HTML into the /docs folder, tests the links using htmlproofer and and creates a .tar artifact file that other GH Actions will serve to GitHub Pages. +- [deploy.sh](scripts/deploy.sh) - compiles the source code, places the HTML into the ./docs folder, tests the **internal links only** using htmlproofer and and creates a .tar artifact file that other GH Actions will serve to GitHub Pages. +- [check-url-links.sh](scripts/check-url-links.sh) - compiles the source code, places the HTML into the ./docs folder, and tests **internal and external URL links** using htmlproofer. This image is used by the [MoJ Template Documentation Site](https://github.com/ministryofjustice/template-documentation-site) repository for MOJ technical documentation that gets published to GitHub Pages. ## Breaking Change in v3 -The inputs to the htmlproofer tool have changed so that the URL checks are stricter than before. It will not check external URLs that start with https://github.com/ministryofjustice ie MoJ GH Org URLs as the tool will return a failure when testing URLs to internal and private repositories. These MoJ GH Org external URLs will have to be manually checked. The tool will fail a URL test when the specified URL is behind a login. The tool will still check that internal links ie [link](hosting.html) within its own repository are still valid. +The scripts in the Docker container have changed. + +`scripts/deploy.sh` is now used to check internal links only during the deploy stage. See `.github/workflows/publish-gh-pages.yml` below. + +[Optional]: Use the `scripts/check-url-links.sh` to test internal and external URLs, it may produce false errors for valid working URLs. Add the `.github/workflows/check-links.yml` below to run the check when the PR is created. The false errors can be ignored. ## Breaking Change in v2 @@ -25,7 +30,7 @@ If you have a branch called gh-pages already rename it to gh-pages-old. In repos ## How to use tool in GH Action -Example of using tech-docs-github-pages-compiler. Add the following code to .github/workflows/publish-gh-pages.yml in your repository. +Example of using tech-docs-github-pages-publisher. Add the following code to `.github/workflows/publish-gh-pages.yml` in your repository. ``` name: Publish gh-pages @@ -58,10 +63,9 @@ jobs: - name: Checkout uses: actions/checkout@v3 - name: Compile Markdown to HTML and create artifact - run: | - /scripts/compile-and-create-artifact.sh + run: /scripts/deploy.sh - name: Upload artifact to be published - uses: actions/upload-artifact@main + uses: actions/upload-artifact@v3 with: name: github-pages path: artifact.tar @@ -78,14 +82,36 @@ jobs: uses: actions/configure-pages@v2 - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@main + uses: actions/deploy-pages@v1 +``` + +Create a `.github/workflows/check-links.yml` file that uses `scripts/check-url-links.sh`. + +``` +name: Check for broken links + +on: + workflow_dispatch: + pull_request: + paths: + - "source/**" + +jobs: + check_links: + runs-on: ubuntu-latest + container: + image: ministryofjustice/tech-docs-github-pages-publisher:v3 + steps: + - uses: actions/checkout@v3 + - name: htmlproofer + run: /scripts/check-url-links.sh ``` ## Local development with another MoJ Documentation repository: This assumes you have Ruby and Bundler already installed on your local machine. See the [installation](https://tdt-documentation.london.cloudapps.digital/create_project/get_started/#get-started) setup. -Copy the config.rb file, Gemfile and Gemfile.lock to the checked out repository folder that contains the webpage data. +Copy the `config.rb` file, `Gemfile` and `Gemfile.lock` from this repository to the checked out repository folder that contains the documentation data. ``` gem install middleman @@ -99,7 +125,7 @@ Open a browser at http://127.0.0.1:4567/ ## Locally in Docker -To run the Docker image locally for development, build the image then run the container from the repository containing the webpage data: +To run the Docker image locally for development, build the image then run the container from the repository containing the documentation data: Build the Docker image: @@ -130,10 +156,16 @@ Open a browser at http://127.0.0.1:4567/ Inside the Docker container run the html-proofer tests locally before creating a PR: ``` -../scripts/compile-and-create-artifact.sh +../scripts/deploy.sh ``` -Alternatively use the [makefile](https://github.com/ministryofjustice/technical-guidance/blob/main/makefile) from the technical-guidance repository to run the Docker container locally using the `make preview` and `make check` commands +or + +``` +../scripts/check-url-links.sh +``` + +Alternatively use the [makefile](https://github.com/ministryofjustice/technical-guidance/blob/main/makefile) from the technical-guidance repository to run the Docker container locally using the `make preview`, `make deploy` or `make check` commands. ## CI/CD @@ -143,8 +175,10 @@ A [GitHub Action](.github/workflows/docker-hub.yml) publishes this repository Do The [govuk_tech_docs](https://rubygems.org/gems/govuk_tech_docs) gem is within the Gemfile. -Either dependabot will automatically update the gem or a new PR with the gem updated is required. +Either Dependabot will automatically update the gem or a new PR with the gem updated is required. + +The Gems in the `Gemfile` are fixed for various reasons. This prevents incrementing the versions. ## Markdown in html -Markdown syntax used within compiled down html files can be found [here](https://daringfireball.net/projects/markdown/) and here [kramdown](https://kramdown.gettalong.org/syntax.html). +Markdown syntax used within the compiled down html files can be found [here](https://daringfireball.net/projects/markdown/) and here [kramdown](https://kramdown.gettalong.org/syntax.html). diff --git a/scripts/check-url-links.sh b/scripts/check-url-links.sh new file mode 100755 index 0000000..d0f8815 --- /dev/null +++ b/scripts/check-url-links.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +set -x +set -euo pipefail + +# Restore the stashed config.rb Gemfile and Gemfile.lock +cp /stashed-files/* . + +# Compile source markdown files into HTML in the `/docs` directory +bundle exec middleman build --build-dir docs --relative-links --verbose + +# Check all URLs +htmlproofer --log-level debug --allow-missing-href true --typhoeus '{"connecttimeout": 10, "timeout": 30, "accept_encoding": "zstd,br,gzip,deflate" }' ./docs diff --git a/scripts/compile-and-create-artifact.sh b/scripts/compile-and-create-artifact.sh deleted file mode 100755 index 7cf306f..0000000 --- a/scripts/compile-and-create-artifact.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/sh - -# Compile source markdown files into HTML in the `/docs` directory - -set -x -set -euo pipefail - -CONFIG_FILE=config/tech-docs.yml - -main() { - # Restore the stashed config.rb Gemfile and Gemfile.lock - cp /stashed-files/* . - - bundle exec middleman build --build-dir docs --relative-links --verbose - - touch docs/.nojekyll - - bundle exec htmlproofer \ - --log-level debug \ - --allow-missing-href true \ - --swap-urls "$(url_swap):" \ - --ignore-urls "/https...github.com.ministryofjustice.*/" \ - ./docs - - tar --dereference --directory docs -cvf artifact.tar --exclude=.git --exclude=.github . -} - - -# Convert the `host` value from `config/tech-docs.yml` to the form required in -# the --swap-urls command-line parameter to htmlproofer -# -# e.g. https://ministryofjustice.github.io/modernisation-platform -# => https?\:\/\/ministryofjustice\.github\.io\/modernisation-platform -# -# This is to prevent the link-checker from complaining about any links to pages -# which aren't yet published in the live version of the documentation website. -url_swap() { - grep ^host: ${CONFIG_FILE} \ - | sed 's/host: //' \ - | sed 's/^http.*:/https\?\\:/' \ - | sed 's/\./\\./g' \ - | sed 's/\//\\\//g' -} - -main diff --git a/scripts/deploy.sh b/scripts/deploy.sh new file mode 100755 index 0000000..8821876 --- /dev/null +++ b/scripts/deploy.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +set -x +set -euo pipefail + +# Restore the stashed config.rb Gemfile and Gemfile.lock +cp /stashed-files/* . + +# Compile source markdown files into HTML in the `/docs` directory +bundle exec middleman build --build-dir docs --relative-links --verbose + +touch docs/.nojekyll + +# Internal link check only within the docs folder +htmlproofer --log-level debug --allow-missing-href true --disable_external true ./docs + +tar --dereference --directory docs -cvf artifact.tar --exclude=.git --exclude=.github .