Skip to content

Backup and restore test #53

Backup and restore test

Backup and restore test #53

---
# TODO:
# - [ ] test on XL, also break the puppetdb database
# - [ ] test custom scenario for only restoring primary
# - [ ] test custom scenario for only restoring puppetdb (on external server)
name: "Backup and restore test"
on:
pull_request_target:
types: [ready_for_review]
workflow_dispatch:
inputs:
image:
description: "GCP image for test cluster"
required: true
default: "almalinux-cloud/almalinux-8"
architecture:
description: "PE architecture to test"
required: true
default: "standard"
version:
description: "PE version to install"
required: true
default: "2023.5.0"
ssh-debugging:
description: "Boolean; whether or not to pause for ssh debugging"
required: true
default: "false"
jobs:
backup:
name: "Backup cluster: PE ${{ github.event.inputs.version || '2023.5.0' }} ${{ github.event.inputs.architecture || 'extra-large' }} on ${{ github.event.inputs.image || 'almalinux-cloud/almalinux-8' }}"
runs-on: ubuntu-20.04
env:
BOLT_GEM: true
BOLT_DISABLE_ANALYTICS: true
LANG: "en_US.UTF-8"
steps:
- name: "Start SSH session"
if: ${{ github.event.inputs.ssh-debugging == 'true' }}
uses: luchihoratiu/debug-via-ssh@main
with:
NGROK_AUTH_TOKEN: ${{ secrets.NGROK_AUTH_TOKEN }}
SSH_PASS: ${{ secrets.SSH_PASS }}
- name: "Checkout Source"
uses: actions/checkout@v2
- name: "Activate Ruby 2.7"
uses: ruby/setup-ruby@v1
with:
ruby-version: "2.7"
bundler-cache: true
- name: "Print bundle environment"
if: ${{ github.repository_owner == 'puppetlabs' }}
run: |
echo ::group::info:bundler
bundle env
echo ::endgroup::
- name: "Provision test cluster"
timeout-minutes: 15
run: |
echo ::group::prepare
mkdir -p $HOME/.ssh
echo 'Host *' > $HOME/.ssh/config
echo ' ServerAliveInterval 150' >> $HOME/.ssh/config
echo ' ServerAliveCountMax 2' >> $HOME/.ssh/config
bundle exec rake spec_prep
echo ::endgroup::
echo ::group::provision
bundle exec bolt plan run peadm_spec::provision_test_cluster \
--modulepath spec/fixtures/modules \
provider=provision_service \
image=${{ github.event.inputs.image || 'almalinux-cloud/almalinux-8' }} \
architecture=${{ github.event.inputs.architecture || 'extra-large' }}
echo ::endgroup::
echo ::group::info:request
cat request.json || true; echo
echo ::endgroup::
echo ::group::info:inventory
sed -e 's/password: .*/password: "[redacted]"/' < spec/fixtures/litmus_inventory.yaml || true
echo ::endgroup::
- name: "Install PE on test cluster"
timeout-minutes: 120
run: |
bundle exec bolt plan run peadm_spec::install_test_cluster \
--inventoryfile spec/fixtures/litmus_inventory.yaml \
--modulepath spec/fixtures/modules \
architecture=${{ github.event.inputs.architecture || 'extra-large' }} \
version=${{ github.event.inputs.version || '2023.5.0' }} \
--stream
- name: Perform peadm backup of cluster
timeout-minutes: 10
continue-on-error: true
run: |
echo ::group::prepare
mkdir -p $HOME/.ssh
echo 'Host *' > $HOME/.ssh/config
echo ' ServerAliveInterval 150' >> $HOME/.ssh/config
echo ' ServerAliveCountMax 2' >> $HOME/.ssh/config
bundle exec rake spec_prep
echo ::endgroup::
echo ::group::backup
bundle exec bolt plan run peadm_spec::test_backup \
--inventoryfile spec/fixtures/litmus_inventory.yaml \
--modulepath spec/fixtures/modules \
--stream
echo ::endgroup::
- name: Set up yq
uses: frenck/action-setup-yq@v1
with:
version: v4.30.5
- name: Break PE cluster
run: |
primary=$(yq '.groups[].targets[] | select(.vars.role == "primary") | .uri' spec/fixtures/litmus_inventory.yaml)
echo "Removing ssl directories"
bundle exec bolt command run "rm -rf /etc/puppetlabs/puppetserver/ca /etc/puppetlabs/puppet/ssl" -t $primary --inventoryfile spec/fixtures/litmus_inventory.yaml
echo "Removing classifier database"
bundle exec bolt command run "rm -rf /opt/puppetlabs/server/data/postgresql/classifier" -t $primary --inventoryfile spec/fixtures/litmus_inventory.yaml
bundle exec bolt command run "puppet infrastructure status" -t $primary --inventoryfile spec/fixtures/litmus_inventory.yaml
#TODO if arch is XL, run pe-uninstaller on the primary database
if [ ${{ github.event.inputs.architecture || 'extra-large' }} == 'extra-large' ]; then
echo "Uninstalling PE from primary database"
primary_db=$(yq '.groups[].targets[] | select(.vars.role == "primary-pdb-postgresql") | .uri' spec/fixtures/litmus_inventory.yaml)
bundle exec bolt command run "/opt/puppetlabs/bin/puppet-enterprise-uninstaller -d -p -y" -t $primary_db --inventoryfile spec/fixtures/litmus_inventory.yaml
fi
- name: Reinstall PE primary in a non-peadm way
continue-on-error: true
run: |
primary=$(yq '.groups[].targets[] | select(.vars.role == "primary") | .uri' spec/fixtures/litmus_inventory.yaml)
bundle exec bolt task run peadm_spec::reinstall_pe version=${{ github.event.inputs.version || '2023.5.0' }} -t $primary \
--inventoryfile spec/fixtures/litmus_inventory.yaml \
--modulepath spec/fixtures/modules
--verbose --stream
- name: Perform peadm restore of primary server
timeout-minutes: 30
continue-on-error: true
run: |
echo ::group::prepare
mkdir -p $HOME/.ssh
echo 'Host *' > $HOME/.ssh/config
echo ' ServerAliveInterval 150' >> $HOME/.ssh/config
echo ' ServerAliveCountMax 2' >> $HOME/.ssh/config
bundle exec rake spec_prep
echo ::endgroup::
echo ::group::restore
bundle exec bolt plan run peadm_spec::test_restore \
--inventoryfile spec/fixtures/litmus_inventory.yaml \
--modulepath spec/fixtures/modules \
--stream
echo ::endgroup::
#TODO new step to run only if arch is XL
# - clean the primary db certificate on the primary server (puppet ca clean --certname <primary_db>)
# - run the pe installer on the primary db server
# - run the add_database plan on the primary db server
# - run the peadm restore plan with the option "recover_primary_db" set to true
# add docs to README about actions the customer needs to do to recover the primary db
- name: Setup Primary DB in XL
if: ${{ github.event.inputs.architecture || 'extra-large' }} == 'extra-large'
run: |
primary_db=$(yq '.groups[].targets[] | select(.vars.role == "primary-pdb-postgresql") | .uri' spec/fixtures/litmus_inventory.yaml)
bundle exec bolt plan run "puppet ca clean --certname $(hostname -f)" -t $primary_db --inventoryfile spec/fixtures/litmus_inventory.yaml
bundle exec bolt plan run peadm::pe_install -t $primary_db --inventoryfile spec/fixtures/litmus_inventory.yaml
bundle exec bolt plan run peadm::add_database -t $primary_db --inventoryfile spec/fixtures/litmus_inventory.yaml
bundle exec bolt plan run peadm_spec::test_restore restore_type="recovery-db" \
--inventoryfile spec/fixtures/litmus_inventory.yaml \
--modulepath spec/fixtures/modules \
--stream
- name: Output PE cluster status
run: |
primary=$(yq '.groups[].targets[] | select(.vars.role == "primary") | .uri' spec/fixtures/litmus_inventory.yaml)
bundle exec bolt command run "puppet infrastructure status" -t $primary --inventoryfile spec/fixtures/litmus_inventory.yaml
- name: "Wait as long as the file ${HOME}/pause file is present"
continue-on-error: true
if: ${{ always() && github.event.inputs.ssh-debugging == 'true' }}
# if: github.event.inputs.ssh-debugging == 'true'
run: |
while [ -f "${HOME}/pause" ] ; do
echo "${HOME}/pause present, sleeping for 60 seconds..."
sleep 10
done
echo "${HOME}/pause absent, continuing workflow."
- name: "Tear down cluster"
if: always()
run: |
if [ -f spec/fixtures/litmus_inventory.yaml ]; then
echo ::group::tear_down
bundle exec rake 'litmus:tear_down'
echo ::endgroup::
echo ::group::info:request
cat request.json || true; echo
echo ::endgroup::
fi