Skip to content

Commit

Permalink
A0-4265: Implement retry connection mechanism in weekly snapshot test…
Browse files Browse the repository at this point in the history
…s (#1691)

# Description

Weekly test from snapshot should not fail, such as
https://github.com/Cardinal-Cryptography/aleph-node/actions/runs/8699284871/job/23858108105.
This PR adds retry connection mechanism instead of sleep-waiting. In
this test, it should take a node no more than 15 minutes to boot on
ParityDB. Timeouts were adjusted accordingly. This PR also adds tests
for Mainnet ParityDB.

Tests for non-ParityDB will be added in the next PRs.

## Type of change

Please delete options that are not relevant.

- Bug fix (non-breaking change which fixes an issue)

# Checklist:

* mainnet db-prunning
https://github.com/Cardinal-Cryptography/aleph-node/actions/runs/8846481836/job/24293106565
  • Loading branch information
Marcin-Radecki authored May 17, 2024
1 parent e81ef28 commit 7bce60a
Show file tree
Hide file tree
Showing 9 changed files with 265 additions and 166 deletions.
20 changes: 17 additions & 3 deletions .github/scripts/get_top_block.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
#!/usr/bin/env python3
import sys
from sys import argv
import time

from substrateinterface import SubstrateInterface
chain = SubstrateInterface(url=argv[1])
number = chain.get_block()['header']['number']
print(number)
connection_attempts = 0

while connection_attempts < 15:
try:
chain = SubstrateInterface(url=argv[1])
number = chain.get_block()['header']['number']
print(number)
sys.exit(0)
except Exception as e:
print("An error occurred: " + str(e), file=sys.stderr)
connection_attempts += 1
time.sleep(60)
raise ConnectionRefusedError

116 changes: 116 additions & 0 deletions .github/scripts/test_parity_db_sync.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/bin/bash
set -euo pipefail
echo "Starting Parity DB sync test."

PRUNING="false"

while [[ $# -gt 0 ]]; do
case "$1" in
--pruned)
echo "Using pruned DB."
PRUNING="true"
shift;;
--mainnet)
ENV="mainnet"
shift;;
--testnet)
ENV="testnet"
shift;;
*)
echo "Unrecognized argument: $1"
exit 1;;
esac
done

BASE_PATH="running/"
CHAINSPEC="${BASE_PATH}/chainspec.json"
DB_ARG="--database paritydb"
TOP_BLOCK_SCRIPT="./.github/scripts/get_top_block.py"

if [[ "${ENV}" == "mainnet" ]]; then
SOURCE_CHAINSPEC="./bin/node/src/resources/mainnet_chainspec.json"
BOOT_NODES=/dns4/bootnode-eu-central-1-0.azero.dev/tcp/30333/p2p/12D3KooWEF1Eo7uFZWdqFsTPP7CehpRt5NeXFwCe3157qpoU5aqd/dns4/bootnode-eu-west-1-0.azero.dev/tcp/30333/p2p/12D3KooWPhi8Qvzvc8iJ4CeQj2vptjc5FTrodKPmra1NS1qfftjr/dns4/bootnode-eu-west-2-0.azero.dev/tcp/30333/p2p/12D3KooWDfUzU64WURE77tXYM9H94xQFAEL6ULQYhzegKsZXjEkC/dns4/bootnode-us-east-1-0.azero.dev/tcp/30333/p2p/12D3KooWFQSGvQii2gRGB5T4M6TXhM83JV4bTEhubCBpdoR6Rkwk/dns4/bootnode-us-east-2-0.azero.dev/tcp/30333/p2p/12D3KooWJnEGVVmnXhVNxV6KWe3EsDPNvPFNcYbQ6amFVGECVAGB
DB_PATH="chains/mainnet/"
TARGET_CHAIN="wss://ws.azero.dev"
else
SOURCE_CHAINSPEC="./bin/node/src/resources/testnet_chainspec.json"
BOOT_NODES=/dns4/bootnode-eu-central-1-0.test.azero.dev/tcp/30333/p2p/12D3KooWRkGLz4YbVmrsWK75VjFTs8NvaBu42xhAmQaP4KeJpw1L/dns4/bootnode-eu-west-1-0.test.azero.dev/tcp/30333/p2p/12D3KooWFVXnvJdPuGnGYMPn5qLQAQYwmRBgo6SmEQsKZSrDoo2k/dns4/bootnode-eu-west-2-0.test.azero.dev/tcp/30333/p2p/12D3KooWAkqYFFKMEJn6fnPjYnbuBBsBZq6fRFJZYR6rxnuCZWCC/dns4/bootnode-us-east-1-0.test.azero.dev/tcp/30333/p2p/12D3KooWQFkkFr5aM5anGEiUCQiGUdRyWgrdpvSjBgWAUS9srLE4/dns4/bootnode-us-east-2-0.test.azero.dev/tcp/30333/p2p/12D3KooWD5s2dkifJua69RbLwEREDdJjsNHvavNRGxdCvzhoeaLc
DB_PATH="chains/testnet/"
TARGET_CHAIN="wss://ws.test.azero.dev"
fi
if [[ "${PRUNING}" == "true" ]]; then
DB_ARG="--enable-pruning"
if [[ "${ENV}" == "mainnet" ]]; then
DB_SNAPSHOT_URL="http://db.azero.dev.s3-website.eu-central-1.amazonaws.com/latest-parity-pruned.html"
else
DB_SNAPSHOT_URL="http://db.test.azero.dev.s3-website.eu-central-1.amazonaws.com/latest-parity-pruned.html"
fi
else
if [[ "${ENV}" == "mainnet" ]]; then
DB_SNAPSHOT_URL="http://db.azero.dev.s3-website.eu-central-1.amazonaws.com/latest-parity.html"
else
DB_SNAPSHOT_URL="http://db.test.azero.dev.s3-website.eu-central-1.amazonaws.com/latest-parity.html"
fi
fi

initialize() {
pip install substrate-interface
mkdir -p "${BASE_PATH}"
}

get_snapshot () {
echo "Downloading the snapshot... "
DB_SNAPSHOT_PATH=${BASE_PATH}/${DB_PATH}
mkdir -p "${DB_SNAPSHOT_PATH}"
pushd "${DB_SNAPSHOT_PATH}" > /dev/null

set +e
wget -q -O - ${DB_SNAPSHOT_URL} | tar xzf -
if [[ 0 -ne $? ]]
then
error "Failed to download and unpack the snapshot."
fi
set -e
popd > /dev/null
}

copy_chainspec () {
echo "Copying the chainspec... "
cp "${SOURCE_CHAINSPEC}" "${CHAINSPEC}"
}

get_target_block() {
echo "Determining target block... "
TARGET_BLOCK=`${TOP_BLOCK_SCRIPT} "${TARGET_CHAIN}"`
}

get_current_block() {
echo "Determining current block... "
CURRENT_BLOCK=`${TOP_BLOCK_SCRIPT} "ws://localhost:9944"`
}

initialize
copy_chainspec
get_snapshot

get_target_block

chmod +x aleph-node
./aleph-node \
--chain "${CHAINSPEC}" \
--base-path "${BASE_PATH}" \
--rpc-port 9944 \
--name sync-from-snapshot-tester \
--bootnodes "${BOOT_NODES}" \
--node-key-file "${BASE_PATH}/p2p_secret" \
${DB_ARG} \
--no-mdns 1>/dev/null 2> "${BASE_PATH}/aleph-node.log" &

get_current_block
echo "Syncing to ${TARGET_BLOCK} starting at ${CURRENT_BLOCK}."

while [ $CURRENT_BLOCK -le $TARGET_BLOCK ]; do
sleep 1m
get_current_block
echo "Sync status: ${CURRENT_BLOCK}/${TARGET_BLOCK}".
done
89 changes: 0 additions & 89 deletions .github/scripts/test_testnet_db_sync.sh

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
---
# This workflow performs sync to Testnet from a pruned snapshot using the latest main version.
# This workflow performs live chain (Mainnet or Testnet) sync from a ParityDB snapshot,
# using the latest main version.

name: Weekly sync from snapshot test, pruned
name: Weekly sync from snapshot test
on:
# At 03:00 on Tuesday
schedule:
- cron: '0 3 * * 2'
workflow_dispatch:

concurrency:
group: ${{ github.ref }}-${{ github.workflow }}
cancel-in-progress: false
workflow_call:
inputs:
timeout:
description: "Test timeout in minutes"
type: number
required: true
args:
description: "Test arguments passed to bash test runner"
type: string
required: true

jobs:
check-vars-and-secrets:
Expand Down Expand Up @@ -38,12 +41,25 @@ jobs:
with:
name: aleph-production-node

- name: Download snapshot
timeout-minutes: 180
- name: Download snapshot and test sync
timeout-minutes: ${{ inputs.timeout }}
run: |
./.github/scripts/test_parity_db_sync.sh ${{ inputs.args }}
- name: Archive logs from failed test
if: ${{ failure() }}
shell: bash
run: |
./.github/scripts/test_testnet_db_sync.sh \
--pruned
tar -czvf aleph-node-logs.tar.gz running/aleph-node.log
- name: Upload logs from failed e2e test
if: ${{ failure() }}
uses: actions/upload-artifact@v4
with:
path: aleph-node-logs.tar.gz
name: aleph-node-logs.tar.gz
if-no-files-found: error
retention-days: 7

slack-notification:
name: Slack notification
Expand Down
26 changes: 26 additions & 0 deletions .github/workflows/sync-from-snapshot-mainnet-paritydb-pruned.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
# This workflow performs sync to Mainnet from a pruned ParityDB snapshot using the latest
# main version.
#
# For now, this test not quite correctly tests sync Mainnet from latest aleph-node binary,
# for which we don't guarantee it will always happen to work.

name: Weekly sync from snapshot, Mainnet, ParityDB pruned
on:
# At 03:00 on Wednesday and Saturday
schedule:
- cron: '0 3 * * 3,6'
workflow_dispatch:

concurrency:
group: ${{ github.ref }}-${{ github.workflow }}
cancel-in-progress: false

jobs:
main:
name: Test sync
uses: ./.github/workflows/_sync-from-snapshot.yml
secrets: inherit
with:
timeout: 60
args: --mainnet --pruned
26 changes: 26 additions & 0 deletions .github/workflows/sync-from-snapshot-mainnet-partidb.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
# This workflow performs sync to Mainnet from a ParityDB non-pruned snapshot using the latest
# main version.
#
# For now, this test not quite correctly tests sync Mainnet from latest aleph-node binary,
# for which we don't guarantee it will always happen to work.

name: Sync from snapshot test, Mainnet, ParityDB non-pruned
on:
# At 03:00 on Wednesday and Saturday
schedule:
- cron: '0 3 * * 3,6'
workflow_dispatch:

concurrency:
group: ${{ github.ref }}-${{ github.workflow }}
cancel-in-progress: false

jobs:
main:
name: Test sync
uses: ./.github/workflows/_sync-from-snapshot.yml
secrets: inherit
with:
timeout: 300
args: --mainnet
23 changes: 23 additions & 0 deletions .github/workflows/sync-from-snapshot-testnet-paritydb-pruned.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
# This workflow performs sync to Testnet from a pruned ParityDB snapshot using the latest
# main version.

name: Sync from snapshot, Testnet, ParityDB pruned
on:
# At 03:00 on Wednesday and Saturday
schedule:
- cron: '0 3 * * 3,6'
workflow_dispatch:

concurrency:
group: ${{ github.ref }}-${{ github.workflow }}
cancel-in-progress: false

jobs:
main:
name: Test sync
uses: ./.github/workflows/_sync-from-snapshot.yml
secrets: inherit
with:
timeout: 60
args: --testnet --pruned
27 changes: 27 additions & 0 deletions .github/workflows/sync-from-snapshot-testnet-partidb.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
---
# This workflow performs sync to Testnet from a ParityDB non-pruned snapshot using the latest
# main version.

name: Sync from snapshot test, Testnet, ParityDB non-pruned
on:
# At 03:00 on Wednesday and Saturday
schedule:
- cron: '0 3 * * 3,6'
workflow_dispatch:
# testing, remove before merge
push:
branches:
- A0-4265

concurrency:
group: ${{ github.ref }}-${{ github.workflow }}
cancel-in-progress: false

jobs:
main:
name: Test sync
uses: ./.github/workflows/_sync-from-snapshot.yml
secrets: inherit
with:
timeout: 300
args: --testnet
Loading

0 comments on commit 7bce60a

Please sign in to comment.