From abfdd69f36a9885c7ba77986b3cd42a69f17687d Mon Sep 17 00:00:00 2001 From: Ondrej Brablc Date: Fri, 31 May 2024 13:46:08 +0200 Subject: [PATCH] Basic implementation with simulation of alert creation/resolution --- config.sh | 1 - docker-cmd.sh | 44 ++++++++++++++++++++++++++++++-------------- services.sh | 37 ++++++++++++++++++++++++++++++------- test.sh | 12 ++++++++++++ 4 files changed, 72 insertions(+), 22 deletions(-) create mode 100755 test.sh diff --git a/config.sh b/config.sh index 671cc81..4f34490 100755 --- a/config.sh +++ b/config.sh @@ -1,3 +1,2 @@ -OK_FILE="/tmp/ok" LOGGER_USE_TS=1 SCRIPT_NAME=${0##*/} diff --git a/docker-cmd.sh b/docker-cmd.sh index c3023bf..a29ea9b 100755 --- a/docker-cmd.sh +++ b/docker-cmd.sh @@ -5,24 +5,40 @@ source ./logger.sh SLEEP=${SLEEP-10s} -function get_cmd() { - cmd=(./dockerize -timeout 300s -wait-retry-interval 5s) - while read SERVICE; do - cmd+=(-wait $SERVICE) +function check_services() { + while read service network_alias port; do + prefix="/tmp/alert-$(echo "$service $network_alias:$port" | base64)" + alert_file=${prefix}-alert + log_file=${prefix}-log + # used for testing + real_port=$port + if [[ -f test-change-port-$port ]]; then + read real_port < test-change-port-$port + fi + ./dockerize -timeout 5s -wait tcp://$network_alias:$real_port true 2>$log_file + if [ $? -ne 0 ]; then + if [[ -f $alert_file ]]; then + log_warn "$service|$network_alias:$port|Pending alert" + else + log_error "$service|$network_alias:$port|New alert" + echo "$service $network_alias:$port"> $alert_file + cat $log_file + fi + else + if [[ -f $alert_file ]]; then + log_info "$service|$network_alias:$port|Resolved alert" + rm -f $alert_file + fi + fi done < <(./services.sh) - cmd+=(touch $OK_FILE) - echo ${cmd[@]} } +log_info "Initial list of services (run services.sh using docker exec to see actual):" +./services.sh + log_info "Entering loop with ${SLEEP} sleep ..." while true; do - eval $(get_cmd) - if [ -f $OK_FILE ]; then - log_info OK - sleep $SLEEP - rm -f $OK_FILE - else - log_error TIMEOUT - fi + sleep $SLEEP + check_services done diff --git a/services.sh b/services.sh index 5fb3a44..52dbc50 100755 --- a/services.sh +++ b/services.sh @@ -1,18 +1,41 @@ #!/usr/bin/env bash -SOCK=/var/run/docker.sock -URL=http://v1.45/services +source ./logger.sh + +sock=/var/run/docker.sock +url=http://v1.45 LABEL="swarm-health-alerter.port" +curl -s --unix-socket $sock $url/services -o /tmp/services + function get_services() { - curl -s --unix-socket $SOCK $URL \ - | jq -r '.[] | select(.Spec.Labels["com.docker.stack.namespace"] != null) | .Spec.Name' + cat /tmp/services | jq -r '.[] | select(.Spec.Labels["com.docker.stack.namespace"] != null) | .Spec.Name' | sort -u +} + +function get_service() { + local service="$1" + cat /tmp/services | jq -r '.[] | select(.Spec.Name=="'$service'")' } while read service; do - ports=$(curl -s --unix-socket $SOCK $URL/$service | jq -r '.Spec.Labels["'$LABEL'"]') + ports=$(get_service $service | jq -r '.Spec.Labels["'$LABEL'"]') test "$ports" != "null" || continue - network_alias=$(curl -s --unix-socket $SOCK $URL/$service | jq -r '.Spec.TaskTemplate.Networks[].Aliases[]' | sort | head -1) - echo $ports | sed 's/,/\n/g' | while read port; do echo "tcp://$network_alias:$port"; done + + network_alias=$(get_service $service | jq -r '.Spec.TaskTemplate.Networks[].Aliases[]' | sort | head -1) + + echo $ports | sed 's/,/\n/g' | while read port; do + read service_id replicas < <(get_service $service | jq -r '"\(.ID) \(.Spec.Mode.Replicated.Replicas)"') + test "$replicas" != "0" || continue + + if [[ $replicas == "null" ]]; then + filter=$(echo '{"service":["'$service_id'"], "desired-state":["running"]}' | jq -s -R -r @uri) + read state message exit_code < <(curl -s --unix-socket $sock "$url/tasks?filters=$filter" | jq -r '.[] | "\(.Status.State) \(.Status.Message) \(.ContainerStatus.ExitCode)"' | grep running | head -1) + + test "$state" == "running" || break + fi + + echo "$service $network_alias $port" + + done done < <(get_services) diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..41d1660 --- /dev/null +++ b/test.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +set -e + +network=${1?Expecting network name} + +docker run -it --rm \ + --name swarm-health-alerter-test \ + --network $network \ + --volume /var/run/docker.sock:/var/run/docker.sock \ + --volume .:/app/ \ + brablc/swarm-health-alerter:dev