diff --git a/.gitignore b/.gitignore index d723e9e..2b91c1a 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ dockerize +data diff --git a/Dockerfile b/Dockerfile index 5405b65..75e9c36 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,8 +2,12 @@ FROM alpine:latest ENV DOCKERIZE_VERSION v0.7.0 +ENV DATA_DIR=/data + WORKDIR /app +RUN mkdir -p "$DATA_DIR" + COPY *.sh ./ COPY integrations/ integrations/ diff --git a/docker-cmd.sh b/docker-cmd.sh index a4e0781..9608f6a 100755 --- a/docker-cmd.sh +++ b/docker-cmd.sh @@ -1,7 +1,9 @@ #!/usr/bin/env bash -source ./config.sh -source ./logger.sh +script_path=$(readlink -f $0) +script_dir=${script_path%/*} +source "$script_dir/config.sh" +source "$script_dir/logger.sh" LOOP_SLEEP=${LOOP_SLEEP:-10s} @@ -13,47 +15,67 @@ fi test -z "$ALERT_SCRIPT" && log_warn "Env ALERT_SCRIPT not defined - alerting disabled" test -z "$SWARM_NAME" && log_warn "Env SWARM_NAME not defined using default" -SWARM_NAME="${SWARM_NAME:-Swarm}" +swarm_name="${SWARM_NAME:-Swarm}" +DATA_DIR=${DATA_DIR:-$script_dir/data} +mkdir -p $DATA_DIR if [[ -n $ALERT_SCRIPT && ! -f $ALERT_SCRIPT ]]; then - log_error "Alert script defined but not accessible on $ALERT_SCRIPT path" - ALERT_SCRIPT="" + log_error "Alert script defined but not accessible on $ALERT_SCRIPT path!" + ALERT_SCRIPT="jq ." fi function check_services() { - local swarm_name=$SWARM_NAME - while read service network_alias port; do - read unique_id rest < <(echo "${swarm_name}_${service}_${network_alias}_${port}" | md5sum) - prefix="/tmp/alerter-${unique_id}" - alert_file=${prefix}.pending - log_file=${prefix}.log + while read service_name network_alias port; do + unique_name=$(echo "${swarm_name} ${service_name} ${network_alias} ${port}" ) + unique_code=$(echo "${unique_name,,}" | sed -e 's/ /_/g' -e 's/[^a-zA-Z0-9_-]/_/g') + random_str=$(tr -dc 'a-zA-Z0-9' < /dev/urandom | head -c 10) + read unique_id _ < <(echo -n "$unique_name $random_str" | md5sum) + prefix="$DATA_DIR/${unique_code}" + pending_file="${prefix}.pending" + log_file="${prefix}.log" # used for testing - real_port=$port - if [[ -f test-change-port-$port ]]; then - read real_port < test-change-port-$port + real_port="$port" + if [[ -f "$DATA_DIR/test-change-port-$port" ]]; then + real_port=$(< "$DATA_DIR/test-change-port-$port") fi + action="" ./dockerize -timeout 5s -wait tcp://$network_alias:$real_port true 2>$log_file if [ $? -ne 0 ]; then - if [[ -f $alert_file ]]; then - log_warn "$service|$network_alias:$port|Pending alert" + if [[ -f $pending_file ]]; then + log_warn "$service_name|$network_alias:$port|Pending alert" else - log_error "$service|$network_alias:$port|Creating alert" - echo "$service $network_alias:$port"> $alert_file - if [[ -n $ALERT_SCRIPT ]]; then - cat $log_file | /bin/bash -c "$ALERT_SCRIPT CREATE '$swarm_name' '$service' '$network_alias' '$port'" - fi - cat $log_file + log_error "$service_name|$network_alias:$port|Creating alert" + echo "$unique_id" > $pending_file + action="create" fi else - if [[ -f $alert_file ]]; then - log_info "$service|$network_alias:$port|Resolving alert" - if [[ -n $ALERT_SCRIPT ]]; then - cat $log_file | /bin/bash -c "$ALERT_SCRIPT RESOLVE '$SWARM_NAME' '$service' '$network_alias' '$port'" - fi - rm -f $alert_file + if [[ -f $pending_file ]]; then + log_info "$service_name|$network_alias:$port|Resolving alert" + action="resolve" + unique_id=$(cat $pending_file) + rm -f $pending_file fi - rm -f $log_file fi + if [[ -n $action ]]; then + jq -n \ + --arg action "create" \ + --arg unique_id "$unique_id" \ + --arg swarm_name "$swarm_name" \ + --arg service_name "$service_name" \ + --arg network_alias "$network_alias" \ + --arg port "$port" \ + --arg log "$(jq -R -s @json $log_file)" \ + '{ + "action": $action, + "unique_id": $unique_id, + "swarm_name": $swarm_name, + "service_name": $service_name, + "network_alias": $network_alias, + "port": $port, + "log": $log + }' | /bin/bash -c "$ALERT_SCRIPT" + fi + rm -f $log_file done < <(./services.sh) } diff --git a/integrations/zenduty.sh b/integrations/zenduty.sh index ed59caa..5aea2f5 100755 --- a/integrations/zenduty.sh +++ b/integrations/zenduty.sh @@ -1,61 +1,61 @@ #!/usr/bin/env bash -echo "Reading log file from stdin ..." >&2 -summary=$(cat | jq -Rs .) - script_path=$(readlink -f $0) script_dir=${script_path%/*} -source "$script_dir/../config.sh" source "$script_dir/../logger.sh" -if [[ -z $ZENDUTY_API_KEY ]]; then - log_error "Expecting ZENDUTY_API_KEY env" +DATA_DIR=${DATA_DIR:-$script_dir/../data} + +input_file=$(mktemp $DATA_DIR/zenduty-input.XXXXXX) +trap "rm -f $input_file" EXIT + +if ! timeout 2s cat > $input_file; then + log_error "Reading from stdin timed out." exit 1 fi -if [[ $# != 5 ]]; then - log_error "Expecting parameters: ACTION SWARM_NAME SERVICE_NAME NETWORK_ALIAS PORT" +if [[ -z $ZENDUTY_API_KEY ]]; then + log_error "Expecting ZENDUTY_API_KEY env" exit 1 fi -ACTION=$1 -SWARM_NAME=$2 -SERVICE_NAME=$3 -NETWORK_ALIAS=$4 -PORT=$5 +action=$(jq -r .action $input_file) +entity_id=$(jq -r .unique_id $input_file) alert_type="" -case $ACTION in - CREATE ) +appendix="" +case $action in + create ) alert_type="critical" + appendix="not available" ;; - RESOLVE ) + resolve ) alert_type="resolved" + appendix="is available" ;; *) - log_error "Action must be one of: CREATE RESOLVE. Received: '$ACTION'" + log_error "Action must be one of: create resolve. Received: '$action'" ;; esac -read entity_id rest < <(echo "${SWARM_NAME}_${SERVICE_NAME}_${NETWORK_ALIAS}_${PORT}" | md5sum) - -request_file=/tmp/zenduty-request-${entity_id}.json -response_file=/tmp/zenduty-response-${entity_id}.json +request_file=$DATA_DIR/${entity_id}-zenduty-request.json +response_file=$DATA_DIR/${entity_id}-zenduty-response.json -cat << __PAYLOAD > $request_file -{ - "alert_type": "$alert_type", - "entity_id": "$entity_id", - "message":"$SWARM_NAME service $SERVICE_NAME ($NETWORK_ALIAS:$PORT) not available", - "summary": $summary -} -__PAYLOAD +jq -r \ + --arg alert_type "$alert_type" \ + --arg appendix "$appendix" \ + '{ + "alert_type": $alert_type, + "entity_id": .unique_id, + "message": "\(.swarm_name) service \(.service_name) (\(.network_alias):\(.port)) \($appendix)", + "summary": .log + }' $input_file > $request_file -# log_info "Request file:" -# jq . $request_file 2>/dev/null || cat $request_file +log_info "Request file:" +jq . $request_file 2>/dev/null || cat $request_file url="https://www.zenduty.com/api/events/${ZENDUTY_API_KEY}/" -curl -s -X POST "$url" -H 'Content-Type: application/json' -d @$request_file >$response_file 2>&1 +curl -s -X POST "$url" -H 'Content-Type: application/json' -d @$request_file >$response_file return_code=$? if [ $return_code -ne 0 ]; then @@ -65,7 +65,7 @@ fi log_info "Response file:" jq . $response_file 2>/dev/null || cat $response_file -if [[ $ACTION == "RESOLVE" ]]; then +if [[ $action == "resolve" ]]; then rm -f $request_file rm -f $response_file fi diff --git a/test.sh b/test.sh index 534015e..93c3ccd 100755 --- a/test.sh +++ b/test.sh @@ -13,6 +13,7 @@ docker run --rm \ --env ALERT_SCRIPT="$ALERT_SCRIPT" \ --env SWARM_NAME="$SWARM_NAME" \ --env ZENDUTY_API_KEY="$ZENDUTY_API_KEY" \ + --env DATA_DIR=/app/data \ --volume /var/run/docker.sock:/var/run/docker.sock \ --volume .:/app/ \ brablc/swarm-health-alerter:dev "$@"