Skip to content

Commit

Permalink
Refactored and complete
Browse files Browse the repository at this point in the history
  • Loading branch information
brablc committed Jun 2, 2024
1 parent 8bf10df commit e2da847
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 62 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
dockerize
data
4 changes: 4 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@ FROM alpine:latest

ENV DOCKERIZE_VERSION v0.7.0

ENV DATA_DIR=/data

WORKDIR /app

RUN mkdir -p "$DATA_DIR"

COPY *.sh ./
COPY integrations/ integrations/

Expand Down
80 changes: 51 additions & 29 deletions docker-cmd.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#!/usr/bin/env bash

source ./config.sh
source ./logger.sh
script_path=$(readlink -f $0)
script_dir=${script_path%/*}
source "$script_dir/config.sh"
source "$script_dir/logger.sh"

LOOP_SLEEP=${LOOP_SLEEP:-10s}

Expand All @@ -13,47 +15,67 @@ fi
test -z "$ALERT_SCRIPT" && log_warn "Env ALERT_SCRIPT not defined - alerting disabled"
test -z "$SWARM_NAME" && log_warn "Env SWARM_NAME not defined using default"

SWARM_NAME="${SWARM_NAME:-Swarm}"
swarm_name="${SWARM_NAME:-Swarm}"
DATA_DIR=${DATA_DIR:-$script_dir/data}
mkdir -p $DATA_DIR

if [[ -n $ALERT_SCRIPT && ! -f $ALERT_SCRIPT ]]; then
log_error "Alert script defined but not accessible on $ALERT_SCRIPT path"
ALERT_SCRIPT=""
log_error "Alert script defined but not accessible on $ALERT_SCRIPT path!"
ALERT_SCRIPT="jq ."
fi

function check_services() {
local swarm_name=$SWARM_NAME
while read service network_alias port; do
read unique_id rest < <(echo "${swarm_name}_${service}_${network_alias}_${port}" | md5sum)
prefix="/tmp/alerter-${unique_id}"
alert_file=${prefix}.pending
log_file=${prefix}.log
while read service_name network_alias port; do
unique_name=$(echo "${swarm_name} ${service_name} ${network_alias} ${port}" )
unique_code=$(echo "${unique_name,,}" | sed -e 's/ /_/g' -e 's/[^a-zA-Z0-9_-]/_/g')
random_str=$(tr -dc 'a-zA-Z0-9' < /dev/urandom | head -c 10)
read unique_id _ < <(echo -n "$unique_name $random_str" | md5sum)
prefix="$DATA_DIR/${unique_code}"
pending_file="${prefix}.pending"
log_file="${prefix}.log"
# used for testing
real_port=$port
if [[ -f test-change-port-$port ]]; then
read real_port < test-change-port-$port
real_port="$port"
if [[ -f "$DATA_DIR/test-change-port-$port" ]]; then
real_port=$(< "$DATA_DIR/test-change-port-$port")
fi
action=""
./dockerize -timeout 5s -wait tcp://$network_alias:$real_port true 2>$log_file
if [ $? -ne 0 ]; then
if [[ -f $alert_file ]]; then
log_warn "$service|$network_alias:$port|Pending alert"
if [[ -f $pending_file ]]; then
log_warn "$service_name|$network_alias:$port|Pending alert"
else
log_error "$service|$network_alias:$port|Creating alert"
echo "$service $network_alias:$port"> $alert_file
if [[ -n $ALERT_SCRIPT ]]; then
cat $log_file | /bin/bash -c "$ALERT_SCRIPT CREATE '$swarm_name' '$service' '$network_alias' '$port'"
fi
cat $log_file
log_error "$service_name|$network_alias:$port|Creating alert"
echo "$unique_id" > $pending_file
action="create"
fi
else
if [[ -f $alert_file ]]; then
log_info "$service|$network_alias:$port|Resolving alert"
if [[ -n $ALERT_SCRIPT ]]; then
cat $log_file | /bin/bash -c "$ALERT_SCRIPT RESOLVE '$SWARM_NAME' '$service' '$network_alias' '$port'"
fi
rm -f $alert_file
if [[ -f $pending_file ]]; then
log_info "$service_name|$network_alias:$port|Resolving alert"
action="resolve"
unique_id=$(cat $pending_file)
rm -f $pending_file
fi
rm -f $log_file
fi
if [[ -n $action ]]; then
jq -n \
--arg action "create" \
--arg unique_id "$unique_id" \
--arg swarm_name "$swarm_name" \
--arg service_name "$service_name" \
--arg network_alias "$network_alias" \
--arg port "$port" \
--arg log "$(jq -R -s @json $log_file)" \
'{
"action": $action,
"unique_id": $unique_id,
"swarm_name": $swarm_name,
"service_name": $service_name,
"network_alias": $network_alias,
"port": $port,
"log": $log
}' | /bin/bash -c "$ALERT_SCRIPT"
fi
rm -f $log_file
done < <(./services.sh)
}

Expand Down
66 changes: 33 additions & 33 deletions integrations/zenduty.sh
Original file line number Diff line number Diff line change
@@ -1,61 +1,61 @@
#!/usr/bin/env bash

echo "Reading log file from stdin ..." >&2
summary=$(cat | jq -Rs .)

script_path=$(readlink -f $0)
script_dir=${script_path%/*}
source "$script_dir/../config.sh"
source "$script_dir/../logger.sh"

if [[ -z $ZENDUTY_API_KEY ]]; then
log_error "Expecting ZENDUTY_API_KEY env"
DATA_DIR=${DATA_DIR:-$script_dir/../data}

input_file=$(mktemp $DATA_DIR/zenduty-input.XXXXXX)
trap "rm -f $input_file" EXIT

if ! timeout 2s cat > $input_file; then
log_error "Reading from stdin timed out."
exit 1
fi

if [[ $# != 5 ]]; then
log_error "Expecting parameters: ACTION SWARM_NAME SERVICE_NAME NETWORK_ALIAS PORT"
if [[ -z $ZENDUTY_API_KEY ]]; then
log_error "Expecting ZENDUTY_API_KEY env"
exit 1
fi

ACTION=$1
SWARM_NAME=$2
SERVICE_NAME=$3
NETWORK_ALIAS=$4
PORT=$5
action=$(jq -r .action $input_file)
entity_id=$(jq -r .unique_id $input_file)

alert_type=""
case $ACTION in
CREATE )
appendix=""
case $action in
create )
alert_type="critical"
appendix="not available"
;;
RESOLVE )
resolve )
alert_type="resolved"
appendix="is available"
;;
*)
log_error "Action must be one of: CREATE RESOLVE. Received: '$ACTION'"
log_error "Action must be one of: create resolve. Received: '$action'"
;;
esac

read entity_id rest < <(echo "${SWARM_NAME}_${SERVICE_NAME}_${NETWORK_ALIAS}_${PORT}" | md5sum)

request_file=/tmp/zenduty-request-${entity_id}.json
response_file=/tmp/zenduty-response-${entity_id}.json
request_file=$DATA_DIR/${entity_id}-zenduty-request.json
response_file=$DATA_DIR/${entity_id}-zenduty-response.json

cat << __PAYLOAD > $request_file
{
"alert_type": "$alert_type",
"entity_id": "$entity_id",
"message":"$SWARM_NAME service $SERVICE_NAME ($NETWORK_ALIAS:$PORT) not available",
"summary": $summary
}
__PAYLOAD
jq -r \
--arg alert_type "$alert_type" \
--arg appendix "$appendix" \
'{
"alert_type": $alert_type,
"entity_id": .unique_id,
"message": "\(.swarm_name) service \(.service_name) (\(.network_alias):\(.port)) \($appendix)",
"summary": .log
}' $input_file > $request_file

# log_info "Request file:"
# jq . $request_file 2>/dev/null || cat $request_file
log_info "Request file:"
jq . $request_file 2>/dev/null || cat $request_file

url="https://www.zenduty.com/api/events/${ZENDUTY_API_KEY}/"
curl -s -X POST "$url" -H 'Content-Type: application/json' -d @$request_file >$response_file 2>&1
curl -s -X POST "$url" -H 'Content-Type: application/json' -d @$request_file >$response_file
return_code=$?

if [ $return_code -ne 0 ]; then
Expand All @@ -65,7 +65,7 @@ fi
log_info "Response file:"
jq . $response_file 2>/dev/null || cat $response_file

if [[ $ACTION == "RESOLVE" ]]; then
if [[ $action == "resolve" ]]; then
rm -f $request_file
rm -f $response_file
fi
1 change: 1 addition & 0 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ docker run --rm \
--env ALERT_SCRIPT="$ALERT_SCRIPT" \
--env SWARM_NAME="$SWARM_NAME" \
--env ZENDUTY_API_KEY="$ZENDUTY_API_KEY" \
--env DATA_DIR=/app/data \
--volume /var/run/docker.sock:/var/run/docker.sock \
--volume .:/app/ \
brablc/swarm-health-alerter:dev "$@"

0 comments on commit e2da847

Please sign in to comment.