Skip to content

Commit

Permalink
Fix condition, both destroys and creates must be above threshold
Browse files Browse the repository at this point in the history
  • Loading branch information
brablc committed Jun 3, 2024
1 parent cddf786 commit 56f56bd
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 10 deletions.
5 changes: 2 additions & 3 deletions docker-cmd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ source "./logger.sh"

export LOOP_SLEEP=${LOOP_SLEEP:-10}
export ALERTER_URL=${ALERTER_URL:-http://alerter:80}
export SWARM_NAME=${SWARM_NAME:-Swarm}

if [[ ! -S /var/run/docker.sock ]]; then
log_error "Mount to /var/run/docker.sock missing?"
Expand Down Expand Up @@ -38,10 +39,8 @@ fi

### Manager code only

test -z "$SWARM_NAME" && log_warn "Env SWARM_NAME not defined using default"
swarm_name="${SWARM_NAME:-Swarm}"

function check_services() {
local swarm_name=$SWARM_NAME
while read service_name network_alias port; do
unique_name=$(echo "${swarm_name} ${service_name} ${network_alias} ${port}" )
unique_code=$(echo "${unique_name,,}" | sed -e 's/ /_/g' -e 's/[^a-zA-Z0-9_-]/_/g')
Expand Down
11 changes: 4 additions & 7 deletions event-alerter.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,15 @@ def process_events():
while events and events[0]["ts"] <= current_time - EVENTS_WINDOW:
events.popleft()

# Count events per service
for event in events:
counts[event["service_name"]][event["action"]] += 1
hosts[event["service_name"]].add(event["host"])
seen_services.add(event["service_name"])

for service_name, actions in counts.items():
if (
actions["destroy"] < EVENTS_THRESHOLD
and actions["create"] < EVENTS_THRESHOLD
if not (
actions["destroy"] >= EVENTS_THRESHOLD
and actions["create"] >= EVENTS_THRESHOLD
):
continue

Expand All @@ -63,10 +62,9 @@ def process_events():
f"{SWARM_NAME} {service_name} {get_random_str(10)}"
),
"message": f"{SWARM_NAME} service {service_name} not healthy",
"summary": f"There were {actions["create"]} containers created and {actions["destroy"]} destroyed within {EVENTS_WINDOW} seconds.\nReported by {list(hosts[service_name])} hosts.",
"summary": f"There were {actions["create"]} containers created and {actions["destroy"]} destroyed within {EVENTS_WINDOW} seconds.\nReported by {list(hosts[service_name])} host(s).",
}
pending_alerts[service_name] = data
log_error(f"Creating alert: {data["message"]}")
send_alert(data)

for service_name in list(pending_alerts.keys()):
Expand All @@ -80,7 +78,6 @@ def process_events():
"summary": f"No events in last {EVENTS_WINDOW} seconds, assuming service is healthy (or stopped)",
}
del pending_alerts[service_name]
log_info(f"Resolving alert: {data["message"]}")
send_alert(data)


Expand Down

0 comments on commit 56f56bd

Please sign in to comment.