Skip to content

Commit 5e52461

Browse files
committed
Merge branch 'maint', 4.4.2 release
2 parents a709554 + fa3e186 commit 5e52461

21 files changed

+2429
-1738
lines changed

Changelog

+14
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,20 @@
22
Nagios Core 4 Change Log
33
########################
44

5+
4.4.2 - 2018-08-16
6+
------------------
7+
FIXES
8+
* Fix comment data being duplicated after a `service nagios reload` or similar (#549) (Bryan Heden)
9+
* Fix check_interval and retry_interval not changing at the appropriate times (#551) (Scott Wilkerson)
10+
* Fixed passive checks sending recovery email when host was previously UP (#552) (Scott Wilkerson)
11+
* Fixed flapping comments duplication on nagios reload (#554) (Christian Jung)
12+
* Fix for CVE-2018-13441, CVE-2018-13458, CVE-2018-13457 null pointer dereference (Trevor McDonald)
13+
* Fixed syntax error in file: default-init.in (#558) (Christian Zettel)
14+
* Reset current notification number and state flags when the host recovers, reset all service variables when they recover fixes (#557) (Scott Wilkerson)
15+
* Fixed wrong counting of service status totals when showing servicegroup details (#548) (Christian Zettel, Bryan Heden)
16+
* Fixed avail.cgi not printing CSV output when checkbox is checked (for any type: host/service/hostgroup/servicegroup) (#570) (Bryan Heden)
17+
* Fixed nagios not logging SOFT state changes after 1 (Scott Wilkerson)
18+
519
4.4.1 - 2018-06-25
620
------------------
721
FIXES

THANKS

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ wrong, please let me know.
5454
* Chris Kolquist
5555
* Chris Rothecker
5656
* Chris Witterholt
57+
* Christian Jung
5758
* Christian Masopust
5859
* Christian Mies
5960
* Christian Zettel

base/checks.c

+58-5
Original file line numberDiff line numberDiff line change
@@ -893,6 +893,9 @@ static inline void service_state_or_hard_state_type_change(service * svc, int st
893893
if (svc->current_state == STATE_OK) {
894894
svc->last_problem_id = svc->current_problem_id;
895895
svc->current_problem_id = 0L;
896+
svc->current_attempt = 1;
897+
svc->current_notification_number = 0;
898+
svc->host_problem_at_last_check = FALSE;
896899
}
897900

898901
svc->state_type = SOFT_STATE;
@@ -911,6 +914,11 @@ static inline void service_state_or_hard_state_type_change(service * svc, int st
911914

912915
if (state_or_type_change) {
913916

917+
/* check if service should go into downtime from flexible downtime */
918+
if (svc->pending_flex_downtime > 0) {
919+
check_pending_flex_service_downtime(svc);
920+
}
921+
914922
/* reset notification times and suppression option */
915923
svc->last_notification = (time_t)0;
916924
svc->next_notification = (time_t)0;
@@ -941,8 +949,12 @@ static inline void host_state_or_hard_state_type_change(host * hst, int state_ch
941949

942950
log_debug_info(DEBUGL_CHECKS, 2, "Check type passive and passive host checks aren't false\n");
943951

944-
hst->current_attempt = 1;
945-
hard_state_change = TRUE;
952+
if (state_change == TRUE) {
953+
hst->current_attempt = 1;
954+
hard_state_change = TRUE;
955+
}
956+
957+
hst->state_type = HARD_STATE;
946958
}
947959

948960
/* update event and problem ids */
@@ -989,6 +1001,9 @@ static inline void host_state_or_hard_state_type_change(host * hst, int state_ch
9891001

9901002
if (state_or_type_change) {
9911003

1004+
/* check if host should go into downtime from flexible downtime */
1005+
check_pending_flex_host_downtime(hst);
1006+
9921007
/* reset notification times and suppression option */
9931008
hst->last_notification = (time_t)0;
9941009
hst->next_notification = (time_t)0;
@@ -1228,7 +1243,7 @@ int handle_async_service_check_result(service *svc, check_result *cr)
12281243
next_check = (time_t)(svc->last_check + (svc->check_interval * interval_length));
12291244

12301245
/***********************************************/
1231-
/********** SCHEDULE HOST CHECK LOGIC **********/
1246+
/********** SCHEDULE SERVICE CHECK LOGIC **********/
12321247
/***********************************************/
12331248
if (svc->current_state == STATE_OK) {
12341249

@@ -1269,6 +1284,7 @@ int handle_async_service_check_result(service *svc, check_result *cr)
12691284

12701285
svc->host_problem_at_last_check = TRUE;
12711286
}
1287+
12721288
}
12731289
else {
12741290

@@ -1368,6 +1384,9 @@ int handle_async_service_check_result(service *svc, check_result *cr)
13681384
else {
13691385

13701386
log_debug_info(DEBUGL_CHECKS, 1, "Service is a non-OK state (%s)!", service_state_name(svc->current_state));
1387+
1388+
svc->state_type = SOFT_STATE;
1389+
svc->current_attempt = 1;
13711390

13721391
handle_event = TRUE;
13731392
}
@@ -1395,6 +1414,12 @@ int handle_async_service_check_result(service *svc, check_result *cr)
13951414

13961415
log_debug_info(DEBUGL_CHECKS, 1, "Service experienced a SOFT recovery.\n");
13971416
}
1417+
1418+
1419+
/* reset all service variables because its ok now... */
1420+
svc->state_type = HARD_STATE;
1421+
state_change = TRUE;
1422+
hard_state_change = TRUE;
13981423
}
13991424

14001425
/***** SERVICE IS STILL IN PROBLEM STATE *****/
@@ -1418,6 +1443,14 @@ int handle_async_service_check_result(service *svc, check_result *cr)
14181443
}
14191444
}
14201445
}
1446+
1447+
/* soft states should be using retry_interval */
1448+
if (svc->state_type == SOFT_STATE) {
1449+
1450+
log_debug_info(DEBUGL_CHECKS, 2, "Service state type is soft, using retry_interval\n");
1451+
1452+
next_check = (unsigned long) (current_time + svc->retry_interval * interval_length);
1453+
}
14211454

14221455
/* check for a state change */
14231456
if (svc->current_state != svc->last_state || (svc->current_state == STATE_OK && svc->state_type == SOFT_STATE)) {
@@ -1454,6 +1487,8 @@ int handle_async_service_check_result(service *svc, check_result *cr)
14541487
if (svc->current_attempt >= svc->max_attempts && svc->current_state != svc->last_hard_state) {
14551488

14561489
log_debug_info(DEBUGL_CHECKS, 2, "Service had a HARD STATE CHANGE!!\n");
1490+
1491+
next_check = (unsigned long)(current_time + (svc->check_interval * interval_length));
14571492

14581493
hard_state_change = TRUE;
14591494

@@ -1464,6 +1499,11 @@ int handle_async_service_check_result(service *svc, check_result *cr)
14641499
/* handle some acknowledgement things and update last_state_change */
14651500
service_state_or_hard_state_type_change(svc, state_change, hard_state_change, &log_event, &handle_event);
14661501

1502+
/* fix edge cases where log_event wouldn't have been set or won't be */
1503+
if (svc->current_state != STATE_OK && svc->state_type == SOFT_STATE) {
1504+
log_event = TRUE;
1505+
}
1506+
14671507
record_last_service_state_ended(svc);
14681508

14691509
check_for_service_flapping(svc, TRUE, TRUE);
@@ -2197,6 +2237,9 @@ int handle_async_host_check_result(host *hst, check_result *cr)
21972237
else {
21982238

21992239
log_debug_info(DEBUGL_CHECKS, 1, "Host is no longer UP (%s)!\n", host_state_name(hst->current_state));
2240+
2241+
hst->state_type = SOFT_STATE;
2242+
hst->current_attempt = 1;
22002243

22012244
/* propagate checks to immediate parents if they are UP */
22022245
host_propagate_checks_to_immediate_parents(hst, FALSE, current_time);
@@ -2276,7 +2319,9 @@ int handle_async_host_check_result(host *hst, check_result *cr)
22762319
if (hst->current_state != HOST_UP && (hst->check_type == CHECK_TYPE_ACTIVE || translate_passive_host_checks == TRUE)) {
22772320

22782321
hst->current_state = determine_host_reachability(hst);
2279-
next_check = (unsigned long)(current_time + (hst->retry_interval * interval_length));
2322+
if (hst->state_type == SOFT_STATE)
2323+
next_check = (unsigned long)(current_time + (hst->retry_interval * interval_length));
2324+
22802325
}
22812326

22822327
/* check for state change */
@@ -2310,7 +2355,9 @@ int handle_async_host_check_result(host *hst, check_result *cr)
23102355

23112356
log_debug_info(DEBUGL_CHECKS, 2, "Host had a HARD STATE CHANGE!!\n");
23122357

2313-
hard_state_change = TRUE;
2358+
next_check = (unsigned long)(current_time + (hst->check_interval * interval_length));
2359+
2360+
hard_state_change = TRUE;
23142361
send_notification = TRUE;
23152362
}
23162363

@@ -2372,6 +2419,12 @@ int handle_async_host_check_result(host *hst, check_result *cr)
23722419
}
23732420
}
23742421

2422+
/* the host recovered, so reset the current notification number and state flags (after the recovery notification has gone out) */
2423+
if(hst->current_state == HOST_UP && hst->state_type == HARD_STATE && hard_state_change == TRUE) {
2424+
hst->current_notification_number = 0;
2425+
hst->notified_on = 0;
2426+
}
2427+
23752428
if (obsess_over_hosts == TRUE) {
23762429
obsessive_compulsive_host_check_processor(hst);
23772430
}

base/flapping.c

+12-8
Original file line numberDiff line numberDiff line change
@@ -308,10 +308,12 @@ void set_service_flap(service *svc, double percent_change, double high_threshold
308308
/* log a notice - this one is parsed by the history CGI */
309309
logit(NSLOG_RUNTIME_WARNING, FALSE, "SERVICE FLAPPING ALERT: %s;%s;STARTED; Service appears to have started flapping (%2.1f%% change >= %2.1f%% threshold)\n", svc->host_name, svc->description, percent_change, high_threshold);
310310

311-
/* add a non-persistent comment to the service */
312-
asprintf(&temp_buffer, "Notifications for this service are being suppressed because it was detected as having been flapping between different states (%2.1f%% change >= %2.1f%% threshold). When the service state stabilizes and the flapping stops, notifications will be re-enabled.", percent_change, high_threshold);
313-
add_new_service_comment(FLAPPING_COMMENT, svc->host_name, svc->description, time(NULL), "(Nagios Process)", temp_buffer, 0, COMMENTSOURCE_INTERNAL, FALSE, (time_t)0, &(svc->flapping_comment_id));
314-
my_free(temp_buffer);
311+
if (svc->flapping_comment_id == 0) {
312+
/* add a non-persistent comment to the service */
313+
asprintf(&temp_buffer, "Notifications for this service are being suppressed because it was detected as having been flapping between different states (%2.1f%% change >= %2.1f%% threshold). When the service state stabilizes and the flapping stops, notifications will be re-enabled.", percent_change, high_threshold);
314+
add_new_service_comment(FLAPPING_COMMENT, svc->host_name, svc->description, time(NULL), "(Nagios Process)", temp_buffer, 0, COMMENTSOURCE_INTERNAL, FALSE, (time_t)0, &(svc->flapping_comment_id));
315+
my_free(temp_buffer);
316+
}
315317

316318
/* set the flapping indicator */
317319
svc->is_flapping = TRUE;
@@ -396,10 +398,12 @@ void set_host_flap(host *hst, double percent_change, double high_threshold, doub
396398
/* log a notice - this one is parsed by the history CGI */
397399
logit(NSLOG_RUNTIME_WARNING, FALSE, "HOST FLAPPING ALERT: %s;STARTED; Host appears to have started flapping (%2.1f%% change > %2.1f%% threshold)\n", hst->name, percent_change, high_threshold);
398400

399-
/* add a non-persistent comment to the host */
400-
asprintf(&temp_buffer, "Notifications for this host are being suppressed because it was detected as having been flapping between different states (%2.1f%% change > %2.1f%% threshold). When the host state stabilizes and the flapping stops, notifications will be re-enabled.", percent_change, high_threshold);
401-
add_new_host_comment(FLAPPING_COMMENT, hst->name, time(NULL), "(Nagios Process)", temp_buffer, 0, COMMENTSOURCE_INTERNAL, FALSE, (time_t)0, &(hst->flapping_comment_id));
402-
my_free(temp_buffer);
401+
if (hst->flapping_comment_id == 0) {
402+
/* add a non-persistent comment to the host */
403+
asprintf(&temp_buffer, "Notifications for this host are being suppressed because it was detected as having been flapping between different states (%2.1f%% change > %2.1f%% threshold). When the host state stabilizes and the flapping stops, notifications will be re-enabled.", percent_change, high_threshold);
404+
add_new_host_comment(FLAPPING_COMMENT, hst->name, time(NULL), "(Nagios Process)", temp_buffer, 0, COMMENTSOURCE_INTERNAL, FALSE, (time_t)0, &(hst->flapping_comment_id));
405+
my_free(temp_buffer);
406+
}
403407

404408
/* set the flapping indicator */
405409
hst->is_flapping = TRUE;

base/nagios.c

+3
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,9 @@ int main(int argc, char **argv) {
878878
/* clean up the scheduled downtime data */
879879
cleanup_downtime_data();
880880

881+
/* clean up comment data */
882+
free_comment_data();
883+
881884
/* clean up the status data if we are not restarting */
882885
if(sigrestart == FALSE) {
883886
cleanup_status_data(TRUE);

base/notifications.c

+27-27
Original file line numberDiff line numberDiff line change
@@ -567,29 +567,6 @@ int check_service_notification_viability(service *svc, int type, int options) {
567567
return ERROR;
568568
}
569569

570-
/***** RECOVERY NOTIFICATIONS ARE GOOD TO GO AT THIS POINT *****/
571-
if(svc->current_state == STATE_OK)
572-
return OK;
573-
574-
/* don't notify contacts about this service problem again if the notification interval is set to 0 */
575-
if(svc->no_more_notifications == TRUE) {
576-
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "We shouldn't re-notify contacts about this service problem.\n");
577-
return ERROR;
578-
}
579-
580-
/* if the host is down or unreachable, don't notify contacts about service failures */
581-
if(temp_host->current_state != STATE_UP && temp_host->state_type == HARD_STATE) {
582-
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "The host is either down or unreachable, so we won't notify contacts about this service.\n");
583-
return ERROR;
584-
}
585-
586-
/* don't notify if we haven't waited long enough since the last time (and the service is not marked as being volatile) */
587-
if((current_time < svc->next_notification) && svc->is_volatile == FALSE) {
588-
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "We haven't waited long enough to re-notify contacts about this service.\n");
589-
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "Next valid notification time: %s", ctime(&svc->next_notification));
590-
return ERROR;
591-
}
592-
593570
/* if this service is currently in a scheduled downtime period, don't send the notification */
594571
if(svc->scheduled_downtime_depth > 0) {
595572
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "This service is currently in a scheduled downtime, so we won't send notifications.\n");
@@ -614,6 +591,29 @@ int check_service_notification_viability(service *svc, int type, int options) {
614591
return ERROR;
615592
}
616593

594+
/***** RECOVERY NOTIFICATIONS ARE GOOD TO GO AT THIS POINT *****/
595+
if(svc->current_state == STATE_OK)
596+
return OK;
597+
598+
/* don't notify contacts about this service problem again if the notification interval is set to 0 */
599+
if(svc->no_more_notifications == TRUE) {
600+
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "We shouldn't re-notify contacts about this service problem.\n");
601+
return ERROR;
602+
}
603+
604+
/* if the host is down or unreachable, don't notify contacts about service failures */
605+
if(temp_host->current_state != STATE_UP && temp_host->state_type == HARD_STATE) {
606+
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "The host is either down or unreachable, so we won't notify contacts about this service.\n");
607+
return ERROR;
608+
}
609+
610+
/* don't notify if we haven't waited long enough since the last time (and the service is not marked as being volatile) */
611+
if((current_time < svc->next_notification) && svc->is_volatile == FALSE) {
612+
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "We haven't waited long enough to re-notify contacts about this service.\n");
613+
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "Next valid notification time: %s", ctime(&svc->next_notification));
614+
return ERROR;
615+
}
616+
617617
return OK;
618618
}
619619

@@ -1495,16 +1495,16 @@ int check_host_notification_viability(host *hst, int type, int options) {
14951495
return ERROR;
14961496
}
14971497

1498-
/***** RECOVERY NOTIFICATIONS ARE GOOD TO GO AT THIS POINT *****/
1499-
if(hst->current_state == HOST_UP)
1500-
return OK;
1501-
15021498
/* if this host is currently in a scheduled downtime period, don't send the notification */
15031499
if(hst->scheduled_downtime_depth > 0) {
15041500
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "This host is currently in a scheduled downtime, so we won't send notifications.\n");
15051501
return ERROR;
15061502
}
15071503

1504+
/***** RECOVERY NOTIFICATIONS ARE GOOD TO GO AT THIS POINT *****/
1505+
if(hst->current_state == HOST_UP)
1506+
return OK;
1507+
15081508
/* check if we shouldn't renotify contacts about the host problem */
15091509
if(hst->no_more_notifications == TRUE) {
15101510
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "We shouldn't re-notify contacts about this host problem.\n");

base/query-handler.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ static int qh_echo(int sd, char *buf, unsigned int len)
2626
{
2727
int result = 0;
2828

29-
if (!strcmp(buf, "help")) {
29+
if (buf == NULL || !strcmp(buf, "help")) {
3030

3131
nsock_printf_nul(sd,
3232
"Query handler that simply echoes back what you send it.");
@@ -371,7 +371,7 @@ static int qh_help(int sd, char *buf, unsigned int len)
371371
{
372372
struct query_handler *qh = NULL;
373373

374-
if (!*buf || !strcmp(buf, "help")) {
374+
if (buf == NULL || !strcmp(buf, "help")) {
375375
nsock_printf_nul(sd,
376376
" help <name> show help for handler <name>\n"
377377
" help list list registered handlers\n");
@@ -405,7 +405,7 @@ static int qh_core(int sd, char *buf, unsigned int len)
405405
{
406406
char *space;
407407

408-
if (*buf == 0 || !strcmp(buf, "help")) {
408+
if (buf == NULL || !strcmp(buf, "help")) {
409409

410410
nsock_printf_nul(sd,
411411
"Query handler for manipulating nagios core.\n"

0 commit comments

Comments
 (0)