Skip to content

Commit 85b8d28

Browse files
authored
Fixes skupperproject#1385: Log inter-inter connection failures only on pn condition description change (skupperproject#1386)
1 parent 93abd7c commit 85b8d28

File tree

2 files changed

+49
-21
lines changed

2 files changed

+49
-21
lines changed

src/server.c

+22-3
Original file line numberDiff line numberDiff line change
@@ -1065,6 +1065,7 @@ static bool handle(qd_server_t *qd_server, pn_event_t *e, pn_connection_t *pn_co
10651065
if (ctx && ctx->connector) { /* Outgoing connection */
10661066
const qd_server_config_t *config = &ctx->connector->config;
10671067
char conn_msg[QD_CXTR_CONN_MSG_BUF_SIZE]; // avoid holding connector lock when logging
1068+
char conn_msg_1[QD_CXTR_CONN_MSG_BUF_SIZE]; // this connection message does not contain the connection id
10681069

10691070
sys_mutex_lock(&ctx->connector->lock);
10701071
qd_increment_conn_index_lh(ctx);
@@ -1074,14 +1075,32 @@ static bool handle(qd_server_t *qd_server, pn_event_t *e, pn_connection_t *pn_co
10741075
qd_format_string(conn_msg, sizeof(conn_msg), "[C%"PRIu64"] Connection to %s failed: %s %s",
10751076
ctx->connection_id, config->host_port, pn_condition_get_name(condition),
10761077
pn_condition_get_description(condition));
1078+
1079+
qd_format_string(conn_msg_1, sizeof(conn_msg_1), "Connection to %s failed: %s %s",
1080+
config->host_port, pn_condition_get_name(condition), pn_condition_get_description(condition));
10771081
} else {
10781082
qd_format_string(conn_msg, sizeof(conn_msg), "[C%"PRIu64"] Connection to %s failed",
10791083
ctx->connection_id, config->host_port);
1084+
qd_format_string(conn_msg_1, sizeof(conn_msg_1), "Connection to %s failed", config->host_port);
1085+
}
1086+
//
1087+
// This is a fix for https://github.com/skupperproject/skupper-router/issues/1385
1088+
// The router will repeatedly try to connect to the host/port specified in the connector
1089+
// If it is unable to connect, an error message will be logged only once and more error messages
1090+
// from connection failures will only be logged if the error message changes.
1091+
// This is done so we don't flood the log with connection failure error messages
1092+
// Even though we restrict the number of times the error message is displayed, the
1093+
// router will still keep trying to connect to the host/port specified in the connector.
1094+
//
1095+
bool log_error_message = false;
1096+
if (strcmp(ctx->connector->conn_msg, conn_msg_1) != 0) {
1097+
strncpy(ctx->connector->conn_msg, conn_msg_1, QD_CXTR_CONN_MSG_BUF_SIZE);
1098+
log_error_message = true;
10801099
}
1081-
strncpy(ctx->connector->conn_msg, conn_msg, QD_CXTR_CONN_MSG_BUF_SIZE);
10821100
sys_mutex_unlock(&ctx->connector->lock);
1083-
1084-
qd_log(LOG_SERVER, QD_LOG_ERROR, "%s", conn_msg);
1101+
if (log_error_message) {
1102+
qd_log(LOG_SERVER, QD_LOG_ERROR, "%s", conn_msg);
1103+
}
10851104

10861105
} else if (ctx && ctx->listener) { /* Incoming connection */
10871106
if (condition && pn_condition_is_set(condition)) {

tests/system_tests_sasl_plain.py

+27-18
Original file line numberDiff line numberDiff line change
@@ -148,20 +148,27 @@ def test_inter_router_sasl_fail(self):
148148

149149
# There was no inter-router connection established.
150150
self.assertFalse(passed)
151-
152151
qd_manager = QdManager(address=self.routers[1].addresses[0])
153-
logs = qd_manager.get_log()
154152

155-
sasl_failed = False
156-
file_open_failed = False
157-
for log in logs:
158-
if log[0] == 'SERVER' and log[1] == "error" and "amqp:unauthorized-access Authentication failed [mech=PLAIN]" in log[2]:
159-
sasl_failed = True
160-
if log[0] == "CONN_MGR" and log[1] == "error" and "Unable to open password file" in log[2] and "error: No such file or directory" in log[2]:
161-
file_open_failed = True
153+
def check_sasl_failed():
154+
logs = qd_manager.get_log()
155+
sasl_failed = False
156+
for log in logs:
157+
if log[0] == 'SERVER' and log[1] == "error" and "amqp:unauthorized-access Authentication failed [mech=PLAIN]" in log[2]:
158+
sasl_failed = True
159+
break
160+
self.assertTrue(sasl_failed)
161+
162+
def check_file_open_failed():
163+
logs = qd_manager.get_log()
164+
file_open_failed = False
165+
for log in logs:
166+
if log[0] == "CONN_MGR" and log[1] == "error" and "Unable to open password file" in log[2] and "error: No such file or directory" in log[2]:
167+
file_open_failed = True
168+
self.assertTrue(file_open_failed)
162169

163-
self.assertTrue(sasl_failed)
164-
self.assertTrue(file_open_failed)
170+
retry_assertion(check_sasl_failed, delay=2)
171+
retry_assertion(check_file_open_failed, delay=2)
165172

166173

167174
class RouterTestPlainSaslFailureUsingLiteral(RouterTestPlainSaslCommon):
@@ -244,14 +251,16 @@ def test_inter_router_sasl_fail(self):
244251

245252
# There was no inter-router connection established.
246253
self.assertFalse(passed)
247-
logs = qd_manager.get_log()
248-
249-
sasl_failed = False
250-
for log in logs:
251-
if log[0] == 'SERVER' and log[1] == "error" and "amqp:unauthorized-access Authentication failed [mech=PLAIN]" in log[2]:
252-
sasl_failed = True
253254

254-
self.assertTrue(sasl_failed)
255+
def check_sasl_failed():
256+
logs = qd_manager.get_log()
257+
sasl_failed = False
258+
for log in logs:
259+
if log[0] == 'SERVER' and log[1] == "error" and "amqp:unauthorized-access Authentication failed [mech=PLAIN]" in log[2]:
260+
sasl_failed = True
261+
break
262+
self.assertTrue(sasl_failed)
263+
retry_assertion(check_sasl_failed, delay=2)
255264

256265

257266
class RouterTestPlainSasl(RouterTestPlainSaslCommon):

0 commit comments

Comments
 (0)