forked from rsyslog/rsyslog
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdiag.sh
executable file
·3007 lines (2733 loc) · 97.4 KB
/
diag.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#
# this shell script provides commands to the common diag system. It enables
# test scripts to wait for certain conditions and initiate certain actions.
# needs support in config file.
# NOTE: this file should be included with ". diag.sh", as it otherwise is
# not always able to convey back states to the upper-level test driver
# begun 2009-05-27 by rgerhards
# This file is part of the rsyslog project, released under GPLv3
#
# This file can be customized to environment specifics via environment
# variables:
# SUDO either blank, or the command to use for sudo (usually "sudo").
# This env var is sometimes used to increase the chance that we
# get extra information, e.g. when trying to analyze running
# processes. Bottom line: if sudo is possible and you are OK with
# the testbench utilizing this, use
# export SUDO=sudo
# to activate the extra functionality.
# RS_SORTCMD Sort command to use (must support $RS_SORT_NUMERIC_OPT option). If unset,
# "sort" is used. E.g. Solaris needs "gsort"
# RS_SORT_NUMERIC_OPT option to use for numerical sort, If unset "-g" is used.
# RS_CMPCMD cmp command to use. If unset, "cmd" is used.
# E.g. Solaris needs "gcmp"
# RS_HEADCMD head command to use. If unset, "head" is used.
# E.g. Solaris needs "ghead"
# RSTB_GLOBAL_INPUT_SHUTDOWN_TIMEOUT
# global input timeout shutdown, default 60000 (60) sec. This should
# only be set specifically if there is good need to do so, e.g. if
# a test needs to timeout.
# USE_VALGRIND if set to "YES", the test will be run under valgrind control, with
# "default" settings (memcheck including leak check, termination on error).
# This permits to have valgrind and non-valgrind versions of the same test
# without the need to write it twice: just have a 2-liner -vg.sh which
# does:
# export USE_VALGRIND="YES"
# source original-test.sh
# sample can be seen in imjournal-basic[.vg].sh
# You may also use USE_VALGRIND="YES-NOLEAK" to request valgrind without
# leakcheck (this sometimes is needed).
# ABORT_ALL_ON_TEST_FAIL
# if set to "YES" and one test fails, all others are not executed but skipped.
# This is useful in long-running CI jobs where we are happy with seeing the
# first failure (to save time).
#
#
# EXIT STATES
# 0 - ok
# 1 - FAIL
# 77 - SKIP
# 100 - Testbench failure
export TB_ERR_TIMEOUT=101
# 177 - internal state: test failed, but in a way that makes us strongly believe
# this is caused by environment. This will lead to exit 77 (SKIP), but report
# the failure if failure reporting is active
# environment variables:
# USE_AUTO_DEBUG "on" --> enables automatic debugging, anything else
# turns it off
# diag system internal environment variables
# these variables are for use by test scripts - they CANNOT be
# overridden by the user
# TCPFLOOD_EXTRA_OPTS enables to set extra options for tcpflood, usually
# used in tests that have a common driver where it
# is too hard to set these options otherwise
# CONFIG
export ZOOPIDFILE="$(pwd)/zookeeper.pid"
#valgrind="valgrind --malloc-fill=ff --free-fill=fe --log-fd=1"
#valgrind="valgrind --tool=callgrind" # for kcachegrind profiling
# **** use the line below for very hard to find leaks! *****
#valgrind="valgrind --leak-check=full --show-leak-kinds=all --malloc-fill=ff --free-fill=fe --log-fd=1"
#valgrind="valgrind --tool=drd --log-fd=1"
#valgrind="valgrind --tool=helgrind --log-fd=1 --suppressions=$srcdir/linux_localtime_r.supp --gen-suppressions=all"
#valgrind="valgrind --tool=exp-ptrcheck --log-fd=1"
#set -o xtrace
#export RSYSLOG_DEBUG="debug nologfuncflow noprintmutexaction nostdout"
#export RSYSLOG_DEBUGLOG="log"
TB_TIMEOUT_STARTSTOP=400 # timeout for start/stop rsyslogd in tenths (!) of a second 400 => 40 sec
# note that 40sec for the startup should be sufficient even on very slow machines. we changed this from 2min on 2017-12-12
TB_TEST_TIMEOUT=90 # number of seconds after which test checks timeout (eg. waits)
TB_TEST_MAX_RUNTIME=${TEST_MAX_RUNTIME:-580} # maximum runtime in seconds for a test;
# default TEST_MAX_RUNTIME e.g. for long-running tests or special
# testbench use. Testbench will abort test
# after that time (iff it has a chance to, not strictly enforced)
# Note: 580 is slightly below the rsyslog-ci required max non-stdout writing timeout
# This is usually at 600 (10 minutes) and processes will be force-terminated if they
# go over it. This is especially bad because we do not receive notifications in this
# case.
export RSYSLOG_DEBUG_TIMEOUTS_TO_STDERR="on" # we want to know when we loose messages due to timeouts
if [ "$TESTTOOL_DIR" == "" ]; then
export TESTTOOL_DIR="${srcdir:-.}"
fi
# newer functionality is preferably introduced via bash functions
# rgerhards, 2018-07-03
rsyslog_testbench_test_url_access() {
local missing_requirements=
if ! hash curl 2>/dev/null ; then
missing_requirements="'curl' is missing in PATH; Make sure you have cURL installed! Skipping test ..."
fi
if [ -n "${missing_requirements}" ]; then
printf '%s\n' "${missing_requirements}"
exit 77
fi
local http_endpoint="$1"
if ! curl --fail --max-time 30 "${http_endpoint}" 1>/dev/null 2>&1; then
echo "HTTP endpoint '${http_endpoint}' is not reachable. Skipping test ..."
exit 77
else
echo "HTTP endpoint '${http_endpoint}' is reachable! Starting test ..."
fi
}
# function to skip a test on a specific platform
# $1 is what we check in uname, $2 (optional) is a reason message
skip_platform() {
if [ "$(uname)" == "$1" ]; then
printf 'platform is "%s" - test does not work under "%s"\n' "$(uname)" "$1"
if [ "$2" != "" ]; then
printf 'reason: %s\n' "$2"
fi
exit 77
fi
}
# function to skip a test if TSAN is enabled
# This is necessary as TSAN does not properly handle thread creation
# after fork() - which happens regularly in rsyslog if backgrounding
# is activated.
# $1 is the reason why TSAN is not supported
# note: we depend on CFLAGS to properly reflect build options (what
# usually is the case when the testbench is run)
skip_TSAN() {
echo skip:_TSAN, CFLAGS $CFLAGS
if [[ "$CFLAGS" == *"sanitize=thread"* ]]; then
printf 'test incompatible with TSAN because of %s\n' "$1"
exit 77
fi
}
# a consistent format to output testbench timestamps
tb_timestamp() {
printf '%s[%s] ' "$(date +%H:%M:%S)" "$(( $(date +%s) - TB_STARTTEST ))"
}
# override the test timeout, but only if the new value is higher
# than the previous one. This is necessary for slow test systems
# $1 is timeout in seconds
override_test_timeout() {
if [ "${1:=0}" == "" ]; then
printf 'FAIL: invalid testbench call, override_test_timeout needs value\n'
error_exit 100
fi
if [ "$1" -gt "$TB_TEST_TIMEOUT" ]; then
TB_TEST_TIMEOUT=$1
printf 'info: TB_TEST_TIMEOUT increased to %s\n' "$TB_TEST_TIMEOUT"
fi
}
# set special tests status. States ($1) are:
# unreliable -- as the name says, test does not work reliably; $2 must be github issue URL
# depending on CI configuration, "unreliable" tests are skipped and not failed
# or not executed at all. Test reports may also be amended to github issue.
test_status() {
if [ "$1" == "unreliable" ]; then
if [ "$2" == "" ]; then
printf 'TESTBENCH_ERROR: github issue URL must be given\n'
error_exit 100
fi
export TEST_STATUS="$1"
export TEST_GITHUB_ISSUE="$2"
else
printf 'TESTBENCH_ERROR: test_status "%s" unknown\n' "$1"
error_exit 100
fi
}
setvar_RS_HOSTNAME() {
printf '### Obtaining HOSTNAME (prerequisite, not actual test) ###\n'
generate_conf ""
add_conf 'module(load="../plugins/imtcp/.libs/imtcp")
input(type="imtcp" port="0" listenPortFileName="'$RSYSLOG_DYNNAME'.tcpflood_port")
$template hostname,"%hostname%"
local0.* ./'${RSYSLOG_DYNNAME}'.HOSTNAME;hostname
'
rm -f "${RSYSLOG_DYNNAME}.HOSTNAME"
startup ""
tcpflood -m1 -M "\"<128>\""
shutdown_when_empty
wait_shutdown ""
export RS_HOSTNAME="$(cat ${RSYSLOG_DYNNAME}.HOSTNAME)"
rm -f "${RSYSLOG_DYNNAME}.HOSTNAME"
echo HOSTNAME is: $RS_HOSTNAME
}
# begin a new testconfig
# 2018-09-07: Incremented inputs.timeout.shutdown to 60000 because kafka tests may not be
# finished under stress otherwise
# $1 is the instance id, if given
generate_conf() {
if [ "$RSTB_GLOBAL_QUEUE_SHUTDOWN_TIMEOUT" == "" ]; then
RSTB_GLOBAL_QUEUE_SHUTDOWN_TIMEOUT="10000"
fi
if [ "$RSTB_GLOBAL_INPUT_SHUTDOWN_TIMEOUT" == "" ]; then
RSTB_GLOBAL_INPUT_SHUTDOWN_TIMEOUT="60000"
fi
if [ "$RSTB_ACTION_DEFAULT_Q_TO_SHUTDOWN" == "" ]; then
RSTB_ACTION_DEFAULT_Q_TO_SHUTDOWN="20000"
fi
if [ "$RSTB_ACTION_DEFAULT_Q_TO_ENQUEUE" == "" ]; then
RSTB_ACTION_DEFAULT_Q_TO_ENQUEUE="20000"
fi
export TCPFLOOD_PORT="$(get_free_port)"
if [ "$1" == "" ]; then
export TESTCONF_NM="${RSYSLOG_DYNNAME}_" # this basename is also used by instance 2!
export RSYSLOG_OUT_LOG="${RSYSLOG_DYNNAME}.out.log"
export RSYSLOG2_OUT_LOG="${RSYSLOG_DYNNAME}_2.out.log"
export RSYSLOG_PIDBASE="${RSYSLOG_DYNNAME}:" # also used by instance 2!
mkdir $RSYSLOG_DYNNAME.spool
fi
echo 'module(load="../plugins/imdiag/.libs/imdiag")
global(inputs.timeout.shutdown="'$RSTB_GLOBAL_INPUT_SHUTDOWN_TIMEOUT'"
default.action.queue.timeoutshutdown="'$RSTB_ACTION_DEFAULT_Q_TO_SHUTDOWN'"
default.action.queue.timeoutEnqueue="'$RSTB_ACTION_DEFAULT_Q_TO_ENQUEUE'"
debug.abortOnProgramError="on")
# use legacy-style for the following settings so that we can override if needed
$MainmsgQueueTimeoutEnqueue 20000
$MainmsgQueueTimeoutShutdown '$RSTB_GLOBAL_QUEUE_SHUTDOWN_TIMEOUT'
$IMDiagListenPortFileName '$RSYSLOG_DYNNAME.imdiag$1.port'
$IMDiagServerRun 0
$IMDiagAbortTimeout '$TB_TEST_MAX_RUNTIME'
:syslogtag, contains, "rsyslogd" ./'${RSYSLOG_DYNNAME}$1'.started
###### end of testbench instrumentation part, test conf follows:' > ${TESTCONF_NM}$1.conf
}
# add more data to config file. Note: generate_conf must have been called
# $1 is config fragment, $2 the instance id, if given
add_conf() {
printf '%s' "$1" >> ${TESTCONF_NM}$2.conf
}
rst_msleep() {
$TESTTOOL_DIR/msleep $1
}
# compare file to expected exact content
# $1 is file to compare, default $RSYSLOG_OUT_LOG
cmp_exact() {
filename=${1:-"$RSYSLOG_OUT_LOG"}
if [ "$filename" == "" ]; then
printf 'Testbench ERROR, cmp_exact() does not have a filename at ALL!\n'
error_exit 100
fi
if [ "$EXPECTED" == "" ]; then
printf 'Testbench ERROR, cmp_exact() needs to have env var EXPECTED set!\n'
error_exit 100
fi
printf '%s\n' "$EXPECTED" | cmp - "$filename"
if [ $? -ne 0 ]; then
printf 'invalid response generated\n'
printf '################# %s is:\n' "$filename"
cat -n $filename
printf '################# EXPECTED was:\n'
cat -n <<< "$EXPECTED"
printf '\n#################### diff is:\n'
diff - "$filename" <<< "$EXPECTED"
error_exit 1
fi;
}
# code common to all startup...() functions
startup_common() {
instance=
if [ "$1" == "2" ]; then
CONF_FILE="${TESTCONF_NM}2.conf"
instance=2
elif [ "$1" == "" ] || [ "$1" == "1" ]; then
CONF_FILE="${TESTCONF_NM}.conf"
else
CONF_FILE="$srcdir/testsuites/$1"
instance=$2
fi
# we need to remove the imdiag port file as there are some
# tests that start multiple times. These may get the old port
# number if the file still exists AND timing is bad so that
# imdiag does not generate the port file quickly enough on
# startup.
rm -f $RSYSLOG_DYNNAME.imdiag$instance.port
if [ ! -f $CONF_FILE ]; then
echo "ERROR: config file '$CONF_FILE' not found!"
error_exit 1
fi
echo config $CONF_FILE is:
cat -n $CONF_FILE
}
# wait for appearance of a specific pid file, given as $1
wait_startup_pid() {
if [ "$1" == "" ]; then
echo "FAIL: testbench bug: wait_startup_called without \$1"
error_exit 100
fi
while test ! -f $1; do
$TESTTOOL_DIR/msleep 100 # wait 100 milliseconds
if [ $(date +%s) -gt $(( TB_STARTTEST + TB_TEST_MAX_RUNTIME )) ]; then
printf '%s ABORT! Timeout waiting on startup (pid file %s)\n' "$(tb_timestamp)" "$1"
ls -l "$1"
ps -fp $($SUDO cat "$1")
error_exit 1
fi
done
printf '%s %s found, pid %s\n' "$(tb_timestamp)" "$1" "$(cat $1)"
}
# special version of wait_startup_pid() for rsyslog startup
wait_rsyslog_startup_pid() {
wait_startup_pid $RSYSLOG_PIDBASE$1.pid
}
# wait for startup of an arbitrary process
# $1 - pid file name
# $2 - startup file name (optional, only checked if given)
wait_process_startup() {
wait_startup_pid $1.pid
i=0
if [ "$2" != "" ]; then
while test ! -f "$2"; do
$TESTTOOL_DIR/msleep 100 # wait 100 milliseconds
ps -p $(cat $1.pid) &> /dev/null
if [ $? -ne 0 ]
then
echo "ABORT! pid in $1 no longer active during startup!"
error_exit 1
fi
(( i++ ))
if [ $(date +%s) -gt $(( TB_STARTTEST + TB_TEST_MAX_RUNTIME )) ]; then
printf '%s ABORT! Timeout waiting on file %s\n' "$(tb_timestamp)" "$2"
error_exit 1
fi
done
printf '%s %s seen, associated pid %s\n' "$(tb_timestamp)" "$2" "$(cat $1)"
fi
}
# wait for the pid in $1 to terminate, abort on timeout
wait_pid_termination() {
out_pid="$1"
if [[ "$out_pid" == "" ]]; then
printf 'TESTBENCH error: pidfile name not specified in wait_pid_termination\n'
error_exit 100
fi
terminated=0
while [[ $terminated -eq 0 ]]; do
ps -p $out_pid &> /dev/null
if [[ $? != 0 ]]; then
terminated=1
fi
$TESTTOOL_DIR/msleep 100
if [ $(date +%s) -gt $(( TB_STARTTEST + TB_TEST_MAX_RUNTIME )) ]; then
printf '%s ABORT! Timeout waiting on shutdown (pid %s)\n' "$(tb_timestamp)" $out_pid
ps -fp $out_pid
printf 'Instance is possibly still running and may need manual cleanup.\n'
error_exit 1
fi
done
unset terminated
unset out_pid
}
# wait for file $1 to exist AND be non-empty
# $1 : file to wait for
# $2 (optional): error message to show if timeout occurs
wait_file_exists() {
echo waiting for file $1
i=0
while true; do
if [ -f $1 ] && [ "$(cat $1 2> /dev/null)" != "" ]; then
break
fi
$TESTTOOL_DIR/msleep 100 # wait 100 milliseconds
((i++))
if test $i -gt $TB_TIMEOUT_STARTSTOP; then
echo "ABORT! Timeout waiting for file $1"
ls -l $1
if [ "$2" != "" ]; then
echo "$2"
fi
error_exit 1
fi
done
}
# kafka special wait function: we wait for the output file in order
# to ensure Kafka/Zookeeper is actually ready to go. This is NOT
# a generic check function and must only used with those kafka tests
# that actually need it.
kafka_wait_group_coordinator() {
echo We are waiting for kafka/zookeeper being ready to deliver messages
wait_file_exists $RSYSLOG_OUT_LOG "
Non-existence of $RSYSLOG_OUT_LOG can be caused
by a problem inside zookeeper. If debug output in the receiver is enabled, one
may see this message:
\"GroupCoordinator response error: Broker: Group coordinator not available\"
In this case you may want to do a web search and/or have a look at
https://github.com/edenhill/librdkafka/issues/799
The question, of course, is if there is nevertheless a problem in imkafka.
Usually, the wait we do inside the testbench is sufficient to handle all
Zookeeper/Kafka startup. So if the issue reoccurs, it is suggested to enable
debug output in the receiver and check for actual problems.
"
}
# check if kafka itself failed. $1 is the message file name.
kafka_check_broken_broker() {
failed=0
if grep "Broker transport failure" < "$1" ; then
failed=1
fi
if grep "broker connections are down" < "$1" ; then
failed=1
fi
if [ $failed -eq 1 ]; then
printf '\n\nenvironment-induced test error - kafka broker failed - skipping test\n'
printf 'content of %s:\n' "$1"
cat -n "$1"
error_exit 177
fi
}
# inject messages via kafkacat tool (for imkafka tests)
# $1 == "--wait" means wait for rsyslog to receive TESTMESSAGES lines in RSYSLOG_OUT_LOG
# $TESTMESSAGES contains number of messages to inject
# $RANDTOPIC contains topic to produce to
injectmsg_kafkacat() {
if [ "$1" == "--wait" ]; then
wait="YES"
shift
fi
if [ "$TESTMESSAGES" == "" ]; then
printf 'TESTBENCH ERROR: TESTMESSAGES env var not set!\n'
error_exit 1
fi
MAXATONCE=25000 # how many msgs should kafkacat send? - hint: current version errs out above ~70000
i=1
while (( i<=TESTMESSAGES )); do
currmsgs=0
while ((i <= $TESTMESSAGES && currmsgs != MAXATONCE)); do
printf ' msgnum:%8.8d\n' $i;
i=$((i + 1))
currmsgs=$((currmsgs+1))
done > "$RSYSLOG_DYNNAME.kafkacat.in"
set -e
kafkacat -P -b localhost:29092 -t $RANDTOPIC <"$RSYSLOG_DYNNAME.kafkacat.in" 2>&1 | tee >$RSYSLOG_DYNNAME.kafkacat.log
set +e
printf 'kafkacat injected %d msgs so far\n' $((i - 1))
kafka_check_broken_broker $RSYSLOG_DYNNAME.kafkacat.log
check_not_present "ERROR" $RSYSLOG_DYNNAME.kafkacat.log
cat $RSYSLOG_DYNNAME.kafkacat.log
done
if [ "$wait" == "YES" ]; then
wait_seq_check "$@"
fi
}
# wait for rsyslogd startup ($1 is the instance)
wait_startup() {
wait_rsyslog_startup_pid $1
while test ! -f ${RSYSLOG_DYNNAME}$1.started; do
$TESTTOOL_DIR/msleep 100 # wait 100 milliseconds
ps -p $(cat $RSYSLOG_PIDBASE$1.pid) &> /dev/null
if [ $? -ne 0 ]
then
echo "ABORT! rsyslog pid no longer active during startup!"
error_exit 1 stacktrace
fi
if [ $(date +%s) -gt $(( TB_STARTTEST + TB_TEST_MAX_RUNTIME )) ]; then
printf '%s ABORT! Timeout waiting startup file %s\n' "$(tb_timestamp)" "${RSYSLOG_DYNNAME}.started"
error_exit 1
fi
done
echo "$(tb_timestamp) rsyslogd$1 startup msg seen, pid " $(cat $RSYSLOG_PIDBASE$1.pid)
wait_file_exists $RSYSLOG_DYNNAME.imdiag$1.port
eval export IMDIAG_PORT$1=$(cat $RSYSLOG_DYNNAME.imdiag$1.port)
eval PORT='$IMDIAG_PORT'$1
echo "imdiag$1 port: $PORT"
if [ "$PORT" == "" ]; then
echo "TESTBENCH ERROR: imdiag port not found!"
ls -l $RSYSLOG_DYNNAME*
exit 100
fi
}
# reassign ports after rsyslog startup; must be called from all
# functions that startup rsyslog
reassign_ports() {
if grep -q 'listenPortFileName="'$RSYSLOG_DYNNAME'\.tcpflood_port"' $CONF_FILE; then
assign_tcpflood_port $RSYSLOG_DYNNAME.tcpflood_port
fi
if grep -q '$InputTCPServerListenPortFile.*\.tcpflood_port' $CONF_FILE; then
assign_tcpflood_port $RSYSLOG_DYNNAME.tcpflood_port
fi
}
# start rsyslogd with default params. $1 is the config file name to use
# returns only after successful startup, $2 is the instance (blank or 2!)
# RS_REDIR maybe set to redirect rsyslog output
# env var RSTB_DAEMONIZE" == "YES" means rsyslogd shall daemonize itself;
# any other value or unset means it does not do that.
startup() {
if [ "$USE_VALGRIND" == "YES" ]; then
startup_vg "$1" "$2"
return
elif [ "$USE_VALGRIND" == "YES-NOLEAK" ]; then
startup_vg_noleak "$1" "$2"
return
fi
startup_common "$1" "$2"
if [ "$RSTB_DAEMONIZE" == "YES" ]; then
n_option=""
else
n_option="-n"
fi
eval LD_PRELOAD=$RSYSLOG_PRELOAD $valgrind ../tools/rsyslogd -C $n_option -i$RSYSLOG_PIDBASE$instance.pid -M../runtime/.libs:../.libs -f$CONF_FILE $RS_REDIR &
wait_startup $instance
reassign_ports
}
# assign TCPFLOOD_PORT from port file
# $1 - port file
assign_tcpflood_port() {
wait_file_exists "$1"
export TCPFLOOD_PORT=$(cat "$1")
echo "TCPFLOOD_PORT now: $TCPFLOOD_PORT"
if [ "$TCPFLOOD_PORT" == "" ]; then
echo "TESTBENCH ERROR: TCPFLOOD_PORT not found!"
ls -l $RSYSLOG_DYNNAME*
exit 100
fi
}
# assign TCPFLOOD_PORT2 from port file
# $1 - port file
assign_tcpflood_port2() {
wait_file_exists "$1"
export TCPFLOOD_PORT2=$(cat "$1")
echo "TCPFLOOD_PORT2 now: $TCPFLOOD_PORT2"
if [ "$TCPFLOOD_PORT2" == "" ]; then
echo "TESTBENCH ERROR: TCPFLOOD_PORT2 not found!"
ls -l $RSYSLOG_DYNNAME*
exit 100
fi
}
# assign RS_PORT from port file - this is meant as generic way to
# obtain additional port variables
# $1 - port file
assign_rs_port() {
wait_file_exists "$1"
export RS_PORT=$(cat "$1")
echo "RS_PORT now: $RS_PORT"
if [ "$RS_PORT" == "" ]; then
echo "TESTBENCH ERROR: RS_PORT not found!"
ls -l $RSYSLOG_DYNNAME*
exit 100
fi
}
# wait for a file to exist, then export it's content to env var
# intended to be used for very small files, e.g. listenPort files
# $1 - env var name
# $2 - port file
assign_file_content() {
wait_file_exists "$2"
content=$(cat "$2")
if [ "$content" == "" ]; then
echo "TESTBENCH ERROR: get_file content had empty file $2"
ls -l $RSYSLOG_DYNNAME*
exit 100
fi
eval export $1="$content"
printf 'exported: %s=%s\n' $1 "$content"
}
# start a minitcpsrvr with the regular parameters
# $1 is the output file name
# $2 is the instance name (1, 2)
start_minitcpsrvr() {
instance=${2:-1}
./minitcpsrv -t127.0.0.1 -p 0 -P "$RSYSLOG_DYNNAME.minitcpsrvr_port$instance" -f "$1" $MINITCPSRV_EXTRA_OPTS &
BGPROCESS=$!
wait_file_exists "$RSYSLOG_DYNNAME.minitcpsrvr_port$instance"
if [ "$instance" == "1" ]; then
export MINITCPSRVR_PORT1="$(cat $RSYSLOG_DYNNAME.minitcpsrvr_port$instance)"
else
export MINITCPSRVR_PORT2="$(cat $RSYSLOG_DYNNAME.minitcpsrvr_port$instance)"
fi
echo "### background minitcpsrv process id is $BGPROCESS port $(cat $RSYSLOG_DYNNAME.minitcpsrvr_port$instance) ###"
}
# same as startup_vg, BUT we do NOT wait on the startup message!
startup_vg_waitpid_only() {
startup_common "$1" "$2"
if [ "$RS_TESTBENCH_LEAK_CHECK" == "" ]; then
RS_TESTBENCH_LEAK_CHECK=full
fi
# add --keep-debuginfo=yes for hard to find cases; this cannot be used generally,
# because it is only supported by newer versions of valgrind (else CI will fail
# on older platforms).
LD_PRELOAD=$RSYSLOG_PRELOAD valgrind $RS_TEST_VALGRIND_EXTRA_OPTS $RS_TESTBENCH_VALGRIND_EXTRA_OPTS --suppressions=$srcdir/known_issues.supp ${EXTRA_VALGRIND_SUPPRESSIONS:-} --gen-suppressions=all --log-fd=1 --error-exitcode=10 --malloc-fill=ff --free-fill=fe --leak-check=$RS_TESTBENCH_LEAK_CHECK ../tools/rsyslogd -C -n -i$RSYSLOG_PIDBASE$instance.pid -M../runtime/.libs:../.libs -f$CONF_FILE &
wait_rsyslog_startup_pid $1
}
# start rsyslogd with default params under valgrind control. $1 is the config file name to use
# returns only after successful startup, $2 is the instance (blank or 2!)
startup_vg() {
startup_vg_waitpid_only $1 $2
wait_startup $instance
reassign_ports
}
# same as startup-vg, except that --leak-check is set to "none". This
# is meant to be used in cases where we have to deal with libraries (and such
# that) we don't can influence and where we cannot provide suppressions as
# they are platform-dependent. In that case, we can't test for leak checks
# (obviously), but we can check for access violations, what still is useful.
startup_vg_noleak() {
RS_TESTBENCH_LEAK_CHECK=no
startup_vg "$@"
}
# same as startup-vgthread, BUT we do NOT wait on the startup message!
startup_vgthread_waitpid_only() {
startup_common "$1" "$2"
valgrind --tool=helgrind $RS_TEST_VALGRIND_EXTRA_OPTS $RS_TESTBENCH_VALGRIND_EXTRA_OPTS --log-fd=1 --error-exitcode=10 --suppressions=$srcdir/linux_localtime_r.supp --suppressions=$srcdir/known_issues.supp ${EXTRA_VALGRIND_SUPPRESSIONS:-} --suppressions=$srcdir/CI/gcov.supp --gen-suppressions=all ../tools/rsyslogd -C -n -i$RSYSLOG_PIDBASE$2.pid -M../runtime/.libs:../.libs -f$CONF_FILE &
wait_rsyslog_startup_pid $2
}
# start rsyslogd with default params under valgrind thread debugger control.
# $1 is the config file name to use, $2 is the instance (blank or 2!)
# returns only after successful startup
startup_vgthread() {
startup_vgthread_waitpid_only $1 $2
wait_startup $2
reassign_ports
}
# inject messages via our inject interface (imdiag)
# $1 is start message number, env var NUMMESSAGES is number of messages to inject
injectmsg() {
if [ "$3" != "" ] ; then
printf 'error: injectmsg only has two arguments, extra arg is %s\n' "$3"
fi
msgs=${2:-$NUMMESSAGES}
echo injecting $msgs messages
echo injectmsg "${1:-0}" "$msgs" | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $?
}
# inject messages in INSTANCE 2 via our inject interface (imdiag)
injectmsg2() {
msgs=${2:-$NUMMESSAGES}
echo injecting $msgs messages into instance 2
echo injectmsg "${1:-0}" "$msgs" $3 $4 | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT2 || error_exit $?
# TODO: some return state checking? (does it really make sense here?)
}
# inject literal payload via our inject interface (imdiag)
injectmsg_literal() {
printf 'injecting msg payload: %s\n' "$1"
sed -e 's/^/injectmsg literal /g' <<< "$1" | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $?
}
# inject literal payload via our inject interface (imdiag)
injectmsg_file() {
printf 'injecting msg payload: %s\n' "$1"
sed -e 's/^/injectmsg literal /g' < "$1" | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $?
}
# show the current main queue size. $1 is the instance.
get_mainqueuesize() {
if [ "$1" == "2" ]; then
echo getmainmsgqueuesize | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT2 || error_exit $?
else
echo getmainmsgqueuesize | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $?
fi
}
# get pid of rsyslog instance $1
getpid() {
printf '%s' "$(cat $RSYSLOG_PIDBASE$1.pid)"
}
# grep for (partial) content. $1 is the content to check for, $2 the file to check
# option --check-only just returns success/fail but does not terminate on fail
# this is meant for checking during queue shutdown processing.
# option --regex is understood, in which case $1 is a regex
content_check() {
if [ "$1" == "--check-only" ]; then
check_only="yes"
shift
else
check_only="no"
fi
if [ "$1" == "--regex" ]; then
grep_opt=
shift
else
grep_opt=-F
fi
if [ "$1" == "--output-results" ]; then
output_results="yes"
shift
else
output_results="no"
fi
file=${2:-$RSYSLOG_OUT_LOG}
if ! grep -q $grep_opt -- "$1" < "${file}"; then
if [ "$check_only" == "yes" ]; then
printf 'content_check did not yet succeed\n'
return 1
fi
printf '\n============================================================\n'
printf 'FILE "%s" content:\n' "$file"
cat -n ${file}
printf 'FAIL: content_check failed to find "%s"\n' "$1"
error_exit 1
else
if [ "$output_results" == "yes" ]; then
# Output GREP results
echo "SUCCESS: content_check found results for '$1'\n"
grep "$1" "${file}"
fi
fi
if [ "$check_only" == "yes" ]; then
return 0
fi
}
# grep for (partial) content. this checks the count of the content
# $1 is the content to check for
# $2 required count
# $3 the file to check (if default not used)
# option --regex is understood, in which case $1 is a regex
content_count_check() {
if [ "$1" == "--regex" ]; then
grep_opt=
shift
else
grep_opt=-F
fi
file=${3:-$RSYSLOG_OUT_LOG}
count=$(grep -c $grep_opt -- "$1" <${RSYSLOG_OUT_LOG})
if [ ${count:=0} -ne "$2" ]; then
grep -c -F -- "$1" <${RSYSLOG_OUT_LOG}
printf '\n============================================================\n'
printf 'FILE "%s" content:\n' "$file"
cat -n ${file}
printf 'FAIL: content count required %d but was %d\n' "$2" $count
printf 'FAIL: content_check failed to find "%s"\n' "$1"
error_exit 1
fi
}
# $1 - content to check for
# $2 - number of times content must appear
# $3 - timeout (default: 1)
content_check_with_count() {
timeoutend=${3:-1}
timecounter=0
while [ $timecounter -lt $timeoutend ]; do
(( timecounter=timecounter+1 ))
count=0
if [ -f "${RSYSLOG_OUT_LOG}" ]; then
count=$(grep -c -F -- "$1" <${RSYSLOG_OUT_LOG})
fi
if [ ${count:=0} -eq $2 ]; then
echo content_check_with_count SUCCESS, \"$1\" occurred $2 times
break
else
if [ "$timecounter" == "$timeoutend" ]; then
shutdown_when_empty ""
wait_shutdown ""
echo "$(tb_timestamp)" content_check_with_count failed, expected \"$1\" to occur $2 times, but found it "$count" times
echo file $RSYSLOG_OUT_LOG content is:
if [ $(wc -l < "$RSYSLOG_OUT_LOG") -gt 10000 ]; then
printf 'truncation, we have %d lines, which is way too much\n' \
$(wc -l < "$RSYSLOG_OUT_LOG")
printf 'showing first and last 5000 lines\n'
head -n 5000 < "$RSYSLOG_OUT_LOG"
print '\n ... CUT ..................................................\n\n'
tail -n 5000 < "$RSYSLOG_OUT_LOG"
else
cat -n "$RSYSLOG_OUT_LOG"
fi
error_exit 1
else
printf '%s content_check_with_count, try %d have %d, wait for %d, search for: "%s"\n' \
"$(tb_timestamp)" "$timecounter" "$count" "$2" "$1"
$TESTTOOL_DIR/msleep 1000
fi
fi
printf '%s **** content_check_with_count DEBUG (timeout %s, need %s lines):\n' "$(tb_timestamp)" "$3" "$2" # rger: REMOVE ME when problems are fixed
if [ -f "${RSYSLOG_OUT_LOG}" ]; then cat -n "$RSYSLOG_OUT_LOG"; fi
done
}
custom_content_check() {
grep -qF -- "$1" < $2
if [ "$?" -ne "0" ]; then
echo FAIL: custom_content_check failed to find "'$1'" inside "'$2'"
echo "file contents:"
cat -n $2
error_exit 1
fi
}
# check that given content $1 is not present in file $2 (default: RSYSLOG_OUT_LOG)
# regular expressions may be used
check_not_present() {
if [ "$2" == "" ]; then
file=$RSYSLOG_OUT_LOG
else
file="$2"
fi
grep -q -- "$1" < "$file"
if [ "$?" -eq "0" ]; then
echo FAIL: check_not present found
echo $1
echo inside file $file of $(wc -l < $file) lines
echo samples:
cat -n "$file" | grep -- "$1" | head -10
error_exit 1
fi
}
# check if mainqueue spool files exist, if not abort (we just check .qi).
check_mainq_spool() {
printf 'There must exist some files now:\n'
ls -l $RSYSLOG_DYNNAME.spool
printf '.qi file:\n'
cat $RSYSLOG_DYNNAME.spool/mainq.qi
if [ ! -f $RSYSLOG_DYNNAME.spool/mainq.qi ]; then
printf 'error: mainq.qi does not exist where expected to do so!\n'
error_exit 1
fi
}
# check that no spool file exists. Abort if they do.
# This situation must exist after a successful termination of rsyslog
# where the disk queue has properly been drained and shut down.
check_spool_empty() {
if [ "$(ls $RSYSLOG_DYNNAME.spool/* 2> /dev/null)" != "" ]; then
printf 'error: spool files exists where they are not permitted to do so:\n'
ls -l $RSYSLOG_DYNNAME.spool/*
error_exit 1
fi
}
# general helper for imjournal tests: check that we got hold of the
# injected test message. This is pretty lengthy as the journal has played
# "a bit" with us and also seems to sometimes have a heavy latency in
# forwarding messages. So let's centralize the check code.
#
# $TESTMSG must contain the test message
check_journal_testmsg_received() {
printf 'checking that journal indeed contains test message - may take a short while...\n'
# search reverse, gets us to our message (much) faster .... if it is there...
journalctl -a -r | grep -qF "$TESTMSG"
if [ $? -ne 0 ]; then
print 'SKIP: cannot read journal - our testmessage not found via journalctl\n'
exit 77
fi
printf 'journal contains test message\n'
echo "INFO: $(wc -l < $RSYSLOG_OUT_LOG) lines in $RSYSLOG_OUT_LOG"
grep -qF "$TESTMSG" < $RSYSLOG_OUT_LOG
if [ $? -ne 0 ]; then
echo "FAIL: $RSYSLOG_OUT_LOG content (tail -n200):"
tail -n200 $RSYSLOG_OUT_LOG
echo "======="
echo "searching journal for testbench messages:"
journalctl -a | grep -qF "TestBenCH-RSYSLog imjournal"
echo "======="
echo "NOTE: showing only last 200 lines, may be insufficient on busy systems!"
echo "last entries from journal:"
journalctl -an 200
echo "======="
echo "NOTE: showing only last 200 lines, may be insufficient on busy systems!"
echo "However, the test check all of the journal, we are just limiting the output"
echo "to 200 lines to not spam CI systems too much."
echo "======="
echo "FAIL: imjournal test message could not be found!"
echo "Expected message content was:"
echo "$TESTMSG"
error_exit 1
fi;
}
# checks that among the open files found in /proc/<PID>/fd/*
# there is or is not, depending on the calling mode,
# a link with the specified suffix in the target name
check_fd_for_pid() {
local pid="$1" mode="$2" suffix="$3" target seen
seen="false"
for fd in $(echo /proc/$pid/fd/*); do
target="$(readlink -m "$fd")"
if [[ "$target" != *$RSYSLOG_DYNNAME* ]]; then
continue
fi
if ((i % 10 == 0)); then
echo "INFO: check target='$target'"
fi
if [[ "$target" == *$suffix ]]; then
seen="true"
if [[ "$mode" == "exists" ]]; then
echo "PASS: check fd for pid=$pid mode='$mode' suffix='$suffix'"
return 0
fi
fi
done
if [[ "$seen" == "false" ]] && [[ "$mode" == "absent" ]]; then
echo "PASS: check fd for pid=$pid mode='$mode' suffix='$suffix'"
return 0
fi
echo "FAIL: check fd for pid=$pid mode='$mode' suffix='$suffix'"
if [[ "$mode" != "ignore" ]]; then
return 1
fi
return 0
}
# wait for main message queue to be empty. $1 is the instance.
# we run in a loop to ensure rsyslog is *really* finished when a
# function for the "finished predicate" is defined. This is done
# by setting env var QUEUE_EMPTY_CHECK_FUNC to the bash
# function name.
wait_queueempty() {
while [ $(date +%s) -le $(( TB_STARTTEST + TB_TEST_MAX_RUNTIME )) ]; do
if [ "$1" == "2" ]; then
echo WaitMainQueueEmpty | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT2 || error_exit $?
else
echo WaitMainQueueEmpty | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $?
fi
if [ "$QUEUE_EMPTY_CHECK_FUNC" == "" ]; then
return
else
if $QUEUE_EMPTY_CHECK_FUNC ; then
return
fi
fi
done
error_exit $TB_ERR_TIMEOUT
}
# shut rsyslogd down when main queue is empty. $1 is the instance.
shutdown_when_empty() {
printf '%s Shutting down instance %s\n' "$(date +%H:%M:%S)" ${1:-1}
wait_queueempty $1
if [ "$RSYSLOG_PIDBASE" == "" ]; then
echo "RSYSLOG_PIDBASE is EMPTY! - bug in test? (instance $1)"
error_exit 1
fi
cp $RSYSLOG_PIDBASE$1.pid $RSYSLOG_PIDBASE$1.pid.save
$TESTTOOL_DIR/msleep 500 # wait a bit (think about slow testbench machines!)
kill $(cat $RSYSLOG_PIDBASE$1.pid) # note: we do not wait for the actual termination!
}
# shut rsyslogd down without emptying the queue. $2 is the instance.
shutdown_immediate() {
pidfile=$RSYSLOG_PIDBASE${1:-}.pid