From ee1ae48336bd1f224b3570fe98c95218760eb08a Mon Sep 17 00:00:00 2001 From: Reuben Miller Date: Sat, 20 Jul 2024 19:13:51 +0200 Subject: [PATCH 1/3] add legacy monit config and automatically detect which it should be used --- nfpm.yaml | 4 -- src/conf.d/tedge-monitoring-legacy.conf | 85 +++++++++++++++++++++++++ src/conf.d/tedge-monitoring.conf | 1 + src/packaging/postinstall | 43 +++++++++++-- src/packaging/postremove | 31 +++++++-- 5 files changed, 151 insertions(+), 13 deletions(-) create mode 100644 src/conf.d/tedge-monitoring-legacy.conf diff --git a/nfpm.yaml b/nfpm.yaml index 48cc01e..8e4451a 100644 --- a/nfpm.yaml +++ b/nfpm.yaml @@ -27,10 +27,6 @@ contents: dst: /etc/monit/conf.d/tedge.conf type: symlink - - src: /usr/share/tedge-monit-setup/tedge-monitoring.conf - dst: /etc/monit/conf.d/tedge-monitoring.conf - type: symlink - - src: ./src/tedge-monit-setup/env dst: /etc/tedge-monit-setup/env type: config|noreplace diff --git a/src/conf.d/tedge-monitoring-legacy.conf b/src/conf.d/tedge-monitoring-legacy.conf new file mode 100644 index 0000000..5d90c99 --- /dev/null +++ b/src/conf.d/tedge-monitoring-legacy.conf @@ -0,0 +1,85 @@ +# compatible with monit >= 5.26 +# System monitoring +# +check system $HOST + if memory usage >= 90% for 1 cycles then exec "/usr/bin/monit-tedge-message event system_mem_hi" + if loadavg (5min) per core > 1.5 for 10 cycles then exec "/usr/bin/monit-tedge-message event system_loadavg_hi" + +# +# Service level monitoring +# +# Don't bother trying to send a message to MQTT is MQTT is failing ;) + +# match against mosquitto service so it does not rely on the mosquitto.pid which is not +# always used in all installations +check process mosquitto MATCHING "mosquitto -c .+" + if failed port 1883 protocol mqtt then alert + +# Note: the tedge-agent memory usage can spike when using package managed such as apt as +# they generally require a lot of memory (>100MB) if you access the indexes from any public software +# repositories. +check process tedge-agent with pidfile /run/lock/tedge-agent.lock + if memory usage > 20 MB for 10 cycles then exec "/usr/bin/monit-tedge-message event tedge-agent_mem_hi" + + + +######################################################################## +# Monitor cloud connections +######################################################################## +# +# -enabled - Checks if the mapper is activated +# -connectivity - Checks if the mapper is connected or not, and reconnects it if required +# +# Note: If the path needs to use double quotes, use the hex value \0x22 instead +# + +# +# Cumulocity IoT +# +check program c8y-enabled with path "/bin/sh -c '[ -n \0x22$(tedge config get c8y.url)\0x22 ]'" + with timeout 5 seconds + every 2 cycles + if status != 0 then unmonitor + group c8y + +check program c8y-connectivity with path "/usr/bin/tedge connect c8y --test" + with timeout 60 seconds + every 120 cycles + if status != 0 then alert + if status != 0 for 10 cycles then exec "/usr/bin/monit-tedge-reconnect c8y" + depends on c8y-enabled + group c8y + +# +# Azure IoT +# +check program az-enabled with path "/bin/sh -c '[ -n \0x22$(tedge config get az.url)\0x22 ]'" + with timeout 5 seconds + every 2 cycles + if status != 0 then unmonitor + group az + +check program az-connectivity with path "/usr/bin/tedge connect az --test" + with timeout 60 seconds + every 120 cycles + if status != 0 then alert + if status != 0 for 10 cycles then exec "/usr/bin/monit-tedge-reconnect az" + depends on az-enabled + group az + +# +# AWS +# +check program aws-enabled with path "/bin/sh -c '[ -n \0x22$(tedge config get aws.url)\0x22 ]'" + with timeout 5 seconds + every 2 cycles + if status != 0 then unmonitor + group aws + +check program aws-connectivity with path "/usr/bin/tedge connect aws --test" + with timeout 60 seconds + every 120 cycles + if status != 0 then alert + if status != 0 for 10 cycles then exec "/usr/bin/monit-tedge-reconnect aws" + depends on aws-enabled + group aws diff --git a/src/conf.d/tedge-monitoring.conf b/src/conf.d/tedge-monitoring.conf index c536890..23cead9 100644 --- a/src/conf.d/tedge-monitoring.conf +++ b/src/conf.d/tedge-monitoring.conf @@ -1,3 +1,4 @@ +# compatible with monit >= 5.27 # # System monitoring # diff --git a/src/packaging/postinstall b/src/packaging/postinstall index 3433791..1387bf3 100755 --- a/src/packaging/postinstall +++ b/src/packaging/postinstall @@ -16,12 +16,47 @@ do_systemd() { fi } +MONITRC=/etc/monit/monitrc +if [ -f /etc/monitrc ]; then + MONITRC=/etc/monitrc +fi + # In Yocto, monit looks at the /etc/monit.d/ directory # instead of /etc/monit/conf.d/, so let's add the other directory to as well (for normalization) -if [ -f /etc/monitrc ]; then - if ! grep -q "include /etc/monit/conf.d/\*" /etc/monitrc; then - echo "Adding /etc/monit/conf.d/ to the monit config (/etc/monitrc)" >&2 - echo 'include /etc/monit/conf.d/*.conf' >> /etc/monitrc +if [ -f "$MONITRC" ]; then + if ! grep -q "include /etc/monit/conf.d/\*" "$MONITRC"; then + echo "Adding /etc/monit/conf.d/ to the monit config ($MONITRC)" >&2 + echo 'include /etc/monit/conf.d/*.conf' >> "$MONITRC" + fi +fi + +add_config_if_valid() { + # + # Only add a configuration file if it is accepted by monit + # It is verified by adding the config file, running "monit -t", and removing the config file if the test fails + # + CONF_FILE="$1" + if [ $# -gt 1 ]; then + name="$2" + else + name=$(basename "$CONF_FILE") + fi + + if monit -c "$MONITRC" -t >/dev/null 2>&1; then + ln -sf "$CONF_FILE" "/etc/monit/conf.d/$name" + if ! monit -c "$MONITRC" -t; then + echo "Warning: Excluding $name as the monit version does not support the syntax used in it" + rm -f "/etc/monit/conf.d/$name" + return 1 + fi + fi + return 0 +} + +# Add configuration +if [ -f "$MONITRC" ]; then + if ! add_config_if_valid /usr/share/tedge-monit-setup/tedge-monitoring.conf "tedge-monitoring.conf"; then + add_config_if_valid /usr/share/tedge-monit-setup/tedge-monitoring-legacy.conf "tedge-monitoring.conf" ||: fi fi diff --git a/src/packaging/postremove b/src/packaging/postremove index c3dfd68..5c40442 100755 --- a/src/packaging/postremove +++ b/src/packaging/postremove @@ -13,8 +13,29 @@ do_initd() { service monit stop ||: } -if command -V systemctl >/dev/null 2>&1; then - do_systemd -else - do_initd -fi +remove_or_purge() { + rm -f /etc/monit/conf.d/tedge-monitoring-extended.conf + rm -f /usr/share/tedge-monit-setup/tedge-monitoring-legacy.conf + + if command -V systemctl >/dev/null 2>&1; then + do_systemd + else + do_initd + fi +} + +action="$1" +case "$action" in + "0" | "remove") + remove_or_purge + ;; + "1" | "upgrade") + # Do nothing on upgrade + ;; + "purge") + remove_or_purge + ;; + *) + remove_or_purge + ;; +esac From 11af1dc11bbf2334a356cc3eed42ab94ac6165d9 Mon Sep 17 00:00:00 2001 From: Reuben Miller Date: Sat, 20 Jul 2024 19:14:42 +0200 Subject: [PATCH 2/3] extend test matrix targets --- .github/workflows/test.yaml | 3 +++ test-images/ubuntu-20.04/Dockerfile | 40 +++++++++++++++++++++++++++++ test-images/ubuntu-22.04/Dockerfile | 40 +++++++++++++++++++++++++++++ test-images/ubuntu-24.04/Dockerfile | 40 +++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+) create mode 100644 test-images/ubuntu-20.04/Dockerfile create mode 100644 test-images/ubuntu-22.04/Dockerfile create mode 100644 test-images/ubuntu-24.04/Dockerfile diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 6ff95bb..36354dd 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -41,6 +41,9 @@ jobs: job: - { image: debian-11 } - { image: debian-12 } + - { image: ubuntu-20.04 } + - { image: ubuntu-22.04 } + - { image: ubuntu-24.04 } steps: - name: Checkout diff --git a/test-images/ubuntu-20.04/Dockerfile b/test-images/ubuntu-20.04/Dockerfile new file mode 100644 index 0000000..541dd0a --- /dev/null +++ b/test-images/ubuntu-20.04/Dockerfile @@ -0,0 +1,40 @@ +FROM ubuntu:20.04 + +# Install +RUN apt-get -y update \ + && DEBIAN_FRONTEND=noninteractive apt-get -y --no-install-recommends install \ + wget \ + curl \ + gnupg2 \ + sudo \ + apt-transport-https \ + ca-certificates \ + ssh \ + systemd \ + dbus \ + systemd-sysv + +# Remove unnecessary systemd services +RUN rm -f /lib/systemd/system/multi-user.target.wants/* \ + /etc/systemd/system/*.wants/* \ + /lib/systemd/system/local-fs.target.wants/* \ + /lib/systemd/system/sockets.target.wants/*udev* \ + /lib/systemd/system/sockets.target.wants/*initctl* \ + /lib/systemd/system/systemd-update-utmp* \ + # Remove policy-rc.d file which prevents services from starting + && rm -f /usr/sbin/policy-rc.d + +RUN wget -O - https://thin-edge.io/install.sh | sh -s + +COPY test-images/debian-11/bootstrap.sh /usr/bin/ + +WORKDIR /build + +COPY dist/tedge-monit-setup_*.deb . +RUN apt-get install -y --no-install-recommends ./tedge-monit-setup_*.deb + +# Reference: https://developers.redhat.com/blog/2019/04/24/how-to-run-systemd-in-a-container#enter_podman +# STOPSIGNAL SIGRTMIN+3 (=37) +STOPSIGNAL 37 + +CMD ["/lib/systemd/systemd"] \ No newline at end of file diff --git a/test-images/ubuntu-22.04/Dockerfile b/test-images/ubuntu-22.04/Dockerfile new file mode 100644 index 0000000..e1d5978 --- /dev/null +++ b/test-images/ubuntu-22.04/Dockerfile @@ -0,0 +1,40 @@ +FROM ubuntu:22.04 + +# Install +RUN apt-get -y update \ + && DEBIAN_FRONTEND=noninteractive apt-get -y --no-install-recommends install \ + wget \ + curl \ + gnupg2 \ + sudo \ + apt-transport-https \ + ca-certificates \ + ssh \ + systemd \ + dbus \ + systemd-sysv + +# Remove unnecessary systemd services +RUN rm -f /lib/systemd/system/multi-user.target.wants/* \ + /etc/systemd/system/*.wants/* \ + /lib/systemd/system/local-fs.target.wants/* \ + /lib/systemd/system/sockets.target.wants/*udev* \ + /lib/systemd/system/sockets.target.wants/*initctl* \ + /lib/systemd/system/systemd-update-utmp* \ + # Remove policy-rc.d file which prevents services from starting + && rm -f /usr/sbin/policy-rc.d + +RUN wget -O - https://thin-edge.io/install.sh | sh -s + +COPY test-images/debian-11/bootstrap.sh /usr/bin/ + +WORKDIR /build + +COPY dist/tedge-monit-setup_*.deb . +RUN apt-get install -y --no-install-recommends ./tedge-monit-setup_*.deb + +# Reference: https://developers.redhat.com/blog/2019/04/24/how-to-run-systemd-in-a-container#enter_podman +# STOPSIGNAL SIGRTMIN+3 (=37) +STOPSIGNAL 37 + +CMD ["/lib/systemd/systemd"] \ No newline at end of file diff --git a/test-images/ubuntu-24.04/Dockerfile b/test-images/ubuntu-24.04/Dockerfile new file mode 100644 index 0000000..ad4f1b7 --- /dev/null +++ b/test-images/ubuntu-24.04/Dockerfile @@ -0,0 +1,40 @@ +FROM ubuntu:24.04 + +# Install +RUN apt-get -y update \ + && DEBIAN_FRONTEND=noninteractive apt-get -y --no-install-recommends install \ + wget \ + curl \ + gnupg2 \ + sudo \ + apt-transport-https \ + ca-certificates \ + ssh \ + systemd \ + dbus \ + systemd-sysv + +# Remove unnecessary systemd services +RUN rm -f /lib/systemd/system/multi-user.target.wants/* \ + /etc/systemd/system/*.wants/* \ + /lib/systemd/system/local-fs.target.wants/* \ + /lib/systemd/system/sockets.target.wants/*udev* \ + /lib/systemd/system/sockets.target.wants/*initctl* \ + /lib/systemd/system/systemd-update-utmp* \ + # Remove policy-rc.d file which prevents services from starting + && rm -f /usr/sbin/policy-rc.d + +RUN wget -O - https://thin-edge.io/install.sh | sh -s + +COPY test-images/debian-11/bootstrap.sh /usr/bin/ + +WORKDIR /build + +COPY dist/tedge-monit-setup_*.deb . +RUN apt-get install -y --no-install-recommends ./tedge-monit-setup_*.deb + +# Reference: https://developers.redhat.com/blog/2019/04/24/how-to-run-systemd-in-a-container#enter_podman +# STOPSIGNAL SIGRTMIN+3 (=37) +STOPSIGNAL 37 + +CMD ["/lib/systemd/systemd"] \ No newline at end of file From 8f791e56e20b9fc2cce1350807c7a4c27d4b72b7 Mon Sep 17 00:00:00 2001 From: Reuben Miller Date: Sun, 21 Jul 2024 11:37:38 +0200 Subject: [PATCH 3/3] add support for ubuntu-24.04 and custom monit.service definition --- nfpm.yaml | 10 ++++++++++ src/packaging/postinstall | 9 +++++++++ src/packaging/postremove | 1 + src/service/monit.service | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+) create mode 100644 src/service/monit.service diff --git a/nfpm.yaml b/nfpm.yaml index 8e4451a..c02f487 100644 --- a/nfpm.yaml +++ b/nfpm.yaml @@ -27,6 +27,16 @@ contents: dst: /etc/monit/conf.d/tedge.conf type: symlink + # Optional service definition which has compatible capability settings + # otherwise the reconnect command will fail + # See https://salsa.debian.org/debian/monit/-/blob/debian/main/debian/patches/040_hardening-monit.service.patch?ref_type=heads + - src: ./src/service/monit.service + dst: /usr/share/tedge-monit-setup/service/ + file_info: + mode: 0644 + group: root + owner: root + - src: ./src/tedge-monit-setup/env dst: /etc/tedge-monit-setup/env type: config|noreplace diff --git a/src/packaging/postinstall b/src/packaging/postinstall index 1387bf3..fddecf4 100755 --- a/src/packaging/postinstall +++ b/src/packaging/postinstall @@ -60,6 +60,15 @@ if [ -f "$MONITRC" ]; then fi fi +# newer monit service definitions are sometimes too strict on the "hardening" of the service +# which results in the "tedge reconnect c8y" command failing. +if grep -q "CapabilityBoundingSet=" /usr/lib/systemd/system/monit.service; then + if [ ! -e /etc/systemd/system/monit.service ]; then + echo "Using custom monit.service definition with custom capabilities set" + ln -sf /usr/share/tedge-monit-setup/service/monit.service /etc/systemd/system/monit.service + fi +fi + do_initd() { if command -V chkconfig >/dev/null 2>&1; then chkconfig --add monit ||: diff --git a/src/packaging/postremove b/src/packaging/postremove index 5c40442..e96c91a 100755 --- a/src/packaging/postremove +++ b/src/packaging/postremove @@ -16,6 +16,7 @@ do_initd() { remove_or_purge() { rm -f /etc/monit/conf.d/tedge-monitoring-extended.conf rm -f /usr/share/tedge-monit-setup/tedge-monitoring-legacy.conf + rm -f /etc/systemd/system/monit.service if command -V systemctl >/dev/null 2>&1; then do_systemd diff --git a/src/service/monit.service b/src/service/monit.service new file mode 100644 index 0000000..3cccc76 --- /dev/null +++ b/src/service/monit.service @@ -0,0 +1,36 @@ +[Unit] +Description=Pro-active monitoring utility for unix systems +After=network-online.target +Documentation=man:monit(1) https://mmonit.com/wiki/Monit/HowTo + +[Service] +Type=simple +KillMode=process +ExecStart=/usr/bin/monit -I +ExecStop=/usr/bin/monit quit +ExecReload=/usr/bin/monit reload +Restart=on-abnormal +StandardOutput=null + +# hardening options +# details: https://www.freedesktop.org/software/systemd/man/systemd.exec.html +CapabilityBoundingSet=~CAP_SYS_ADMIN +LockPersonality=true +MemoryDenyWriteExecute=true +NoNewPrivileges=true +PrivateTmp=true +ProtectClock=true +ProtectControlGroups=true +ProtectHome=yes +ProtectHostname=true +ProtectKernelLogs=true +ProtectKernelModules=true +ProtectKernelTunables=true +ProtectSystem=strict +ReadWritePaths=/run/ /var/lib/monit/ /var/log/ /etc/tedge/ /opt/tedge-monit/ +RestrictRealtime=true +RestrictSUIDSGID=true +SystemCallArchitectures=native + +[Install] +WantedBy=multi-user.target