diff --git a/README.md b/README.md index cc8b678..b8f037c 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,12 @@ ``check_omd`` is a Nagios / Icinga plugin for checking a particular [OMD](http://www.omdistro.org) site's services. -# Requirements +## Requirements + I successfully tested the plugin with OMD site versions 1.20 to 2.70. As the plugin needs to be executed **by the site user**, a sudo rule is needed. A template (*``check_omd-sudo-template``*) is part of the repository. -# Usage +## Usage + By default, the script checks all services of the site - it is also possible to exclude services if they are predicted to fail in your environment (*``-x`` / ``--exclude`` parameters*). The following parameters can be specified: @@ -22,33 +24,40 @@ The following parameters can be specified: | `--version` | prints programm version and quits | ## Examples + The following example indicates an running OMD site: -``` -$ ./check_omd.py + +```shell +$ ./check_omd.py OK: OMD site 'stankowic' services are running. ``` A site with a failed ``nagios`` service: -``` -$ ./check_omd.py + +```shell +$ ./check_omd.py CRITICAL: OMD site 'hansel' has failed service(s): 'nagios' ``` OMD site ``giertz`` with a well-known daemon, that's crashing sometimes: -``` + +```shell $ ./check_omd.py -x npcd OK: OMD site 'giertz' services are running. ``` OMD site ``clpmchn``, excluding npcd from throwing critical states: -``` + +```shell $ ./check_omd.py -w npcd WARNING: OMD site 'clpmchn' has service(s) in warning state: 'npcd' ``` -# Installation +## Installation + To install the plugin, move the Python script, the agent configuration and sudo rule into their appropriate directories. The paths may vary, depending on your Linux distribution and architecture. For RPM-based distribtions, proceed with the following steps: -``` + +```shell # mv check_omd.py /usr/lib64/nagios/plugins # mv check_omd-sudo-template /etc/sudoers.d/ # chmod +x /usr/lib64/nagios/plugins/check_omd.py @@ -56,33 +65,41 @@ To install the plugin, move the Python script, the agent configuration and sudo ``` When using NRPE, copy the appropriate configuration and restart the daemon: -``` + +```shell # mv check_omd.cfg /etc/nrpe.d/ # service nrpe restart ``` When using Icinga2, copy the configuration to **ITL** (*Icinga Template Library*), e.g.: -``` + +```shell # cp check_omd.conf /usr/share/icinga2/include/plugins-contrib.d/ # service icinga2 restart ``` Make sure to alter the sudo configuration to match your OMD site name, e.g.: -``` + +```shell nrpe ALL = (stankowic) NOPASSWD: /usr/lib64/nagios/plugins/check_omd.py ``` It also possible to create a RPM file for your Linux distribution with the RPM spec file: -``` + +```shell $ rpmbuild -ba nagios-plugins-check_omd.spec +... ``` + The RPM spec has been tested on Enterprise Linux 5 to 7, i386 and x86_64. Currently, the RPM package only includes NRPE-related configuration, Icinga2 will follow. -# Configuration +## Configuration + +### Nagios / Icinga 1.x -## Nagios / Icinga 1.x Inside Nagios / Icinga you will need to configure a remote check command, e.g. for NRPE: -``` + +```text #check_nrpe_omd define command{ command_name check_nrpe_omd @@ -91,7 +108,8 @@ define command{ ``` Configure the check for a particular host, e.g.: -``` + +```text #SRV: omd stankowic define service{ use generic-service @@ -101,9 +119,11 @@ define service{ } ``` -## Icinga2 +### Icinga2 + Define a service like this: -``` + +```text apply Service for (SITE => config in host.vars.omd_sites) { import "generic-service" check_command = "check_omd" @@ -117,7 +137,8 @@ apply Service for (SITE => config in host.vars.omd_sites) { ``` Create ``omd_site`` dictionaries for your hosts and assign the ``app`` variable: -``` + +```text object Host "st-mon04.stankowic.loc" { import "linux-host" ... @@ -132,23 +153,28 @@ object Host "st-mon04.stankowic.loc" { ``` Validate the configuration and reload the Icinga2 daemon: -``` + +```shell # icinga2 daemon -C # service icinga2 reload ``` -# Troubleshooting -## Plugin not executed as OMD site user +## Troubleshooting + +### Plugin not executed as OMD site user + The plugin will not work if is not executed as site user: -``` + +```shell $ whoami taylor -$ ./check_omd.py +$ ./check_omd.py UNKNOWN: unable to check site: 'omd: no such site: taylor' - did you miss running this plugin as OMD site user? ``` -An error message like this will be displayed if multiple OMD sites are available and you're running the plugin as root: -``` +An error message like this will be displayed if multiple OMD sites are available and you're running the plugin as `root`: + +```shell # ./check_omd.py UNKOWN: unable to check site, it seems this plugin is executed as root (use OMD site context!) ```` diff --git a/check_omd.py b/check_omd.py index d9083e0..662285d 100755 --- a/check_omd.py +++ b/check_omd.py @@ -13,6 +13,7 @@ from optparse import OptionParser import subprocess import io +import sys import logging __version__ = "1.1.1" @@ -50,13 +51,13 @@ def get_site_status(): "running this plugin as OMD site user?".format(err.rstrip())) else: print("UNKNOWN: unable to check site: '{0}'".format(err.rstrip())) - exit(3) + sys.exit(3) if res: #try to find out whether omd was executed as root if res.count(bytes("OVERALL", "utf-8")) > 1: print("UNKOWN: unable to check site, it seems this plugin is " \ "executed as root (use OMD site context!)") - exit(3) + sys.exit(3) #check all services fail_srvs = [] @@ -80,7 +81,12 @@ def get_site_status(): if OPTIONS.heal: cmd = ['omd', 'restart', service] LOGGER.debug("running command '%s'", cmd) - proc = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + proc = subprocess.Popen( + cmd, + stderr=subprocess.PIPE, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE + ) res2, err2 = proc.communicate() print("{}".format(res2.rstrip().decode("utf-8"))) restarted_srvs.append(service) @@ -96,21 +102,25 @@ def get_site_status(): ) if OPTIONS.heal: if len(restarted_srvs) > 0: - print("WARNING: Restarted services on site '{0}': '{1}'".format(site, ' '.join(restarted_srvs))) - exit(1) + print( + "WARNING: Restarted services on site '{0}': '{1}'".format( + site, ' '.join(restarted_srvs) + ) + ) + sys.exit(1) else: - exit(0) + sys.exit(0) if len(fail_srvs) == 0 and len(warn_srvs) == 0: print("OK: OMD site '{0}' services are running.".format(site)) - exit(0) + sys.exit(0) elif len(fail_srvs) > 0: print("CRITICAL: OMD site '{0}' has failed service(s): " \ "'{1}'".format(site, ' '.join(fail_srvs))) - exit(2) + sys.exit(2) else: print("WARNING: OMD site '{0}' has service(s) in warning state: " \ "'{1}'".format(site, ' '.join(warn_srvs))) - exit(1) + sys.exit(1) diff --git a/test/README.md b/test/README.md index d4bfc8f..b4e97fc 100644 --- a/test/README.md +++ b/test/README.md @@ -16,6 +16,7 @@ Open a shell, move to this folder an run the following command to create the tes ```shell $ vagrant up +... ``` This will take a couple of minutes, as a CentOS 7 template is downloaded and deployed. It will also automatically install and configure OMD. @@ -25,4 +26,6 @@ Afterwards, create a SSH configuration and run the tests using `pytest`: ```shell $ vagrant ssh-config > .vagrant/ssh_config $ py.test --connection=ssh --ssh-config .vagrant/ssh_config --hosts=omd test_plugin.py --sudo +... +=== 3 passed in 4.01s === ``` diff --git a/test/test_plugin.py b/test/test_plugin.py index bbc3e94..0be9c32 100644 --- a/test/test_plugin.py +++ b/test/test_plugin.py @@ -1,7 +1,7 @@ """ Plugin unit tests """ -import os + def test_exclude(host): """