2 # Check_MK Agent for Linux
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # Remove locale settings to eliminate localized outputs where possible
31 export MK_LIBDIR
=${MK_LIBDIR:-/usr/lib/check_mk_agent}
32 export MK_CONFDIR
=${MK_CONFDIR:-/etc/check_mk}
33 export MK_VARDIR
=${MK_VARDIR:-/var/lib/check_mk_agent}
35 # Optionally set a tempdir for all subsequent calls
38 # Provide information about the remote host. That helps when data
39 # is being sent only once to each remote host.
40 if [ "$REMOTE_HOST" ] ; then
41 export REMOTE
=$REMOTE_HOST
42 elif [ "$SSH_CLIENT" ] ; then
43 export REMOTE
=${SSH_CLIENT%% *}
46 # The package name gets patched for baked agents to either
47 # "check-mk-agent" or the name set by the "name of agent packages" rule
48 XINETD_SERVICE_NAME
=check_mk
50 # Make sure, locally installed binaries are found
51 PATH
=$PATH:/usr
/local
/bin
53 # All executables in PLUGINSDIR will simply be executed and their
54 # ouput appended to the output of the agent. Plugins define their own
55 # sections and must output headers with '<<<' and '>>>'
56 PLUGINSDIR
=$MK_LIBDIR/plugins
58 # All executables in LOCALDIR will by executabled and their
59 # output inserted into the section <<<local>>>. Please
60 # refer to online documentation for details about local checks.
61 LOCALDIR
=$MK_LIBDIR/local
63 # All files in SPOOLDIR will simply appended to the agent
64 # output if they are not outdated (see below)
65 SPOOLDIR
=$MK_VARDIR/spool
67 # close standard input (for security reasons) and stderr when not
68 # explicitly in debug mode.
69 # When the nodes agent is executed by a e.g. docker node in a container,
70 # then don't close stdin, because the agent is piped through it in this
75 elif [ -z "$MK_FROM_NODE" ]; then
76 exec </dev
/null
2>/dev
/null
79 # Detect whether or not the agent is being executed in a container
81 if [ -f /.dockerenv
]; then
87 # Prefer (relatively) new /usr/bin/timeout from coreutils against
88 # our shipped waitmax. waitmax is statically linked and crashes on
89 # some Ubuntu versions recently.
90 if type timeout
>/dev
/null
2>&1 ; then
97 if [ -f "$MK_CONFDIR/encryption.cfg" ] ; then
98 source "$MK_CONFDIR/encryption.cfg"
101 if [ "$ENCRYPTED" == "yes" ] ; then
102 echo -n "00" # protocol version
103 exec > >(openssl enc
-aes-256-cbc -md md5
-k "$PASSPHRASE" -nosalt)
108 if [ -e "$MK_CONFDIR/real_time_checks.cfg" ]; then
109 .
"$MK_CONFDIR/real_time_checks.cfg"
117 function section_mem
()
119 if [ -z "$MK_IN_CONTAINER" ]; then
121 grep -E -v '^Swap:|^Mem:|total:' < /proc
/meminfo
123 echo '<<<docker_container_mem>>>'
124 cat /sys
/fs
/cgroup
/memory
/memory.stat
125 echo -n "usage_in_bytes "
126 cat /sys
/fs
/cgroup
/memory
/memory.usage_in_bytes
127 echo -n "limit_in_bytes "
128 cat /sys
/fs
/cgroup
/memory
/memory.limit_in_bytes
129 grep -F 'MemTotal:' /proc
/meminfo
133 function section_cpu
()
135 if [ "$(uname -m)" = "armv7l" ]; then
136 CPU_REGEX
='^processor'
138 CPU_REGEX
='^CPU|^processor'
140 NUM_CPUS
=$
(grep -c -E $CPU_REGEX < /proc
/cpuinfo
)
142 if [ -z "$MK_IN_CONTAINER" ]; then
144 echo "$(cat /proc/loadavg) $NUM_CPUS"
146 echo '<<<docker_container_cpu>>>'
147 grep "^cpu " /proc
/stat
148 echo "num_cpus $NUM_CPUS"
149 cat /sys
/fs
/cgroup
/cpuacct
/cpuacct.stat
153 function section_uptime
()
156 if [ -z "$MK_IN_CONTAINER" ]; then
159 echo "$(($(date +%s) - $(stat -c %Z /dev/pts)))"
163 # Print out Partitions / Filesystems. (-P gives non-wrapped POSIXed output)
164 # Heads up: NFS-mounts are generally supressed to avoid agent hangs.
165 # If hard NFS mounts are configured or you have too large nfs retry/timeout
166 # settings, accessing those mounts from the agent would leave you with
167 # thousands of agent processes and, ultimately, a dead monitored system.
168 # These should generally be monitored on the NFS server, not on the clients.
169 function section_df
()
171 if [ -n "$MK_IN_CONTAINER" ]; then
176 # The exclusion list is getting a bit of a problem. -l should hide any remote FS but seems
177 # to be all but working.
178 local excludefs
="-x smbfs -x cifs -x iso9660 -x udf -x nfsv4 -x nfs -x mvfs -x zfs -x prl_fs -x squashfs"
179 df
-PTlk $excludefs |
sed 1d
181 # df inodes information
183 echo '[df_inodes_start]'
184 df
-PTli $excludefs |
sed 1d
185 echo '[df_inodes_end]'
188 function sections_systemd
()
190 if type systemctl
>/dev
/null
2>&1 ; then
191 echo '<<<systemd_units>>>'
192 systemctl
--all --no-pager
196 function run_mrpe
() {
203 PLUGIN
=${cmdline%% *}
204 OUTPUT
=$
(eval "$cmdline")
206 echo -n "(${PLUGIN##*/}) $descr $? $OUTPUT" |
tr \\n
\\1
212 # Runs a command asynchronous by use of a cache file. Usage:
213 # run_cached [-s] NAME MAXAGE
214 # -s creates the section header <<<$NAME>>>
215 # -m mrpe-mode: stores exit code with the cache
216 # -ma mrpe-mode with age: stores exit code with the cache and adds the cache age
217 # NAME is the name of the section (also used as cache file name)
218 # MAXAGE is the maximum cache livetime in seconds
219 function run_cached
() {
225 # TODO: this function is unable to handle mulitple args at once
226 # for example: -s -m won't work, it is read as single token "-s -m"
227 if [ "$1" = -s ] ; then local section
="echo '<<<$2:cached($NOW,$3)>>>' ; " ; shift ; fi
228 if [ "$1" = -m ] ; then local mrpe
=1 ; shift ; fi
229 if [ "$1" = "-ma" ] ; then local mrpe
=1 ; local append_age
=1 ; shift ; fi
233 local CMDLINE
=$section$
*
235 if [ ! -d "$MK_VARDIR/cache" ]; then mkdir
-p "$MK_VARDIR/cache" ; fi
236 if [ "$mrpe" = 1 ] ; then
237 CACHEFILE
="$MK_VARDIR/cache/mrpe_$NAME.cache"
239 CACHEFILE
="$MK_VARDIR/cache/$NAME.cache"
242 # Check if the creation of the cache takes suspiciously long and kill the
243 # process if the age (access time) of $CACHEFILE.new is twice the MAXAGE.
244 # Output the evantually already cached section anyways and start the cache
246 if [ -e "$CACHEFILE.new" ] ; then
248 CF_ATIME
=$
(stat
-c %X
"$CACHEFILE.new")
249 if [ $
((NOW
- CF_ATIME
)) -ge $
((MAXAGE
* 2)) ] ; then
250 # Kill the process still accessing that file in case
251 # it is still running. This avoids overlapping processes!
252 fuser
-k -9 "$CACHEFILE.new" >/dev
/null
2>&1
253 rm -f "$CACHEFILE.new"
258 # Check if cache file exists and is recent enough
259 if [ -s "$CACHEFILE" ] ; then
261 MTIME
=$
(stat
-c %Y
"$CACHEFILE")
264 if [ "$AGE" -le "$MAXAGE" ] ; then local USE_CACHEFILE
=1 ; fi
265 # Output the file in any case, even if it is
266 # outdated. The new file will not yet be available
267 if [ $append_age -eq 1 ] ; then
268 # insert the cached-string before the pipe (first -e)
269 # or, if no pipe found (-e t) append it (third -e),
270 # but only once and on the second line (2!b) (first line is section header,
271 # all further lines are long output)
272 sed -e "2s/|/ (Cached: ${AGE}\/${MAXAGE}s)|/" -e t -e "2s/$/ (Cached: ${AGE}\/${MAXAGE}s)/" < "$CACHEFILE"
278 # Cache file outdated and new job not yet running? Start it
279 if [ -z "$USE_CACHEFILE" ] && [ ! -e "$CACHEFILE.new" ] ; then
280 # When the command fails, the output is throws away ignored
281 if [ $mrpe -eq 1 ] ; then
282 echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; run_mrpe $NAME \"$CMDLINE\" && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup
/bin
/bash
>/dev
/null
2>&1 &
284 echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; $CMDLINE && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup
/bin
/bash
>/dev
/null
2>&1 &
289 # Make run_cached available for subshells (plugins, local checks, etc.)
292 # Implements Real-Time Check feature of the Check_MK agent which can send
293 # some section data in 1 second resolution. Useful for fast notifications and
294 # detailed graphing (if you configure your RRDs to this resolution).
295 function run_real_time_checks
()
297 PIDFILE
=$MK_VARDIR/real_time_checks.pid
301 if [ "$PASSPHRASE" != "" ] ; then
302 # new mechanism to set the passphrase has priority
303 RTC_SECRET
=$PASSPHRASE
306 if [ "$ENCRYPTED_RT" != "no" ] ; then
314 # terminate when pidfile is gone or other Real-Time Check process started or configured timeout
315 if [ ! -e "$PIDFILE" ] ||
[ "$(<"$PIDFILE")" -ne $$
] ||
[ "$RTC_TIMEOUT" -eq 0 ]; then
319 for SECTION
in $RTC_SECTIONS; do
320 # Be aware of maximum packet size. Maybe we need to check the size of the section
321 # output and do some kind of nicer error handling.
322 # 2 bytes: protocol version, 10 bytes: timestamp, rest: encrypted data
323 # dd is used to concatenate the output of all commands to a single write/block => udp packet
324 { echo -n $PROTOCOL ;
325 date +%s |
tr -d '\n' ;
326 if [ "$ENCRYPTED_RT" != "no" ] ; then
327 export RTC_SECRET
=$RTC_SECRET ; section_
"$SECTION" | openssl enc
-aes-256-cbc -md md5
-pass env
:RTC_SECRET
-nosalt ;
331 } |
dd bs
=9999 iflag
=fullblock
2>/dev
/null
>"/dev/udp/${REMOTE}/${RTC_PORT}"
336 if cd "$PLUGINSDIR" ; then
337 for PLUGIN
in $RTC_PLUGINS; do
338 if [ ! -f $PLUGIN ] ; then
342 # Be aware of maximum packet size. Maybe we need to check the size of the section
343 # output and do some kind of nicer error handling.
344 # 2 bytes: protocol version, 10 bytes: timestamp, rest: encrypted data
345 # dd is used to concatenate the output of all commands to a single write/block => udp packet
346 { echo -n $PROTOCOL ;
347 date +%s |
tr -d '\n' ;
348 if [ "$ENCRYPTED_RT" != "no" ] ; then
349 export RTC_SECRET
=$RTC_SECRET ; .
/$PLUGIN | openssl enc
-aes-256-cbc -md md5
-pass env
:RTC_SECRET
-nosalt ;
353 } |
dd bs
=9999 iflag
=fullblock
2>/dev
/null
>"/dev/udp/${REMOTE}/${RTC_PORT}"
358 RTC_TIMEOUT
=$
((RTC_TIMEOUT-1
))
362 echo "<<<check_mk>>>"
363 echo "Version: 1.6.0i1"
364 echo "AgentOS: linux"
365 echo "Hostname: $(hostname)"
366 echo "AgentDirectory: $MK_CONFDIR"
367 echo "DataDirectory: $MK_VARDIR"
368 echo "SpoolDirectory: $SPOOLDIR"
369 echo "PluginsDirectory: $PLUGINSDIR"
370 echo "LocalDirectory: $LOCALDIR"
372 # If we are called via xinetd, try to find only_from configuration
373 if [ -n "$REMOTE_HOST" ]
376 sed -n '/^service[[:space:]]*'$XINETD_SERVICE_NAME'/,/}/s/^[[:space:]]*only_from[[:space:]]*=[[:space:]]*\(.*\)/\1/p' /etc
/xinetd.d
/* |
head -n1; echo
383 # Filesystem usage for ZFS
384 if type zfs
> /dev
/null
2>&1 ; then
386 zfs get
-Hp name
,quota
,used
,avail
,mountpoint
,type -t filesystem
,volume || \
387 zfs get
-Hp name
,quota
,used
,avail
,mountpoint
,type
389 df
-PTlk -t zfs |
sed 1d
392 # Check NFS mounts by accessing them with stat -f (System
393 # call statfs()). If this lasts more then 2 seconds we
394 # consider it as hanging. We need waitmax.
395 if type waitmax
>/dev
/null
397 STAT_VERSION
=$
(stat
--version |
head -1 | cut
-d" " -f4)
400 echo '<<<nfsmounts>>>'
401 sed -n '/ nfs4\? /s/[^ ]* \([^ ]*\) .*/\1/p' < /proc
/mounts |
405 if [ "$STAT_VERSION" != "$STAT_BROKE" ]; then
406 waitmax
-s 9 5 stat
-f -c "$MP ok %b %f %a %s" "$MP" || \
407 echo "$MP hanging 0 0 0 0"
409 waitmax
-s 9 5 stat
-f -c "$MP ok %b %f %a %s" "$MP" && \
410 printf '\n'||
echo "$MP hanging 0 0 0 0"
414 echo '<<<cifsmounts>>>'
415 sed -n '/ cifs\? /s/[^ ]* \([^ ]*\) .*/\1/p' < /proc
/mounts |
419 if [ ! -r "$MP" ]; then
420 echo "$MP Permission denied"
421 elif [ "$STAT_VERSION" != "$STAT_BROKE" ]; then
422 waitmax
-s 9 2 stat
-f -c "$MP ok %b %f %a %s" "$MP" || \
423 echo "$MP hanging 0 0 0 0"
425 waitmax
-s 9 2 stat
-f -c "$MP ok %b %f %a %s" "$MP" && \
426 printf '\n'||
echo "$MP hanging 0 0 0 0"
431 # Check mount options. Filesystems may switch to 'ro' in case
434 grep ^
/dev
< /proc
/mounts |
grep -v " squashfs "
436 # processes including username, without kernel processes
438 ps ax
-o user
:32,vsz
,rss
,cputime
,etime
,pid
,command --columns 10000 |
sed -e 1d
-e 's/ *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) */(\1,\2,\3,\4\/\5,\6) /'
443 # Load and number of processes
449 # New variant: Information about speed and state in one section
450 if type ip
> /dev
/null
453 echo "[start_iplink]"
458 echo '<<<lnx_if:sep(58)>>>'
459 sed 1,2d
/proc
/net
/dev
460 if type ethtool
> /dev
/null
462 sed -e 1,2d
/proc
/net
/dev | cut
-d':' -f1 |
sort |
while read eth
; do
464 ethtool
"$eth" |
grep -E '(Speed|Duplex|Link detected|Auto-negotiation):'
465 echo -e "\tAddress: $(cat "/sys
/class
/net
/$eth/address
")\n"
470 # Current state of bonding interfaces
471 if [ -e /proc
/net
/bonding
] ; then
472 echo '<<<lnx_bonding:sep(58)>>>'
473 pushd /proc
/net
/bonding
> /dev
/null
478 # Same for Open vSwitch bonding
479 if type ovs-appctl
> /dev
/null
; then
480 BONDS
=$
(ovs-appctl bond
/list
)
481 COL
=$
(echo "$BONDS" |
awk '{for(i=1;i<=NF;i++) {if($i == "bond") printf("%d", i)} exit 0}')
482 echo '<<<ovs_bonding:sep(58)>>>'
483 for bond
in $
(echo "$BONDS" |
sed -e 1d | cut
-f"${COL}") ; do
485 ovs-appctl bond
/show
"$bond"
490 # Number of TCP connections in the various states
491 if type waitmax
>/dev
/null
; then
492 echo '<<<tcp_conn_stats>>>'
493 THIS
=$
(waitmax
5 cat /proc
/net
/tcp
/proc
/net
/tcp6
2>/dev
/null |
awk ' /:/ { c[$4]++; } END { for (x in c) { print x, c[x]; } }')
494 if [ $?
== 0 ] ; then
496 elif type ss
> /dev
/null
; then
497 ss
-ant |
grep -v ^State |
awk ' /:/ { c[$1]++; } END { for (x in c) { print x, c[x]; } }' |
sed -e 's/^ESTAB/01/g;s/^SYN-SENT/02/g;s/^SYN-RECV/03/g;s/^FIN-WAIT-1/04/g;s/^FIN-WAIT-2/05/g;s/^TIME-WAIT/06/g;s/^CLOSED/07/g;s/^CLOSE-WAIT/08/g;s/^LAST-ACK/09/g;s/^LISTEN/0A/g;s/^CLOSING/0B/g;'
502 if type multipath
>/dev
/null
; then
503 if [ -f /etc
/multipath.conf
] ; then
504 echo '<<<multipath>>>'
509 # Performancecounter Platten
510 if [ -z "$MK_IN_CONTAINER" ]; then
511 echo '<<<diskstat>>>'
513 grep -E ' (x?[shv]d[a-z]*[0-9]*|cciss/c[0-9]+d[0-9]+|emcpower[a-z]+|dm-[0-9]+|VxVM.*|mmcblk.*|dasd[a-z]*|bcache[0-9]+|nvme[0-9]+n[0-9]+) ' < /proc
/diskstats
514 if type dmsetup
>/dev
/null
; then
515 echo '[dmsetup_info]'
516 dmsetup info
-c --noheadings --separator ' ' -o name
,devno
,vg_name
,lv_name
518 if [ -d /dev
/vx
/dsk
] ; then
520 stat
-c "%t %T %n" /dev
/vx
/dsk
/*/*
523 echo '<<<docker_container_diskstat>>>'
526 for F
in io_service_bytes io_serviced
; do
528 cat "/sys/fs/cgroup/blkio/blkio.throttle.$F"
531 for F
in /sys
/block
/*; do
532 echo -n "${F##*/} " ;
538 # Performancecounter Kernel
539 if [ -z "$MK_IN_CONTAINER" ]; then
542 cat /proc
/vmstat
/proc
/stat
545 # Hardware sensors via IPMI (need ipmitool)
546 if type ipmitool
> /dev
/null
548 run_cached
-s "ipmi:sep(124)" 300 "waitmax 300 ipmitool sensor list | grep -v 'command failed' | egrep -v '^[^ ]+ na ' | grep -v ' discrete '"
549 # readable discrete sensor states
550 run_cached
-s "ipmi_discrete:sep(124)" 300 "waitmax 300 ipmitool sdr elist compact"
554 # IPMI data via ipmi-sensors (of freeipmi). Please make sure, that if you
555 # have installed freeipmi that IPMI is really support by your hardware.
556 if type ipmi-sensors
>/dev
/null
558 echo '<<<ipmi_sensors>>>'
559 # Newer ipmi-sensors version have new output format; Legacy format can be used
560 if ipmi-sensors
--help |
grep -q legacy-output
; then
561 IPMI_FORMAT
="--legacy-output"
565 if ipmi-sensors
--help |
grep -q " \-\-groups"; then
571 # At least with ipmi-sensors 0.7.16 this group is Power_Unit instead of "Power Unit"
572 run_cached
-s ipmi_sensors
300 "for class in Temperature Power_Unit Fan
574 ipmi-sensors $IPMI_FORMAT --sdr-cache-directory /var/cache $IPMI_GROUP_OPT \"\$class\" | sed -e 's/ /_/g' -e 's/:_\?/ /g' -e 's@ \([^(]*\)_(\([^)]*\))@ \2_\1@'
575 # In case of a timeout immediately leave loop.
576 if [ $? = 255 ] ; then break ; fi
580 # RAID status of Linux software RAID
584 # RAID status of Linux RAID via device mapper
585 if type dmraid
>/dev
/null
&& DMSTATUS
=$
(waitmax
3 dmraid
-r)
589 # Output name and status
590 waitmax
20 dmraid
-s |
grep -e ^name
-e ^status
592 # Output disk names of the RAID disks
593 DISKS
=$
(echo "$DMSTATUS" | cut
-f1 -d":")
595 for disk
in $DISKS ; do
596 device
=$
(cat /sys
/block
/"$(basename "$disk")"/device
/model
)
597 status
=$
(echo "$DMSTATUS" |
grep "^${disk}")
598 echo "${status} Model: ${device}"
602 # RAID status of LSI controllers via cfggen
603 if type cfggen
> /dev
/null
; then
605 cfggen
0 DISPLAY |
egrep '(Target ID|State|Volume ID|Status of volume)[[:space:]]*:' |
sed -e 's/ *//g' -e 's/:/ /'
608 # RAID status of LSI MegaRAID controller via MegaCli. You can download that tool from:
609 # http://www.lsi.com/downloads/Public/MegaRAID%20Common%20Files/8.02.16_MegaCLI.zip
610 if type MegaCli
>/dev
/null
; then
611 MegaCli_bin
="MegaCli"
612 elif type MegaCli64
>/dev
/null
; then
613 MegaCli_bin
="MegaCli64"
614 elif type megacli
>/dev
/null
; then
615 MegaCli_bin
="megacli"
616 elif type storcli
>/dev
/null
; then
617 MegaCli_bin
="storcli"
618 elif type storcli64
>/dev
/null
; then
619 MegaCli_bin
="storcli64"
621 MegaCli_bin
="unknown"
624 if [ "$MegaCli_bin" != "unknown" ]; then
625 echo '<<<megaraid_pdisks>>>'
626 for part
in $
($MegaCli_bin -EncInfo -aALL -NoLog < /dev
/null \
627 |
sed -rn 's/:/ /g; s/[[:space:]]+/ /g; s/^ //; s/ $//; s/Number of enclosures on adapter ([0-9]+).*/adapter \1/g; /^(Enclosure|Device ID|adapter) [0-9]+$/ p'); do
628 [ "$part" = adapter
] && echo ""
629 [ "$part" = 'Enclosure' ] && echo -ne "\ndev2enc"
633 $MegaCli_bin -PDList -aALL -NoLog < /dev
/null |
egrep 'Enclosure|Raw Size|Slot Number|Device Id|Firmware state|Inquiry|Adapter'
634 echo '<<<megaraid_ldisks>>>'
635 $MegaCli_bin -LDInfo -Lall -aALL -NoLog < /dev
/null |
egrep 'Size|State|Number|Adapter|Virtual'
636 echo '<<<megaraid_bbu>>>'
637 $MegaCli_bin -AdpBbuCmd -GetBbuStatus -aALL -NoLog < /dev
/null |
grep -v Exit
640 # RAID status of 3WARE disk controller (by Radoslaw Bak)
641 if type tw_cli
> /dev
/null
; then
642 for C
in $
(tw_cli show |
awk 'NR < 4 { next } { print $1 }'); do
643 echo '<<<3ware_info>>>'
644 tw_cli
"/$C" show all |
egrep 'Model =|Firmware|Serial'
645 echo '<<<3ware_disks>>>'
646 tw_cli
"/$C" show drivestatus |
egrep 'p[0-9]' |
sed "s/^/$C\//"
647 echo '<<<3ware_units>>>'
648 tw_cli
"/$C" show unitstatus |
egrep 'u[0-9]' |
sed "s/^/$C\//"
652 # RAID controllers from areca (Taiwan)
653 # cli64 can be found at ftp://ftp.areca.com.tw/RaidCards/AP_Drivers/Linux/CLI/
654 if type cli64
>/dev
/null
; then
655 run_cached
-s arc_raid_status
300 "cli64 rsf info | tail -n +3 | head -n -2"
658 # VirtualBox Guests. Section must always been output. Otherwise the
659 # check would not be executed in case no guest additions are installed.
660 # And that is something the check wants to detect
661 echo '<<<vbox_guest>>>'
662 if type VBoxControl
>/dev
/null
2>&1 && lsmod |
grep vboxguest
>/dev
/null
2>&1; then
663 VBoxControl
-nologo guestproperty enumerate | cut
-d, -f1,2
664 [ "${PIPESTATUS[0]}" = 0 ] ||
echo "ERROR"
667 # OpenVPN Clients. Currently we assume that the configuration # is in
668 # /etc/openvpn. We might find a safer way to find the configuration later.
669 if [ -e /etc
/openvpn
/openvpn-status.log
] ; then
670 echo '<<<openvpn_clients:sep(44)>>>'
671 sed -n -e '/CLIENT LIST/,/ROUTING TABLE/p' < /etc
/openvpn
/openvpn-status.log |
sed -e 1,3d
-e '$d'
674 # Time synchronization with NTP
675 if type ntpq
> /dev
/null
2>&1 ; then
676 # remove heading, make first column space separated
677 run_cached
-s ntp
30 "waitmax 5 ntpq -np | sed -e 1,2d -e 's/^\(.\)/\1 /' -e 's/^ /%/' || true"
680 # Time synchronization with Chrony
681 if type chronyc
> /dev
/null
2>&1 ; then
682 # Force successful exit code. Otherwise section will be missing if daemon not running
684 # The "| cat" has been added for some kind of regression in RedHat 7.5. The
685 # SELinux rules shipped with that release were denying the chronyc call
687 run_cached
-s chrony
30 "waitmax 5 chronyc -n tracking | cat || true"
690 if type nvidia-settings
>/dev
/null
&& [ -S /tmp
/.X11-unix
/X0
]
693 for var
in GPUErrors GPUCoreTemp
695 DISPLAY
=:0 waitmax
2 nvidia-settings
-t -q $var |
sed "s/^/$var: /"
699 if [ -z "$MK_IN_CONTAINER" ] && [ -e /proc
/drbd
]; then
704 # Heartbeat monitoring
705 # Different handling for heartbeat clusters with and without CRM
706 # for the resource state
707 if [ -S /var
/run
/heartbeat
/crm
/cib_ro
-o -S /var
/run
/crm
/cib_ro
] || pgrep crmd
> /dev
/null
2>&1; then
708 echo '<<<heartbeat_crm>>>'
709 TZ
=UTC crm_mon
-1 -r |
grep -v ^$ |
sed 's/^ //; /^\sResource Group:/,$ s/^\s//; s/^\s/_/g'
711 if type cl_status
> /dev
/null
2>&1; then
712 echo '<<<heartbeat_rscstatus>>>'
715 echo '<<<heartbeat_nodes>>>'
716 for NODE
in $
(cl_status listnodes
); do
717 if [ "$NODE" != "$(echo "$HOSTNAME" | tr '[:upper:]' '[:lower:]')" ]; then
718 STATUS
=$
(cl_status nodestatus
"$NODE")
719 echo -n "$NODE $STATUS"
720 for LINK
in $
(cl_status listhblinks
"$NODE" 2>/dev
/null
); do
721 echo -n " $LINK $(cl_status hblinkstatus "$NODE" "$LINK")"
728 # Postfix mailqueue monitoring
729 # Determine the number of mails and their size in several postfix mail queues
730 function read_postfix_queue_dirs
{
732 if [ -n "$postfix_queue_dir" ]; then
733 echo '<<<postfix_mailq>>>'
734 if [ ! -z "$2" ]; then
737 for queue
in deferred active
739 count
=$
(find "${postfix_queue_dir}/$queue" -type f |
wc -l)
740 size
=$
(du
-s "${postfix_queue_dir}/$queue" |
awk '{print $1 }')
741 if [ -z "$size" ]; then
744 if [ -z "$count" ]; then
745 echo "Mail queue is empty"
747 echo "QUEUE_${queue} $size $count"
753 # Postfix mailqueue monitoring
754 # Determine the number of mails and their size in several postfix mail queues
755 if type postconf
>/dev
/null
; then
756 # Check if multi_instance_directories exists in main.cf and is not empty
757 # always takes the last entry, multiple entries possible
758 multi_instances_dirs
=$
(postconf
-c /etc
/postfix
2>/dev
/null |
grep ^multi_instance_directories |
sed 's/.*=[[:space:]]*//g')
759 if [ ! -z "$multi_instances_dirs" ]; then
760 for queue_dir
in $multi_instances_dirs
762 if [ -n "$queue_dir" ]; then
763 postfix_queue_dir
=$
(postconf
-c "$queue_dir" 2>/dev
/null |
grep ^queue_directory |
sed 's/.*=[[:space:]]*//g')
764 read_postfix_queue_dirs
"$postfix_queue_dir" "$queue_dir"
769 postfix_queue_dir
=$
(postconf
-h queue_directory
2>/dev
/null
)
770 read_postfix_queue_dirs
"$postfix_queue_dir"
773 elif [ -x /usr
/sbin
/ssmtp
] ; then
774 echo '<<<postfix_mailq>>>'
775 mailq
2>&1 |
sed 's/^[^:]*: \(.*\)/\1/' |
tail -n 6
779 # Postfix status monitoring. Can handle multiple instances.
780 if type postfix
>/dev
/null
; then
781 echo "<<<postfix_mailq_status:sep(58)>>>"
782 for i
in /var
/spool
/postfix
*/; do
783 if [ -e "$i/pid/master.pid" ]; then
784 if [ -r "$i/pid/master.pid" ]; then
785 postfix_pid
=$
(sed 's/ //g' < "$i/pid/master.pid") # handle possible spaces in output
786 if readlink
-- "/proc/${postfix_pid}/exe" |
grep -q ".*postfix/\(s\?bin/\)\?master.*"; then
787 echo "$i:the Postfix mail system is running:PID:$postfix_pid" |
sed 's/\/var\/spool\///g'
789 echo "$i:PID file exists but instance is not running!" |
sed 's/\/var\/spool\///g'
792 echo "$i:PID file exists but is not readable"
795 echo "$i:the Postfix mail system is not running" |
sed 's/\/var\/spool\///g'
800 # Check status of qmail mailqueue
801 if type qmail-qstat
>/dev
/null
803 echo "<<<qmail_stats>>>"
807 # Nullmailer queue monitoring
808 if type nullmailer-send
>/dev
/null
&& [ -d /var
/spool
/nullmailer
/queue
]
810 echo '<<<nullmailer_mailq>>>'
811 COUNT
=$
(find /var
/spool
/nullmailer
/queue
-type f |
wc -l)
812 SIZE
=$
(du
-s /var
/spool
/nullmailer
/queue |
awk '{print $1 }')
816 # Check status of OMD sites and Check_MK Notification spooler
817 if type omd
>/dev
/null
819 run_cached
-s omd_status
60 "omd status --bare --auto || true"
820 echo '<<<mknotifyd:sep(0)>>>'
821 for statefile
in /omd
/sites
/*/var
/log
/mknotifyd.state
; do
822 if [ -e "$statefile" ] ; then
823 site
=${statefile%/var/log*}
824 site
=${site#/omd/sites/}
826 grep -v '^#' < "$statefile"
830 echo '<<<omd_apache:sep(124)>>>'
831 for statsfile
in /omd
/sites
/*/var
/log
/apache
/stats
; do
832 if [ -e "$statsfile" ] ; then
833 site
=${statsfile%/var/log*}
834 site
=${site#/omd/sites/}
838 # prevent next section to fail caused by a missing newline at the end of the statsfile
845 # Welcome the ZFS check on Linux
846 # We do not endorse running ZFS on linux if your vendor doesnt support it ;)
848 if type zpool
>/dev
/null
; then
849 echo "<<<zpool_status>>>"
856 # Veritas Cluster Server
857 # Software is always installed in /opt/VRTSvcs.
858 # Secure mode must be off to allow root to execute commands
859 if [ -x /opt
/VRTSvcs
/bin
/haclus
]
861 echo "<<<veritas_vcs>>>"
862 vcshost
=$
(hostname | cut
-d.
-f1)
863 waitmax
-s 9 2 /opt
/VRTSvcs
/bin
/haclus
-display -localclus |
grep -e ClusterName
-e ClusState
864 waitmax
-s 9 2 /opt
/VRTSvcs
/bin
/hasys
-display -attribute SysState
865 waitmax
-s 9 2 /opt
/VRTSvcs
/bin
/hagrp
-display -sys "$vcshost" -attribute State
-localclus
866 waitmax
-s 9 2 /opt
/VRTSvcs
/bin
/hares
-display -sys "$vcshost" -attribute State
-localclus
867 waitmax
-s 9 2 /opt
/VRTSvcs
/bin
/hagrp
-display -attribute TFrozen
-attribute Frozen
871 # Fileinfo-Check: put patterns for files into /etc/check_mk/fileinfo.cfg
872 function replace_datevariable
()
874 # Replace the date variable of the input, e.g. $DATE:%Y%m%d$, by
875 # the current date. If there's no match just return the input.
877 # shellcheck disable=SC2016
878 local pattern
='(\$DATE:(.*)\$)'
880 if [[ ! $file_name =~
$pattern ]]; then
883 date_variable
="${BASH_REMATCH[1]}"
884 format_string
="${BASH_REMATCH[2]}"
885 echo "${file_name/$date_variable/$(date +"$format_string")}"
889 if [ -r "$MK_CONFDIR/fileinfo.cfg" ]; then
890 echo '<<<fileinfo:sep(124)>>>'
895 # let the shell do all the expansion, and pipe all files to perl
896 (cat "$MK_CONFDIR/fileinfo.cfg" "$MK_CONFDIR/fileinfo.d/*" 2>/dev
/null
) |
while read -r pattern
; do
898 /*) pattern
=$
(replace_datevariable
"$pattern")
899 for f
in $pattern; do echo $f; done
904 print "[[[header]]]\n";
905 print "name|status|size|time\n";
906 print "[[[content]]]\n";
911 print "$_|missing\n";
914 ($device, $inode, $mode, $nlink, $uid, $gid, $rdev, $size,
915 $atime, $mtime, $ctime, $blksize, $blocks) = stat($_);
917 print "$_|stat failed\n";
919 print "$_|ok|$size|$mtime\n";
922 set +vx
; eval "$old_state"
926 # Get stats about OMD monitoring cores running on this machine.
927 # Since cd is a shell builtin the check does not affect the performance
928 # on non-OMD machines.
931 echo '<<<livestatus_status:sep(59)>>>'
934 if [ -S "/omd/sites/$site/tmp/run/live" ] ; then
936 echo -e "GET status" | \
937 waitmax
3 "/omd/sites/$site/bin/unixcat" "/omd/sites/$site/tmp/run/live"
941 echo '<<<livestatus_ssl_certs:sep(124)>>>'
945 for PEM_PATH
in "/omd/sites/$site/etc/ssl/ca.pem" "/omd/sites/$site/etc/ssl/sites/$site.pem"; do
946 if [ -f "$PEM_PATH" ]; then
947 CERT_DATE
=$
(openssl x509
-enddate -noout -in "$PEM_PATH")
948 CERT_DATE
=${CERT_DATE/notAfter=/}
949 echo "$PEM_PATH|$(date --date="$CERT_DATE" --utc +%s)"
954 echo '<<<mkeventd_status:sep(0)>>>'
957 if [ -S "/omd/sites/$site/tmp/run/mkeventd/status" ] ; then
959 echo -e "GET status\nOutputFormat: json" \
960 | waitmax
3 "/omd/sites/$site/bin/unixcat" "/omd/sites/$site/tmp/run/mkeventd/status"
965 # Collect states of configured Check_MK site backup jobs
966 if ls /omd
/sites
/*/var
/check_mk
/backup
/*.state
>/dev
/null
2>&1; then
967 echo "<<<mkbackup>>>"
968 for F
in /omd
/sites
/*/var
/check_mk
/backup
/*.state
; do
972 JOB_IDENT
=${F%.state}
973 JOB_IDENT
=${JOB_IDENT##*/}
975 if [ "$JOB_IDENT" != "restore" ]; then
976 echo "[[[site:$SITE:$JOB_IDENT]]]"
983 # Collect states of configured CMA backup jobs
984 if type mkbackup
>/dev
/null
&& ls /var
/lib
/mkbackup
/*.state
>/dev
/null
2>&1; then
985 echo "<<<mkbackup>>>"
986 for F
in /var
/lib
/mkbackup
/*.state
; do
987 JOB_IDENT
=${F%.state}
988 JOB_IDENT
=${JOB_IDENT##*/}
990 if [ "$JOB_IDENT" != "restore" ]; then
991 echo "[[[system:$JOB_IDENT]]]"
998 # Get statistics about monitored jobs. Below the job directory there
999 # is a sub directory per user that ran a job. That directory must be
1000 # owned by the user so that a symlink or hardlink attack for reading
1001 # arbitrary files can be avoided.
1002 if pushd "$MK_VARDIR/job" >/dev
/null
; then
1006 if [ -d "$username" ] && cd "$username" ; then
1007 if [ $EUID -eq 0 ]; then
1008 su
-s "$SHELL" "$username" -c "head -n -0 -v *"
1018 # Gather thermal information provided e.g. by acpi
1019 # At the moment only supporting thermal sensors
1020 if [ -z "$MK_IN_CONTAINER" ] && ls /sys
/class
/thermal
/thermal_zone
* >/dev
/null
2>&1; then
1021 echo '<<<lnx_thermal:sep(124)>>>'
1022 for F
in /sys
/class
/thermal
/thermal_zone
*; do
1024 if [ ! -e "$F/mode" ] ; then line
="${line}|-" ; else line
="${line}|$(cat "$F"/mode)"; fi
1025 line
="${line}|$(cat "$F"/{type,temp} | tr \\n "|
")"
1026 for G
in $
(ls "$F"/trip_point_
*_
{temp
,type}); do
1027 line
="${line}$(< "$G" tr \\n "|
")"
1033 # Libelle Business Shadow
1034 if type trd
>/dev
/null
; then
1035 echo "<<<libelle_business_shadow:sep(58)>>>"
1039 # HTTP Accelerator Statistics
1040 if type varnishstat
>/dev
/null
; then
1041 echo "<<<varnish>>>"
1046 if type pvecm
> /dev
/null
2>&1 ; then
1047 echo "<<<pvecm_status:sep(58)>>>"
1049 echo "<<<pvecm_nodes>>>"
1053 for HAPROXY_SOCK
in /run
/haproxy
/admin.sock
/var
/lib
/haproxy
/stats
; do
1054 if [ -r "$HAPROXY_SOCK" ] && type socat
>/dev
/null
2>&1; then
1055 echo "<<<haproxy:sep(44)>>>"
1056 echo "show stat" | socat
- "UNIX-CONNECT:$HAPROXY_SOCK"
1060 # Start new liveupdate process in background on each agent execution. Starting
1061 # a new live update process will terminate the old one automatically after
1063 if [ -e "$MK_CONFDIR/real_time_checks.cfg" ]; then
1064 if [ -z "$REMOTE" ]; then
1065 echo "ERROR: \$REMOTE not specified. Not starting Real-Time Checks." >&2
1066 elif ! type openssl
>/dev
/null
; then
1067 echo "ERROR: openssl command is missing. Not starting Real-Time Checks." >&2
1069 run_real_time_checks
>/dev
/null
&
1073 # MK's Remote Plugin Executor
1074 if [ -e "$MK_CONFDIR/mrpe.cfg" ]
1076 grep -Ev '^[[:space:]]*($|#)' "$MK_CONFDIR/mrpe.cfg" | \
1077 while read descr cmdline
1081 # NOTE: Due to an escaping-related bug in some old bash versions
1082 # (3.2.x), we have to use an intermediate variable for the pattern.
1083 pattern
='\(([^\)]*)\)[[:space:]](.*)'
1084 if [[ $cmdline =~
$pattern ]]
1086 parameters
=${BASH_REMATCH[1]}
1087 cmdline
=${BASH_REMATCH[2]}
1089 # split multiple parameter assignments
1090 for par
in $
(echo "$parameters" |
tr ":" "\n")
1092 # split each assignment
1093 key
=$
(echo "$par" | cut
-d= -f1)
1094 value
=$
(echo "$par" | cut
-d= -f2)
1096 if [ "$key" = "interval" ] ; then
1098 elif [ "$key" = "appendage" ] ; then
1104 if [ -z "$interval" ]
1106 run_mrpe
"$descr" "$cmdline"
1108 run_cached
"$args" "$descr" "$interval" "$cmdline"
1113 # MK's runas Executor
1114 if [ -e "$MK_CONFDIR/runas.cfg" ]
1116 grep -Ev '^[[:space:]]*($|#)' "$MK_CONFDIR/runas.cfg" | \
1117 while read type user include
1119 if [ -d "$include" -o \
( "$type" == "mrpe" -a -f "$include" \
) ] ; then
1121 if [ "$user" != "-" ] ; then
1122 PREFIX
="su $user -c "
1126 if [ "$type" == "mrpe" ] ; then
1127 grep -Ev '^[[:space:]]*($|#)' "$include" | \
1128 while read descr cmdline
1131 # NOTE: Due to an escaping-related bug in some old bash
1132 # versions (3.2.x), we have to use an intermediate variable
1134 pattern
='\(([^\)]*)\)[[:space:]](.*)'
1135 if [[ $cmdline =~
$pattern ]]
1137 parameters
=${BASH_REMATCH[1]}
1138 cmdline
=${BASH_REMATCH[2]}
1140 # split multiple parameter assignments
1141 for par
in $
(echo "$parameters" |
tr ":" "\n")
1143 # split each assignment
1144 IFS
='=' read key value
<<< $par
1145 if [ "$key" = "interval" ]
1148 # no other parameters supported currently
1153 if [ -n "$PREFIX" ] ; then
1154 cmdline
="$PREFIX\'$cmdline\'"
1156 if [ -z "$interval" ]
1158 run_mrpe
"$descr" "$cmdline"
1160 run_cached
-m "$descr" "$interval" "$cmdline"
1164 # local and plugin includes
1165 elif [ "$type" == "local" -o "$type" == "plugin" ] ; then
1166 if [ "$type" == "local" ] ; then
1170 find "$include" -executable -type f | \
1173 if [ -n "$PREFIX" ] ; then
1174 cmdline
="$PREFIX\"$filename\""
1186 function is_valid_plugin
() {
1187 # NOTE: Due to an escaping-related bug in some old bash versions
1188 # (3.2.x), we have to use an intermediate variable for the pattern.
1189 pattern
='\.dpkg-(new|old|temp)$'
1190 #TODO Maybe we should change this mechanism
1191 # shellcheck disable=SC2015
1192 [[ -f "$1" && -x "$1" && ! "$1" =~
$pattern ]] && true || false
1197 if cd "$LOCALDIR" ; then
1198 for skript
in .
/*; do
1199 if is_valid_plugin
"$skript"; then
1203 # Call some plugins only every X'th second
1204 for skript
in [1-9]*/* ; do
1205 if is_valid_plugin
"$skript"; then
1206 run_cached
"local_${skript//\//\\}" "${skript%/*}" "$skript"
1212 if cd "$PLUGINSDIR"; then
1213 for skript
in .
/*; do
1214 if is_valid_plugin
"$skript"; then
1218 # Call some plugins only every Xth second
1219 for skript
in [1-9]*/* ; do
1220 if is_valid_plugin
"$skript"; then
1221 run_cached
"plugins_${skript//\//\\}" "${skript%/*}" "$skript"
1226 # Agent output snippets created by cronjobs, etc.
1227 if [ -d "$SPOOLDIR" ]
1229 pushd "$SPOOLDIR" > /dev
/null
1234 test "$file" = "*" && break
1235 # output every file in this directory. If the file is prefixed
1236 # with a number, then that number is the maximum age of the
1237 # file in seconds. If the file is older than that, it is ignored.
1241 # Each away all digits from the front of the filename and
1242 # collect them in the variable maxage.
1243 while [ "${part/#[0-9]/}" != "$part" ]
1245 maxage
=$maxage${part:0:1}
1249 # If there is at least one digit, than we honor that.
1250 if [ "$maxage" ] ; then
1251 mtime
=$
(stat
-c %Y
"$file")
1252 if [ $
((now
- mtime
)) -gt "$maxage" ] ; then