2 # Check_MK Agent for Linux
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # Remove locale settings to eliminate localized outputs where possible
31 export MK_LIBDIR
=${MK_LIBDIR:-/usr/lib/check_mk_agent}
32 export MK_CONFDIR
=${MK_CONFDIR:-/etc/check_mk}
33 export MK_VARDIR
=${MK_VARDIR:-/var/lib/check_mk_agent}
35 # Optionally set a tempdir for all subsequent calls
38 # Provide information about the remote host. That helps when data
39 # is being sent only once to each remote host.
40 if [ "$REMOTE_HOST" ] ; then
41 export REMOTE
=$REMOTE_HOST
42 elif [ "$SSH_CLIENT" ] ; then
43 export REMOTE
=${SSH_CLIENT%% *}
46 # The package name gets patched for baked agents to either
47 # "check-mk-agent" or the name set by the "name of agent packages" rule
48 XINETD_SERVICE_NAME
=check_mk
50 # Make sure, locally installed binaries are found
51 PATH
=$PATH:/usr
/local
/bin
53 # All executables in PLUGINSDIR will simply be executed and their
54 # ouput appended to the output of the agent. Plugins define their own
55 # sections and must output headers with '<<<' and '>>>'
56 PLUGINSDIR
=$MK_LIBDIR/plugins
58 # All executables in LOCALDIR will by executabled and their
59 # output inserted into the section <<<local>>>. Please
60 # refer to online documentation for details about local checks.
61 LOCALDIR
=$MK_LIBDIR/local
63 # All files in SPOOLDIR will simply appended to the agent
64 # output if they are not outdated (see below)
65 SPOOLDIR
=$MK_VARDIR/spool
67 # close standard input (for security reasons) and stderr when not
68 # explicitly in debug mode.
69 # When the nodes agent is executed by a e.g. docker node in a container,
70 # then don't close stdin, because the agent is piped through it in this
75 elif [ -z "$MK_FROM_NODE" ]; then
76 exec </dev
/null
2>/dev
/null
79 # Detect whether or not the agent is being executed in a container
81 if [ -f /.dockerenv
]; then
83 elif grep container
=lxc
/proc
/1/environ
>/dev
/null
2>&1; then
84 # Works in lxc environment e.g. on Ubuntu bionic, but does not
85 # seem to work in proxmox (see CMK-1561)
87 elif grep 'lxcfs /proc/cpuinfo fuse.lxcfs' /proc
/mounts
>/dev
/null
2>&1; then
88 # Seems to work in proxmox
92 unset IS_LXC_CONTAINER
95 # Prefer (relatively) new /usr/bin/timeout from coreutils against
96 # our shipped waitmax. waitmax is statically linked and crashes on
97 # some Ubuntu versions recently.
98 if type timeout
>/dev
/null
2>&1 ; then
105 if [ -f "$MK_CONFDIR/encryption.cfg" ] ; then
106 source "$MK_CONFDIR/encryption.cfg"
109 if [ "$ENCRYPTED" == "yes" ] ; then
110 echo -n "00" # protocol version
111 exec > >(openssl enc
-aes-256-cbc -md md5
-k "$PASSPHRASE" -nosalt)
116 if [ -e "$MK_CONFDIR/real_time_checks.cfg" ]; then
117 .
"$MK_CONFDIR/real_time_checks.cfg"
125 function section_mem
()
127 if [ -z "$IS_DOCKERIZED" ]; then
129 grep -E -v '^Swap:|^Mem:|total:' < /proc
/meminfo
131 echo '<<<docker_container_mem>>>'
132 cat /sys
/fs
/cgroup
/memory
/memory.stat
133 echo -n "usage_in_bytes "
134 cat /sys
/fs
/cgroup
/memory
/memory.usage_in_bytes
135 echo -n "limit_in_bytes "
136 cat /sys
/fs
/cgroup
/memory
/memory.limit_in_bytes
137 grep -F 'MemTotal:' /proc
/meminfo
141 function section_cpu
()
143 if [ "$(uname -m)" = "armv7l" ]; then
144 CPU_REGEX
='^processor'
146 CPU_REGEX
='^CPU|^processor'
148 NUM_CPUS
=$
(grep -c -E $CPU_REGEX < /proc
/cpuinfo
)
150 if [ -z "$IS_DOCKERIZED" ] && [ -z "$IS_LXC_CONTAINER" ]; then
152 echo "$(cat /proc/loadavg) $NUM_CPUS"
153 if [ -f "/proc/sys/kernel/threads-max" ]; then
154 cat /proc
/sys
/kernel
/threads-max
157 if [ -n "$IS_DOCKERIZED" ]; then
158 echo '<<<docker_container_cpu>>>'
160 echo '<<<lxc_container_cpu>>>'
162 grep "^cpu " /proc
/stat
163 echo "num_cpus $NUM_CPUS"
164 cat /sys
/fs
/cgroup
/cpuacct
/cpuacct.stat
168 function section_uptime
()
171 if [ -z "$IS_DOCKERIZED" ]; then
174 echo "$(($(date +%s) - $(stat -c %Z /dev/pts)))"
178 # Print out Partitions / Filesystems. (-P gives non-wrapped POSIXed output)
179 # Heads up: NFS-mounts are generally supressed to avoid agent hangs.
180 # If hard NFS mounts are configured or you have too large nfs retry/timeout
181 # settings, accessing those mounts from the agent would leave you with
182 # thousands of agent processes and, ultimately, a dead monitored system.
183 # These should generally be monitored on the NFS server, not on the clients.
184 function section_df
()
186 if [ -n "$IS_DOCKERIZED" ]; then
190 # The exclusion list is getting a bit of a problem. -l should hide any remote FS but seems
191 # to be all but working.
192 local excludefs
="-x smbfs -x cifs -x iso9660 -x udf -x nfsv4 -x nfs -x mvfs -x prl_fs -x squashfs"
193 if [ -n "$IS_LXC_CONTAINER" ]; then
198 df
-PTlk $excludefs |
sed 1d
200 # df inodes information
202 echo '[df_inodes_start]'
203 df
-PTli $excludefs |
sed 1d
204 echo '[df_inodes_end]'
207 function sections_systemd
()
209 if type systemctl
>/dev
/null
2>&1 ; then
210 echo '<<<systemd_units>>>'
211 systemctl
--all --no-pager
215 function run_mrpe
() {
222 PLUGIN
=${cmdline%% *}
223 OUTPUT
=$
(eval "$cmdline")
225 echo -n "(${PLUGIN##*/}) $descr $? $OUTPUT" |
tr \\n
\\1
231 # Runs a command asynchronous by use of a cache file. Usage:
232 # run_cached [-s] NAME MAXAGE
233 # -s creates the section header <<<$NAME>>>
234 # -m mrpe-mode: stores exit code with the cache
235 # -ma mrpe-mode with age: stores exit code with the cache and adds the cache age
236 # NAME is the name of the section (also used as cache file name)
237 # MAXAGE is the maximum cache livetime in seconds
238 function run_cached
() {
244 # TODO: this function is unable to handle mulitple args at once
245 # for example: -s -m won't work, it is read as single token "-s -m"
246 if [ "$1" = -s ] ; then local section
="echo '<<<$2:cached($NOW,$3)>>>' ; " ; shift ; fi
247 if [ "$1" = -m ] ; then local mrpe
=1 ; shift ; fi
248 if [ "$1" = "-ma" ] ; then local mrpe
=1 ; local append_age
=1 ; shift ; fi
252 local CMDLINE
=$section$
*
254 if [ ! -d "$MK_VARDIR/cache" ]; then mkdir
-p "$MK_VARDIR/cache" ; fi
255 if [ "$mrpe" = 1 ] ; then
256 CACHEFILE
="$MK_VARDIR/cache/mrpe_$NAME.cache"
258 CACHEFILE
="$MK_VARDIR/cache/$NAME.cache"
261 # Check if the creation of the cache takes suspiciously long and kill the
262 # process if the age (access time) of $CACHEFILE.new is twice the MAXAGE.
263 # Output the evantually already cached section anyways and start the cache
265 if [ -e "$CACHEFILE.new" ] ; then
267 CF_ATIME
=$
(stat
-c %X
"$CACHEFILE.new")
268 if [ $
((NOW
- CF_ATIME
)) -ge $
((MAXAGE
* 2)) ] ; then
269 # Kill the process still accessing that file in case
270 # it is still running. This avoids overlapping processes!
271 fuser
-k -9 "$CACHEFILE.new" >/dev
/null
2>&1
272 rm -f "$CACHEFILE.new"
277 # Check if cache file exists and is recent enough
278 if [ -s "$CACHEFILE" ] ; then
280 MTIME
=$
(stat
-c %Y
"$CACHEFILE")
283 if [ "$AGE" -le "$MAXAGE" ] ; then local USE_CACHEFILE
=1 ; fi
284 # Output the file in any case, even if it is
285 # outdated. The new file will not yet be available
286 if [ $append_age -eq 1 ] ; then
287 # insert the cached-string before the pipe (first -e)
288 # or, if no pipe found (-e t) append it (third -e),
289 # but only once and on the second line (2!b) (first line is section header,
290 # all further lines are long output)
291 sed -e "2s/|/ (Cached: ${AGE}\/${MAXAGE}s)|/" -e t -e "2s/$/ (Cached: ${AGE}\/${MAXAGE}s)/" < "$CACHEFILE"
293 CACHE_INFO
=":cached($MTIME,$MAXAGE)"
294 # insert the cache info in the section header (^= after '!'),
295 # if none is present (^= before '!')
296 sed -e '/^<<<.*\(:cached(\).*>>>/!s/^<<<\([^>]*\)>>>$/<<<\1'$CACHE_INFO'>>>/' "$CACHEFILE"
300 # Cache file outdated and new job not yet running? Start it
301 if [ -z "$USE_CACHEFILE" ] && [ ! -e "$CACHEFILE.new" ] ; then
302 # When the command fails, the output is throws away ignored
303 if [ $mrpe -eq 1 ] ; then
304 echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; run_mrpe $NAME \"$CMDLINE\" && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup
/bin
/bash
>/dev
/null
2>&1 &
306 echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; $CMDLINE && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup
/bin
/bash
>/dev
/null
2>&1 &
311 # Make run_cached available for subshells (plugins, local checks, etc.)
314 # Implements Real-Time Check feature of the Check_MK agent which can send
315 # some section data in 1 second resolution. Useful for fast notifications and
316 # detailed graphing (if you configure your RRDs to this resolution).
317 function run_real_time_checks
()
319 PIDFILE
=$MK_VARDIR/real_time_checks.pid
323 if [ "$PASSPHRASE" != "" ] ; then
324 # new mechanism to set the passphrase has priority
325 RTC_SECRET
=$PASSPHRASE
328 if [ "$ENCRYPTED_RT" != "no" ] ; then
336 # terminate when pidfile is gone or other Real-Time Check process started or configured timeout
337 if [ ! -e "$PIDFILE" ] ||
[ "$(<"$PIDFILE")" -ne $$
] ||
[ "$RTC_TIMEOUT" -eq 0 ]; then
341 for SECTION
in $RTC_SECTIONS; do
342 # Be aware of maximum packet size. Maybe we need to check the size of the section
343 # output and do some kind of nicer error handling.
344 # 2 bytes: protocol version, 10 bytes: timestamp, rest: encrypted data
345 # dd is used to concatenate the output of all commands to a single write/block => udp packet
346 { echo -n $PROTOCOL ;
347 date +%s |
tr -d '\n' ;
348 if [ "$ENCRYPTED_RT" != "no" ] ; then
349 export RTC_SECRET
=$RTC_SECRET ; section_
"$SECTION" | openssl enc
-aes-256-cbc -md md5
-pass env
:RTC_SECRET
-nosalt ;
353 } |
dd bs
=9999 iflag
=fullblock
2>/dev
/null
>"/dev/udp/${REMOTE}/${RTC_PORT}"
358 if cd "$PLUGINSDIR" ; then
359 for PLUGIN
in $RTC_PLUGINS; do
360 if [ ! -f $PLUGIN ] ; then
364 # Be aware of maximum packet size. Maybe we need to check the size of the section
365 # output and do some kind of nicer error handling.
366 # 2 bytes: protocol version, 10 bytes: timestamp, rest: encrypted data
367 # dd is used to concatenate the output of all commands to a single write/block => udp packet
368 { echo -n $PROTOCOL ;
369 date +%s |
tr -d '\n' ;
370 if [ "$ENCRYPTED_RT" != "no" ] ; then
371 export RTC_SECRET
=$RTC_SECRET ; .
/$PLUGIN | openssl enc
-aes-256-cbc -md md5
-pass env
:RTC_SECRET
-nosalt ;
375 } |
dd bs
=9999 iflag
=fullblock
2>/dev
/null
>"/dev/udp/${REMOTE}/${RTC_PORT}"
380 RTC_TIMEOUT
=$
((RTC_TIMEOUT-1
))
384 echo "<<<check_mk>>>"
385 echo "Version: 1.6.0i1"
386 echo "AgentOS: linux"
387 echo "Hostname: $(hostname)"
388 echo "AgentDirectory: $MK_CONFDIR"
389 echo "DataDirectory: $MK_VARDIR"
390 echo "SpoolDirectory: $SPOOLDIR"
391 echo "PluginsDirectory: $PLUGINSDIR"
392 echo "LocalDirectory: $LOCALDIR"
394 # If we are called via xinetd, try to find only_from configuration
395 if [ -n "$REMOTE_HOST" ]
398 sed -n '/^service[[:space:]]*'$XINETD_SERVICE_NAME'/,/}/s/^[[:space:]]*only_from[[:space:]]*=[[:space:]]*\(.*\)/\1/p' /etc
/xinetd.d
/* |
head -n1; echo
405 # Filesystem usage for ZFS
406 if type zfs
> /dev
/null
2>&1 ; then
408 zfs get
-t filesystem
,volume
-Hp name
,quota
,used
,avail
,mountpoint
,type 2>/dev
/null
410 df
-PTlk -t zfs |
sed 1d
413 # Check NFS mounts by accessing them with stat -f (System
414 # call statfs()). If this lasts more then 2 seconds we
415 # consider it as hanging. We need waitmax.
416 if type waitmax
>/dev
/null
418 STAT_VERSION
=$
(stat
--version |
head -1 | cut
-d" " -f4)
421 echo '<<<nfsmounts>>>'
422 sed -n '/ nfs4\? /s/[^ ]* \([^ ]*\) .*/\1/p' < /proc
/mounts |
426 if [ "$STAT_VERSION" != "$STAT_BROKE" ]; then
427 waitmax
-s 9 5 stat
-f -c "$MP ok %b %f %a %s" "$MP" || \
428 echo "$MP hanging 0 0 0 0"
430 waitmax
-s 9 5 stat
-f -c "$MP ok %b %f %a %s" "$MP" && \
431 printf '\n'||
echo "$MP hanging 0 0 0 0"
435 echo '<<<cifsmounts>>>'
436 sed -n '/ cifs\? /s/[^ ]* \([^ ]*\) .*/\1/p' < /proc
/mounts |
440 if [ ! -r "$MP" ]; then
441 echo "$MP Permission denied"
442 elif [ "$STAT_VERSION" != "$STAT_BROKE" ]; then
443 waitmax
-s 9 2 stat
-f -c "$MP ok %b %f %a %s" "$MP" || \
444 echo "$MP hanging 0 0 0 0"
446 waitmax
-s 9 2 stat
-f -c "$MP ok %b %f %a %s" "$MP" && \
447 printf '\n'||
echo "$MP hanging 0 0 0 0"
452 # Check mount options. Filesystems may switch to 'ro' in case
455 grep ^
/dev
< /proc
/mounts |
grep -v " squashfs "
457 # processes including username, without kernel processes
459 ps ax
-o user
:32,vsz
,rss
,cputime
,etime
,pid
,command --columns 10000 |
sed -e 1d
-e 's/ *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) */(\1,\2,\3,\4\/\5,\6) /'
464 # Load and number of processes
470 # New variant: Information about speed and state in one section
471 if type ip
> /dev
/null
474 echo "[start_iplink]"
479 echo '<<<lnx_if:sep(58)>>>'
480 sed 1,2d
/proc
/net
/dev
481 if type ethtool
> /dev
/null
483 sed -e 1,2d
/proc
/net
/dev | cut
-d':' -f1 |
sort |
while read eth
; do
485 ethtool
"$eth" |
grep -E '(Speed|Duplex|Link detected|Auto-negotiation):'
486 echo -e "\tAddress: $(cat "/sys
/class
/net
/$eth/address
")\n"
491 # Current state of bonding interfaces
492 if [ -e /proc
/net
/bonding
] ; then
493 echo '<<<lnx_bonding:sep(58)>>>'
494 pushd /proc
/net
/bonding
> /dev
/null
499 # Same for Open vSwitch bonding
500 if type ovs-appctl
> /dev
/null
; then
501 BONDS
=$
(ovs-appctl bond
/list
)
502 COL
=$
(echo "$BONDS" |
awk '{for(i=1;i<=NF;i++) {if($i == "bond") printf("%d", i)} exit 0}')
503 echo '<<<ovs_bonding:sep(58)>>>'
504 for bond
in $
(echo "$BONDS" |
sed -e 1d | cut
-f"${COL}") ; do
506 ovs-appctl bond
/show
"$bond"
511 # Number of TCP connections in the various states
512 if type waitmax
>/dev
/null
; then
513 echo '<<<tcp_conn_stats>>>'
514 THIS
=$
(waitmax
5 cat /proc
/net
/tcp
/proc
/net
/tcp6
2>/dev
/null |
awk ' /:/ { c[$4]++; } END { for (x in c) { print x, c[x]; } }')
515 if [ $?
== 0 ] ; then
517 elif type ss
> /dev
/null
; then
518 ss
-ant |
grep -v ^State |
awk ' /:/ { c[$1]++; } END { for (x in c) { print x, c[x]; } }' |
sed -e 's/^ESTAB/01/g;s/^SYN-SENT/02/g;s/^SYN-RECV/03/g;s/^FIN-WAIT-1/04/g;s/^FIN-WAIT-2/05/g;s/^TIME-WAIT/06/g;s/^CLOSED/07/g;s/^CLOSE-WAIT/08/g;s/^LAST-ACK/09/g;s/^LISTEN/0A/g;s/^CLOSING/0B/g;'
523 if type multipath
>/dev
/null
; then
524 if [ -f /etc
/multipath.conf
] ; then
525 echo '<<<multipath>>>'
530 # Performancecounter Platten
531 if [ -z "$IS_DOCKERIZED" ]; then
532 echo '<<<diskstat>>>'
534 grep -E ' (x?[shv]d[a-z]*[0-9]*|cciss/c[0-9]+d[0-9]+|emcpower[a-z]+|dm-[0-9]+|VxVM.*|mmcblk.*|dasd[a-z]*|bcache[0-9]+|nvme[0-9]+n[0-9]+) ' < /proc
/diskstats
535 if type dmsetup
>/dev
/null
; then
536 echo '[dmsetup_info]'
537 dmsetup info
-c --noheadings --separator ' ' -o name
,devno
,vg_name
,lv_name
539 if [ -d /dev
/vx
/dsk
] ; then
541 stat
-c "%t %T %n" /dev
/vx
/dsk
/*/*
544 echo '<<<docker_container_diskstat>>>'
547 for F
in io_service_bytes io_serviced
; do
549 cat "/sys/fs/cgroup/blkio/blkio.throttle.$F"
552 for F
in /sys
/block
/*; do
553 echo -n "${F##*/} " ;
559 # Performancecounter Kernel
560 if [ -z "$IS_DOCKERIZED" ] && [ -z "$IS_LXC_CONTAINER" ]; then
563 cat /proc
/vmstat
/proc
/stat
566 # Hardware sensors via IPMI (need ipmitool)
567 if type ipmitool
> /dev
/null
569 run_cached
-s "ipmi:sep(124)" 300 "waitmax 300 ipmitool sensor list | grep -v 'command failed' | egrep -v '^[^ ]+ na ' | grep -v ' discrete '"
570 # readable discrete sensor states
571 run_cached
-s "ipmi_discrete:sep(124)" 300 "waitmax 300 ipmitool sdr elist compact"
575 # IPMI data via ipmi-sensors (of freeipmi). Please make sure, that if you
576 # have installed freeipmi that IPMI is really support by your hardware.
577 if (type ipmi-sensors
&& ls /dev
/ipmi
*) &>/dev
/null
579 echo '<<<ipmi_sensors>>>'
580 # Newer ipmi-sensors version have new output format; Legacy format can be used
581 if ipmi-sensors
--help |
grep -q legacy-output
; then
582 IPMI_FORMAT
="--legacy-output"
586 if ipmi-sensors
--help |
grep -q " \-\-groups"; then
592 # At least with ipmi-sensors 0.7.16 this group is Power_Unit instead of "Power Unit"
593 run_cached
-s ipmi_sensors
300 "for class in Temperature Power_Unit Fan
595 ipmi-sensors $IPMI_FORMAT --sdr-cache-directory /var/cache $IPMI_GROUP_OPT \"\$class\" | sed -e 's/ /_/g' -e 's/:_\?/ /g' -e 's@ \([^(]*\)_(\([^)]*\))@ \2_\1@'
596 # In case of a timeout immediately leave loop.
597 if [ $? = 255 ] ; then break ; fi
601 # RAID status of Linux software RAID
605 # RAID status of Linux RAID via device mapper
606 if type dmraid
>/dev
/null
&& DMSTATUS
=$
(waitmax
3 dmraid
-r)
610 # Output name and status
611 waitmax
20 dmraid
-s |
grep -e ^name
-e ^status
613 # Output disk names of the RAID disks
614 DISKS
=$
(echo "$DMSTATUS" | cut
-f1 -d":")
616 for disk
in $DISKS ; do
617 device
=$
(cat /sys
/block
/"$(basename "$disk")"/device
/model
)
618 status
=$
(echo "$DMSTATUS" |
grep "^${disk}")
619 echo "${status} Model: ${device}"
623 # RAID status of LSI controllers via cfggen
624 if type cfggen
> /dev
/null
; then
626 cfggen
0 DISPLAY |
egrep '(Target ID|State|Volume ID|Status of volume)[[:space:]]*:' |
sed -e 's/ *//g' -e 's/:/ /'
629 # RAID status of LSI MegaRAID controller via MegaCli. You can download that tool from:
630 # http://www.lsi.com/downloads/Public/MegaRAID%20Common%20Files/8.02.16_MegaCLI.zip
631 if type MegaCli
>/dev
/null
; then
632 MegaCli_bin
="MegaCli"
633 elif type MegaCli64
>/dev
/null
; then
634 MegaCli_bin
="MegaCli64"
635 elif type megacli
>/dev
/null
; then
636 MegaCli_bin
="megacli"
637 elif type storcli
>/dev
/null
; then
638 MegaCli_bin
="storcli"
639 elif type storcli64
>/dev
/null
; then
640 MegaCli_bin
="storcli64"
642 MegaCli_bin
="unknown"
645 if [ "$MegaCli_bin" != "unknown" ]; then
646 echo '<<<megaraid_pdisks>>>'
647 for part
in $
($MegaCli_bin -EncInfo -aALL -NoLog < /dev
/null \
648 |
sed -rn 's/:/ /g; s/[[:space:]]+/ /g; s/^ //; s/ $//; s/Number of enclosures on adapter ([0-9]+).*/adapter \1/g; /^(Enclosure|Device ID|adapter) [0-9]+$/ p'); do
649 [ "$part" = adapter
] && echo ""
650 [ "$part" = 'Enclosure' ] && echo -ne "\ndev2enc"
654 $MegaCli_bin -PDList -aALL -NoLog < /dev
/null |
egrep 'Enclosure|Raw Size|Slot Number|Device Id|Firmware state|Inquiry|Adapter'
655 echo '<<<megaraid_ldisks>>>'
656 $MegaCli_bin -LDInfo -Lall -aALL -NoLog < /dev
/null |
egrep 'Size|State|Number|Adapter|Virtual'
657 echo '<<<megaraid_bbu>>>'
658 $MegaCli_bin -AdpBbuCmd -GetBbuStatus -aALL -NoLog < /dev
/null |
grep -v Exit
661 # RAID status of 3WARE disk controller (by Radoslaw Bak)
662 if type tw_cli
> /dev
/null
; then
663 for C
in $
(tw_cli show |
awk 'NR < 4 { next } { print $1 }'); do
664 echo '<<<3ware_info>>>'
665 tw_cli
"/$C" show all |
egrep 'Model =|Firmware|Serial'
666 echo '<<<3ware_disks>>>'
667 tw_cli
"/$C" show drivestatus |
egrep 'p[0-9]' |
sed "s/^/$C\//"
668 echo '<<<3ware_units>>>'
669 tw_cli
"/$C" show unitstatus |
egrep 'u[0-9]' |
sed "s/^/$C\//"
673 # RAID controllers from areca (Taiwan)
674 # cli64 can be found at ftp://ftp.areca.com.tw/RaidCards/AP_Drivers/Linux/CLI/
675 if type cli64
>/dev
/null
; then
676 run_cached
-s arc_raid_status
300 "cli64 rsf info | tail -n +3 | head -n -2"
679 # VirtualBox Guests. Section must always been output. Otherwise the
680 # check would not be executed in case no guest additions are installed.
681 # And that is something the check wants to detect
682 echo '<<<vbox_guest>>>'
683 if type VBoxControl
>/dev
/null
2>&1 && lsmod |
grep vboxguest
>/dev
/null
2>&1; then
684 VBoxControl
-nologo guestproperty enumerate | cut
-d, -f1,2
685 [ "${PIPESTATUS[0]}" = 0 ] ||
echo "ERROR"
688 # OpenVPN Clients. Currently we assume that the configuration # is in
689 # /etc/openvpn. We might find a safer way to find the configuration later.
690 if [ -e /etc
/openvpn
/openvpn-status.log
] ; then
691 echo '<<<openvpn_clients:sep(44)>>>'
692 sed -n -e '/CLIENT LIST/,/ROUTING TABLE/p' < /etc
/openvpn
/openvpn-status.log |
sed -e 1,3d
-e '$d'
695 # Time synchronization with NTP
696 if type ntpq
> /dev
/null
2>&1 ; then
697 # remove heading, make first column space separated
698 run_cached
-s ntp
30 "waitmax 5 ntpq -np | sed -e 1,2d -e 's/^\(.\)/\1 /' -e 's/^ /%/' || true"
701 # Time synchronization with Chrony
702 if type chronyc
> /dev
/null
2>&1 ; then
703 # Force successful exit code. Otherwise section will be missing if daemon not running
705 # The "| cat" has been added for some kind of regression in RedHat 7.5. The
706 # SELinux rules shipped with that release were denying the chronyc call
708 run_cached
-s chrony
30 "waitmax 5 chronyc -n tracking | cat || true"
711 if type nvidia-settings
>/dev
/null
&& [ -S /tmp
/.X11-unix
/X0
]
714 for var
in GPUErrors GPUCoreTemp
716 DISPLAY
=:0 waitmax
2 nvidia-settings
-t -q $var |
sed "s/^/$var: /"
720 if [ -z "$IS_DOCKERIZED" ] && [ -z "$IS_LXC_CONTAINER" ] && [ -e /proc
/drbd
]; then
725 # Heartbeat monitoring
726 # Different handling for heartbeat clusters with and without CRM
727 # for the resource state
728 if [ -S /var
/run
/heartbeat
/crm
/cib_ro
-o -S /var
/run
/crm
/cib_ro
] || pgrep crmd
> /dev
/null
2>&1; then
729 echo '<<<heartbeat_crm>>>'
730 TZ
=UTC crm_mon
-1 -r |
grep -v ^$ |
sed 's/^ //; /^\sResource Group:/,$ s/^\s//; s/^\s/_/g'
732 if type cl_status
> /dev
/null
2>&1; then
733 echo '<<<heartbeat_rscstatus>>>'
736 echo '<<<heartbeat_nodes>>>'
737 for NODE
in $
(cl_status listnodes
); do
738 if [ "$NODE" != "$(echo "$HOSTNAME" | tr '[:upper:]' '[:lower:]')" ]; then
739 STATUS
=$
(cl_status nodestatus
"$NODE")
740 echo -n "$NODE $STATUS"
741 for LINK
in $
(cl_status listhblinks
"$NODE" 2>/dev
/null
); do
742 echo -n " $LINK $(cl_status hblinkstatus "$NODE" "$LINK")"
749 # Postfix mailqueue monitoring
750 # Determine the number of mails and their size in several postfix mail queues
751 function read_postfix_queue_dirs
{
753 if [ -n "$postfix_queue_dir" ]; then
754 echo '<<<postfix_mailq>>>'
755 if [ ! -z "$2" ]; then
758 for queue
in deferred active
760 count
=$
(find "${postfix_queue_dir}/$queue" -type f |
wc -l)
761 size
=$
(du
-s "${postfix_queue_dir}/$queue" |
awk '{print $1 }')
762 if [ -z "$size" ]; then
765 if [ -z "$count" ]; then
766 echo "Mail queue is empty"
768 echo "QUEUE_${queue} $size $count"
774 # Postfix mailqueue monitoring
775 # Determine the number of mails and their size in several postfix mail queues
776 if type postconf
>/dev
/null
; then
777 # Check if multi_instance_directories exists in main.cf and is not empty
778 # always takes the last entry, multiple entries possible
779 multi_instances_dirs
=$
(postconf
-c /etc
/postfix
2>/dev
/null |
grep ^multi_instance_directories |
sed 's/.*=[[:space:]]*//g')
780 if [ ! -z "$multi_instances_dirs" ]; then
781 for queue_dir
in $multi_instances_dirs
783 if [ -n "$queue_dir" ]; then
784 postfix_queue_dir
=$
(postconf
-c "$queue_dir" 2>/dev
/null |
grep ^queue_directory |
sed 's/.*=[[:space:]]*//g')
785 read_postfix_queue_dirs
"$postfix_queue_dir" "$queue_dir"
790 postfix_queue_dir
=$
(postconf
-h queue_directory
2>/dev
/null
)
791 read_postfix_queue_dirs
"$postfix_queue_dir"
794 elif [ -x /usr
/sbin
/ssmtp
] ; then
795 echo '<<<postfix_mailq>>>'
796 mailq
2>&1 |
sed 's/^[^:]*: \(.*\)/\1/' |
tail -n 6
800 # Postfix status monitoring. Can handle multiple instances.
801 if type postfix
>/dev
/null
; then
802 echo "<<<postfix_mailq_status:sep(58)>>>"
803 for i
in /var
/spool
/postfix
*/; do
804 if [ -e "$i/pid/master.pid" ]; then
805 if [ -r "$i/pid/master.pid" ]; then
806 postfix_pid
=$
(sed 's/ //g' < "$i/pid/master.pid") # handle possible spaces in output
807 if readlink
-- "/proc/${postfix_pid}/exe" |
grep -q ".*postfix/\(s\?bin/\)\?master.*"; then
808 echo "$i:the Postfix mail system is running:PID:$postfix_pid" |
sed 's/\/var\/spool\///g'
810 echo "$i:PID file exists but instance is not running!" |
sed 's/\/var\/spool\///g'
813 echo "$i:PID file exists but is not readable"
816 echo "$i:the Postfix mail system is not running" |
sed 's/\/var\/spool\///g'
821 # Check status of qmail mailqueue
822 if type qmail-qstat
>/dev
/null
824 echo "<<<qmail_stats>>>"
828 # Nullmailer queue monitoring
829 if type nullmailer-send
>/dev
/null
&& [ -d /var
/spool
/nullmailer
/queue
]
831 echo '<<<nullmailer_mailq>>>'
832 COUNT
=$
(find /var
/spool
/nullmailer
/queue
-type f |
wc -l)
833 SIZE
=$
(du
-s /var
/spool
/nullmailer
/queue |
awk '{print $1 }')
837 # Check status of OMD sites and Check_MK Notification spooler
838 if type omd
>/dev
/null
840 run_cached
-s omd_status
60 "omd status --bare --auto || true"
841 echo '<<<mknotifyd:sep(0)>>>'
842 for statefile
in /omd
/sites
/*/var
/log
/mknotifyd.state
; do
843 if [ -e "$statefile" ] ; then
844 site
=${statefile%/var/log*}
845 site
=${site#/omd/sites/}
847 grep -v '^#' < "$statefile"
851 echo '<<<omd_apache:sep(124)>>>'
852 for statsfile
in /omd
/sites
/*/var
/log
/apache
/stats
; do
853 if [ -e "$statsfile" ] ; then
854 site
=${statsfile%/var/log*}
855 site
=${site#/omd/sites/}
859 # prevent next section to fail caused by a missing newline at the end of the statsfile
866 # Welcome the ZFS check on Linux
867 # We do not endorse running ZFS on linux if your vendor doesnt support it ;)
869 if type zpool
>/dev
/null
; then
870 echo "<<<zpool_status>>>"
877 # Veritas Cluster Server
878 # Software is always installed in /opt/VRTSvcs.
879 # Secure mode must be off to allow root to execute commands
880 if [ -x /opt
/VRTSvcs
/bin
/haclus
]
882 echo "<<<veritas_vcs>>>"
883 vcshost
=$
(hostname | cut
-d.
-f1)
884 waitmax
-s 9 2 /opt
/VRTSvcs
/bin
/haclus
-display -localclus |
grep -e ClusterName
-e ClusState
885 waitmax
-s 9 2 /opt
/VRTSvcs
/bin
/hasys
-display -attribute SysState
886 waitmax
-s 9 2 /opt
/VRTSvcs
/bin
/hagrp
-display -sys "$vcshost" -attribute State
-localclus
887 waitmax
-s 9 2 /opt
/VRTSvcs
/bin
/hares
-display -sys "$vcshost" -attribute State
-localclus
888 waitmax
-s 9 2 /opt
/VRTSvcs
/bin
/hagrp
-display -attribute TFrozen
-attribute Frozen
892 # Fileinfo-Check: put patterns for files into /etc/check_mk/fileinfo.cfg
893 function replace_datevariable
()
895 # Replace the date variable of the input, e.g. $DATE:%Y%m%d$, by
896 # the current date. If there's no match just return the input.
898 # shellcheck disable=SC2016
899 local pattern
='(\$DATE:(.*)\$)'
901 if [[ ! $file_name =~
$pattern ]]; then
904 date_variable
="${BASH_REMATCH[1]}"
905 format_string
="${BASH_REMATCH[2]}"
906 echo "${file_name/$date_variable/$(date +"$format_string")}"
910 if [ -r "$MK_CONFDIR/fileinfo.cfg" ]; then
911 echo '<<<fileinfo:sep(124)>>>'
916 # let the shell do all the expansion, and pipe all files to perl
917 (cat "$MK_CONFDIR/fileinfo.cfg" "$MK_CONFDIR/fileinfo.d/*" 2>/dev
/null
) |
while read -r pattern
; do
919 /*) pattern
=$
(replace_datevariable
"$pattern")
920 for f
in $pattern; do echo $f; done
925 print "[[[header]]]\n";
926 print "name|status|size|time\n";
927 print "[[[content]]]\n";
932 print "$_|missing\n";
935 ($device, $inode, $mode, $nlink, $uid, $gid, $rdev, $size,
936 $atime, $mtime, $ctime, $blksize, $blocks) = stat($_);
938 print "$_|stat failed\n";
940 print "$_|ok|$size|$mtime\n";
943 set +vx
; eval "$old_state"
947 # Get stats about OMD monitoring cores running on this machine.
948 # Since cd is a shell builtin the check does not affect the performance
949 # on non-OMD machines.
952 echo '<<<livestatus_status:sep(59)>>>'
955 if [ -S "/omd/sites/$site/tmp/run/live" ] ; then
957 echo -e "GET status" | \
958 waitmax
3 "/omd/sites/$site/bin/unixcat" "/omd/sites/$site/tmp/run/live"
962 echo '<<<livestatus_ssl_certs:sep(124)>>>'
966 for PEM_PATH
in "/omd/sites/$site/etc/ssl/ca.pem" "/omd/sites/$site/etc/ssl/sites/$site.pem"; do
967 if [ -f "$PEM_PATH" ]; then
968 CERT_DATE
=$
(openssl x509
-enddate -noout -in "$PEM_PATH")
969 CERT_DATE
=${CERT_DATE/notAfter=/}
970 echo "$PEM_PATH|$(date --date="$CERT_DATE" --utc +%s)"
975 echo '<<<mkeventd_status:sep(0)>>>'
978 if [ -S "/omd/sites/$site/tmp/run/mkeventd/status" ] ; then
980 echo -e "GET status\nOutputFormat: json" \
981 | waitmax
3 "/omd/sites/$site/bin/unixcat" "/omd/sites/$site/tmp/run/mkeventd/status"
986 # Collect states of configured Check_MK site backup jobs
987 if ls /omd
/sites
/*/var
/check_mk
/backup
/*.state
>/dev
/null
2>&1; then
988 echo "<<<mkbackup>>>"
989 for F
in /omd
/sites
/*/var
/check_mk
/backup
/*.state
; do
993 JOB_IDENT
=${F%.state}
994 JOB_IDENT
=${JOB_IDENT##*/}
996 if [ "$JOB_IDENT" != "restore" ]; then
997 echo "[[[site:$SITE:$JOB_IDENT]]]"
1004 # Collect states of configured CMA backup jobs
1005 if type mkbackup
>/dev
/null
&& ls /var
/lib
/mkbackup
/*.state
>/dev
/null
2>&1; then
1006 echo "<<<mkbackup>>>"
1007 for F
in /var
/lib
/mkbackup
/*.state
; do
1008 JOB_IDENT
=${F%.state}
1009 JOB_IDENT
=${JOB_IDENT##*/}
1011 if [ "$JOB_IDENT" != "restore" ]; then
1012 echo "[[[system:$JOB_IDENT]]]"
1019 # Get statistics about monitored jobs. Below the job directory there
1020 # is a sub directory per user that ran a job. That directory must be
1021 # owned by the user so that a symlink or hardlink attack for reading
1022 # arbitrary files can be avoided.
1023 if pushd "$MK_VARDIR/job" >/dev
/null
; then
1027 if [ -d "$username" ] && cd "$username" ; then
1028 if [ $EUID -eq 0 ]; then
1029 su
-s "$SHELL" "$username" -c "head -n -0 -v *"
1039 # Gather thermal information provided e.g. by acpi
1040 # At the moment only supporting thermal sensors
1041 if [ -z "$IS_DOCKERIZED" ] && [ -z "$IS_LXC_CONTAINER" ] && ls /sys
/class
/thermal
/thermal_zone
* >/dev
/null
2>&1; then
1042 echo '<<<lnx_thermal:sep(124)>>>'
1043 for F
in /sys
/class
/thermal
/thermal_zone
*; do
1045 if [ ! -e "$F/mode" ] ; then line
="${line}|-" ; else line
="${line}|$(cat "$F"/mode)"; fi
1046 line
="${line}|$(cat "$F"/{type,temp} | tr \\n "|
")"
1047 for G
in $
(ls "$F"/trip_point_
*_
{temp
,type}); do
1048 line
="${line}$(< "$G" tr \\n "|
")"
1054 # Libelle Business Shadow
1055 if type trd
>/dev
/null
; then
1056 echo "<<<libelle_business_shadow:sep(58)>>>"
1060 # HTTP Accelerator Statistics
1061 if type varnishstat
>/dev
/null
; then
1062 echo "<<<varnish>>>"
1067 if type pvecm
> /dev
/null
2>&1 ; then
1068 echo "<<<pvecm_status:sep(58)>>>"
1070 echo "<<<pvecm_nodes>>>"
1074 for HAPROXY_SOCK
in /run
/haproxy
/admin.sock
/var
/lib
/haproxy
/stats
; do
1075 if [ -r "$HAPROXY_SOCK" ] && type socat
>/dev
/null
2>&1; then
1076 echo "<<<haproxy:sep(44)>>>"
1077 echo "show stat" | socat
- "UNIX-CONNECT:$HAPROXY_SOCK"
1081 # Start new liveupdate process in background on each agent execution. Starting
1082 # a new live update process will terminate the old one automatically after
1084 if [ -e "$MK_CONFDIR/real_time_checks.cfg" ]; then
1085 if [ -z "$REMOTE" ]; then
1086 echo "ERROR: \$REMOTE not specified. Not starting Real-Time Checks." >&2
1087 elif ! type openssl
>/dev
/null
; then
1088 echo "ERROR: openssl command is missing. Not starting Real-Time Checks." >&2
1090 run_real_time_checks
>/dev
/null
&
1094 # MK's Remote Plugin Executor
1095 if [ -e "$MK_CONFDIR/mrpe.cfg" ]
1097 grep -Ev '^[[:space:]]*($|#)' "$MK_CONFDIR/mrpe.cfg" | \
1098 while read descr cmdline
1102 # NOTE: Due to an escaping-related bug in some old bash versions
1103 # (3.2.x), we have to use an intermediate variable for the pattern.
1104 pattern
='\(([^\)]*)\)[[:space:]](.*)'
1105 if [[ $cmdline =~
$pattern ]]
1107 parameters
=${BASH_REMATCH[1]}
1108 cmdline
=${BASH_REMATCH[2]}
1110 # split multiple parameter assignments
1111 for par
in $
(echo "$parameters" |
tr ":" "\n")
1113 # split each assignment
1114 key
=$
(echo "$par" | cut
-d= -f1)
1115 value
=$
(echo "$par" | cut
-d= -f2)
1117 if [ "$key" = "interval" ] ; then
1119 elif [ "$key" = "appendage" ] ; then
1125 if [ -z "$interval" ]
1127 run_mrpe
"$descr" "$cmdline"
1129 run_cached
"$args" "$descr" "$interval" "$cmdline"
1134 # MK's runas Executor
1135 if [ -e "$MK_CONFDIR/runas.cfg" ]
1137 grep -Ev '^[[:space:]]*($|#)' "$MK_CONFDIR/runas.cfg" | \
1138 while read type user include
1140 if [ -d "$include" -o \
( "$type" == "mrpe" -a -f "$include" \
) ] ; then
1142 if [ "$user" != "-" ] ; then
1143 PREFIX
="su $user -c "
1147 if [ "$type" == "mrpe" ] ; then
1148 grep -Ev '^[[:space:]]*($|#)' "$include" | \
1149 while read descr cmdline
1152 # NOTE: Due to an escaping-related bug in some old bash
1153 # versions (3.2.x), we have to use an intermediate variable
1155 pattern
='\(([^\)]*)\)[[:space:]](.*)'
1156 if [[ $cmdline =~
$pattern ]]
1158 parameters
=${BASH_REMATCH[1]}
1159 cmdline
=${BASH_REMATCH[2]}
1161 # split multiple parameter assignments
1162 for par
in $
(echo "$parameters" |
tr ":" "\n")
1164 # split each assignment
1165 IFS
='=' read key value
<<< $par
1166 if [ "$key" = "interval" ]
1169 # no other parameters supported currently
1174 if [ -n "$PREFIX" ] ; then
1175 cmdline
="$PREFIX\'$cmdline\'"
1177 if [ -z "$interval" ]
1179 run_mrpe
"$descr" "$cmdline"
1181 run_cached
-m "$descr" "$interval" "$cmdline"
1185 # local and plugin includes
1186 elif [ "$type" == "local" -o "$type" == "plugin" ] ; then
1187 if [ "$type" == "local" ] ; then
1191 find "$include" -executable -type f | \
1194 if [ -n "$PREFIX" ] ; then
1195 cmdline
="$PREFIX\"$filename\""
1207 function is_valid_plugin
() {
1208 # NOTE: Due to an escaping-related bug in some old bash versions
1209 # (3.2.x), we have to use an intermediate variable for the pattern.
1210 pattern
='\.dpkg-(new|old|temp)$'
1211 #TODO Maybe we should change this mechanism
1212 # shellcheck disable=SC2015
1213 [[ -f "$1" && -x "$1" && ! "$1" =~
$pattern ]] && true || false
1218 if cd "$LOCALDIR" ; then
1219 for skript
in .
/*; do
1220 if is_valid_plugin
"$skript"; then
1224 # Call some plugins only every X'th second
1225 for skript
in [1-9]*/* ; do
1226 if is_valid_plugin
"$skript"; then
1227 run_cached
"local_${skript//\//\\}" "${skript%/*}" "$skript"
1233 if cd "$PLUGINSDIR"; then
1234 for skript
in .
/*; do
1235 if is_valid_plugin
"$skript"; then
1239 # Call some plugins only every Xth second
1240 for skript
in [1-9]*/* ; do
1241 if is_valid_plugin
"$skript"; then
1242 run_cached
"plugins_${skript//\//\\}" "${skript%/*}" "$skript"
1247 # Agent output snippets created by cronjobs, etc.
1248 if [ -d "$SPOOLDIR" ]
1250 pushd "$SPOOLDIR" > /dev
/null
1255 test "$file" = "*" && break
1256 # output every file in this directory. If the file is prefixed
1257 # with a number, then that number is the maximum age of the
1258 # file in seconds. If the file is older than that, it is ignored.
1262 # Each away all digits from the front of the filename and
1263 # collect them in the variable maxage.
1264 while [ "${part/#[0-9]/}" != "$part" ]
1266 maxage
=$maxage${part:0:1}
1270 # If there is at least one digit, than we honor that.
1271 if [ "$maxage" ] ; then
1272 mtime
=$
(stat
-c %Y
"$file")
1273 if [ $
((now
- mtime
)) -gt "$maxage" ] ; then