Refactoring: Changed all check parameters starting with an 'o' to the new rulespec...
[check_mk.git] / agents / check_mk_agent.solaris
blob0a2c320995aa4dbf83a277cbef48cb52fdf2f543
1 #!/usr/bin/bash
2 # Check_MK Agent for Solaris
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2013 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # Remove locale settings to eliminate localized outputs where possible
28 export LC_ALL=C
29 unset LANG
31 export MK_LIBDIR=${MK_LIBDIR:-/usr/lib/check_mk_agent}
32 export MK_CONFDIR=${MK_CONFDIR:-/etc/check_mk}
33 export MK_VARDIR=${MK_VARDIR:-/var/lib/check_mk_agent}
35 # Optionally set a tempdir for all subsequent calls
36 #export TMPDIR=
38 # All executables in PLUGINSDIR will simply be executed and their
39 # ouput appended to the output of the agent. Plugins define their own
40 # sections and must output headers with '<<<' and '>>>'
41 PLUGINSDIR=$MK_LIBDIR/plugins
43 # All executables in LOCALDIR will by executabled and their
44 # output inserted into the section <<<local>>>. Please refer
45 # to online documentation for details.
46 LOCALDIR=$MK_LIBDIR/local
48 # close standard input (for security reasons) and stderr
49 if [ "$1" = -d ]
50 then
51 set -xv
52 else
53 exec <&- 2>/dev/null
56 function file_age() {
57 /usr/bin/perl -e 'if (! -f $ARGV[0]){die "0000000"};$mtime=(stat($ARGV[0]))[9];print ($^T-$mtime);' "$1"
61 function run_mrpe() {
62 local descr=$1
63 shift
64 local cmdline="$@"
66 echo '<<<mrpe>>>'
68 OUTPUT=$(eval "$cmdline")
70 echo -n "$descr $? $OUTPUT" | tr \\n \\1
71 echo
74 export -f run_mrpe
77 # Runs a command asynchronous by use of a cache file
78 function run_cached () {
79 local mrpe=0
80 local append_age=0
81 # TODO: this function is unable to handle mulitple args at once
82 # for example: -s -m won't work, it is read as single token "-s -m"
84 if [ "$1" = -s ] ; then local section="echo '<<<$2>>>' ; " ; shift ; fi
85 if [ "$1" = -m ] ; then local mrpe=1 ; shift ; fi
86 if [ "$1" = "-ma" ] ; then local mrpe=1 ; local append_age=1 ; shift ; fi
87 local NAME=$1
88 local MAXAGE=$2
89 shift 2
90 local CMDLINE="$section$@"
92 if [ ! -d $MK_VARDIR/cache ]; then mkdir -p $MK_VARDIR/cache ; fi
93 if [ "$mrpe" = 1 ] ; then
94 CACHEFILE="$MK_VARDIR/cache/mrpe_$NAME.cache"
95 else
96 CACHEFILE="$MK_VARDIR/cache/$NAME.cache"
99 # Check if the creation of the cache takes suspiciously long and return
100 # nothing if the age (access time) of $CACHEFILE.new is twice the MAXAGE
101 if [ -e "$CACHEFILE.new" ] ; then
102 AGE=$(file_age "$CACHEFILE.new")
103 if [ $AGE -ge $((MAXAGE * 2)) ] ; then
104 fuser -k "$CACHEFILE.new" >/dev/null 2>&1
105 rm -f "$CACHEFILE.new"
109 # Check if cache file exists and is recent enough
110 if [ -s "$CACHEFILE" ] ; then
111 AGE=$(file_age "$CACHEFILE")
112 if [ $AGE -le $MAXAGE ] ; then local USE_CACHEFILE=1 ; fi
113 # Output the file in any case, even if it is
114 # outdated. The new file will not yet be available
115 if [ $append_age -eq 1 ] ; then
116 # insert the cached-string before the pipe (first -e)
117 # or, if no pipe found (-e t) append it (third -e),
118 # but only once and on the second line (2) (first line is section header,
119 # all further lines are long output)
120 cat "$CACHEFILE" | sed -e "2s/|/ (Cached: ${AGE}\/${MAXAGE}s)|/" -e t -e "2s/$/ (Cached: ${AGE}\/${MAXAGE}s)/"
121 else
122 cat "$CACHEFILE"
126 # Cache file outdated and new job not yet running? Start it
127 if [ -z "$USE_CACHEFILE" ] && [ ! -e "$CACHEFILE.new" ] ; then
128 if [ $mrpe -eq 1 ] ; then
129 echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; run_mrpe $NAME \"$CMDLINE\" && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup /usr/bin/bash >/dev/null 2>&1 &
130 else
131 echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; $CMDLINE && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup /usr/bin/bash >/dev/null 2>&1 &
137 echo "<<<check_mk>>>"
138 echo "Version: 1.6.0i1"
139 echo "AgentOS: solaris"
140 echo "Hostname: $(hostname)"
141 echo "AgentDirectory: $MK_CONFDIR"
142 echo "DataDirectory: $MK_VARDIR"
143 echo "SpoolDirectory: $SPOOLDIR"
144 echo "PluginsDirectory: $PLUGINSDIR"
145 echo "LocalDirectory: $LOCALDIR"
148 # Find out what zone we are running in
149 # Treat all pre-Solaris 10 systems as "global"
150 if type zonename &>/dev/null
151 then
152 zonename=$(zonename)
153 pszone="-z $zonename"
154 else
155 zonename="global"
156 pszone="-A"
160 # Get statistics about monitored jobs. Below the job directory there
161 # is a sub directory per user that ran a job. That directory must be
162 # owned by the user so that a symlink or hardlink attack for reading
163 # arbitrary files can be avoided.
164 if pushd $MK_VARDIR/job >/dev/null; then
165 echo '<<<job>>>'
166 for username in *
168 if [ -d "$username" ] && cd "$username" ; then
169 count=$(su -s "$SHELL" "$username" -c "ls -1 * | wc -l")
171 if [ "$count" -eq "1" ]; then
172 filename=$(su -s "$SHELL" "$username" -c "ls -1 *")
173 echo "==> $filename <=="
176 su -s "$SHELL" "$username" -c "head -n1000 *"
177 cd ..
179 done
180 popd > /dev/null
186 # Filesystem usage for UFS and VXFS
187 echo '<<<df>>>'
188 for fs in ufs vxfs samfs lofs tmpfs
190 df -l -k -F $fs 2>/dev/null | sed 1d | grep -v "^[^ ]*/lib/[^ ]*\.so\.1 " | \
191 while read Filesystem kbytes used avail capacity Mountedon
193 kbytes=$(($used + $avail))
194 echo "$Filesystem $fs $kbytes $used $avail $capacity $Mountedon"
195 done
196 done
198 # Filesystem usage for ZFS
199 if type zfs &>/dev/null
200 then
201 echo '<<<zfsget>>>'
202 zfs get -Hp name,usedbydataset,avail,mountpoint,type | sed 's/usedbydataset/used/g' 2>/dev/null
203 if [ $? -ne 0 ] ; then
204 zfs get -Hp name,referenced,avail,mountpoint,type | sed 's/referenced/used/g'
206 echo '[df]'
207 df -l -k -F zfs 2>/dev/null | sed 1d
210 # ZFS arc cache
211 # newer Solaris (>=11.3) do not provide hits and misses via mdb -k
212 echo '<<<zfs_arc_cache>>>'
213 if type kstat &>/dev/null
214 then
215 kstat -p zfs:0:arcstats | sed -e 's/.*arcstats://g' | awk '{printf "%s = %s\n", $1, $2;}'
217 elif type mdb &>/dev/null
218 then
219 echo '::arc' | mdb -k
222 # Processes
223 echo '<<<ps>>>'
224 # The default solaris ps command strips the command lines of the processes. But for good process
225 # matching on the server we really need to whole command line. On linux there are arguments to
226 # make ps output the whole command line, but on solaris this seems to be missing. We use the ucb
227 # ps command to get the full command line instead. What a hack.
228 if [ -x /usr/ucb/ps ]; then
229 UCB_PS=$(/usr/ucb/ps -agwwwx)
230 PS=$(ps -o user,vsz,rss,pcpu,etime,pid,args $pszone | \
231 sed -e 1d -e 's/ *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) */(\1,\2,\3,\4\/\5,\6) /')
232 while read -r LINE; do
233 STATS=${LINE%) *}
234 PID=${STATS##*,}
236 # Directly use ps output when line is too slow to be stripped
237 if [ ${#LINE} -lt 100 ]; then
238 echo "$LINE"
239 continue
242 CMD=$(echo "$UCB_PS" | grep "^[ ]*$PID " | head -n1 | \
243 awk '{ s = ""; for (i = 5; i <= NF; i++) s = s $i " "; print s }')
244 # Only use the ucb ps line when it's not empty (process might already been gone)
245 if [ -z "$CMD" ]; then
246 echo "$LINE"
247 else
248 echo "${STATS}) ${CMD}"
250 done <<< "$PS"
251 else
252 ps -o user,vsz,rss,pcpu,args $pszone | \
253 sed -e 1d -e 's/ *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) */(\1,\2,\3,\4) /'
257 # Statgrab
258 # source: http://www.i-scream.org/libstatgrab/
259 # binary: http://www.opencsw.org/
260 if type statgrab &>/dev/null
261 then
262 statgrab_vars="const. cpu. disk. general. mem. page. swap. user."
263 statgrab_sections="cpu disk page"
265 # Collect net stats in the global zone and in local zones if dlstat is present.
266 if [ "$zonename" == "global" ] || type dlstat &>/dev/null
267 then
268 statgrab_vars="$statgrab_vars net."
269 statgrab_sections="$statgrab_sections net"
272 statgrab $statgrab_vars | grep -v md 1> /tmp/statgrab.$$
273 for s in $statgrab_sections
275 echo "<<<statgrab_$s>>>"
276 grep "^$s\." /tmp/statgrab.$$ | cut -d. -f2-99 | sed 's/ *= */ /'
277 done
279 # <<<statgrab_mem>>> info is preferred over <<<solaris_mem>>>
280 # since solaris_mem is under suspicion to be buggy.
281 echo '<<<statgrab_mem>>>'
282 egrep "^(swap|mem)\." /tmp/statgrab.$$ | sed 's/ *= */ /'
284 [ -f /tmp/statgrab.$$ ] && rm -f /tmp/statgrab.$$
288 # /proc/cpu
289 # Simulated Output of Linux /proc/cpu
290 echo '<<<cpu>>>'
291 load=$(uptime|sed -e 's;.*average: \([0-9]\{1,\}\.[0-9]\{1,\}\), \([0-9]\{1,\}\.[0-9]\{1,\}\), \([0-9]\{1,\}\.[0-9]\{1,\}\).*;\1 \2 \3;')
292 nthreads=$(ps -AL --no-headers | wc -l)
293 procs=$(($(psrinfo | wc -l)))
294 echo $load 1/$nthreads $$ $procs
297 # zpool status
298 if [ -x /sbin/zpool ]; then
299 run_cached -s zpool_status 120 "/sbin/zpool status -x"
301 echo '<<<zpool>>>'
302 zpool list
306 # /proc/uptime
307 # Simulated output of Linux /proc/uptime
308 echo '<<<uptime>>>'
309 btime=$(kstat '-p' 'unix:::boot_time' 2>&1|grep 'boot_time'|awk '{print $2}';)
310 echo $btime
313 # NTP
314 ps -o comm $pszone | grep -w .*ntpd &>/dev/null
315 if [ $? -eq 0 ]
316 then
317 echo '<<<ntp>>>'
318 ntpq -np | sed -e 1,2d -e 's/^\(.\)/\1 /' -e 's/^ /%/'
322 # Memory
323 # <<<solaris_mem>>> should be used if statgrab is missing and top is available.
324 if ! type statgrab &>/dev/null;
325 then
326 if [ -x /usr/bin/top ] || [ -x /usr/local/top ]
327 then
328 echo "<<<solaris_mem>>>"
329 if [ -x /usr/bin/top ]; then /usr/bin/top | grep '^Memory:'; fi
330 if [ -x /usr/local/bin/top ]; then /usr/bin/top | grep '^Memory:'; fi
334 if type prtdiag >/dev/null 2>&1
335 then
336 # prtdiag does not work in local zones
337 if [ "$zonename" == "global" ]
338 then
339 run_cached -s solaris_prtdiag_status 300 '/usr/sbin/prtdiag 1>/dev/null 2>&1; echo $?'
343 # TCP Connection stats
344 echo '<<<tcp_conn_stats>>>'
345 netstat -n -a -f inet -P tcp | tail +5 | \
346 nawk '{ c[$7]++; } END { for (x in c) { print x, c[x]; } }'
349 # Multipathing
350 if type mpathadm &>/dev/null
351 then
352 if [ "$zonename" == "global" ]
353 then
354 echo '<<<solaris_multipath>>>'
355 mpathadm list LU | nawk '{if(NR%3==1){dev=$1}
356 if(NR%3==2){tc=$NF}
357 if(NR%3==0){printf "%s %s %s\n",dev,tc,$NF}}'
362 # Fileinfo-Check: put patterns for files into $MK_CONFDIR/fileinfo.cfg
363 function replace_datevariable()
365 # Replace the date variable of the input, e.g. $DATE:%Y%m%d$, by
366 # the current date. If there's no match just return the input.
367 local file_name="$1"
368 local pattern='(\$DATE:(.*)\$)'
370 if [[ ! $file_name =~ $pattern ]]; then
371 echo "$file_name"
372 else
373 date_variable="${BASH_REMATCH[1]}"
374 format_string="${BASH_REMATCH[2]}"
375 echo "${file_name/$date_variable/$(date +$format_string)}"
380 # Fileinfo-Check: put patterns for files into /etc/check_mk/fileinfo.cfg
381 if [ -r "$MK_CONFDIR/fileinfo.cfg" ]; then
382 echo '<<<fileinfo:sep(124)>>>'
384 old_state=$(set +o)
385 set +f
387 # let the shell do all the expansion, and pipe all files to perl
388 (cat "$MK_CONFDIR/fileinfo.cfg" "$MK_CONFDIR/fileinfo.d/*" 2>/dev/null) | while read -r pattern; do
389 case $pattern in
390 /*) pattern=$(replace_datevariable "$pattern")
391 for f in $pattern; do echo $f; done
393 esac
394 done | perl -e '
395 print time."\n";
396 print "[[[header]]]\n";
397 print "name|status|size|time\n";
398 print "[[[content]]]\n";
399 while (<>) {
400 chomp $_;
401 if (-d $_) { next; }
402 if (not -f $_) {
403 print "$_|missing\n";
404 next;
406 ($device, $inode, $mode, $nlink, $uid, $gid, $rdev, $size,
407 $atime, $mtime, $ctime, $blksize, $blocks) = stat($_);
408 if ($!) {
409 print "$_|stat failed\n";
410 } else {
411 print "$_|ok|$size|$mtime\n";
414 set +vx; eval "$old_state"
418 # Libelle Business Shadow
419 if type trd >/dev/null 2>&1
420 then
421 echo '<<<libelle_business_shadow:sep(58)>>>'
422 trd -s
425 # Displaying Information About Faults or Defects
426 # If there are no faults the output of this command will be empty.
427 if type fmadm >/dev/null 2>&1
428 then
429 echo '<<<solaris_fmadm:sep(58)>>>'
430 fmadm faulty
433 # Getting Information About Services Running on Solaris
434 # We can get a list of all service instances, including disabled
435 # or incomplete ones by 'svcs -a'
436 if type svcs > /dev/null 2>&1
437 then
438 echo '<<<solaris_services>>>'
439 svcs -a
442 # MK's Remote Plugin Executor
443 if test -f "$MK_CONFDIR/mrpe.cfg"
444 then
445 echo '<<<mrpe>>>'
446 grep -v '^ *#' "$MK_CONFDIR/mrpe.cfg" | grep -v '^ *$' | \
447 while read descr cmdline
449 interval=
450 args="-m"
451 if [[ $cmdline =~ \(([^\)]*)\)[[:space:]](.*) ]]
452 then
453 parameters=${BASH_REMATCH[1]}
454 cmdline=${BASH_REMATCH[2]}
456 # split multiple parameter assignments
457 for par in $(echo $parameters | tr ":" "\n")
459 # split each assignment
460 key=$(echo $par | cut -d= -f1)
461 value=$(echo $par | cut -d= -f2)
463 if [ "$key" = "interval" ] ; then
464 interval=$value
465 elif [ "$key" = "appendage" ] ; then
466 args="-ma"
468 done
471 if [ -z "$interval" ]
472 then
473 run_mrpe $descr "$cmdline"
474 else
475 run_cached $args $descr $interval "$cmdline"
477 done
480 # Local checks
481 if cd $LOCALDIR 2>/dev/null
482 then
483 echo '<<<local>>>'
484 for skript in $(ls)
486 if [ -x "$skript" ] ; then
487 ./$skript
489 done
491 # Call some plugins only every X'th second
492 for skript in [1-9]*/* ; do
493 if [ -x "$skript" ] ; then
494 run_cached local_${skript//\//\#} ${skript%/*} "$skript"
496 done
500 # Plugins
501 if cd $PLUGINSDIR 2>/dev/null
502 then
503 for skript in $(ls)
505 if [ -x "$skript" ] ; then
506 ./$skript
508 done
510 # Call some plugins only every X'th second
511 for skript in [1-9]*/* ; do
512 if [ -x "$skript" ] ; then
513 run_cached plugins_${skript//\//\#} ${skript%/*} "$skript"
515 done