Cleanup config.nodes_of
[check_mk.git] / agents / check_mk_agent.solaris
blob74ebf51f2fd2c0c5acba2060279eb8e6caa1b76e
1 #!/usr/bin/bash
2 # Check_MK Agent for Solaris
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2013 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # Remove locale settings to eliminate localized outputs where possible
28 export LC_ALL=C
29 unset LANG
31 export MK_LIBDIR=${MK_LIBDIR:-/usr/lib/check_mk_agent}
32 export MK_CONFDIR=${MK_CONFDIR:-/etc/check_mk}
33 export MK_VARDIR=${MK_VARDIR:-/var/lib/check_mk_agent}
35 # Optionally set a tempdir for all subsequent calls
36 #export TMPDIR=
38 # Provide information about the remote host. That helps when data
39 # is being sent only once to each remote host.
40 if [ "$REMOTE_HOST" ] ; then
41 export REMOTE=$REMOTE_HOST
42 elif [ "$SSH_CLIENT" ] ; then
43 export REMOTE=${SSH_CLIENT%% *}
46 # All executables in PLUGINSDIR will simply be executed and their
47 # ouput appended to the output of the agent. Plugins define their own
48 # sections and must output headers with '<<<' and '>>>'
49 PLUGINSDIR=$MK_LIBDIR/plugins
51 # All executables in LOCALDIR will by executabled and their
52 # output inserted into the section <<<local>>>. Please refer
53 # to online documentation for details.
54 LOCALDIR=$MK_LIBDIR/local
56 # close standard input (for security reasons) and stderr
57 if [ "$1" = -d ]
58 then
59 set -xv
60 else
61 exec <&- 2>/dev/null
64 function file_age() {
65 /usr/bin/perl -e 'if (! -f $ARGV[0]){die "0000000"};$mtime=(stat($ARGV[0]))[9];print ($^T-$mtime);' "$1"
69 function run_mrpe() {
70 local descr=$1
71 shift
72 local cmdline="$@"
74 echo '<<<mrpe>>>'
76 OUTPUT=$(eval "$cmdline")
78 echo -n "$descr $? $OUTPUT" | tr \\n \\1
79 echo
82 export -f run_mrpe
85 # Runs a command asynchronous by use of a cache file
86 function run_cached () {
87 local mrpe=0
88 local append_age=0
89 # TODO: this function is unable to handle mulitple args at once
90 # for example: -s -m won't work, it is read as single token "-s -m"
92 if [ "$1" = -s ] ; then local section="echo '<<<$2>>>' ; " ; shift ; fi
93 if [ "$1" = -m ] ; then local mrpe=1 ; shift ; fi
94 if [ "$1" = "-ma" ] ; then local mrpe=1 ; local append_age=1 ; shift ; fi
95 local NAME=$1
96 local MAXAGE=$2
97 shift 2
98 local CMDLINE="$section$@"
100 if [ ! -d $MK_VARDIR/cache ]; then mkdir -p $MK_VARDIR/cache ; fi
101 if [ "$mrpe" = 1 ] ; then
102 CACHEFILE="$MK_VARDIR/cache/mrpe_$NAME.cache"
103 else
104 CACHEFILE="$MK_VARDIR/cache/$NAME.cache"
107 # Check if the creation of the cache takes suspiciously long and return
108 # nothing if the age (access time) of $CACHEFILE.new is twice the MAXAGE
109 if [ -e "$CACHEFILE.new" ] ; then
110 AGE=$(file_age "$CACHEFILE.new")
111 if [ $AGE -ge $((MAXAGE * 2)) ] ; then
112 fuser -k "$CACHEFILE.new" >/dev/null 2>&1
113 rm -f "$CACHEFILE.new"
117 # Check if cache file exists and is recent enough
118 if [ -s "$CACHEFILE" ] ; then
119 AGE=$(file_age "$CACHEFILE")
120 if [ $AGE -le $MAXAGE ] ; then local USE_CACHEFILE=1 ; fi
121 # Output the file in any case, even if it is
122 # outdated. The new file will not yet be available
123 if [ $append_age -eq 1 ] ; then
124 # insert the cached-string before the pipe (first -e)
125 # or, if no pipe found (-e t) append it (third -e),
126 # but only once and on the second line (2) (first line is section header,
127 # all further lines are long output)
128 cat "$CACHEFILE" | sed -e "2s/|/ (Cached: ${AGE}\/${MAXAGE}s)|/" -e t -e "2s/$/ (Cached: ${AGE}\/${MAXAGE}s)/"
129 else
130 CACHE_INFO=":cached($MTIME,$MAXAGE)"
131 # insert the cache info in the section header (^= after '!'),
132 # if none is present (^= before '!')
133 sed -e '/^<<<.*\(:cached(\).*>>>/!s/^<<<\([^>]*\)>>>$/<<<\1'$CACHE_INFO'>>>/' "$CACHEFILE"
137 # Cache file outdated and new job not yet running? Start it
138 if [ -z "$USE_CACHEFILE" ] && [ ! -e "$CACHEFILE.new" ] ; then
139 if [ $mrpe -eq 1 ] ; then
140 echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; run_mrpe $NAME \"$CMDLINE\" && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup /usr/bin/bash >/dev/null 2>&1 &
141 else
142 echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; $CMDLINE && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup /usr/bin/bash >/dev/null 2>&1 &
148 echo "<<<check_mk>>>"
149 echo "Version: 1.6.0i1"
150 echo "AgentOS: solaris"
151 echo "Hostname: $(hostname)"
152 echo "AgentDirectory: $MK_CONFDIR"
153 echo "DataDirectory: $MK_VARDIR"
154 echo "SpoolDirectory: $SPOOLDIR"
155 echo "PluginsDirectory: $PLUGINSDIR"
156 echo "LocalDirectory: $LOCALDIR"
159 # Find out what zone we are running in
160 # Treat all pre-Solaris 10 systems as "global"
161 if type zonename &>/dev/null
162 then
163 zonename=$(zonename)
164 pszone="-z $zonename"
165 else
166 zonename="global"
167 pszone="-A"
171 # Get statistics about monitored jobs. Below the job directory there
172 # is a sub directory per user that ran a job. That directory must be
173 # owned by the user so that a symlink or hardlink attack for reading
174 # arbitrary files can be avoided.
175 if pushd $MK_VARDIR/job >/dev/null; then
176 echo '<<<job>>>'
177 for username in *
179 if [ -d "$username" ] && cd "$username" ; then
180 count=$(su -s "$SHELL" "$username" -c "ls -1 * | wc -l")
182 if [ "$count" -eq "1" ]; then
183 filename=$(su -s "$SHELL" "$username" -c "ls -1 *")
184 echo "==> $filename <=="
187 su -s "$SHELL" "$username" -c "head -n1000 *"
188 cd ..
190 done
191 popd > /dev/null
197 # Filesystem usage for UFS and VXFS
198 echo '<<<df>>>'
199 for fs in ufs vxfs samfs lofs tmpfs
201 df -l -k -F $fs 2>/dev/null | sed 1d | grep -v "^[^ ]*/lib/[^ ]*\.so\.1 " | \
202 while read Filesystem kbytes used avail capacity Mountedon
204 kbytes=$(($used + $avail))
205 echo "$Filesystem $fs $kbytes $used $avail $capacity $Mountedon"
206 done
207 done
209 # Filesystem usage for ZFS
210 if type zfs &>/dev/null
211 then
212 echo '<<<zfsget>>>'
213 zfs get -t filesystem,volume -Hp name,quota,used,avail,mountpoint,type 2>/dev/null
214 echo '[df]'
215 df -l -k -F zfs 2>/dev/null | sed 1d
218 # ZFS arc cache
219 # newer Solaris (>=11.3) do not provide hits and misses via mdb -k
220 echo '<<<zfs_arc_cache>>>'
221 if type kstat &>/dev/null
222 then
223 kstat -p zfs:0:arcstats | sed -e 's/.*arcstats://g' | awk '{printf "%s = %s\n", $1, $2;}'
225 elif type mdb &>/dev/null
226 then
227 echo '::arc' | mdb -k
230 # Processes
231 echo '<<<ps>>>'
232 # The default solaris ps command strips the command lines of the processes. But for good process
233 # matching on the server we really need to whole command line. On linux there are arguments to
234 # make ps output the whole command line, but on solaris this seems to be missing. We use the ucb
235 # ps command to get the full command line instead. What a hack.
236 if [ -x /usr/ucb/ps ]; then
237 UCB_PS=$(/usr/ucb/ps -agwwwx)
238 PS=$(ps -o user,vsz,rss,pcpu,etime,pid,args $pszone | \
239 sed -e 1d -e 's/ *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) */(\1,\2,\3,\4\/\5,\6) /')
240 while read -r LINE; do
241 STATS=${LINE%) *}
242 PID=${STATS##*,}
244 # Directly use ps output when line is too slow to be stripped
245 if [ ${#LINE} -lt 100 ]; then
246 echo "$LINE"
247 continue
250 CMD=$(echo "$UCB_PS" | grep "^[ ]*$PID " | head -n1 | \
251 awk '{ s = ""; for (i = 5; i <= NF; i++) s = s $i " "; print s }')
252 # Only use the ucb ps line when it's not empty (process might already been gone)
253 if [ -z "$CMD" ]; then
254 echo "$LINE"
255 else
256 echo "${STATS}) ${CMD}"
258 done <<< "$PS"
259 else
260 ps -o user,vsz,rss,pcpu,args $pszone | \
261 sed -e 1d -e 's/ *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) */(\1,\2,\3,\4) /'
265 # Statgrab
266 # source: http://www.i-scream.org/libstatgrab/
267 # binary: http://www.opencsw.org/
268 if type statgrab &>/dev/null
269 then
270 statgrab_vars="const. cpu. disk. general. mem. page. swap. user."
271 statgrab_sections="cpu disk page"
273 # Collect net stats in the global zone and in local zones if dlstat is present.
274 if [ "$zonename" == "global" ] || type dlstat &>/dev/null
275 then
276 statgrab_vars="$statgrab_vars net."
277 statgrab_sections="$statgrab_sections net"
280 statgrab $statgrab_vars | grep -v md 1> /tmp/statgrab.$$
281 for s in $statgrab_sections
283 echo "<<<statgrab_$s>>>"
284 grep "^$s\." /tmp/statgrab.$$ | cut -d. -f2-99 | sed 's/ *= */ /'
285 done
287 # <<<statgrab_mem>>> info is preferred over <<<solaris_mem>>>
288 # since solaris_mem is under suspicion to be buggy.
289 echo '<<<statgrab_mem>>>'
290 egrep "^(swap|mem)\." /tmp/statgrab.$$ | sed 's/ *= */ /'
292 [ -f /tmp/statgrab.$$ ] && rm -f /tmp/statgrab.$$
296 # /proc/cpu
297 # Simulated Output of Linux /proc/cpu
298 echo '<<<cpu>>>'
299 load=$(uptime|sed -e 's;.*average: \([0-9]\{1,\}\.[0-9]\{1,\}\), \([0-9]\{1,\}\.[0-9]\{1,\}\), \([0-9]\{1,\}\.[0-9]\{1,\}\).*;\1 \2 \3;')
300 nthreads=$(($(ps -AL | wc -l)))
301 procs=$(($(psrinfo | wc -l)))
302 echo $load 1/$nthreads $$ $procs
305 # zpool status
306 if [ -x /sbin/zpool ]; then
307 run_cached -s zpool_status 120 "/sbin/zpool status -x"
309 echo '<<<zpool>>>'
310 zpool list
313 # Solaris doesn't always give a consisten output on uptime, thus include side information
314 # Tested in VM for solaris 10/11
315 echo '<<<uptime>>>'
316 ctime=`nawk 'BEGIN{print srand()}'`;
317 btime=`kstat '-p' 'unix:::boot_time' 2>&1|grep 'boot_time'|awk '{print $2}'`;
318 echo $(($ctime - $btime));
319 echo '[uptime_solaris_start]'
320 uname -a
321 zonename
322 uptime
323 kstat -p unix:0:system_misc:snaptime
324 echo '[uptime_solaris_end]'
326 # NTP
327 ps -o comm $pszone | grep -w .*ntpd &>/dev/null
328 if [ $? -eq 0 ]
329 then
330 echo '<<<ntp>>>'
331 ntpq -np | sed -e 1,2d -e 's/^\(.\)/\1 /' -e 's/^ /%/'
335 # Memory
336 # <<<solaris_mem>>> should be used if statgrab is missing and top is available.
337 if ! type statgrab &>/dev/null;
338 then
339 if [ -x /usr/bin/top ] || [ -x /usr/local/bin/top ]
340 then
341 echo "<<<solaris_mem>>>"
342 if [ -x /usr/bin/top ]; then /usr/bin/top | grep '^Memory:'; fi
343 if [ -x /usr/local/bin/top ]; then /usr/local/bin/top | grep '^Memory:'; fi
347 if type prtdiag >/dev/null 2>&1
348 then
349 # prtdiag does not work in local zones
350 if [ "$zonename" == "global" ]
351 then
352 run_cached -s solaris_prtdiag_status 300 '/usr/sbin/prtdiag 1>/dev/null 2>&1; echo $?'
356 # TCP Connection stats
357 echo '<<<tcp_conn_stats>>>'
358 netstat -n -a -f inet -P tcp | tail +5 | \
359 nawk '{ c[$7]++; } END { for (x in c) { print x, c[x]; } }'
362 # Multipathing
363 if type mpathadm &>/dev/null
364 then
365 if [ "$zonename" == "global" ]
366 then
367 echo '<<<solaris_multipath>>>'
368 mpathadm list LU | nawk '{if(NR%3==1){dev=$1}
369 if(NR%3==2){tc=$NF}
370 if(NR%3==0){printf "%s %s %s\n",dev,tc,$NF}}'
375 # Fileinfo-Check: put patterns for files into $MK_CONFDIR/fileinfo.cfg
376 function replace_datevariable()
378 # Replace the date variable of the input, e.g. $DATE:%Y%m%d$, by
379 # the current date. If there's no match just return the input.
380 local file_name="$1"
381 local pattern='(\$DATE:(.*)\$)'
383 if [[ ! $file_name =~ $pattern ]]; then
384 echo "$file_name"
385 else
386 date_variable="${BASH_REMATCH[1]}"
387 format_string="${BASH_REMATCH[2]}"
388 echo "${file_name/$date_variable/$(date +$format_string)}"
393 # Fileinfo-Check: put patterns for files into /etc/check_mk/fileinfo.cfg
394 if [ -r "$MK_CONFDIR/fileinfo.cfg" ]; then
395 echo '<<<fileinfo:sep(124)>>>'
397 old_state=$(set +o)
398 set +f
400 # let the shell do all the expansion, and pipe all files to perl
401 (cat "$MK_CONFDIR/fileinfo.cfg" "$MK_CONFDIR/fileinfo.d/*" 2>/dev/null) | while read -r pattern; do
402 case $pattern in
403 /*) pattern=$(replace_datevariable "$pattern")
404 for f in $pattern; do echo $f; done
406 esac
407 done | perl -e '
408 print time."\n";
409 print "[[[header]]]\n";
410 print "name|status|size|time\n";
411 print "[[[content]]]\n";
412 while (<>) {
413 chomp $_;
414 if (-d $_) { next; }
415 if (not -f $_) {
416 print "$_|missing\n";
417 next;
419 ($device, $inode, $mode, $nlink, $uid, $gid, $rdev, $size,
420 $atime, $mtime, $ctime, $blksize, $blocks) = stat($_);
421 if ($!) {
422 print "$_|stat failed\n";
423 } else {
424 print "$_|ok|$size|$mtime\n";
427 set +vx; eval "$old_state"
431 # Libelle Business Shadow
432 if type trd >/dev/null 2>&1
433 then
434 echo '<<<libelle_business_shadow:sep(58)>>>'
435 trd -s
438 # Displaying Information About Faults or Defects
439 # If there are no faults the output of this command will be empty.
440 if type fmadm >/dev/null 2>&1
441 then
442 echo '<<<solaris_fmadm:sep(58)>>>'
443 fmadm faulty
446 # Getting Information About Services Running on Solaris
447 # We can get a list of all service instances, including disabled
448 # or incomplete ones by 'svcs -a'
449 if type svcs > /dev/null 2>&1
450 then
451 echo '<<<solaris_services>>>'
452 svcs -a
455 # MK's Remote Plugin Executor
456 if test -f "$MK_CONFDIR/mrpe.cfg"
457 then
458 echo '<<<mrpe>>>'
459 grep -v '^ *#' "$MK_CONFDIR/mrpe.cfg" | grep -v '^ *$' | \
460 while read descr cmdline
462 interval=
463 args="-m"
464 if [[ $cmdline =~ \(([^\)]*)\)[[:space:]](.*) ]]
465 then
466 parameters=${BASH_REMATCH[1]}
467 cmdline=${BASH_REMATCH[2]}
469 # split multiple parameter assignments
470 for par in $(echo $parameters | tr ":" "\n")
472 # split each assignment
473 key=$(echo $par | cut -d= -f1)
474 value=$(echo $par | cut -d= -f2)
476 if [ "$key" = "interval" ] ; then
477 interval=$value
478 elif [ "$key" = "appendage" ] ; then
479 args="-ma"
481 done
484 if [ -z "$interval" ]
485 then
486 run_mrpe $descr "$cmdline"
487 else
488 run_cached $args $descr $interval "$cmdline"
490 done
493 # Local checks
494 if cd $LOCALDIR 2>/dev/null
495 then
496 echo '<<<local>>>'
497 for skript in $(ls)
499 if [ -x "$skript" ] ; then
500 ./$skript
502 done
504 # Call some plugins only every X'th second
505 for skript in [1-9]*/* ; do
506 if [ -x "$skript" ] ; then
507 run_cached local_${skript//\//\#} ${skript%/*} "$skript"
509 done
513 # Plugins
514 if cd $PLUGINSDIR 2>/dev/null
515 then
516 for skript in $(ls)
518 if [ -x "$skript" ] ; then
519 ./$skript
521 done
523 # Call some plugins only every X'th second
524 for skript in [1-9]*/* ; do
525 if [ -x "$skript" ] ; then
526 run_cached plugins_${skript//\//\#} ${skript%/*} "$skript"
528 done