3 # Copyright (c) 2013-2016 Red Hat.
4 # Copyright (c) 1998-2000,2003 Silicon Graphics, Inc. All Rights Reserved.
6 # This program is free software; you can redistribute it and/or modify it
7 # under the terms of the GNU General Public License as published by the
8 # Free Software Foundation; either version 2 of the License, or (at your
9 # option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 # Administrative script to check pmie processes are alive, and restart
20 # Get standard environment
21 .
$PCP_DIR/etc
/pcp.env
22 .
$PCP_SHARE_DIR/lib
/rc-proc.sh
24 PMIE
="$PCP_BIN_DIR/pmie"
25 PMIECONF
="$PCP_BIN_DIR/pmieconf"
27 # error messages should go to stderr, not the GUI notifiers
31 # added to handle problem when /var/log/pcp is a symlink, as first
32 # reported by Micah_Altman@harvard.edu in Nov 2001
38 __real_d
=`cd $__d 2>/dev/null && $PWDCMND`
43 echo $__real_d/`basename $1`
49 tmp
=`mktemp -d /tmp/pcp.XXXXXXXXX` ||
exit 1
53 PROGLOG
=$PCP_LOG_DIR/pmie
/$prog.log
58 $USE_SYSLOG && [ $status -ne 0 ] && \
59 $PCP_SYSLOG_PROG -p daemon.error
"$prog failed - see $PROGLOG"
60 [ -s "$PROGLOG" ] ||
rm -f "$PROGLOG"
61 lockfile
=`cat $tmp/lock 2>/dev/null`
65 trap "_cleanup; exit \$status" 0 1 2 3 15
67 # control files for pmie administration ... edit the entries in this
68 # file (and optional directory) to reflect your local configuration;
69 # see also -c option below.
71 CONTROL
=$PCP_PMIECONTROL_PATH
72 CONTROLDIR
=$PCP_PMIECONTROL_PATH.d
74 # determine path for pwd command to override shell built-in
75 PWDCMND
=`which pwd 2>/dev/null | $PCP_AWK_PROG '
78 / aliased to / { i = 1 }
79 { if ( i == 0 ) print }
81 [ -z "$PWDCMND" ] && PWDCMND
=/bin
/pwd
82 eval $PWDCMND -P >/dev
/null
2>&1
83 [ $?
-eq 0 ] && PWDCMND
="$PWDCMND -P"
101 cat >> $tmp/usage
<< EOF
103 -c=FILE,--control=FILE configuration of pmie instances to manage
104 -l=FILE,--logfile=FILE send important diagnostic messages to FILE
105 -C query system service runlevel information
106 -N,--showme perform a dry run, showing what would be done
107 -s,--stop stop pmie processes instead of starting them
108 -T,--terse produce a terser form of output
109 -V,--verbose increase diagnostic verbosity
113 ARGS
=`pmgetopt --progname=$prog --config=$tmp/usage -- "$@"`
114 [ $?
!= 0 ] && exit 1
125 -C) CHECK_RUNLEVEL
=true
153 -\?) pmgetopt
--usage --progname=$prog --config=$tmp/usage
163 pmgetopt
--usage --progname=$prog --config=$tmp/usage
168 # after argument checking, everything must be logged to ensure no mail is
169 # accidentally sent from cron. Close stdout and stderr, then open stdout
170 # as our logfile and redirect stderr there too.
172 PROGLOGDIR
=`dirname "$PROGLOG"`
173 [ -d "$PROGLOGDIR" ] || mkdir
-p -m 775 "$PROGLOGDIR" 2>/dev
/null
174 [ -f "$PROGLOG" ] && mv "$PROGLOG" "$PROGLOG.prev"
180 echo "$prog: [$controlfile:$line]"
182 echo "... automated performance reasoning for host \"$host\" unchanged"
188 echo "$prog [$controlfile:$line]"
194 $PCP_ECHO_PROG $PCP_ECHO_N "Restarting pmie for host \"$host\" ...""$PCP_ECHO_C"
199 # demand mutual exclusion
201 rm -f $tmp/stamp
$tmp/out
202 delay
=200 # tenths of a second
203 while [ $delay -ne 0 ]
205 if pmlock
-v $logfile.lock
>$tmp/out
207 echo $logfile.lock
>$tmp/lock
210 if [ ! -f $tmp/stamp
]
212 touch -t `pmdate -30M %Y%m%d%H%M` $tmp/stamp
214 if [ -n "`find $logfile.lock ! -newer $tmp/stamp -print 2>/dev/null`" ]
216 _warning
"removing lock file older than 30 minutes"
222 delay
=`expr $delay - 1`
227 # failed to gain mutex lock
229 if [ -f $logfile.lock
]
231 _warning
"is another PCP cron job running concurrently?"
234 echo "$prog: `cat $tmp/out`"
236 _warning
"failed to acquire exclusive lock ($logfile.lock) ..."
251 echo "$prog: Error: cannot find pmie output file at \"$logfile\""
256 logdir
=`dirname $logfile`
257 echo "Directory (`cd $logdir; $PWDCMND`) contents:"
258 LC_TIME
=POSIX
ls -la $logdir
261 echo "Contents of pmie output file \"$logfile\" ..."
268 $VERBOSE && $PCP_ECHO_PROG $PCP_ECHO_N " [process $1] ""$PCP_ECHO_C"
270 # wait until pmie process starts, or exits
273 [ ! -z "$PMCD_CONNECT_TIMEOUT" ] && delay
=$PMCD_CONNECT_TIMEOUT
275 [ ! -z "$PMCD_REQUEST_TIMEOUT" ] && x
=$PMCD_REQUEST_TIMEOUT
277 # wait for maximum time of a connection and 20 requests
279 delay
=`expr \( $delay + 20 \* $x \) \* 10` # tenths of a second
280 while [ $delay -ne 0 ]
284 # $logfile was previously removed, if it has appeared again then
285 # we know pmie has started ... if not just sleep and try again
287 if ls "$PCP_TMP_DIR/pmie/$1" >$tmp/out
2>&1
289 if grep "No such file or directory" $tmp/out
>/dev
/null
293 $VERBOSE && echo " done"
298 _plist
=`_get_pids_by_name pmie`
300 for _p
in `echo $_plist`
302 [ $_p -eq $1 ] && _found
=true
307 # process still here, just hasn't created its status file
311 $VERBOSE || _restarting
312 echo " process exited!"
317 echo "$prog: Error: failed to restart pmie"
318 echo "Current pmie processes:"
319 $PCP_PS_PROG $PCP_PS_ALL_FLAGS |
tee $tmp/tmp |
sed -n -e 1p
320 for _p
in `echo $_plist`
322 sed -n -e "/^[ ]*[^ ]* [ ]*$_p /p" < $tmp/tmp
331 delay
=`expr $delay - 1`
332 $VERBOSE && [ `expr $delay % 10` -eq 0 ] && \
333 $PCP_ECHO_PROG $PCP_ECHO_N ".""$PCP_ECHO_C"
335 $VERBOSE || _restarting
336 echo " timed out waiting!"
341 sed -e 's/^/ /' $tmp/out
349 # extract the pmie configuration file (-c) from a list of arguments
355 -e 's/.* -c\([^ ]*\).*/\1/p'
360 # update a pmie configuration file if it should be created/modified
366 if [ -f "$configfile" ]
368 # look for "magic" string at start of file, and ensure we created it
369 sed 1q
"$configfile" |
grep '^// pmieconf-pmie [0-9]' >/dev
/null
371 grep '^// Auto-generated by pmieconf' "$configfile" >/dev
/null
373 if [ $magic -eq 0 -a $owned -eq 0 ]
375 # pmieconf file, see if re-generation is needed
376 cp "$configfile" "$tmpconfig"
377 if $PMIECONF -f "$tmpconfig" -cF >$tmp/diag
2>&1
379 [ $isprimary = y
] && $PMIECONF -f "$tmpconfig" modify primary enabled
yes
380 grep -v "generated by pmieconf" "$configfile" >$tmp/old
381 grep -v "generated by pmieconf" "$tmpconfig" >$tmp/new
382 if ! diff $tmp/old
$tmp/new
>/dev
/null
384 if [ -w "$configfile" ]
386 $VERBOSE && echo "Reconfigured: \"$configfile\" (pmieconf)"
387 eval $CP "$tmpconfig" "$configfile"
389 _warning
"no write access to pmieconf file \"$configfile\", skip reconfiguration"
394 _warning
"pmieconf failed to reconfigure \"$configfile\""
395 sed -e "s;$tmpconfig;$configfile;g" $tmp/diag
396 echo "=== start pmieconf file ==="
398 echo "=== end pmieconf file ==="
401 elif [ ! -e "$configfile" ]
403 # file does not exist, generate it, if possible
406 echo "+ $PMIECONF -f $configfile -cF"
407 elif ! $PMIECONF -f "$configfile" -cF >$tmp/diag
2>&1
409 _warning
"pmieconf failed to generate \"$configfile\""
411 echo "=== start pmieconf file ==="
413 echo "=== end pmieconf file ==="
415 [ $isprimary = y
] && $PMIECONF -f "$configfile" modify primary enabled
yes
416 chown
$PCP_USER:$PCP_GROUP "$configfile" >/dev
/null
2>&1
422 if [ $CHECK_RUNLEVEL = true
]
424 # determine whether to start pmie based on runlevel settings - we
425 # need to do this when running unilaterally from cron, else we'll
426 # always start pmie up (even when we shouldn't).
429 if is_chkconfig_on pmie
437 if [ $STOP_PMIE = true
]
439 # if pmie has never been started, there's no work to do to stop it
440 [ ! -d "$PCP_TMP_DIR/pmie" ] && exit
441 $QUIETLY ||
$PCP_BINADM_DIR/pmpost
"stop pmie from $prog"
442 elif [ $START_PMIE = false
]
447 if [ ! -f "$CONTROL" ]
449 echo "$prog: Error: cannot find control file ($CONTROL)"
454 # note on control file format version
455 # 1.0 was the first release, and did not include the primary field
456 # [this is the default for backwards compatibility]
457 # 1.1 adds the primary field (ala pmlogger control file) indicating
458 # localhost-specific rules should be enabled
462 rm -f $tmp/err
$tmp/pmies
469 if echo "$controlfile" |
grep -q -e '\.rpmsave' -e '\.rpmnew'
471 _warning
"ignored backup control file \"$controlfile\""
475 sed -e "s;PCP_LOG_DIR;$PCP_LOG_DIR;g" $controlfile | \
476 while read host primary socks logfile args
478 # start in one place for each iteration (beware relative paths)
480 line
=`expr $line + 1`
484 \
#*|'') # comment or empty
487 \$
*) # in-line variable assignment
488 $SHOWME && echo "# $host $primary $socks $logfile $args"
489 cmd
=`echo "$host $primary $socks $logfile $args" \
491 -e "/='/s/\(='[^']*'\).*/\1/" \
492 -e '/="/s/\(="[^"]*"\).*/\1/' \
493 -e '/=[^"'"'"']/s/[;&<>|].*$//' \
494 -e '/^\\$[A-Za-z][A-Za-z0-9_]*=/{
496 /^\([A-Za-z][A-Za-z0-9_]*\)=/s//export \1; \1=/p
500 # in-line command, not a variable assignment
501 _warning
"in-line command is not a variable assignment, line ignored"
506 _warning
"cannot change \$PATH, line ignored"
509 _warning
"cannot change \$IFS, line ignored"
512 $SHOWME && echo "+ $cmd"
513 echo eval $cmd >>$tmp/cmd
522 # set the version and other variables
527 if grep 'version=' $tmp/cmd
>/dev
/null
534 _error
"bad version ($version) in control file"
543 _warning
"missing \$version, assuming version 1.0 control format"
546 if [ "$version" = "1.0" ]
548 # handle backwards compatibility
549 # one less field and primary defaults to "n" for version 1.0
553 args
="$logfile $args"
555 # missing "args" ... this is bad, but will be reported below
556 # ... guard avoids setting "args" to " " which would defeat
566 if [ -z "$primary" -o -z "$socks" -o -z "$logfile" -o -z "$args" ]
568 _error
"insufficient fields in control file record"
571 if [ "$primary" != y
-a "$primary" != n
]
573 _error
"primary field in control file record must be y or n, not \"$primary\""
576 if [ "$socks" != y
-a "$socks" != n
]
578 _error
"socks field in control file record must be y or n, not \"$socks\""
582 # args should begin with a hyphen
589 _error
"args field in control file must begin with a hyphen not \"$args\""
594 # substitute LOCALHOSTNAME marker in this config line
595 # (differently for logfile and pcp -h HOST arguments)
597 logfilehost
=`hostname || echo localhost`
598 logfile
=`echo $logfile | sed -e "s;LOCALHOSTNAME;$logfilehost;"`
599 logfile
=`_unsymlink_path $logfile`
600 [ $primary = y
-o "x$host" = xLOCALHOSTNAME
] && host=local:
602 dir
=`dirname $logfile`
603 $VERY_VERBOSE && echo "Check pmie -h $host -l $logfile ..."
605 # make sure output directory exists
609 mkdir
-p -m 755 "$dir" >$tmp/err
2>&1
613 _error
"cannot create directory ($dir) for pmie log file"
618 # and the user pcp can write there
620 chown
$PCP_USER:$PCP_GROUP "$dir" >/dev
/null
2>&1
622 # and the logfile is writeable, if it exists
624 [ -f "$logfile" ] && chown
$PCP_USER:$PCP_GROUP "$logfile" >/dev
/null
2>&1
630 _error
"cannot chdir to directory ($dir) for pmie log file"
634 $SHOWME && echo "+ cd $dir"
638 _warning
"no write access in $dir, skip lock file processing"
644 # match $logfile from control file to running pmies
646 for pidfile
in $PCP_TMP_DIR/pmie
/[0-9]*
648 [ "$pidfile" = "$PCP_TMP_DIR/pmie/[0-9]*" ] && continue
649 $VERY_VERBOSE && $PCP_ECHO_PROG $PCP_ECHO_N "... try $pidfile: ""$PCP_ECHO_C"
651 p_id
=`echo $pidfile | sed -e 's,.*/,,'`
655 # throw away stderr in case $pidfile has been removed by now
656 eval `$PCP_BINADM_DIR/pmiestatus $pidfile 2>/dev/null | $PCP_AWK_PROG '
657 NR == 2 { printf "p_logfile=\"%s\"\n", $0; next }
658 NR == 3 { printf "p_pmcd_host=\"%s\"\n", $0; next }
661 p_logfile
=`_unsymlink_path $p_logfile`
662 if [ "$p_logfile" != $logfile ]
664 $VERY_VERBOSE && echo "different logfile, skip"
665 $VERY_VERBOSE && echo " $p_logfile differs to $logfile"
666 elif _get_pids_by_name pmie |
grep "^$p_id\$" >/dev
/null
668 $VERY_VERBOSE && echo "pmie process $p_id identified, OK"
672 $VERY_VERBOSE && echo "pmie process $p_id not running, skip"
673 $VERY_VERBOSE && _get_pids_by_name pmie
681 echo "No current pmie process exists for:"
683 echo "Found pmie process $pid monitoring:"
686 echo " log file = $logfile"
689 if [ -z "$pid" -a $START_PMIE = true
]
691 configfile
=`_get_configfile $args`
692 if [ ! -z "$configfile" ]
694 # if this is a relative path and not relative to cwd,
695 # substitute in the default pmie search location.
697 if [ ! -f "$configfile" -a "`basename $configfile`" = "$configfile" ]
699 configfile
="$PCP_VAR_DIR/config/pmie/$configfile"
702 # check configuration file exists and is up to date
703 _configure_pmie
"$configfile" "$host" "$primary"
706 args
="-h $host -l $logfile $args"
708 $VERBOSE && _restarting
713 # only check for pmsocks if it's specified in the control file
715 if which pmsocks
>/dev
/null
2>&1
717 # check if pmsocks has been set up correctly
718 if pmsocks
ls >/dev
/null
2>&1
728 echo "$prog: Warning: no pmsocks available, would run without"
733 [ -f "$logfile" ] && eval $MV -f "$logfile" "$logfile.prior"
738 echo "+ ${sock_me}$PMIE -b $args"
742 # since this is launched as a sort of daemon, any output should
743 # go on pmie's stderr, i.e. $logfile ... use -b for this
745 $VERY_VERBOSE && ( echo; $PCP_ECHO_PROG $PCP_ECHO_N "+ ${sock_me}$PMIE -b $args""$PCP_ECHO_C"; echo "..." )
746 $PCP_BINADM_DIR/pmpost
"start pmie from $prog for host $host"
747 ${sock_me}$PMIE -b $args &
751 # wait for pmie to get started, and check on its health
754 elif [ ! -z "$pid" -a $STOP_PMIE = true
]
756 # Send pmie a SIGTERM, which is noted as a pending shutdown.
757 # Add pid to list of pmies sent SIGTERM - may need SIGKILL later.
759 $VERY_VERBOSE && echo "+ $KILL -s TERM $pid"
760 eval $KILL -s TERM
$pid
761 $PCP_ECHO_PROG $PCP_ECHO_N "$pid ""$PCP_ECHO_C" >> $tmp/pmies
768 _parse_control
$CONTROL
769 append
=`ls $CONTROLDIR 2>/dev/null | LC_COLLATE=POSIX sort`
770 for controlfile
in $append
772 _parse_control
$CONTROLDIR/$controlfile
775 # check all the SIGTERM'd pmies really died - if not, use a bigger hammer.
780 elif [ $STOP_PMIE = true
-a -s $tmp/pmies
]
782 pmielist
=`cat $tmp/pmies`
783 if $PCP_PS_PROG -p "$pmielist" >/dev
/null
2>&1
785 $VERY_VERBOSE && ( echo; $PCP_ECHO_PROG $PCP_ECHO_N "+ $KILL -KILL `cat $tmp/pmies` ...""$PCP_ECHO_C" )
786 eval $KILL -s KILL
$pmielist >/dev
/null
2>&1
787 delay
=30 # tenths of a second
788 while $PCP_PS_PROG -f -p "$pmielist" >$tmp/alive
2>&1
793 delay
=`expr $delay - 1`
796 echo "$prog: Error: pmie process(es) will not die"
804 [ -f $tmp/err
] && status
=1