qa/check

   1 #! /bin/sh
   2 #
   3 # Control script for running PCP QA tests
   4 #
   5 # Copyright (c) 1997-2002 Silicon Graphics, Inc.  All Rights Reserved.
   6 #
   7
   8 mypid=$$
   9 status=0
  10 needwrap=true
  11 try=0
  12 n_bad=0
  13 bad=""
  14 notrun=""
  15 seq=''
  16 aborted=true
  17 myname=`basename $0`
  18 iam=$myname  #  a synonym
  19
  20 # status and log files
  21 CHECKLOCK=/tmp/check-LOCK
  22 CHECKSTS=/tmp/check.sts                 #  If you change these, hangcheck.pcpqa
  23 CHECKPID=/tmp/check.pid                 #  will need to change, too.
  24 CHECKSLOG=/var/tmp/check-start.log      #  A check.log already exists for
  25                                         #  another reason.
  26
  27
  28 _wallclock()
  29 {
  30     date "+%H %M %S" | $PCP_AWK_PROG '{ print $1*3600 + $2*60 + $3 }'
  31 }
  32
  33 _timestamp()
  34 {
  35     now=`date "+%D-%T"`
  36     $PCP_ECHO_PROG $PCP_ECHO_N " [$now]""$PCP_ECHO_C"
  37 }
  38
  39 _release_lock()
  40 {
  41     if [ -f "$CHECKLOCK" ]
  42     then
  43         LOCKOWNER=`cat "$CHECKLOCK" 2>/dev/null` || return 0
  44         [ "$LOCKOWNER" = "$mypid" ] && rm -f "$CHECKLOCK"
  45     fi
  46
  47     return 0
  48 }
  49
  50 _wrapup()
  51 {
  52     # for hangcheck ...
  53     # remove files that were used by hangcheck
  54     #
  55     if [ "$HANGCHECK" = true -a "$USER" = pcpqa ]
  56     then
  57         checkpid=`cat "$CHECKPID"`
  58         [ "$checkpid" = "$mypid" -a -f "$CHECKSTS" ] && rm -f "$CHECKSTS"
  59         [ "$checkpid" = "$mypid" -a -f "$CHECKPID" ] && rm -f "$CHECKPID"
  60     fi
  61
  62     if [ -z "$tmp" ]
  63     then
  64         # did not get very far into the intialization!
  65         :
  66     else
  67         # release the lock and remove backup files
  68         _release_lock
  69         [ -d $tmp ] && ( rm -rf $tmp/checksums ; rmdir $tmp )
  70
  71         if $showme
  72         then
  73             :
  74         elif $needwrap
  75         then
  76             if [ -f check.time -a -f $tmp.time ]
  77             then
  78                 cat check.time $tmp.time \
  79                 | $PCP_AWK_PROG '
  80         { t[$1] = $2 }
  81 END     { if (NR > 0) {
  82             for (i in t) print i " " t[i]
  83           }
  84         }' \
  85                 | sort -n >$tmp.out
  86                 mv $tmp.out check.time
  87             fi
  88
  89             echo "" >>check.log
  90             date >>check.log
  91             echo $list | fmt | sed -e 's/^/    /' >>check.log
  92             if $aborted
  93             then
  94                 if [ -z "$seq" ]
  95                 then
  96                     echo "Aborted! [during setup]" >>check.log
  97                 else
  98                     echo "Aborted! [running $seq]" >>check.log
  99                 fi
 100             fi
 101
 102             if [ ! -z "$notrun" ]
 103             then
 104                 [ $color = true ] && tput bold && tput setaf 4 # blue
 105                 echo "Not run:$notrun"
 106                 [ $color = true ] && tput sgr0 # reset
 107                 echo "Not run:$notrun" | fmt >>check.log
 108             fi
 109             if [ ! -z "$n_bad" -a "$n_bad" != 0 ]
 110             then
 111                 [ $color = true ] && tput bold && tput setaf 1 # red
 112                 echo "Failures:$bad"
 113                 echo "Failed $n_bad of $try tests"
 114                 [ $color = true ] && tput sgr0 # reset
 115                 echo "Failures:$bad" | fmt >>check.log
 116                 echo "Failed $n_bad of $try tests" >>check.log
 117             else
 118                 if [ $try != 0 ]
 119                 then
 120                     [ $color = true ] && tput bold && tput setaf 2 # green
 121                     echo "Passed all $try tests"
 122                     [ $color = true ] && tput sgr0 # reset
 123                     echo "Passed all $try tests" >>check.log
 124                 fi
 125             fi
 126             needwrap=false
 127         fi
 128
 129         rm -f $tmp.*
 130     fi
 131 }
 132
 133 _addfiles ()
 134 {
 135     af=$1
 136     [ "$af" = "" ] && return 1
 137     [ ! -f "$af" ] && touch "$af"
 138     shift
 139
 140     for fn in "$@"
 141     do
 142         fgrep -s "$fn" "$af" >/dev/null
 143         [ $? = 1 ] && echo "$fn" >>"$af"
 144     done
 145
 146     return 0
 147 }
 148
 149 _check_lock() {
 150     #  Check that a check process of that process ID found in
 151     #  $CHECKLOCK exists, and if not, release the lock.
 152
 153     [ ! -f "$CHECKLOCK" ] && return 0
 154     PID=`cat "$CHECKLOCK" 2>/dev/null` || return 0
 155
 156     CCNT=`ps -e -o "pid args" | grep -v grep | grep "$PID" | grep check | \
 157       $PCP_AWK_PROG '{ print $1 }'`
 158     if [ "$PID" != "$CCNT" ]
 159     then
 160         #  We can remove the lock; no check process found with that ID
 161         $sudo rm -f "$CHECKLOCK"
 162     fi
 163
 164     return 0
 165 }
 166
 167 _get_lock()
 168 {
 169     #  Does someone else have a lock on check at this time?  If so, we
 170     #  can't run a test until the lock is removed.
 171     #
 172     #  NOTE: the use of check-LOCK rather than check.pid was done so that
 173     #  people running check manually (rather than run.pcpqa running check)
 174     #  can have tests running between themselves.  This is better than
 175     #  having people waiting on one long series of tests passed to check
 176     #  and having spent 10 minutes waiting for nothing.
 177
 178     #  Check that an instance of check who claims to have the lock actually
 179     #  exists!
 180     _check_lock
 181
 182     #  Get (make) a lock
 183     echomessage=true
 184     for sleeptime in \
 185       1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \
 186       1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \
 187       1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \
 188       1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \
 189       2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \
 190       2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \
 191       2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \
 192       5 5 5 5 5 5 5 5 5 5 5 5 \
 193       5 5 5 5 5 5 5 5 5 5 5 5 \
 194       5 5 5 5 5 5 5 5 5 5 5 5 \
 195       5 5 5 5 5 5 5 5 5 5 5 5 \
 196       5 5 5 5 5 5 5 5 5 5 5 5 0  #  10 minutes waiting time per test...
 197     do
 198         if [ -f "$CHECKLOCK" ]
 199         then
 200             LOCKOWNER=`cat "$CHECKLOCK" 2>/dev/null` || continue
 201
 202             if [ "$LOCKOWNER" != $mypid ]
 203             then
 204                 #  wait until lock disappears...
 205                 if [ "$sleeptime" = 0 ]
 206                 then
 207                     #  We should leave... something's terribly wrong.
 208                     echo ""
 209                     return 1
 210                 else
 211                     $echomessage && \
 212                       $PCP_ECHO_PROG $PCP_ECHO_N " waiting for lock [owner pid=$LOCKOWNER]... ""$PCP_ECHO_C" && \
 213                       echomessage=false
 214                     sleep $sleeptime
 215                 fi
 216             else
 217                 #  already have lock
 218                 break
 219             fi
 220         else
 221             #  make lock
 222             echo "$mypid" >"$CHECKLOCK"
 223             chmod a+r "$CHECKLOCK"
 224             break
 225         fi
 226     done
 227     $echomessage || echo "got it; proceeding: $seq"
 228
 229     return 0
 230 }
 231
 232 _make_checkfiles()
 233 {
 234     if [ ! -f $tmp.checkfiles ]
 235     then
 236         [ -z "$PCP_PMCDOPTIONS_PATH" ] && \
 237                 PCP_PMCDOPTIONS_PATH="$PCP_SYSCONF_DIR/pmcd/pmcd.options"
 238         [ -z "$PCP_PMLOGGERCONTROL_PATH" ] && \
 239                 PCP_PMLOGGERCONTROL_PATH="$PCP_SYSCONF_DIR/pmlogger/control"
 240         [ -z "$PCP_PMIECONTROL_PATH" ] && \
 241                 PCP_PMIECONTROL_PATH="$PCP_SYSCONF_DIR/pmie/control"
 242         _checkfiles="$PCP_PMCDCONF_PATH \
 243                 $PCP_PMLOGGERCONTROL_PATH \
 244                 $PCP_VAR_DIR/config/pmlogger/config.default \
 245                 $PCP_PMLOGGERCONTROL_PATH \
 246                 $PCP_PMCDOPTIONS_PATH \
 247                 $PCP_DIR/etc/init.d/pcp \
 248                 $PCP_DIR/etc/pcp.conf $PCP_DIR/etc/pcp.env \
 249                 $PCP_PMDAS_DIR/sample/dsohelp.dir \
 250                 $PCP_PMDAS_DIR/sample/dsohelp.pag \
 251                 $PCP_PMDAS_DIR/sample/help.dir \
 252                 $PCP_PMDAS_DIR/sample/help.pag \
 253                 $PCP_PMDAS_DIR/simple/simple.conf"
 254     fi
 255 }
 256
 257 _checksums()
 258 {
 259     cmd="$1"
 260
 261     _make_checkfiles
 262
 263     case "$cmd"
 264     in
 265         get)
 266             mkdir -p $tmp/checksums
 267             chmod a+w $tmp/checksums
 268             for f in `cat $tmp.checkfiles`
 269             do
 270                 buf=`echo $f | sed -e 's;/;+;g'`
 271                 buf=$tmp/checksums/$buf
 272                 [ -f $f ] && sum $f
 273                 [ -f $f -a ! -f $buf ] && $sudo cp $f $buf
 274             done
 275             ;;
 276
 277         check)
 278             for f in `cat $tmp.checkfiles`
 279             do
 280                 buf=`echo $f | sed -e 's;/;+;g'`
 281                 buf=$tmp/checksums/$buf
 282                 if [ ! -f $f ]
 283                 then
 284                     if fgrep "$f" $2 >/dev/null 2>&1
 285                     then
 286                         echo "    Missing: \"$f\""
 287                         [ -f $buf ] && $sudo cp -f $buf $f
 288                     fi
 289                 else
 290                     _cs=`sum $f`
 291                     if fgrep "$_cs" $2 >/dev/null 2>&1
 292                     then
 293                         $sudo rm -f $f.$seq.O
 294                     else
 295                         echo "    Changed: \"$f\""
 296                         $sudo cp -f $f $f.$seq.O
 297                         [ -f $buf ] && $sudo cp -f $buf $f
 298                     fi
 299                 fi
 300
 301             done
 302             ;;
 303
 304         *)
 305             bozo
 306             ;;
 307     esac
 308     return 0
 309 }
 310
 311 trap "_wrapup; exit \$status" 0 1 2 3 15
 312
 313 # by default don't output timestamps
 314 timestamp=false
 315
 316 # extra stuff for tracing QA runs       - off/on via $qatrace
 317 qatrace=false
 318 qadepot=mazur.melbourne
 319 qasrc=`hostname`
 320 # constants - meaningful as state transitions in qavis
 321 qanotyet=1      # test not yet started
 322 qarunning=2     # test still going
 323 qafailed=3      # test failed
 324 qapassed=4      # test passed
 325
 326
 327 PCP_TRACE_TIMEOUT=15
 328 export PCP_TRACE_TIMEOUT
 329
 330 # generic initialization... this may take a while to run, because (unless
 331 # $quick is true) make is run.
 332 . ./common
 333
 334 # we have to cheat a bit... but we need to create a check.[pid|sts] file
 335 # to tell hangcheck that we are alive, but not ready to run yet.
 336 if [ "$HANGCHECK" = true -a "$USER" = pcpqa ]
 337 then
 338     # for hangcheck ...
 339     # Save pid of check in a well known place, so that hangcheck can be sure it
 340     # has the right pid (getting the pid from ps output is not reliable enough).
 341     #
 342     if [ -f "$CHECKPID" ]
 343     then
 344         checkpidowner=`/bin/sh "ls -l $CHECKPID" | $PCP_AWK_PROG '{ print $3 }'`
 345         if [ "$checkpidowner" != pcpqa ]
 346         then
 347             $sudo rm -f "$CHECKPID"
 348         else
 349             #  There should be a BIG FAT WARNING here if QA is trying to
 350             #  run tests twice!
 351             echo "$myname: a check.pid file already exists... are you already running tests?!" >&2
 352             status=1
 353             exit
 354         fi
 355     fi
 356     [ ! -f "$CHECKPID" ] && echo "$mypid" >"$CHECKPID"
 357
 358     # for hangcheck ...
 359     # Save the status of check in a well known place, so that hangcheck can be
 360     # sure to know where check is up to (getting test number from ps output is
 361     # not reliable enough since the trace stuff has been introduced).
 362     #
 363     if [ -f "$CHECKSTS" ]
 364     then
 365         checkpidowner=`/bin/sh "ls -l $CHECKSTS" | $PCP_AWK_PROG '{ print $3 }'`
 366         if [ "$checkpidowner" != pcpqa ]
 367         then
 368             $sudo rm -f "$CHECKSTS"
 369         else
 370             echo "$myname: a check.sts file already exists... are you already running tests?!" >&2
 371             status=1
 372             exit
 373         fi
 374     fi
 375     [ ! -f "$CHECKSTS" ] && echo "preamble" >"$CHECKSTS"
 376 fi
 377
 378 [ -f check.time ] || touch check.time
 379
 380 [ "`_get_config pmcd`" != on ] && _change_config pmcd on
 381
 382 if $showme
 383 then
 384     qatrace=false
 385 fi
 386
 387 if $qatrace
 388 then
 389     # if tracing turned on, make sure trace agent running ok
 390     switchon=`pmprobe -h $qadepot trace.control.reset 2>&1 | $PCP_AWK_PROG '{ print $2 }'`
 391     [ "$switchon" != "1" ] && qatrace=false
 392 fi
 393
 394 if $qatrace
 395 then
 396     for seq in $list
 397     do
 398         $verbose && printf "Preparing pmtrace tags: %-.16s:%s\r" \
 399           "$qasrc" "$seq"
 400         pmtrace -qh $qadepot -v $qanotyet "$qasrc:$seq" 2>/dev/null
 401     done
 402     $verbose && printf "%68s\r" " "
 403 fi
 404
 405 torun=`echo $list | wc -w | sed -e 's/ //g'`
 406 haverun=0
 407
 408 for seq in $list
 409 do
 410     err=false
 411     if $showme
 412     then
 413         echo $seq
 414         continue
 415     fi
 416     if [ $torun -gt 9 ]
 417     then
 418         pct=`expr 100 \* $haverun / $torun`
 419         haverun=`expr $haverun + 1`
 420         $PCP_ECHO_PROG $PCP_ECHO_N "[$pct%] ""$PCP_ECHO_C"
 421     fi
 422     $PCP_ECHO_PROG $PCP_ECHO_N "$seq""$PCP_ECHO_C"
 423     if [ ! -f $seq ]
 424     then
 425         echo " [not run, missing]"
 426         notrun="$notrun $seq"
 427         continue
 428     else
 429         # really going to try and run this one
 430         #
 431         rm -f $seq.out.bad
 432         lasttime=`sed -n -e "/^$seq /s/.* //p" <check.time`
 433         [ "X$lasttime" != X ] && $PCP_ECHO_PROG $PCP_ECHO_N " ${lasttime}s ...""$PCP_ECHO_C"
 434         rm -f core $seq.notrun
 435
 436         # acquire lock
 437         _get_lock
 438         if [ $? != 0 ]
 439         then
 440             echo "$myname: could not acquire lock; exiting" 2>&1
 441             status=1
 442             exit
 443         fi
 444
 445         if $check_config
 446         then
 447             # save checksums for critical conf and control files
 448             [ ! -f $tmp.checksums ] && _checksums get >$tmp.checksums
 449         fi
 450
 451         start=`_wallclock`
 452         $timestamp && _timestamp
 453
 454         # for hangcheck ...
 455         [ "$HANGCHECK" = true -a "$USER" = pcpqa ] && echo "$seq" >"$CHECKSTS"
 456
 457         if $qatrace
 458         then
 459             pmtrace -qh $qadepot -v $qarunning "$qasrc:$seq" 2>/dev/null
 460             pmtrace -qh $qadepot -e "./$seq" "$qasrc:$seq" >$tmp.out.1 2>&1
 461             sts=$?
 462             # check for trace errors on first line of test & blow them away
 463             $PCP_AWK_PROG '/pmtrace: / {if (NR != 1) print $0; next} {print $0}' $tmp.out.1 > $tmp.out
 464         else
 465             ./$seq >$tmp.out 2>&1
 466             sts=$?
 467         fi
 468         $timestamp && _timestamp
 469         stop=`_wallclock`
 470
 471         # for hangcheck ...
 472         [ "$HANGCHECK" = true -a "$USER" = pcpqa ] && echo "working" >"$CHECKSTS"
 473
 474         if $check_config
 475         then
 476             # check the saved checksums
 477             _checksums check $tmp.checksums >$tmp.check
 478             if [ -s $tmp.check ]
 479             then
 480                 echo "$myname: $seq: ERROR: test failed to restore the following config files:" >>$tmp.out
 481                 cat $tmp.check >>$tmp.out
 482                 $PCP_ECHO_PROG $PCP_ECHO_N " [config not restored]""$PCP_ECHO_C"
 483             fi
 484         fi
 485
 486         # remove the lock
 487         _release_lock
 488
 489         if [ -f core ]
 490         then
 491             $PCP_ECHO_PROG $PCP_ECHO_N " [dumped core]""$PCP_ECHO_C"
 492             mv core $seq.core
 493             err=true
 494         fi
 495
 496         if [ -f $seq.notrun ]
 497         then
 498             [ $color = true ] && tput bold && tput setaf 4 # blue
 499             echo " [not run] `cat $seq.notrun`"
 500             [ $color = true ] && tput sgr0 # reset
 501             notrun="$notrun $seq"
 502         else
 503             if [ $sts -ne 0 ]
 504             then
 505                 $PCP_ECHO_PROG $PCP_ECHO_N " [failed, exit status $sts]""$PCP_ECHO_C"
 506                 err=true
 507             fi
 508             if [ ! -f $seq.out ]
 509             then
 510                 $PCP_ECHO_PROG $PCP_ECHO_N " - no qualified output""$PCP_ECHO_C"
 511                 mv $tmp.out $seq.out.bad
 512                 err=true
 513             else
 514                 if diff $seq.out $tmp.out >/dev/null 2>&1
 515                 then
 516                     if $err
 517                     then
 518                         :
 519                     else
 520                         echo "$seq `expr $stop - $start`" >>$tmp.time
 521                     fi
 522                 else
 523                     [ $color = true ] && tput bold && tput setaf 1 # red
 524                     $PCP_ECHO_PROG $PCP_ECHO_N " - output mismatch (see $seq.out.bad)""$PCP_ECHO_C"
 525                     [ $color = true ] && tput sgr0 # reset
 526                     mv $tmp.out $seq.out.bad
 527                     $PCP_ECHO_PROG
 528                     $diff $seq.out $seq.out.bad
 529                     err=true
 530                 fi
 531             fi
 532
 533             # make sure this test did not muck up the permissions or
 534             # ownership of key installed files and directories
 535             #
 536             sh 994 --fix >$tmp.out
 537             if [ -s $tmp.out ]
 538             then
 539                 $PCP_ECHO_PROG $PCP_ECHO_N " - failed permissions check""$PCP_ECHO_C"
 540                 echo >$tmp.head
 541                 echo "*** Failed permissions/ownership checks ***" >>$tmp.head
 542                 if [ -f $seq.out.bad ]
 543                 then
 544                     cat $tmp.head $tmp.out >>$seq.out.bad
 545                 elif [ -f $seq.out ]
 546                 then
 547                     cp $seq.out $seq.out.bad
 548                     cat $tmp.head $tmp.out >>$seq.out.bad
 549                 else
 550                     cat $tmp.head $tmp.out >$seq.out.bad
 551                 fi
 552                 err=true
 553             fi
 554             $PCP_ECHO_PROG ""
 555
 556             # really tried to run the test, update the state
 557             #
 558             if $qatrace
 559             then
 560                 if $err
 561                 then
 562                     pmtrace -qh $qadepot -v $qafailed "$qasrc:$seq:$qaown" 2>/dev/null
 563                 else
 564                     pmtrace -qh $qadepot -v $qapassed "$qasrc:$seq:$qaown" 2>/dev/null
 565                 fi
 566             fi
 567         fi
 568     fi
 569
 570     # come here for each test, except when $showme is true
 571     #
 572     if $err
 573     then
 574         bad="$bad $seq"
 575         n_bad=`expr $n_bad + 1`
 576         quick=false
 577         [ $diff = true ] || echo "Check local PMCD is still alive ..."
 578         $OPTION_AGENTS && _haveagents
 579         $OPTION_LOGGER && _havelogger
 580     fi
 581     [ -f $seq.notrun ] || try=`expr $try + 1`
 582     rm -f $seq.notrun
 583
 584     # optional callback
 585     #
 586     [ -x check.callback ] && check.callback $seq
 587 done
 588
 589 aborted=false
 590 status=$n_bad
 591 exit