qa/check

   1 #! /bin/sh
   2 #
   3 # Control script for running PCP QA tests
   4 #
   5 # Copyright (c) 1997-2002 Silicon Graphics, Inc.  All Rights Reserved.
   6 #
   7
   8 mypid=$$
   9 status=0
  10 needwrap=true
  11 try=0
  12 n_bad=0
  13 bad=""
  14 notrun=""
  15 interrupt=true
  16 myname=`basename $0`
  17 iam=$myname  #  a synonym
  18
  19 # status and log files
  20 CHECKLOCK=/tmp/check-LOCK
  21 CHECKSTS=/tmp/check.sts                 #  If you change these, hangcheck.pcpqa
  22 CHECKPID=/tmp/check.pid                 #  will need to change, too.
  23 CHECKSLOG=/var/tmp/check-start.log      #  A check.log already exists for
  24                                         #  another reason.
  25
  26
  27 _wallclock()
  28 {
  29     date "+%H %M %S" | $PCP_AWK_PROG '{ print $1*3600 + $2*60 + $3 }'
  30 }
  31
  32 _timestamp()
  33 {
  34     now=`date "+%D-%T"`
  35     $PCP_ECHO_PROG $PCP_ECHO_N " [$now]""$PCP_ECHO_C"
  36 }
  37
  38 _release_lock()
  39 {
  40     if [ -f "$CHECKLOCK" ]
  41     then
  42         LOCKOWNER=`cat "$CHECKLOCK" 2>/dev/null` || return 0
  43         [ "$LOCKOWNER" = "$mypid" ] && rm -f "$CHECKLOCK"
  44     fi
  45
  46     return 0
  47 }
  48
  49 _wrapup()
  50 {
  51     # for hangcheck ...
  52     # remove files that were used by hangcheck
  53     #
  54     if [ "$HANGCHECK" = true -a "$USER" = pcpqa ]
  55     then
  56         checkpid=`cat "$CHECKPID"`
  57         [ "$checkpid" = "$mypid" -a -f "$CHECKSTS" ] && rm -f "$CHECKSTS"
  58         [ "$checkpid" = "$mypid" -a -f "$CHECKPID" ] && rm -f "$CHECKPID"
  59     fi
  60
  61     if [ -z "$tmp" ]
  62     then
  63         # did not get very far into the intialization!
  64         :
  65     else
  66         # release the lock and remove backup files
  67         _release_lock
  68         [ -d $tmp ] && ( rm -rf $tmp/checksums ; rmdir $tmp )
  69
  70         if $showme
  71         then
  72             :
  73         elif $needwrap
  74         then
  75             if [ -f check.time -a -f $tmp.time ]
  76             then
  77                 cat check.time $tmp.time \
  78                 | $PCP_AWK_PROG '
  79         { t[$1] = $2 }
  80 END     { if (NR > 0) {
  81             for (i in t) print i " " t[i]
  82           }
  83         }' \
  84                 | sort -n >$tmp.out
  85                 mv $tmp.out check.time
  86             fi
  87
  88             echo "" >>check.log
  89             date >>check.log
  90             echo $list | fmt | sed -e 's/^/    /' >>check.log
  91             $interrupt && echo "Interrupted! [running $seq]" >>check.log
  92
  93             if [ ! -z "$notrun" ]
  94             then
  95                 [ $color = true ] && tput bold && tput setaf 4 # blue
  96                 echo "Not run:$notrun"
  97                 [ $color = true ] && tput sgr0 # reset
  98                 echo "Not run:$notrun" | fmt >>check.log
  99             fi
 100             if [ ! -z "$n_bad" -a "$n_bad" != 0 ]
 101             then
 102                 [ $color = true ] && tput bold && tput setaf 1 # red
 103                 echo "Failures:$bad"
 104                 echo "Failed $n_bad of $try tests"
 105                 [ $color = true ] && tput sgr0 # reset
 106                 echo "Failures:$bad" | fmt >>check.log
 107                 echo "Failed $n_bad of $try tests" >>check.log
 108             else
 109                 if [ $try != 0 ]
 110                 then
 111                     [ $color = true ] && tput bold && tput setaf 2 # green
 112                     echo "Passed all $try tests"
 113                     [ $color = true ] && tput sgr0 # reset
 114                     echo "Passed all $try tests" >>check.log
 115                 fi
 116             fi
 117             needwrap=false
 118         fi
 119
 120         rm -f $tmp.*
 121     fi
 122 }
 123
 124 _addfiles ()
 125 {
 126     af=$1
 127     [ "$af" = "" ] && return 1
 128     [ ! -f "$af" ] && touch "$af"
 129     shift
 130
 131     for fn in "$@"
 132     do
 133         fgrep -s "$fn" "$af" >/dev/null
 134         [ $? = 1 ] && echo "$fn" >>"$af"
 135     done
 136
 137     return 0
 138 }
 139
 140 _check_lock() {
 141     #  Check that a check process of that process ID found in
 142     #  $CHECKLOCK exists, and if not, release the lock.
 143
 144     [ ! -f "$CHECKLOCK" ] && return 0
 145     PID=`cat "$CHECKLOCK" 2>/dev/null` || return 0
 146
 147     CCNT=`ps -e -o "pid args" | grep -v grep | grep "$PID" | grep check | \
 148       $PCP_AWK_PROG '{ print $1 }'`
 149     if [ "$PID" != "$CCNT" ]
 150     then
 151         #  We can remove the lock; no check process found with that ID
 152         $sudo rm -f "$CHECKLOCK"
 153     fi
 154
 155     return 0
 156 }
 157
 158 _get_lock()
 159 {
 160     #  Does someone else have a lock on check at this time?  If so, we
 161     #  can't run a test until the lock is removed.
 162     #
 163     #  NOTE: the use of check-LOCK rather than check.pid was done so that
 164     #  people running check manually (rather than run.pcpqa running check)
 165     #  can have tests running between themselves.  This is better than
 166     #  having people waiting on one long series of tests passed to check
 167     #  and having spent 10 minutes waiting for nothing.
 168
 169     #  Check that an instance of check who claims to have the lock actually
 170     #  exists!
 171     _check_lock
 172
 173     #  Get (make) a lock
 174     echomessage=true
 175     for sleeptime in \
 176       1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \
 177       1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \
 178       1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \
 179       1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \
 180       2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \
 181       2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \
 182       2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \
 183       5 5 5 5 5 5 5 5 5 5 5 5 \
 184       5 5 5 5 5 5 5 5 5 5 5 5 \
 185       5 5 5 5 5 5 5 5 5 5 5 5 \
 186       5 5 5 5 5 5 5 5 5 5 5 5 \
 187       5 5 5 5 5 5 5 5 5 5 5 5 0  #  10 minutes waiting time per test...
 188     do
 189         if [ -f "$CHECKLOCK" ]
 190         then
 191             LOCKOWNER=`cat "$CHECKLOCK" 2>/dev/null` || continue
 192
 193             if [ "$LOCKOWNER" != $mypid ]
 194             then
 195                 #  wait until lock disappears...
 196                 if [ "$sleeptime" = 0 ]
 197                 then
 198                     #  We should leave... something's terribly wrong.
 199                     echo ""
 200                     return 1
 201                 else
 202                     $echomessage && \
 203                       $PCP_ECHO_PROG $PCP_ECHO_N " waiting for lock [owner pid=$LOCKOWNER]... ""$PCP_ECHO_C" && \
 204                       echomessage=false
 205                     sleep $sleeptime
 206                 fi
 207             else
 208                 #  already have lock
 209                 break
 210             fi
 211         else
 212             #  make lock
 213             echo "$mypid" >"$CHECKLOCK"
 214             chmod a+r "$CHECKLOCK"
 215             break
 216         fi
 217     done
 218     $echomessage || echo "got it; proceeding: $seq"
 219
 220     return 0
 221 }
 222
 223 _make_checkfiles()
 224 {
 225     if [ ! -f $tmp.checkfiles ]
 226     then
 227         [ -z "$PCP_PMCDOPTIONS_PATH" ] && \
 228                 PCP_PMCDOPTIONS_PATH="$PCP_SYSCONF_DIR/pmcd/pmcd.options"
 229         [ -z "$PCP_PMLOGGERCONTROL_PATH" ] && \
 230                 PCP_PMLOGGERCONTROL_PATH="$PCP_SYSCONF_DIR/pmlogger/control"
 231         [ -z "$PCP_PMIECONTROL_PATH" ] && \
 232                 PCP_PMIECONTROL_PATH="$PCP_SYSCONF_DIR/pmie/control"
 233         _checkfiles="$PCP_PMCDCONF_PATH \
 234                 $PCP_PMLOGGERCONTROL_PATH \
 235                 $PCP_VAR_DIR/config/pmlogger/config.default \
 236                 $PCP_PMLOGGERCONTROL_PATH \
 237                 $PCP_PMCDOPTIONS_PATH \
 238                 $PCP_DIR/etc/init.d/pcp \
 239                 $PCP_DIR/etc/pcp.conf $PCP_DIR/etc/pcp.env \
 240                 $PCP_PMDAS_DIR/sample/dsohelp.dir \
 241                 $PCP_PMDAS_DIR/sample/dsohelp.pag \
 242                 $PCP_PMDAS_DIR/sample/help.dir \
 243                 $PCP_PMDAS_DIR/sample/help.pag \
 244                 $PCP_PMDAS_DIR/simple/simple.conf"
 245     fi
 246 }
 247
 248 _checksums()
 249 {
 250     cmd="$1"
 251
 252     _make_checkfiles
 253
 254     case "$cmd"
 255     in
 256         get)
 257             mkdir -p $tmp/checksums
 258             chmod a+w $tmp/checksums
 259             for f in `cat $tmp.checkfiles`
 260             do
 261                 buf=`echo $f | sed -e 's;/;+;g'`
 262                 buf=$tmp/checksums/$buf
 263                 [ -f $f ] && sum $f
 264                 [ -f $f -a ! -f $buf ] && $sudo cp $f $buf
 265             done
 266             ;;
 267
 268         check)
 269             for f in `cat $tmp.checkfiles`
 270             do
 271                 buf=`echo $f | sed -e 's;/;+;g'`
 272                 buf=$tmp/checksums/$buf
 273                 if [ ! -f $f ]
 274                 then
 275                     if fgrep "$f" $2 >/dev/null 2>&1
 276                     then
 277                         echo "    Missing: \"$f\""
 278                         [ -f $buf ] && $sudo cp -f $buf $f
 279                     fi
 280                 else
 281                     _cs=`sum $f`
 282                     if fgrep "$_cs" $2 >/dev/null 2>&1
 283                     then
 284                         $sudo rm -f $f.$seq.O
 285                     else
 286                         echo "    Changed: \"$f\""
 287                         $sudo cp -f $f $f.$seq.O
 288                         [ -f $buf ] && $sudo cp -f $buf $f
 289                     fi
 290                 fi
 291
 292             done
 293             ;;
 294
 295         *)
 296             bozo
 297             ;;
 298     esac
 299     return 0
 300 }
 301
 302 trap "_wrapup; exit \$status" 0 1 2 3 15
 303
 304 # by default don't output timestamps
 305 timestamp=false
 306
 307 # extra stuff for tracing QA runs       - off/on via $qatrace
 308 qatrace=false
 309 qadepot=mazur.melbourne
 310 qasrc=`hostname`
 311 # constants - meaningful as state transitions in qavis
 312 qanotyet=1      # test not yet started
 313 qarunning=2     # test still going
 314 qafailed=3      # test failed
 315 qapassed=4      # test passed
 316
 317
 318 PCP_TRACE_TIMEOUT=15
 319 export PCP_TRACE_TIMEOUT
 320
 321 # generic initialization... this may take a while to run, because (unless
 322 # $quick is true) make is run.
 323 . ./common
 324
 325 # we have to cheat a bit... but we need to create a check.[pid|sts] file
 326 # to tell hangcheck that we are alive, but not ready to run yet.
 327 if [ "$HANGCHECK" = true -a "$USER" = pcpqa ]
 328 then
 329     # for hangcheck ...
 330     # Save pid of check in a well known place, so that hangcheck can be sure it
 331     # has the right pid (getting the pid from ps output is not reliable enough).
 332     #
 333     if [ -f "$CHECKPID" ]
 334     then
 335         checkpidowner=`/bin/sh "ls -l $CHECKPID" | $PCP_AWK_PROG '{ print $3 }'`
 336         if [ "$checkpidowner" != pcpqa ]
 337         then
 338             $sudo rm -f "$CHECKPID"
 339         else
 340             #  There should be a BIG FAT WARNING here if QA is trying to
 341             #  run tests twice!
 342             echo "$myname: a check.pid file already exists... are you already running tests?!" >&2
 343             exit 1
 344         fi
 345     fi
 346     [ ! -f "$CHECKPID" ] && echo "$mypid" >"$CHECKPID"
 347
 348     # for hangcheck ...
 349     # Save the status of check in a well known place, so that hangcheck can be
 350     # sure to know where check is up to (getting test number from ps output is
 351     # not reliable enough since the trace stuff has been introduced).
 352     #
 353     if [ -f "$CHECKSTS" ]
 354     then
 355         checkpidowner=`/bin/sh "ls -l $CHECKSTS" | $PCP_AWK_PROG '{ print $3 }'`
 356         if [ "$checkpidowner" != pcpqa ]
 357         then
 358             $sudo rm -f "$CHECKSTS"
 359         else
 360             echo "$myname: a check.sts file already exists... are you already running tests?!" >&2
 361             exit 1
 362         fi
 363     fi
 364     [ ! -f "$CHECKSTS" ] && echo "preamble" >"$CHECKSTS"
 365 fi
 366
 367 [ -f check.time ] || touch check.time
 368
 369 [ "`_get_config pmcd`" != on ] && _change_config pmcd on
 370
 371 if $showme
 372 then
 373     qatrace=false
 374 fi
 375
 376 if $qatrace
 377 then
 378     # if tracing turned on, make sure trace agent running ok
 379     switchon=`pmprobe -h $qadepot trace.control.reset 2>&1 | $PCP_AWK_PROG '{ print $2 }'`
 380     [ "$switchon" != "1" ] && qatrace=false
 381 fi
 382
 383 if $qatrace
 384 then
 385     for seq in $list
 386     do
 387         $verbose && printf "Preparing pmtrace tags: %-.16s:%s\r" \
 388           "$qasrc" "$seq"
 389         pmtrace -qh $qadepot -v $qanotyet "$qasrc:$seq" 2>/dev/null
 390     done
 391     $verbose && printf "%68s\r" " "
 392 fi
 393
 394 torun=`echo $list | wc -w | sed -e 's/ //g'`
 395 haverun=0
 396
 397 for seq in $list
 398 do
 399     err=false
 400     if $showme
 401     then
 402         echo $seq
 403         continue
 404     fi
 405     if [ $torun -gt 9 ]
 406     then
 407         pct=`expr 100 \* $haverun / $torun`
 408         haverun=`expr $haverun + 1`
 409         $PCP_ECHO_PROG $PCP_ECHO_N "[$pct%] ""$PCP_ECHO_C"
 410     fi
 411     $PCP_ECHO_PROG $PCP_ECHO_N "$seq""$PCP_ECHO_C"
 412     if [ ! -f $seq ]
 413     then
 414         echo " [not run, missing]"
 415         notrun="$notrun $seq"
 416         continue
 417     else
 418         # really going to try and run this one
 419         #
 420         rm -f $seq.out.bad
 421         lasttime=`sed -n -e "/^$seq /s/.* //p" <check.time`
 422         [ "X$lasttime" != X ] && $PCP_ECHO_PROG $PCP_ECHO_N " ${lasttime}s ...""$PCP_ECHO_C"
 423         rm -f core $seq.notrun
 424
 425         # acquire lock
 426         _get_lock
 427         if [ $? != 0 ]
 428         then echo "$myname: could not acquire lock; exiting" 2>&1
 429         fi
 430
 431         if $check_config
 432         then
 433             # save checksums for critical conf and control files
 434             [ ! -f $tmp.checksums ] && _checksums get >$tmp.checksums
 435         fi
 436
 437         start=`_wallclock`
 438         $timestamp && _timestamp
 439
 440         # for hangcheck ...
 441         [ "$HANGCHECK" = true -a "$USER" = pcpqa ] && echo "$seq" >"$CHECKSTS"
 442
 443         if $qatrace
 444         then
 445             pmtrace -qh $qadepot -v $qarunning "$qasrc:$seq" 2>/dev/null
 446             pmtrace -qh $qadepot -e "./$seq" "$qasrc:$seq" >$tmp.out.1 2>&1
 447             sts=$?
 448             # check for trace errors on first line of test & blow them away
 449             $PCP_AWK_PROG '/pmtrace: / {if (NR != 1) print $0; next} {print $0}' $tmp.out.1 > $tmp.out
 450         else
 451             ./$seq >$tmp.out 2>&1
 452             sts=$?
 453         fi
 454         $timestamp && _timestamp
 455         stop=`_wallclock`
 456
 457         # for hangcheck ...
 458         [ "$HANGCHECK" = true -a "$USER" = pcpqa ] && echo "working" >"$CHECKSTS"
 459
 460         if $check_config
 461         then
 462             # check the saved checksums
 463             _checksums check $tmp.checksums >$tmp.check
 464             if [ -s $tmp.check ]
 465             then
 466                 echo "$myname: $seq: ERROR: test failed to restore the following config files:" >>$tmp.out
 467                 cat $tmp.check >>$tmp.out
 468                 $PCP_ECHO_PROG $PCP_ECHO_N " [config not restored]""$PCP_ECHO_C"
 469             fi
 470         fi
 471
 472         # remove the lock
 473         _release_lock
 474
 475         if [ -f core ]
 476         then
 477             $PCP_ECHO_PROG $PCP_ECHO_N " [dumped core]""$PCP_ECHO_C"
 478             mv core $seq.core
 479             err=true
 480         fi
 481
 482         if [ -f $seq.notrun ]
 483         then
 484             [ $color = true ] && tput bold && tput setaf 4 # blue
 485             echo " [not run] `cat $seq.notrun`"
 486             [ $color = true ] && tput sgr0 # reset
 487             notrun="$notrun $seq"
 488         else
 489             if [ $sts -ne 0 ]
 490             then
 491                 $PCP_ECHO_PROG $PCP_ECHO_N " [failed, exit status $sts]""$PCP_ECHO_C"
 492                 err=true
 493             fi
 494             if [ ! -f $seq.out ]
 495             then
 496                 $PCP_ECHO_PROG $PCP_ECHO_N " - no qualified output""$PCP_ECHO_C"
 497                 mv $tmp.out $seq.out.bad
 498                 err=true
 499             else
 500                 if diff $seq.out $tmp.out >/dev/null 2>&1
 501                 then
 502                     if $err
 503                     then
 504                         :
 505                     else
 506                         echo "$seq `expr $stop - $start`" >>$tmp.time
 507                     fi
 508                 else
 509                     [ $color = true ] && tput bold && tput setaf 1 # red
 510                     $PCP_ECHO_PROG $PCP_ECHO_N " - output mismatch (see $seq.out.bad)""$PCP_ECHO_C"
 511                     [ $color = true ] && tput sgr0 # reset
 512                     mv $tmp.out $seq.out.bad
 513                     $PCP_ECHO_PROG
 514                     $diff $seq.out $seq.out.bad
 515                     err=true
 516                 fi
 517             fi
 518
 519             # make sure this test did not muck up the permissions or
 520             # ownership of key installed files and directories
 521             #
 522             sh 994 --fix >$tmp.out
 523             if [ -s $tmp.out ]
 524             then
 525                 $PCP_ECHO_PROG $PCP_ECHO_N " - failed permissions check""$PCP_ECHO_C"
 526                 echo >$tmp.head
 527                 echo "*** Failed permissions/ownership checks ***" >>$tmp.head
 528                 if [ -f $seq.out.bad ]
 529                 then
 530                     cat $tmp.head $tmp.out >>$seq.out.bad
 531                 elif [ -f $seq.out ]
 532                 then
 533                     cp $seq.out $seq.out.bad
 534                     cat $tmp.head $tmp.out >>$seq.out.bad
 535                 else
 536                     cat $tmp.head $tmp.out >$seq.out.bad
 537                 fi
 538                 err=true
 539             fi
 540             $PCP_ECHO_PROG ""
 541
 542             # really tried to run the test, update the state
 543             #
 544             if $qatrace
 545             then
 546                 if $err
 547                 then
 548                     pmtrace -qh $qadepot -v $qafailed "$qasrc:$seq:$qaown" 2>/dev/null
 549                 else
 550                     pmtrace -qh $qadepot -v $qapassed "$qasrc:$seq:$qaown" 2>/dev/null
 551                 fi
 552             fi
 553         fi
 554     fi
 555
 556     # come here for each test, except when $showme is true
 557     #
 558     if $err
 559     then
 560         bad="$bad $seq"
 561         n_bad=`expr $n_bad + 1`
 562         quick=false
 563         [ $diff = true ] || echo "Check local PMCD is still alive ..."
 564         $OPTION_AGENTS && _haveagents
 565         $OPTION_LOGGER && _havelogger
 566     fi
 567     [ -f $seq.notrun ] || try=`expr $try + 1`
 568     rm -f $seq.notrun
 569
 570     # optional callback
 571     #
 572     [ -x check.callback ] && check.callback $seq
 573 done
 574
 575 interrupt=false
 576 status=$n_bad
 577 exit