qa: revert some change to expected 244 outputs (from pmda-restart)
[pcp.git] / qa / check
blob5d563197a8e7c736621bd8e97009c20d6b817a00
1 #! /bin/sh
3 # Control script for running PCP QA tests
5 # Copyright (c) 1997-2002 Silicon Graphics, Inc. All Rights Reserved.
8 mypid=$$
9 status=0
10 needwrap=true
11 try=0
12 n_bad=0
13 bad=""
14 notrun=""
15 seq=''
16 aborted=true
17 myname=`basename $0`
18 iam=$myname # a synonym
20 # status and log files
21 CHECKLOCK=/tmp/check-LOCK
22 CHECKSTS=/tmp/check.sts # If you change these, hangcheck.pcpqa
23 CHECKPID=/tmp/check.pid # will need to change, too.
24 CHECKSLOG=/var/tmp/check-start.log # A check.log already exists for
25 # another reason.
28 _wallclock()
30 date "+%H %M %S" | $PCP_AWK_PROG '{ print $1*3600 + $2*60 + $3 }'
33 _timestamp()
35 now=`date "+%D-%T"`
36 $PCP_ECHO_PROG $PCP_ECHO_N " [$now]""$PCP_ECHO_C"
39 _release_lock()
41 if [ -f "$CHECKLOCK" ]
42 then
43 LOCKOWNER=`cat "$CHECKLOCK" 2>/dev/null` || return 0
44 [ "$LOCKOWNER" = "$mypid" ] && rm -f "$CHECKLOCK"
47 return 0
50 _wrapup()
52 # for hangcheck ...
53 # remove files that were used by hangcheck
55 if [ "$HANGCHECK" = true -a "$USER" = pcpqa ]
56 then
57 checkpid=`cat "$CHECKPID"`
58 [ "$checkpid" = "$mypid" -a -f "$CHECKSTS" ] && rm -f "$CHECKSTS"
59 [ "$checkpid" = "$mypid" -a -f "$CHECKPID" ] && rm -f "$CHECKPID"
62 if [ -z "$tmp" ]
63 then
64 # did not get very far into the intialization!
66 else
67 # release the lock and remove backup files
68 _release_lock
69 [ -d $tmp ] && ( rm -rf $tmp/checksums ; rmdir $tmp )
71 if $showme
72 then
74 elif $needwrap
75 then
76 if [ -f check.time -a -f $tmp.time ]
77 then
78 cat check.time $tmp.time \
79 | $PCP_AWK_PROG '
80 { t[$1] = $2 }
81 END { if (NR > 0) {
82 for (i in t) print i " " t[i]
84 }' \
85 | sort -n >$tmp.out
86 mv $tmp.out check.time
89 echo "" >>check.log
90 date >>check.log
91 echo $list | fmt | sed -e 's/^/ /' >>check.log
92 if $aborted
93 then
94 if [ -z "$seq" ]
95 then
96 echo "Aborted! [during setup]" >>check.log
97 else
98 echo "Aborted! [running $seq]" >>check.log
102 if [ ! -z "$notrun" ]
103 then
104 [ $color = true ] && tput bold && tput setaf 4 # blue
105 echo "Not run:$notrun"
106 [ $color = true ] && tput sgr0 # reset
107 echo "Not run:$notrun" | fmt >>check.log
109 if [ ! -z "$n_bad" -a "$n_bad" != 0 ]
110 then
111 [ $color = true ] && tput bold && tput setaf 1 # red
112 echo "Failures:$bad"
113 echo "Failed $n_bad of $try tests"
114 [ $color = true ] && tput sgr0 # reset
115 echo "Failures:$bad" | fmt >>check.log
116 echo "Failed $n_bad of $try tests" >>check.log
117 else
118 if [ $try != 0 ]
119 then
120 [ $color = true ] && tput bold && tput setaf 2 # green
121 echo "Passed all $try tests"
122 [ $color = true ] && tput sgr0 # reset
123 echo "Passed all $try tests" >>check.log
126 needwrap=false
129 rm -f $tmp.*
133 _addfiles ()
135 af=$1
136 [ "$af" = "" ] && return 1
137 [ ! -f "$af" ] && touch "$af"
138 shift
140 for fn in "$@"
142 fgrep -s "$fn" "$af" >/dev/null
143 [ $? = 1 ] && echo "$fn" >>"$af"
144 done
146 return 0
149 _check_lock() {
150 # Check that a check process of that process ID found in
151 # $CHECKLOCK exists, and if not, release the lock.
153 [ ! -f "$CHECKLOCK" ] && return 0
154 PID=`cat "$CHECKLOCK" 2>/dev/null` || return 0
156 CCNT=`ps -e -o "pid args" | grep -v grep | grep "$PID" | grep check | \
157 $PCP_AWK_PROG '{ print $1 }'`
158 if [ "$PID" != "$CCNT" ]
159 then
160 # We can remove the lock; no check process found with that ID
161 $sudo rm -f "$CHECKLOCK"
164 return 0
167 _get_lock()
169 # Does someone else have a lock on check at this time? If so, we
170 # can't run a test until the lock is removed.
172 # NOTE: the use of check-LOCK rather than check.pid was done so that
173 # people running check manually (rather than run.pcpqa running check)
174 # can have tests running between themselves. This is better than
175 # having people waiting on one long series of tests passed to check
176 # and having spent 10 minutes waiting for nothing.
178 # Check that an instance of check who claims to have the lock actually
179 # exists!
180 _check_lock
182 # Get (make) a lock
183 echomessage=true
184 for sleeptime in \
185 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \
186 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \
187 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \
188 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \
189 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \
190 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \
191 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \
192 5 5 5 5 5 5 5 5 5 5 5 5 \
193 5 5 5 5 5 5 5 5 5 5 5 5 \
194 5 5 5 5 5 5 5 5 5 5 5 5 \
195 5 5 5 5 5 5 5 5 5 5 5 5 \
196 5 5 5 5 5 5 5 5 5 5 5 5 0 # 10 minutes waiting time per test...
198 if [ -f "$CHECKLOCK" ]
199 then
200 LOCKOWNER=`cat "$CHECKLOCK" 2>/dev/null` || continue
202 if [ "$LOCKOWNER" != $mypid ]
203 then
204 # wait until lock disappears...
205 if [ "$sleeptime" = 0 ]
206 then
207 # We should leave... something's terribly wrong.
208 echo ""
209 return 1
210 else
211 $echomessage && \
212 $PCP_ECHO_PROG $PCP_ECHO_N " waiting for lock [owner pid=$LOCKOWNER]... ""$PCP_ECHO_C" && \
213 echomessage=false
214 sleep $sleeptime
216 else
217 # already have lock
218 break
220 else
221 # make lock
222 echo "$mypid" >"$CHECKLOCK"
223 chmod a+r "$CHECKLOCK"
224 break
226 done
227 $echomessage || echo "got it; proceeding: $seq"
229 return 0
232 _make_checkfiles()
234 if [ ! -f $tmp.checkfiles ]
235 then
236 [ -z "$PCP_PMCDOPTIONS_PATH" ] && \
237 PCP_PMCDOPTIONS_PATH="$PCP_SYSCONF_DIR/pmcd/pmcd.options"
238 [ -z "$PCP_PMLOGGERCONTROL_PATH" ] && \
239 PCP_PMLOGGERCONTROL_PATH="$PCP_SYSCONF_DIR/pmlogger/control"
240 [ -z "$PCP_PMIECONTROL_PATH" ] && \
241 PCP_PMIECONTROL_PATH="$PCP_SYSCONF_DIR/pmie/control"
242 _checkfiles="$PCP_PMCDCONF_PATH \
243 $PCP_PMLOGGERCONTROL_PATH \
244 $PCP_VAR_DIR/config/pmlogger/config.default \
245 $PCP_PMLOGGERCONTROL_PATH \
246 $PCP_PMCDOPTIONS_PATH \
247 $PCP_DIR/etc/init.d/pcp \
248 $PCP_DIR/etc/pcp.conf $PCP_DIR/etc/pcp.env \
249 $PCP_PMDAS_DIR/sample/dsohelp.dir \
250 $PCP_PMDAS_DIR/sample/dsohelp.pag \
251 $PCP_PMDAS_DIR/sample/help.dir \
252 $PCP_PMDAS_DIR/sample/help.pag \
253 $PCP_PMDAS_DIR/simple/simple.conf"
257 _checksums()
259 cmd="$1"
261 _make_checkfiles
263 case "$cmd"
265 get)
266 mkdir -p $tmp/checksums
267 chmod a+w $tmp/checksums
268 for f in `cat $tmp.checkfiles`
270 buf=`echo $f | sed -e 's;/;+;g'`
271 buf=$tmp/checksums/$buf
272 [ -f $f ] && sum $f
273 [ -f $f -a ! -f $buf ] && $sudo cp $f $buf
274 done
277 check)
278 for f in `cat $tmp.checkfiles`
280 buf=`echo $f | sed -e 's;/;+;g'`
281 buf=$tmp/checksums/$buf
282 if [ ! -f $f ]
283 then
284 if fgrep "$f" $2 >/dev/null 2>&1
285 then
286 echo " Missing: \"$f\""
287 [ -f $buf ] && $sudo cp -f $buf $f
289 else
290 _cs=`sum $f`
291 if fgrep "$_cs" $2 >/dev/null 2>&1
292 then
293 $sudo rm -f $f.$seq.O
294 else
295 echo " Changed: \"$f\""
296 $sudo cp -f $f $f.$seq.O
297 [ -f $buf ] && $sudo cp -f $buf $f
301 done
305 bozo
307 esac
308 return 0
311 trap "_wrapup; exit \$status" 0 1 2 3 15
313 # by default don't output timestamps
314 timestamp=false
316 # extra stuff for tracing QA runs - off/on via $qatrace
317 qatrace=false
318 qadepot=mazur.melbourne
319 qasrc=`hostname`
320 # constants - meaningful as state transitions in qavis
321 qanotyet=1 # test not yet started
322 qarunning=2 # test still going
323 qafailed=3 # test failed
324 qapassed=4 # test passed
327 PCP_TRACE_TIMEOUT=15
328 export PCP_TRACE_TIMEOUT
330 # generic initialization... this may take a while to run, because (unless
331 # $quick is true) make is run.
332 . ./common
334 # we have to cheat a bit... but we need to create a check.[pid|sts] file
335 # to tell hangcheck that we are alive, but not ready to run yet.
336 if [ "$HANGCHECK" = true -a "$USER" = pcpqa ]
337 then
338 # for hangcheck ...
339 # Save pid of check in a well known place, so that hangcheck can be sure it
340 # has the right pid (getting the pid from ps output is not reliable enough).
342 if [ -f "$CHECKPID" ]
343 then
344 checkpidowner=`/bin/sh "ls -l $CHECKPID" | $PCP_AWK_PROG '{ print $3 }'`
345 if [ "$checkpidowner" != pcpqa ]
346 then
347 $sudo rm -f "$CHECKPID"
348 else
349 # There should be a BIG FAT WARNING here if QA is trying to
350 # run tests twice!
351 echo "$myname: a check.pid file already exists... are you already running tests?!" >&2
352 status=1
353 exit
356 [ ! -f "$CHECKPID" ] && echo "$mypid" >"$CHECKPID"
358 # for hangcheck ...
359 # Save the status of check in a well known place, so that hangcheck can be
360 # sure to know where check is up to (getting test number from ps output is
361 # not reliable enough since the trace stuff has been introduced).
363 if [ -f "$CHECKSTS" ]
364 then
365 checkpidowner=`/bin/sh "ls -l $CHECKSTS" | $PCP_AWK_PROG '{ print $3 }'`
366 if [ "$checkpidowner" != pcpqa ]
367 then
368 $sudo rm -f "$CHECKSTS"
369 else
370 echo "$myname: a check.sts file already exists... are you already running tests?!" >&2
371 status=1
372 exit
375 [ ! -f "$CHECKSTS" ] && echo "preamble" >"$CHECKSTS"
378 [ -f check.time ] || touch check.time
380 [ "`_get_config pmcd`" != on ] && _change_config pmcd on
382 if $showme
383 then
384 qatrace=false
387 if $qatrace
388 then
389 # if tracing turned on, make sure trace agent running ok
390 switchon=`pmprobe -h $qadepot trace.control.reset 2>&1 | $PCP_AWK_PROG '{ print $2 }'`
391 [ "$switchon" != "1" ] && qatrace=false
394 if $qatrace
395 then
396 for seq in $list
398 $verbose && printf "Preparing pmtrace tags: %-.16s:%s\r" \
399 "$qasrc" "$seq"
400 pmtrace -qh $qadepot -v $qanotyet "$qasrc:$seq" 2>/dev/null
401 done
402 $verbose && printf "%68s\r" " "
405 torun=`echo $list | wc -w | sed -e 's/ //g'`
406 haverun=0
408 for seq in $list
410 err=false
411 if $showme
412 then
413 echo $seq
414 continue
416 if [ $torun -gt 9 ]
417 then
418 pct=`expr 100 \* $haverun / $torun`
419 haverun=`expr $haverun + 1`
420 $PCP_ECHO_PROG $PCP_ECHO_N "[$pct%] ""$PCP_ECHO_C"
422 $PCP_ECHO_PROG $PCP_ECHO_N "$seq""$PCP_ECHO_C"
423 if [ ! -f $seq ]
424 then
425 echo " [not run, missing]"
426 notrun="$notrun $seq"
427 continue
428 else
429 # really going to try and run this one
431 rm -f $seq.out.bad
432 lasttime=`sed -n -e "/^$seq /s/.* //p" <check.time`
433 [ "X$lasttime" != X ] && $PCP_ECHO_PROG $PCP_ECHO_N " ${lasttime}s ...""$PCP_ECHO_C"
434 rm -f core $seq.notrun
436 # acquire lock
437 _get_lock
438 if [ $? != 0 ]
439 then
440 echo "$myname: could not acquire lock; exiting" 2>&1
441 status=1
442 exit
445 if $check_config
446 then
447 # save checksums for critical conf and control files
448 [ ! -f $tmp.checksums ] && _checksums get >$tmp.checksums
451 start=`_wallclock`
452 $timestamp && _timestamp
454 # for hangcheck ...
455 [ "$HANGCHECK" = true -a "$USER" = pcpqa ] && echo "$seq" >"$CHECKSTS"
457 if $qatrace
458 then
459 pmtrace -qh $qadepot -v $qarunning "$qasrc:$seq" 2>/dev/null
460 pmtrace -qh $qadepot -e "./$seq" "$qasrc:$seq" >$tmp.out.1 2>&1
461 sts=$?
462 # check for trace errors on first line of test & blow them away
463 $PCP_AWK_PROG '/pmtrace: / {if (NR != 1) print $0; next} {print $0}' $tmp.out.1 > $tmp.out
464 else
465 ./$seq >$tmp.out 2>&1
466 sts=$?
468 $timestamp && _timestamp
469 stop=`_wallclock`
471 # for hangcheck ...
472 [ "$HANGCHECK" = true -a "$USER" = pcpqa ] && echo "working" >"$CHECKSTS"
474 if $check_config
475 then
476 # check the saved checksums
477 _checksums check $tmp.checksums >$tmp.check
478 if [ -s $tmp.check ]
479 then
480 echo "$myname: $seq: ERROR: test failed to restore the following config files:" >>$tmp.out
481 cat $tmp.check >>$tmp.out
482 $PCP_ECHO_PROG $PCP_ECHO_N " [config not restored]""$PCP_ECHO_C"
486 # remove the lock
487 _release_lock
489 if [ -f core ]
490 then
491 $PCP_ECHO_PROG $PCP_ECHO_N " [dumped core]""$PCP_ECHO_C"
492 mv core $seq.core
493 err=true
496 if [ -f $seq.notrun ]
497 then
498 [ $color = true ] && tput bold && tput setaf 4 # blue
499 echo " [not run] `cat $seq.notrun`"
500 [ $color = true ] && tput sgr0 # reset
501 notrun="$notrun $seq"
502 else
503 if [ $sts -ne 0 ]
504 then
505 $PCP_ECHO_PROG $PCP_ECHO_N " [failed, exit status $sts]""$PCP_ECHO_C"
506 err=true
508 if [ ! -f $seq.out ]
509 then
510 $PCP_ECHO_PROG $PCP_ECHO_N " - no qualified output""$PCP_ECHO_C"
511 mv $tmp.out $seq.out.bad
512 err=true
513 else
514 if diff $seq.out $tmp.out >/dev/null 2>&1
515 then
516 if $err
517 then
519 else
520 echo "$seq `expr $stop - $start`" >>$tmp.time
522 else
523 [ $color = true ] && tput bold && tput setaf 1 # red
524 $PCP_ECHO_PROG $PCP_ECHO_N " - output mismatch (see $seq.out.bad)""$PCP_ECHO_C"
525 [ $color = true ] && tput sgr0 # reset
526 mv $tmp.out $seq.out.bad
527 $PCP_ECHO_PROG
528 $diff $seq.out $seq.out.bad
529 err=true
533 # make sure this test did not muck up the permissions or
534 # ownership of key installed files and directories
536 sh 994 --fix >$tmp.out
537 if [ -s $tmp.out ]
538 then
539 $PCP_ECHO_PROG $PCP_ECHO_N " - failed permissions check""$PCP_ECHO_C"
540 echo >$tmp.head
541 echo "*** Failed permissions/ownership checks ***" >>$tmp.head
542 if [ -f $seq.out.bad ]
543 then
544 cat $tmp.head $tmp.out >>$seq.out.bad
545 elif [ -f $seq.out ]
546 then
547 cp $seq.out $seq.out.bad
548 cat $tmp.head $tmp.out >>$seq.out.bad
549 else
550 cat $tmp.head $tmp.out >$seq.out.bad
552 err=true
554 $PCP_ECHO_PROG ""
556 # really tried to run the test, update the state
558 if $qatrace
559 then
560 if $err
561 then
562 pmtrace -qh $qadepot -v $qafailed "$qasrc:$seq:$qaown" 2>/dev/null
563 else
564 pmtrace -qh $qadepot -v $qapassed "$qasrc:$seq:$qaown" 2>/dev/null
570 # come here for each test, except when $showme is true
572 if $err
573 then
574 bad="$bad $seq"
575 n_bad=`expr $n_bad + 1`
576 quick=false
577 [ $diff = true ] || echo "Check local PMCD is still alive ..."
578 $OPTION_AGENTS && _haveagents
579 $OPTION_LOGGER && _havelogger
581 [ -f $seq.notrun ] || try=`expr $try + 1`
582 rm -f $seq.notrun
584 # optional callback
586 [ -x check.callback ] && check.callback $seq
587 done
589 aborted=false
590 status=$n_bad
591 exit