initial commit for version 1.6.x patch release
[OpenFOAM-1.6.x.git] / bin / foamCheckJobs
bloba2696e1f8066ebc8e99d08ef11cfaf44562a8b63
1 #!/bin/sh
2 #------------------------------------------------------------------------------
3 # ========= |
4 # \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
5 # \\ / O peration |
6 # \\ / A nd | Copyright (C) 1991-2009 OpenCFD Ltd.
7 # \\/ M anipulation |
8 #-------------------------------------------------------------------------------
9 # License
10 # This file is part of OpenFOAM.
12 # OpenFOAM is free software; you can redistribute it and/or modify it
13 # under the terms of the GNU General Public License as published by the
14 # Free Software Foundation; either version 2 of the License, or (at your
15 # option) any later version.
17 # OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
18 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 # for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with OpenFOAM; if not, write to the Free Software Foundation,
24 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
26 # Script
27 # foamCheckJobs
29 # Description
30 # Uses runningJobs/, finishedJobs/ and foamProcessInfo to create stateFile.
31 # stateFile contains per pid information on state of process. Format:
32 # pid state command
34 # where state is one of 'RUNN', 'SUSP', 'OTHR', 'FINI', 'ABRT' ('PEND')
35 # (first three are from foamProcessInfo, others from jobInfo files)
36 # (PEND is special state from when user has submitted but no jobInfo
37 # file yet. Not supported by this script yet)
39 #------------------------------------------------------------------------------
41 PROGNAME=${0##*/}
43 #-------------------------------------------------------------------------------
44 #- User settings
46 #- Number of days for files to be considered old
47 NDAYSLIMIT=7
48 #-------------------------------------------------------------------------------
50 #- work file
51 TMPFILE=/tmp/${PROGNAME}$$.tmp
52 #- work dir. Needs to be accessible for all machines
53 MACHDIR=$HOME/.OpenFOAM/${PROGNAME}
54 DEFSTATEFILE=$HOME/.OpenFOAM/foamCheckJobs.out
57 if [ `uname -s` = Linux ]
58 then
59 ECHO='echo -e'
60 else
61 ECHO='echo'
65 #-------------------------------------------------------------------------------
67 # Functions
69 #-------------------------------------------------------------------------------
71 # getRawEntry dictionary entry
72 # Prints value of dictionary entry
73 getRawEntry() {
74 grep -v '^//' $1 | grep "^[ \t]*$2 " | sed -e "s/^[ \t]*$2 [ ]*//"
77 # getEntry dictionary entry
78 # Like getRawEntry but strips " and ending ';'
79 getEntry() {
80 getRawEntry $1 $2 | sed -e 's/^"//' -e 's/;$//' -e 's/"$//'
83 # notEmpty directory
84 # Returns 0 if directory contains files/directories
85 notEmpty() {
86 if [ "`ls $1`" ]; then
87 return 0
88 else
89 return 1
93 # dayDiff <date string 1> <date string 2>
94 # Prints number of days between the two
95 # Eg. dayDiff "Jan 10 2002" "Dec 28 1999"
96 # ==> 13
97 dayDiff() {
98 date -d "$1" > /dev/null 2>&1
99 if [ $? -ne 0 ]; then
100 #- option '-d' on date not supported. Give up.
101 echo "0"
102 else
103 year1=`echo "$1" | awk '{print $3}'`
104 year2=`echo "$2" | awk '{print $3}'`
105 day1=`date -d "$1" "+%j"`
106 day2=`date -d "$2" "+%j"`
108 nYears=`expr $year1 - $year2`
109 tmp1=`expr $nYears \* 365`
110 tmp2=`expr $day1 - $day2`
111 expr $tmp1 + $tmp2
114 #dayDiff "`date '+%b %d %Y'`" "Dec 28 2001"
117 # getAllJobs jobInfoDirectory
118 # Prints list of all jobs in directory (e.g. runningJobs/)
119 # Also handles 'slaves' entries in jobInfo:
120 # slaves 1 ( penfold.23766 );
121 getAllJobs() {
122 if notEmpty $1; then
123 jobs=$1/*
124 for f in $jobs
126 line=`grep '^[ ]*slaves' $f 2>/dev/null`
127 if [ $? -eq 0 ]; then
128 slaveJobs=`echo "$line" | sed -e 's/.*(\(.*\)).*/\1/'`
129 jobs="$jobs $slaveJobs"
131 done
132 else
133 jobs=''
135 echo "$jobs"
138 # releaseLock jobId lockFile
139 # Releases lock on jobId
140 releaseLock () {
141 if [ -f $2 ]; then
142 #- move lock to finishedJobs
143 mv $2 $FOAM_JOB_DIR/finishedJobs/
145 $ECHO "Lock on job $1 released."
149 printUsage() {
150 cat << LABEL
151 Usage: $PROGNAME [stateFile]
153 This program checks all the locks in the license directory to see if
154 their processes are still running. Processes will not release their
155 lock if they exit abnormally. This program will try to obtain process
156 information on the machine the process ran on and release the lock
157 if the program is no longer running.
159 Requirements: the environment variable FOAM_JOB_DIR needs to point to the
160 license directory and all machines have to be reachable using ssh.
162 The output from checking all running jobs is collected in an optional
163 file.
165 FILES:
166 \$FOAM_JOB_DIR/runningJobs locks for running processes
167 /finishedJobs ,, finished processes
168 LABEL
172 #-------------------------------------------------------------------------------
174 # Main
176 #-------------------------------------------------------------------------------
178 #- Check a few things
180 if [ ! "$FOAM_JOB_DIR" ]; then
181 $ECHO "$PROGNAME : FOAM_JOB_DIR environment variable not set."
182 $ECHO "This should point to your central license directory."
183 exit 1
186 if [ ! -d "$FOAM_JOB_DIR" ]; then
187 $ECHO "$PROGNAME : The license directory accoring to FOAM_JOB_DIR is not valid."
188 $ECHO "FOAM_JOB_DIR: $FOAM_JOB_DIR"
189 exit 1
191 if [ ! -d "$FOAM_JOB_DIR/runningJobs" -o ! -d "$FOAM_JOB_DIR/finishedJobs" ]; then
192 $ECHO "$PROGNAME : The license directory according to FOAM_JOB_DIR is not valid."
193 $ECHO "FOAM_JOB_DIR: $FOAM_JOB_DIR"
194 exit 1
198 if [ $# -eq 1 ]; then
199 STATEFILE=$1
200 elif [ $# -eq 0 ]; then
201 STATEFILE=${STATEFILE:-$DEFSTATEFILE}
202 else
203 printUsage
204 exit 1
207 #- obtain rsh method
208 RSH='ssh'
209 echo "Using remote shell type : $RSH"
211 echo ""
212 echo "Collecting information on jobs in"
213 echo " $FOAM_JOB_DIR"
214 echo ""
217 #- Collect machine names into $TMPFILE
218 # Also handles 'slaves' entry in jobInfo:
220 rm -f $TMPFILE; touch $TMPFILE
221 RUNJOBS=`getAllJobs $FOAM_JOB_DIR/runningJobs`
222 for f in $RUNJOBS
224 machinePid=`basename $f`
225 machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
226 pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
228 fgrep "$machine" $TMPFILE >/dev/null 2>&1
229 if [ $? -ne 0 ]; then
230 $ECHO "$machine" >> $TMPFILE
232 done
233 $ECHO "Found machines:"
234 cat $TMPFILE
235 $ECHO ""
239 #- Collect process info on all machines, one file per machine
241 mkdir -p $MACHDIR
242 cnt=1
243 while true
245 machine=`sed -n -e "${cnt}p" $TMPFILE`
246 if [ ! "$machine" ]; then
247 break
250 machFile=$MACHDIR/$machine
251 rm -f $machFile
252 $ECHO "Contacting $machine to collect process information:"
253 if [ $machine = `hostname` ]; then
254 $ECHO " foamProcessInfo $machFile"
255 foamProcessInfo $machFile >/dev/null 2>&1
256 else
257 $ECHO " $RSH $machine foamProcessInfo $machFile"
258 $RSH $machine foamProcessInfo $machFile >/dev/null 2>&1
260 if [ $? -ne 0 -o ! -s $machFile ]; then
261 $ECHO "** Failed collecting process information on $machine."
262 $ECHO "Check $machFile and run foamProcessInfo by hand"
263 rm -f $machFile
264 else
265 $ECHO "Succesfully collected information in $machFile ..."
268 cnt=`expr $cnt + 1`
269 done
270 $ECHO ""
273 #- Construct state for runningJobs; move non runnning jobs to finishedJobs
275 releaseAll=''
276 rm -f $STATEFILE
277 for f in $RUNJOBS
279 machinePid=`basename $f`
280 machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
281 pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
283 machFile=$MACHDIR/$machine
284 if [ -r $machFile ]; then
285 entry=`grep "^$pid " $machFile 2>/dev/null`
286 if [ $? -ne 0 -o ! "$entry" ]; then
287 if [ "$releaseAll" ]; then
288 releaseLock $machinePid $f
289 else
290 $ECHO "Job $machinePid seems to be no longer running. Release lock? (y/a)\c"
291 read answ
292 if [ "${answ:-y}" = 'y' ]; then
293 releaseLock $machinePid $f
294 elif [ "${answ:-y}" = 'a' ]; then
295 releaseAll='yes'
296 releaseLock $machinePid $f
297 else
298 state='OTHR'
299 $ECHO "$machinePid $state" >> $STATEFILE
302 else
303 state=`echo "$entry" | awk '{print $2}'`
304 $ECHO "$machinePid $state" >> $STATEFILE
307 done
311 #- Collect old jobs in finishedJobs
313 OLDFILES=`find $FOAM_JOB_DIR/finishedJobs -mtime +$NDAYSLIMIT -print`
315 #- Construct state for finishedJobs and check on date of files.
317 if notEmpty $FOAM_JOB_DIR/finishedJobs; then
318 dateNow=`date '+%b %d %Y'`
319 for f in $FOAM_JOB_DIR/finishedJobs/*
321 sz=`ls -s $f | awk '{print $1}'`
322 if [ "$sz" -gt 0 ]; then
323 machinePid=`basename $f`
324 machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
325 pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
327 end=`getEntry $f endDate`
328 if [ ! "$end" ]; then
329 state='ABRT'
330 else
331 nDaysOld=`dayDiff "$dateNow" "$end"`
332 if [ "$nDaysOld" -gt $NDAYSLIMIT ]; then
333 OLDFILES="$OLDFILES $f"
336 state='FINI'
339 $ECHO "$machinePid $state" >> $STATEFILE
341 done
345 #- Remove old locks
347 nOldFiles=`echo "$OLDFILES" | wc -w`
348 if [ "$nOldFiles" -gt 0 ]; then
349 $ECHO "You seem to have $nOldFiles locks older than $NDAYSLIMIT days in finishedJobs/"
350 $ECHO "Do you want to remove these? (y)\c"
351 read answ
352 if [ "${answ:-y}" = 'y' ]; then
353 rm -f $OLDFILES
358 rm -f $TMPFILE
359 rm -r $MACHDIR
361 $ECHO ""
362 $ECHO "Updated stateFile:"
363 $ECHO " $STATEFILE"
364 $ECHO ""
366 #------------------------------------------------------------------------------