4 # Translates +pN-style conv-host options into
26 pes
=`echo $1 | awk '{print substr($1,3)}'`
30 args
=" "$1" "$2" "$args
41 printf "\nRunning on $pes processors: $args\n"
44 if [ -n "$PBS_NODEFILE" ]
46 # we are in a job shell
47 mpirun_cmd
=`which mpirun`
48 if echo $mpirun_cmd |
grep 'mvapich2' > /dev
/null
2>/dev
/null
54 test -z "$machinefile" && args
=-machinefile" "$PBS_NODEFILE" "$args
55 echo mpirun
-np $pes $args
58 elif [ -n "$LSB_HOSTS" ]
61 echo cmpirun
-lsf -poll -no_smp -gm_long 200000 $args
62 cmpirun
-lsf -poll -no_smp -gm_long 200000 $args
63 elif [ -n "$PBS_QUEUE" -o -n "$LSF_QUEUE" ]
65 # Interactive mode: create, and submit a batch job
66 script="charmrun_script.$$.sh"
68 output
="$indir/charmrun_script.$$.stdout"
69 result
="$indir/charmrun_script.$$.result"
71 # Some machine specific
80 turing
*.turing.uiuc.edu
)
81 ppn
='#PBS -l nodes='$pes':ppn=1'
82 extra
='-machinefile $PBS_NODEFILE'
84 tg-login
*|honest
*.ncsa.uiuc.edu
)
86 nodes
=`expr \( $pes + 1 \) / 2`
87 test $pes -eq 1 && ppns
=1 || ppns
=2
88 ppn
='#PBS -l nodes='$nodes':ppn='$ppns
89 extra
='-machinefile $PBS_NODEFILE'
91 co-login
*.ncsa.uiuc.edu
)
92 mem
='#PBS -l mem=500mb'
93 ncpus
="#PBS -l ncpus=$pes"
103 nodes
=`expr \( $pes + 1 \) / 2`
104 test $pes -eq 1 && ppns
=1 || ppns
=2
105 ppn
='#PBS -l nodes='$nodes':ppn='$ppns
106 extra
='-machinefile $PBS_NODEFILE'
109 ncpus
="#PBS -l ncpus=$pes"
112 if test $USE_LSF -eq 0
114 mpirun
=`which mpirun`
117 # This is a charmrun-generated PBS batch job script.
118 # The lines starting with #PBS are queuing system flags:
124 #PBS -l walltime=$walllimit:00
139 $mpirun -np $pes $extra $args
141 # Save mpirun exit status
143 echo \$status > $result
147 mpirun
="cmpirun -lsf -poll -no_smp -gm_long 200000"
150 # This is a charmrun-generated PBS batch job script.
151 # The lines starting with #PBS are queuing system flags:
154 #BSUB -W 0:$walllimit
159 echo \$LSB_MCPU_HOSTS
161 # Save mpirun exit status
163 echo \$status > $result
168 echo "Charmrun> $queue_kill $jobid ..."
174 echo "Submitting batch job for> $mpirun -np $pes $args"
175 echo " using the command> $queue_qsub $script"
177 while [ -z "$jobid" ]
179 [ $USE_LSF = 0 ] && jobid
=`$queue_qsub $script|tail -1`
180 [ $USE_LSF = 1 ] && jobid
=`$queue_qsub < $script|tail -1|sed -e 's/[^0-9]*//g'`
182 echo "Job enqueued under job ID $jobid"
183 # kill job if interrupted
186 # Wait for the job to complete, by checking its status
189 $queue_stat $jobid > tmp.$$
193 # The job is done-- print its output
195 # When job hangs, result file does not exist
196 test -f $result && status
=`cat $result` || status
=1
197 test $status -eq 0 && status
=`grep 'End of program' $output > /dev/null 2>&1`
200 test -f $status && rm -f $script $output
203 # The job is still queued or running-- print status and wait
206 # Job ID may not exist now
207 if test $exitstatus -ne 0
209 # retry a few times when error occurs
210 retry
=`expr $retry + 1`
213 echo "Charmrun> too many errors, abort!"
225 [ -n "$MPI_MACHINEFILE" ] && args
=" -machinefile $MPI_MACHINEFILE $args"
226 echo "charmrun> mpirun -np $pes $args"
227 mpirun
-np $pes $args