3 # This wrapper script will invoke greylag in parallel on our SGE cluster.
5 # This directory is assumed to be shared across all cluster nodes
6 SHAREDTMPDIR
=/clusterfs
/home
/proteomics
/tmp
8 # options passed to all greylag-grind invocations
11 # Divide this run into this many parts. This should probably be at least 5x
12 # the number of cluster nodes available. At the moment, this must be an exact
14 PARTS
=${GREYLAG_PARTS:-3000}
21 usage: $p [-l] <greylag.conf> <ms2-file> [ <ms2-file>... ]
23 Process a set of ms2 files as specified by the given configuration file (which
24 must end in '.conf' and should probably be in the current directory).
26 If the '-l' flag is given, stdout and stderr will be redirected to a
27 corresponding '.log' file.
34 echo 1>&2 "$(date '+%b %e %H:%M:%S') $p: $@"
39 touch "$jobname.done-failed" || err
"touch failed"
43 if [ "$1" == "-l" ]; then
54 rm -f ${config%.conf}.done-
*
58 *) die
"parameter filename must end in '*.conf'";;
61 jobname
=$
(basename $config .conf
)
65 if [ $
(dirname $f) != "." ]; then
66 die
"file '$f' must be in the current working directory"
70 *) die
"argument file '$f' should end in '.ms2'";;
74 [ -e $config ] || die
"'$config' not found"
76 if ! ls -ld . |
egrep -q '^drwxrws'; then
77 chmod g
+rwxs . || err
"attempt to make this directory group writable failed"
81 # Do some basic locking. This tries to prevent simultaneous runs on the same
82 # parameter file, which would produce output to the same file, wasting
83 # resources and causing confusion.
85 lockfile
="$jobname.lock"
86 trap "rm -f $lockfile" EXIT
87 ln -s $$
$lockfile 2>/dev
/null || true
88 lockpid
=$
(ls -ld $lockfile |
sed -e 's/^.*> //')
90 if [ $lockpid != $$
]; then
91 die
"this directory locked by another process (pid = $lockpid)?
92 remove $lockfile if not"
96 if [ "$logging" == "1" ]; then
97 exec < /dev
/null
> "$jobname.log" 2>&1
102 # Be very careful with quoting, as these names may eventually come from
105 shared_d
=$SHAREDTMPDIR/greylag-$
(date +%s
)-$$
# unique
107 # Could add removal of the shared directory to the EXIT trap, but probably we
108 # shouldn't because pdq (and maybe SGE) may react badly
109 # trap "rm -rf $lockfile $shared_d &" EXIT
111 mkdir
$shared_d || die
"'mkdir $shared_d' failed!"
113 cp -p "$config" $shared_d/ || die
"config cp failed!"
115 err
set up work directory
, recreating indices
if necessary
118 if ! [ -e "$idx" -a "$idx" -nt "$f" ]; then
119 greylag-index-spectra
"$f" \
120 || die
"greylag-index-spectra failed"
122 cp -p "$f" "$idx" $shared_d/ || die
"ms2/idx cp failed"
125 jobbasedir
=$
(basename $PWD)
127 #########################################################################
128 # submit the job in this shared directory so that the nodes can see it
129 pushd $shared_d > /dev
/null || die
"pushd failed!"
135 #shared dir: $shared_d
144 # It's important that the same ms2 arguments be given (in the same order) each
147 qsub
-sync y
-r y
-b y
-S /bin
/bash
-cwd -V -hard -l virtual_free
=250M \
150 -N "greylag-$jobbasedir-$jobname-$$" \
151 -e 'grind.$TASK_ID.err' \
152 -o 'grind.$TASK_ID.out' \
153 greylag-grind
$greylag_options \
154 --work-slice '$(greylag-sge-slice ${SGE_TASK_ID} ${SGE_TASK_LAST})' \
157 cat $
(find .
-name 'grind-*.out' -o -name 'grind-*.err' |
sort -t .
-k 2n
,3) /dev
/null
158 if [ "$qsub_status" != 0 ]; then
159 die
"grind on nodes failed"
162 if [ "$(find . -name '*.gwr' | wc -l)" != $PARTS ]; then
163 die
"SGE/grind on nodes failed"
166 popd > /dev
/null || die
"popd failed!"
167 #########################################################################
169 # now merge results (on master, for now)
173 find $shared_d -name '*.gwr' | greylag-merge
--files-on-stdin "$jobname.gwr" \
174 || die
"merge failed"
178 greylag-sqt
"$jobname.gwr" || die
"sqt write failed"
180 # for now, don't do this
181 #rm -fr "$shared_d" $lockfile
184 touch "$jobname.done-ok" || err
"touch failed"