flock_multi: narrow down exceptions reported as error conditions
[puppet-git.git] / flock_multi
blob2aa1d9fa862671d9fcd620f1a423c8e7968e19c4
1 #!/usr/bin/env python
3 # Author: Martin Langhoff <martin.langhoff@remote-learner.net>
4 # License: GPLv2
7 from __future__ import with_statement
9 import os, sys
10 import traceback
11 import getopt
12 import fcntl
13 import re
14 import random
15 import subprocess
16 import time
18 def help():
19 return "Usage:\n" \
20 " flock_multi [-h] [-v] [-E 200] [-T 201] [-s 20] [-w 60m ] heavy 4 heavyscript \n" \
21 "Notes: \n" \
22 " -w accepts m and h suffixes\n"
24 def arg_to_secs(a):
25 if isinstance(a, (int, long)) or re.match('\d+$', a):
26 return int(a)
27 m = re.match('(\d+)m$', a)
28 if m:
29 return (int(m.group(1)) * 60)
30 m = re.match('(\d+)h$', a)
31 if m:
32 return (int(m.group(1)) * 60 * 60)
33 sys.stderr.write("ERROR: timeout parameter not an integer!\n")
34 sys.exit(flockerr)
36 def maybe_timeout(timeout, exitcode):
37 if timeout > 0:
38 if timeout < time.time():
39 sys.stderr.write("ERROR: flock_multi timeout\n")
40 sys.exit(exitcode)
42 def main():
43 # vars overriden from env
44 lockdir = '/mnt/cluster/lock'
45 confdir = '/mnt/cluster/conf/lock'
46 if 'FLOCK_MULTI_DIR' in os.environ:
47 lockdir = os.environ['FLOCK_MULTI_DIR']
48 if 'FLOCK_MULTI_CONF_DIR' in os.environ:
49 confdir = os.environ['FLOCK_MULTI_CONF_DIR']
51 try:
52 long_opts = ["help", "verbose", "conflict-exit-code=",
53 "timeout-exit-code=", "sleeptime=", "wait=", "timeout="]
54 opts, args = getopt.getopt(sys.argv[1:], "hvE:T:s:w:", long_opts)
55 except getopt.GetoptError, e:
56 sys.stderr.write("ERROR: Invalid parameter: %s\n" % e[0])
57 sys.stderr.write(help())
58 sys.exit(200)
60 if len(args) < 3:
61 sys.stderr.write("ERROR: At least 3 parameters needed.\n")
62 sys.stderr.write(help())
63 sys.exit(200)
65 verbose = False
66 flockerr = 200
67 timeouterr= 201
68 sleeptime = 60
69 timeout = 0
71 for o, a in opts:
72 if o in ("-v", "--verbose") :
73 verbose = True
74 elif o in ("-h", "--help"):
75 usage()
76 sys.exit()
77 elif o in ("-E", "--conflict-exit-code"):
78 flockerr = int(a)
79 elif o in ("-T", "--timeout-exit-code"):
80 timeouterr = int(a)
81 elif o in ("-s", "--sleeptime"):
82 sleeptime = int(a)
83 elif o in ("-w", "--wait", "--timeout"):
84 timeout = float(arg_to_secs(a)) + time.time()
85 else:
86 assert False, "unhandled option %s" % o
87 # argument params
88 lockname = args.pop(0)
89 maxlocks = int(args.pop(0))
90 cmd = args
92 conffile = os.path.join(confdir, lockname)
93 try:
94 if os.path.exists(conffile):
95 tmpval = int(open(conffile).read())
96 maxlocks= tmpval
97 except:
98 sys.stderr.write("WARNING: Ignoring invalid value in %s\n" % conffile)
100 if verbose:
101 print "Using %s maxlocks" % maxlocks
103 mypid = os.getpid()
104 hostname = os.uname()[1]
106 gotlock = False
107 cmdexit = 0
108 # cast to have better splay
109 sleeptime = float(sleeptime)
111 while True:
112 locks = range(1, maxlocks+1)
113 if len(locks) > 1:
114 random.shuffle(locks)
115 for trylock in locks:
116 # print "trying %s" %trylock
117 trylockfn = os.path.join(lockdir, lockname + '.%s' % trylock)
118 # we open for "append", and only move to truncate the
119 # file if we succeed in getting the flock
120 with open(trylockfn, 'a') as fh:
121 try:
122 fcntl.flock(fh, fcntl.LOCK_EX|fcntl.LOCK_NB)
123 fh.seek(0)
124 fh.truncate(0)
125 fh.flush()
126 gotlock = True
127 if verbose:
128 print "Got %s" % trylockfn
129 t = time.gmtime()
130 fh.write("%s PID: %s epoch: %s %s\n" %
131 (hostname, mypid, time.time(), time.strftime("%a, %d %b %Y %H:%M:%S +0000", t)))
132 fh.write(' '.join(cmd))
133 fh.write("\n")
134 fh.flush()
135 # execute the command requested
136 cmdexit = subprocess.call(cmd)
137 except IOError: # failed to get lock, nonfatal
138 pass
139 except OSError as e:
140 if e.errno == 2:
141 sys.stderr.write("ERROR: No such file or directory: %s\n" % cmd[0])
142 sys.exit(flockerr)
143 else:
144 raise
145 finally:
146 # runs on _all_ exceptions - IOError, OSError and KeyboardInterrupt
147 # truncate flock'd file on completion
148 if gotlock:
149 fh.seek(0)
150 fh.truncate(0)
151 fh.flush()
153 if gotlock:
154 sys.exit(cmdexit)
156 # all locks taken
157 maybe_timeout(timeout, timeouterr)
159 splay = sleeptime / 10
160 actual_sleep = sleeptime + random.uniform(0 - splay, splay)
161 if verbose:
162 print "Tried all locks - sleeping %s" % actual_sleep
163 time.sleep(actual_sleep)
164 maybe_timeout(timeout, timeouterr)
166 if __name__ == '__main__':
167 try:
168 main()
169 except KeyboardInterrupt: # user hit control-C
170 sys.exit(130)
171 except Exception: # all "interesting" exceptions, but not SystemExit
172 traceback.print_exc(file=sys.stdout)
173 exit(200)