compresslog: simplify code flow
[compresslog.git] / compresslog
blobed62fe9ee59b8f35b593a0e29e075836d3d4f979
1 #!/usr/bin/python
3 ## echo | compresslog /path/to/my/log
5 import os
6 import sys
7 import select
8 import signal
9 import gzip
10 import zlib
11 import subprocess
12 import fcntl
13 import errno
16 ## Setup logging to syslog bright and early
17 ##
18 # this will route stderr to syslog, including
19 # stacktraces from unhandled exceptions
20 if sys.argv[1] == '-d': # "debug" mode disables logging to syslog
21 debug = True
22 sys.argv.pop(1)
23 else:
24 plogger = subprocess.Popen(['/usr/bin/logger', '-t',
25 'compresslog[%u]' % os.getpid()],
26 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
27 close_fds=True )
28 os.dup2(plogger.stdin.fileno(), sys.stderr.fileno())
30 ## init globals used in sig handler
31 # fh in append, binary
32 fname = sys.argv[1]
34 ## Work around several issues
36 ## - gzip files, due to their nature cannot sustain two simultaneous
37 ## writers. Pure text log files can, thanks to promises from POSIX
38 ## implemented in the kernel about the integrity of writes of
39 ## newline-delimited data.
41 ## - gzip files have headers and checksums at the end. So we cannot
42 ## just append to an existing (old, closed). Fedora 19/RHEL7 libs
43 ## seem to handle concatenated gzip files transparently but our
44 ## deployment platform is RHEL6
46 ## - Apache 2.2 will sometimes open multiple piped loggers to the same
47 ## file. Apache 2.4 seems to only do it if the piped logger config
48 ## string is different. v2.2 is less predictable on this.
49 ## This means several compresslog processes are started at the same
50 ## time and race to create the file.
52 renamedwithpid=False
53 while True:
54 if os.path.exists(fname):
55 if not renamedwithpid:
56 # rename, using our pid for uniqueness
57 pidstr = '_%u' % os.getpid()
58 renamedwithpid=True
59 if fname.endswith('_log'):
60 fname = fname[0:-4] + pidstr + '_log'
61 else:
62 fname = fname + pidstr
63 sys.stderr.write("Destination file exists, writing to "
64 + fname + " instead.\n")
66 ## Try to open file exclusively
67 f = gzip.open(fname, 'wb')
68 try:
69 fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
70 except IOError, e:
71 if e.errno == errno.EACCES or e.errno == errno.EAGAIN:
72 sys.stderr.write("Cannot flock %s\n" % fname)
73 if renamedwithpid:
74 sys.stderr.write("Lost race with self? exiting\n")
75 sys.exit(1)
76 else:
77 sys.stderr.write("Unexpected error in flock(): %u\n" % e.errno)
78 raise
79 # success - out of the error handling loop
80 break
82 sig_wants_flush = False
83 sig_wants_flushexit = False
85 def sig_flush(signum, frame):
86 if debug:
87 sys.stderr.write('sig_flush\n')
88 global sig_wants_flush
89 sig_wants_flush = True
91 def sig_flushexit(signum, frame):
92 if debug:
93 sys.stderr.write('sig_flushexit\n')
94 global sig_wants_flushexit
95 global timeout
96 sig_wants_flushexit = True
97 timeout = 0
99 ## ensure we flush on various signals
100 # USR1/USR2: user asked to flush out
101 # HUP: apache is no longer writing to us (hung up)
102 signal.signal(signal.SIGUSR1, sig_flush)
103 signal.signal(signal.SIGUSR2, sig_flush)
104 signal.signal(signal.SIGHUP, sig_flushexit)
105 signal.signal(signal.SIGTERM, sig_flushexit)
107 # while True / select / read(int) / O_NONBLOCK put
108 # input buffering explicitly in our hands
109 fcntl.fcntl(sys.stdin, fcntl.F_SETFL, os.O_NONBLOCK)
111 at_eof=False
112 timeout=60
113 while True:
114 buf = False
115 try:
116 rfds = select.select([sys.stdin], [], [], timeout)[0]
117 if debug:
118 sys.stderr.write('select: %u\n' % len(rfds))
119 if len(rfds): # only read() when select says
120 if debug:
121 sys.stderr.write('read()\n')
122 # will not block
123 buf = sys.stdin.read(4096)
124 if debug:
125 sys.stderr.write('read %u\n' % len(buf))
126 if buf:
127 # If we do get EINTR here, in Python < 2.7.2
128 # the write may be screwed/incomplete, but we
129 # have no way to know&retry.
130 # http://bugs.python.org/issue10956
131 f.write(buf)
132 if debug:
133 sys.stderr.write('wrote\n')
134 else:
135 at_eof = True
136 if debug:
137 sys.stderr.write('at_eof\n')
138 except select.error, e:
139 if debug:
140 sys.stderr.write('E in select: %u\n' % e[0])
141 if e[0] == errno.EINTR:
142 continue # on signal, restart at the top
143 else:
144 raise
145 except IOError, e:
146 if debug:
147 sys.stderr.write('E in read() or write(): %u\n' % e[0])
148 if e[0] == errno.EINTR:
149 continue # on signal, restart at the top
150 else:
151 raise
153 if at_eof or sig_wants_flushexit:
154 f.close()
155 sys.stdin.close()
156 if debug:
157 sys.stderr.write('fh closed, exiting\n')
158 sys.exit(0)
159 if sig_wants_flush:
160 sig_wants_flush = False
161 try:
162 f.flush(zlib.Z_FULL_FLUSH)
163 if debug:
164 sys.stderr.write('flush\n')
165 except:
166 if debug:
167 sys.stderr.write('E in flush\n')
168 # ignore exceptions, try to keep rolling
169 pass