3 ## echo | compresslog /path/to/my/log
14 from optparse
import OptionParser
16 import logging
.handlers
17 from datetime
import datetime
18 import apache_log_parser
27 pf_pviews_time
= list()
29 def __init__(self
, logfmt
):
30 self
._parse
_line
= apache_log_parser
.make_parser(logfmt
)
32 def parse_line(self
, logline
):
34 log_parsed
= self
._parse
_line
(logline
)
37 if log_parsed
['status'] != '200':
39 self
.ct_hits
= self
.ct_hits
+ 1
40 if log_parsed
['response_header_content_type'] != 'text/html':
42 self
.ct_pviews
= self
.ct_pviews
+ 1
43 self
.pf_pviews_time
.append(int(log_parsed
['time_us']))
44 user
= log_parsed
['note_moodleuser']
46 self
.ls_users
.add(user
)
48 def print_stats(self
):
49 print "hits %s" % self
.ct_hits
50 print "pageviews %s" % self
.ct_pviews
51 print "avg pview time us %s" % np
.average(self
.pf_pviews_time
)
52 print "50th percentile %s" % np
.percentile(self
.pf_pviews_time
,50)
53 print "60th percentile %s" % np
.percentile(self
.pf_pviews_time
,60)
54 print "70th percentile %s" % np
.percentile(self
.pf_pviews_time
,70)
55 print "80th percentile %s" % np
.percentile(self
.pf_pviews_time
,80)
56 print "unique users %s" % len(self
.ls_users
)
58 # TODO: Find a reasonable way to make this configurable...
59 pstats
= parse_stats('%h %l %{MOODLEUSER}n %t %D \"%{Content-Type}o\" \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"')
61 parser
= OptionParser()
62 parser
.add_option('-d', '--debug', dest
='debug', default
=False, action
='store_true',
63 help='Print debugging info to stderr, no output to syslog')
64 parser
.add_option('--stats', dest
='stats', default
=False, action
='store_true',
65 help='Parse and process stats assuming it is a specially-formatted apache access log')
66 (opts
, args
) = parser
.parse_args()
70 ## Setup logging to syslog bright and early
72 # when not in debug mode,
73 # this will route stderr to syslog, including
74 # stacktraces from unhandled exceptions
75 logger
= logging
.getLogger('compresslog')
76 if opts
.debug
: # "debug" mode disables logging to syslog
77 logh
= logging
.StreamHandler()
78 # docs tell us to set logging at the handler level
79 # but that's not what works, setting it at the global
80 # logger level does work
81 #logh.setLevel(logging.DEBUG)
82 logger
.setLevel(logging
.DEBUG
)
84 # writing to /dev/log makes it compat w systemd's journal
85 logh
= logging
.handlers
.SysLogHandler(address
='/dev/log')
86 # logger.setLevel(logging.DEBUG)
87 logf
= logging
.Formatter('compresslog: %(message)s')
88 logh
.setFormatter(logf
)
89 logger
.addHandler(logh
)
93 sys
.stderr
.write(msg
+'\n')
96 # using microsecond (%f) to sidestep races
97 datestr
= datetime
.now().strftime('_%Y%m%d-%H%M%S-%f')
98 if fname
.endswith('_log'):
99 return fname
[0:-4] + datestr
+ '_log'
101 return fname
+ datestr
103 ## Work around several issues
105 ## - gzip files, due to their nature cannot sustain two simultaneous
106 ## writers. Pure text log files can, thanks to promises from POSIX
107 ## implemented in the kernel about the integrity of writes of
108 ## newline-delimited data.
110 ## - Apache 2.2 will sometimes open multiple piped loggers to the same
111 ## file. Apache 2.4 seems to only do it if the piped logger config
112 ## string is different. v2.2 is less predictable on this.
113 ## This means several compresslog processes are started at the same
114 ## time and race to create the file.
118 ## Try to open file exclusively
119 f
= gzip
.open(fname
, 'a')
121 fcntl
.flock(f
, fcntl
.LOCK_EX | fcntl
.LOCK_NB
)
123 if e
.errno
== errno
.EACCES
or e
.errno
== errno
.EAGAIN
:
124 sys
.stderr
.write("Cannot flock %s\n" % fname
)
128 fname
= alt_name(fname
)
129 sys
.stderr
.write("Retrying with %s\n" % fname
)
133 errorprint("Unexpected error in flock(): %u" % e
.errno
)
136 # make sure we are appending to an empty or a gzip file
137 # to avoid appending to a plaintext log
138 if os
.stat(fname
)[6] != 0:
139 mimetype
= subprocess
.Popen(['/usr/bin/file', '--mime-type', '-b', fname
],
140 stdout
=subprocess
.PIPE
).communicate()[0]
141 mimetype
= mimetype
.rstrip()
142 if mimetype
!= 'application/x-gzip':
143 mvfname
= alt_name(fname
)
144 errorprint("Destination file exists and is not compressed, renaming old file to "
145 + mimetype
+ mvfname
)
146 os
.rename(fname
, mvfname
)
147 # release the lock, restart the process
148 # unfortunately this will append a gzip header
152 # success - out of the error handling loop
155 sig_wants_flush
= False
156 sig_wants_flushexit
= False
158 def sig_flush(signum
, frame
):
159 logger
.debug('sig_flush')
160 global sig_wants_flush
161 sig_wants_flush
= True
163 def sig_flushexit(signum
, frame
):
164 logger
.debug('sig_flushexit')
165 global sig_wants_flushexit
167 sig_wants_flushexit
= True
170 ## ensure we flush on various signals
171 # USR1/USR2: user asked to flush out
172 # HUP: apache is no longer writing to us (hung up)
173 signal
.signal(signal
.SIGUSR1
, sig_flush
)
174 signal
.signal(signal
.SIGUSR2
, sig_flush
)
175 signal
.signal(signal
.SIGHUP
, sig_flushexit
)
176 signal
.signal(signal
.SIGTERM
, sig_flushexit
)
178 # while True / select / read(int) / O_NONBLOCK put
179 # input buffering explicitly in our hands
180 fcntl
.fcntl(sys
.stdin
, fcntl
.F_SETFL
, os
.O_NONBLOCK
)
192 rfds
= select
.select([sys
.stdin
], [], [], timeout
)[0]
193 logger
.debug('select: %u' % len(rfds
))
194 if len(rfds
): # only read() when select says
195 logger
.debug('read()')
197 buf
= sys
.stdin
.read(4096)
198 logger
.debug('read %u' % len(buf
))
200 # If we do get EINTR during write, in Python < 2.7.2
201 # the write may be screwed/incomplete, but we
202 # have no way to know&retry.
203 # http://bugs.python.org/issue10956
208 logger
.debug('line tail length %u' % len(buftail
))
210 loglines
= buf
.split('\n')
211 # save the tail for later
212 buftail
= loglines
.pop()
213 for logline
in loglines
:
214 linecount
= linecount
+ 1
215 pstats
.parse_line(logline
)
218 logger
.debug('at_eof')
219 except select
.error
, e
:
220 if e
[0] == errno
.EINTR
:
221 logger
.debug('E in select: %u' % e
[0])
222 continue # on signal, restart at the top
226 logger
.debug('E in read() or write(): %u' % e
[0])
227 if e
[0] == errno
.EINTR
:
228 continue # on signal, restart at the top
232 if at_eof
or sig_wants_flushexit
:
235 logger
.debug('fh closed, exiting')
241 sig_wants_flush
= False
243 f
.flush(zlib
.Z_FULL_FLUSH
)
244 logger
.debug('flush')
246 logger
.debug('E in flush')
247 # ignore exceptions, try to keep rolling