3 ## echo | compresslog /path/to/my/log
14 from optparse
import OptionParser
15 from datetime
import datetime
16 import apache_log_parser
25 pf_pviews_time
= list()
27 def __init__(self
, logfmt
):
28 self
._parse
_line
= apache_log_parser
.make_parser(logfmt
)
30 def parse_line(self
, logline
):
32 log_parsed
= self
._parse
_line
(logline
)
35 if log_parsed
['status'] != '200':
37 self
.ct_hits
= self
.ct_hits
+ 1
38 if log_parsed
['response_header_content_type'] != 'text/html':
40 self
.ct_pviews
= self
.ct_pviews
+ 1
41 self
.pf_pviews_time
.append(int(log_parsed
['time_us']))
42 user
= log_parsed
['note_moodleuser']
44 self
.ls_users
.add(user
)
46 def print_stats(self
):
47 print "hits %s" % self
.ct_hits
48 print "pageviews %s" % self
.ct_pviews
49 print "avg pview time us %s" % np
.average(self
.pf_pviews_time
)
50 print "50th percentile %s" % np
.percentile(self
.pf_pviews_time
,50)
51 print "60th percentile %s" % np
.percentile(self
.pf_pviews_time
,60)
52 print "70th percentile %s" % np
.percentile(self
.pf_pviews_time
,70)
53 print "80th percentile %s" % np
.percentile(self
.pf_pviews_time
,80)
54 print "unique users %s" % len(self
.ls_users
)
56 # TODO: Find a reasonable way to make this configurable...
57 pstats
= parse_stats('%h %l %{MOODLEUSER}n %t %D \"%{Content-Type}o\" \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"')
59 parser
= OptionParser()
60 parser
.add_option('-d', '--debug', dest
='debug', default
=False, action
='store_true',
61 help='Print debugging info to stderr, no output to syslog')
62 parser
.add_option('--stats', dest
='stats', default
=False, action
='store_true',
63 help='Parse and process stats assuming it is a specially-formatted apache access log')
64 (opts
, args
) = parser
.parse_args()
68 ## Setup logging to syslog bright and early
70 # this will route stderr to syslog, including
71 # stacktraces from unhandled exceptions
72 if not opts
.debug
: # "debug" mode disables logging to syslog
73 plogger
= subprocess
.Popen(['/usr/bin/logger', '-t',
74 'compresslog[%u]' % os
.getpid()],
75 stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
,
77 os
.dup2(plogger
.stdin
.fileno(), sys
.stderr
.fileno())
79 ## init globals used in sig handler
80 # fh in append, binary
84 sys
.stderr
.write(msg
+'\n')
86 sys
.stderr
.write(msg
+'\n')
89 # using microsecond (%f) to sidestep races
90 datestr
= datetime
.now().strftime('_%Y%m%d-%H%M%S-%f')
91 if fname
.endswith('_log'):
92 return fname
[0:-4] + datestr
+ '_log'
94 return fname
+ datestr
96 ## Work around several issues
98 ## - gzip files, due to their nature cannot sustain two simultaneous
99 ## writers. Pure text log files can, thanks to promises from POSIX
100 ## implemented in the kernel about the integrity of writes of
101 ## newline-delimited data.
103 ## - Apache 2.2 will sometimes open multiple piped loggers to the same
104 ## file. Apache 2.4 seems to only do it if the piped logger config
105 ## string is different. v2.2 is less predictable on this.
106 ## This means several compresslog processes are started at the same
107 ## time and race to create the file.
111 ## Try to open file exclusively
112 f
= gzip
.open(fname
, 'a')
114 fcntl
.flock(f
, fcntl
.LOCK_EX | fcntl
.LOCK_NB
)
116 if e
.errno
== errno
.EACCES
or e
.errno
== errno
.EAGAIN
:
117 sys
.stderr
.write("Cannot flock %s\n" % fname
)
121 fname
= alt_name(fname
)
122 sys
.stderr
.write("Retrying with %s\n" % fname
)
126 errorprint("Unexpected error in flock(): %u" % e
.errno
)
129 # make sure we are appending to an empty or a gzip file
130 # to avoid appending to a plaintext log
131 if os
.stat(fname
)[6] != 0:
132 mimetype
= subprocess
.Popen(['/usr/bin/file', '--mime-type', '-b', fname
],
133 stdout
=subprocess
.PIPE
).communicate()[0]
134 mimetype
= mimetype
.rstrip()
135 if mimetype
!= 'application/x-gzip':
136 mvfname
= alt_name(fname
)
137 errorprint("Destination file exists and is not compressed, renaming old file to "
138 + mimetype
+ mvfname
)
139 os
.rename(fname
, mvfname
)
140 # release the lock, restart the process
141 # unfortunately this will append a gzip header
145 # success - out of the error handling loop
148 sig_wants_flush
= False
149 sig_wants_flushexit
= False
151 def sig_flush(signum
, frame
):
152 debugprint('sig_flush')
153 global sig_wants_flush
154 sig_wants_flush
= True
156 def sig_flushexit(signum
, frame
):
157 debugprint('sig_flushexit')
158 global sig_wants_flushexit
160 sig_wants_flushexit
= True
163 ## ensure we flush on various signals
164 # USR1/USR2: user asked to flush out
165 # HUP: apache is no longer writing to us (hung up)
166 signal
.signal(signal
.SIGUSR1
, sig_flush
)
167 signal
.signal(signal
.SIGUSR2
, sig_flush
)
168 signal
.signal(signal
.SIGHUP
, sig_flushexit
)
169 signal
.signal(signal
.SIGTERM
, sig_flushexit
)
171 # while True / select / read(int) / O_NONBLOCK put
172 # input buffering explicitly in our hands
173 fcntl
.fcntl(sys
.stdin
, fcntl
.F_SETFL
, os
.O_NONBLOCK
)
185 rfds
= select
.select([sys
.stdin
], [], [], timeout
)[0]
186 debugprint('select: %u' % len(rfds
))
187 if len(rfds
): # only read() when select says
190 buf
= sys
.stdin
.read(4096)
191 debugprint('read %u' % len(buf
))
193 # If we do get EINTR during write, in Python < 2.7.2
194 # the write may be screwed/incomplete, but we
195 # have no way to know&retry.
196 # http://bugs.python.org/issue10956
202 debugprint('line tail length %u' % len(buftail
))
204 loglines
= buf
.split('\n')
205 # save the tail for later
206 buftail
= loglines
.pop()
207 for logline
in loglines
:
208 linecount
= linecount
+ 1
209 pstats
.parse_line(logline
)
213 except select
.error
, e
:
214 if e
[0] == errno
.EINTR
:
215 debugprint('E in select: %u' % e
[0])
216 continue # on signal, restart at the top
220 debugprint('E in read() or write(): %u' % e
[0])
221 if e
[0] == errno
.EINTR
:
222 continue # on signal, restart at the top
226 if at_eof
or sig_wants_flushexit
:
229 debugprint('fh closed, exiting')
235 sig_wants_flush
= False
237 f
.flush(zlib
.Z_FULL_FLUSH
)
240 debugprint('E in flush')
241 # ignore exceptions, try to keep rolling