From 5f1c93e41b1c38ccc97638d41f505e5d0fbdab71 Mon Sep 17 00:00:00 2001 From: Martin Langhoff Date: Wed, 18 Mar 2015 15:39:44 -0400 Subject: [PATCH] compresslog: stats now functionally complete --- compresslog | 70 +++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/compresslog b/compresslog index 2c66a3b..7aeb84a 100755 --- a/compresslog +++ b/compresslog @@ -14,20 +14,25 @@ import errno from optparse import OptionParser import logging import logging.handlers +import time from datetime import datetime import apache_log_parser import numpy as np +import json class parse_stats: - ls_users = set() - ct_hits = 0 - ct_pviews = 0 - pf_pviews_time = list() def __init__(self, logfmt): self._parse_line = apache_log_parser.make_parser(logfmt) + self._reset() + + def _reset(self): + self.ls_users = set() + self.ct_hits = 0 + self.ct_pviews = 0 + self.pf_pviews_time = list() def parse_line(self, logline): try: @@ -45,21 +50,33 @@ class parse_stats: if user != '-': self.ls_users.add(user) - def print_stats(self): - print "hits %s" % self.ct_hits - print "pageviews %s" % self.ct_pviews - print "avg pview time us %s" % np.average(self.pf_pviews_time) - print "50th percentile %s" % np.percentile(self.pf_pviews_time,50) - print "60th percentile %s" % np.percentile(self.pf_pviews_time,60) - print "70th percentile %s" % np.percentile(self.pf_pviews_time,70) - print "80th percentile %s" % np.percentile(self.pf_pviews_time,80) - print "unique users %s" % len(self.ls_users) + def log_stats(self, boundary): + stats = { 'hits': 0, + 'pviews': 0, + 'pview_avg_us': 0, + 'pview_50th_us': 0, + 'pview_50th_us': 0, + 'unique_users': 0, + 'boundary_epoch': boundary + } + if self.ct_hits: + stats['hits'] = self.ct_hits + if self.ct_pviews: + stats['pviews'] = self.ct_pviews + stats['pview_avg_us'] = np.average(self.pf_pviews_time), + stats['pview_50th_us'] = np.percentile(self.pf_pviews_time,50), + stats['pview_50th_us'] = np.percentile(self.pf_pviews_time,80), + stats['unique_users'] = len(self.ls_users), + # passing separators explicitly gives us more compact encoding + logger.info("stats %s" % json.dumps(stats, separators=(',',':'))) + self._reset() + # TODO: Find a reasonable way to make this configurable... pstats = parse_stats('%h %l %{MOODLEUSER}n %t %D \"%{Content-Type}o\" \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"') parser = OptionParser() -parser.add_option('-d', '--debug', dest='debug', default=False, action='store_true', +parser.add_option('-d', '--debug', dest='debug', default=0, action='count', help='Print debugging info to stderr, no output to syslog') parser.add_option('--stats', dest='stats', default=False, action='store_true', help='Parse and process stats assuming it is a specially-formatted apache access log') @@ -73,13 +90,15 @@ fname = args[0] # this will route stderr to syslog, including # stacktraces from unhandled exceptions logger = logging.getLogger('compresslog') -if opts.debug: # "debug" mode disables logging to syslog +logger.setLevel(logging.INFO) +if opts.debug > 0: # "debug" mode disables logging to syslog logh = logging.StreamHandler() # docs tell us to set logging at the handler level # but that's not what works, setting it at the global # logger level does work #logh.setLevel(logging.DEBUG) - logger.setLevel(logging.DEBUG) + if opts.debug > 1: + logger.setLevel(logging.DEBUG) else: # writing to /dev/log makes it compat w systemd's journal logh = logging.handlers.SysLogHandler(address='/dev/log') @@ -181,13 +200,22 @@ fcntl.fcntl(sys.stdin, fcntl.F_SETFL, os.O_NONBLOCK) at_eof=False timeout=60 - +statsboundary=False buf = False buftail = False linecount = 0 +PERIOD = 60 # 1m while True: - + if opts.stats: + # calculate next period boundary + # and use that as timeout + t = time.time() + if (statsboundary == False) or (statsboundary < t) : + # next 5m boundary + statsboundary = t - (t % PERIOD ) + PERIOD + timeout = statsboundary - t + logger.debug('timeout in: %u s' % timeout) try: rfds = select.select([sys.stdin], [], [], timeout)[0] logger.debug('select: %u' % len(rfds)) @@ -216,6 +244,10 @@ while True: else: at_eof = True logger.debug('at_eof') + if opts.stats: + t = time.time() + if t > statsboundary: + pstats.log_stats(statsboundary) except select.error, e: if e[0] == errno.EINTR: logger.debug('E in select: %u' % e[0]) @@ -233,8 +265,6 @@ while True: f.close() sys.stdin.close() logger.debug('fh closed, exiting') - if opts.stats: - pstats.print_stats() sys.exit(0) if sig_wants_flush: -- 2.11.4.GIT