utils/analyzer/CmpRuns

   1 #!/usr/bin/env python
   2
   3 """
   4 CmpRuns - A simple tool for comparing two static analyzer runs to determine
   5 which reports have been added, removed, or changed.
   6
   7 This is designed to support automated testing using the static analyzer, from
   8 two perspectives:
   9   1. To monitor changes in the static analyzer's reports on real code bases, for
  10      regression testing.
  11
  12   2. For use by end users who want to integrate regular static analyzer testing
  13      into a buildbot like environment.
  14 """
  15
  16 import os
  17 import plistlib
  18
  19 #
  20
  21 class multidict:
  22     def __init__(self, elts=()):
  23         self.data = {}
  24         for key,value in elts:
  25             self[key] = value
  26
  27     def __getitem__(self, item):
  28         return self.data[item]
  29     def __setitem__(self, key, value):
  30         if key in self.data:
  31             self.data[key].append(value)
  32         else:
  33             self.data[key] = [value]
  34     def items(self):
  35         return self.data.items()
  36     def values(self):
  37         return self.data.values()
  38     def keys(self):
  39         return self.data.keys()
  40     def __len__(self):
  41         return len(self.data)
  42     def get(self, key, default=None):
  43         return self.data.get(key, default)
  44
  45 #
  46
  47 class AnalysisReport:
  48     def __init__(self, run, files):
  49         self.run = run
  50         self.files = files
  51
  52 class AnalysisDiagnostic:
  53     def __init__(self, data, report, htmlReport):
  54         self.data = data
  55         self.report = report
  56         self.htmlReport = htmlReport
  57
  58     def getReadableName(self):
  59         loc = self.data['location']
  60         filename = self.report.run.getSourceName(self.report.files[loc['file']])
  61         line = loc['line']
  62         column = loc['col']
  63
  64         # FIXME: Get a report number based on this key, to 'distinguish'
  65         # reports, or something.
  66
  67         return '%s:%d:%d' % (filename, line, column)
  68
  69     def getReportData(self):
  70         if self.htmlReport is None:
  71             return "This diagnostic does not have any report data."
  72
  73         return open(os.path.join(self.report.run.path,
  74                                  self.htmlReport), "rb").read()
  75
  76 class AnalysisRun:
  77     def __init__(self, path, opts):
  78         self.path = path
  79         self.reports = []
  80         self.diagnostics = []
  81         self.opts = opts
  82
  83     def getSourceName(self, path):
  84         if path.startswith(self.opts.root):
  85             return path[len(self.opts.root):]
  86         return path
  87
  88 def loadResults(path, opts):
  89     run = AnalysisRun(path, opts)
  90
  91     for f in os.listdir(path):
  92         if (not f.startswith('report') or
  93             not f.endswith('plist')):
  94             continue
  95
  96         p = os.path.join(path, f)
  97         data = plistlib.readPlist(p)
  98
  99         # Ignore empty reports.
 100         if not data['files']:
 101             continue
 102
 103         # Extract the HTML reports, if they exists.
 104         if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
 105             htmlFiles = []
 106             for d in data['diagnostics']:
 107                 # FIXME: Why is this named files, when does it have multiple
 108                 # files?
 109                 assert len(d['HTMLDiagnostics_files']) == 1
 110                 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
 111         else:
 112             htmlFiles = [None] * len(data['diagnostics'])
 113
 114         report = AnalysisReport(run, data.pop('files'))
 115         diagnostics = [AnalysisDiagnostic(d, report, h)
 116                        for d,h in zip(data.pop('diagnostics'),
 117                                       htmlFiles)]
 118
 119         assert not data
 120
 121         run.reports.append(report)
 122         run.diagnostics.extend(diagnostics)
 123
 124     return run
 125
 126 def compareResults(A, B):
 127     """
 128     compareResults - Generate a relation from diagnostics in run A to
 129     diagnostics in run B.
 130
 131     The result is the relation as a list of triples (a, b, confidence) where
 132     each element {a,b} is None or an element from the respective run, and
 133     confidence is a measure of the match quality (where 0 indicates equality,
 134     and None is used if either element is None).
 135     """
 136
 137     res = []
 138
 139     # Quickly eliminate equal elements.
 140     neqA = []
 141     neqB = []
 142     eltsA = list(A.diagnostics)
 143     eltsB = list(B.diagnostics)
 144     eltsA.sort(key = lambda d: d.data)
 145     eltsB.sort(key = lambda d: d.data)
 146     while eltsA and eltsB:
 147         a = eltsA.pop()
 148         b = eltsB.pop()
 149         if a.data == b.data:
 150             res.append((a, b, 0))
 151         elif a.data > b.data:
 152             neqA.append(a)
 153             eltsB.append(b)
 154         else:
 155             neqB.append(b)
 156             eltsA.append(a)
 157     neqA.extend(eltsA)
 158     neqB.extend(eltsB)
 159
 160     # FIXME: Add fuzzy matching. One simple and possible effective idea would be
 161     # to bin the diagnostics, print them in a normalized form (based solely on
 162     # the structure of the diagnostic), compute the diff, then use that as the
 163     # basis for matching. This has the nice property that we don't depend in any
 164     # way on the diagnostic format.
 165
 166     for a in neqA:
 167         res.append((a, None, None))
 168     for b in neqB:
 169         res.append((None, b, None))
 170
 171     return res
 172
 173 def main():
 174     from optparse import OptionParser
 175     parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
 176     parser.add_option("", "--root", dest="root",
 177                       help="Prefix to ignore on source files",
 178                       action="store", type=str, default="")
 179     parser.add_option("", "--verbose-log", dest="verboseLog",
 180                       help="Write additional information to LOG [default=None]",
 181                       action="store", type=str, default=None,
 182                       metavar="LOG")
 183     (opts, args) = parser.parse_args()
 184
 185     if len(args) != 2:
 186         parser.error("invalid number of arguments")
 187
 188     dirA,dirB = args
 189
 190     # Load the run results.
 191     resultsA = loadResults(dirA, opts)
 192     resultsB = loadResults(dirB, opts)
 193
 194     # Open the verbose log, if given.
 195     if opts.verboseLog:
 196         auxLog = open(opts.verboseLog, "wb")
 197     else:
 198         auxLog = None
 199
 200     diff = compareResults(resultsA, resultsB)
 201     for res in diff:
 202         a,b,confidence = res
 203         if a is None:
 204             print "ADDED: %r" % b.getReadableName()
 205             if auxLog:
 206                 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
 207                                                         b.getReportData()))
 208         elif b is None:
 209             print "REMOVED: %r" % a.getReadableName()
 210             if auxLog:
 211                 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
 212                                                           a.getReportData()))
 213         elif confidence:
 214             print "CHANGED: %r to %r" % (a.getReadableName(),
 215                                          b.getReadableName())
 216             if auxLog:
 217                 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
 218                                  % (a.getReadableName(),
 219                                     b.getReadableName(),
 220                                     a.getReportData(),
 221                                     b.getReportData()))
 222         else:
 223             pass
 224
 225     print "TOTAL REPORTS: %r" % len(resultsB.diagnostics)
 226     if auxLog:
 227         print >>auxLog, "('TOTAL', %r)" % len(resultsB.diagnostics)
 228
 229 if __name__ == '__main__':
 230     main()