config/find_OOM_errors.py

   1 #!/usr/bin/env python
   2 # This Source Code Form is subject to the terms of the Mozilla Public
   3 # License, v. 2.0. If a copy of the MPL was not distributed with this
   4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
   5
   6
   7 usage = """%prog: A test for OOM conditions in the shell.
   8
   9 %prog finds segfaults and other errors caused by incorrect handling of
  10 allocation during OOM (out-of-memory) conditions.
  11 """
  12
  13 help = """Check for regressions only. This runs a set of files with a known
  14 number of OOM errors (specified by REGRESSION_COUNT), and exits with a non-zero
  15 result if more or less errors are found. See js/src/Makefile.in for invocation.
  16 """
  17
  18
  19 import hashlib
  20 import re
  21 import shlex
  22 import subprocess
  23 import sys
  24 import threading
  25 import time
  26
  27 from optparse import OptionParser
  28
  29 #####################################################################
  30 # Utility functions
  31 #####################################################################
  32 def run(args, stdin=None):
  33   class ThreadWorker(threading.Thread):
  34     def __init__(self, pipe):
  35       super(ThreadWorker, self).__init__()
  36       self.all = ""
  37       self.pipe = pipe
  38       self.setDaemon(True)
  39
  40     def run(self):
  41       while True:
  42         line = self.pipe.readline()
  43         if line == '': break
  44         else:
  45           self.all += line
  46
  47   try:
  48     if type(args) == str:
  49       args = shlex.split(args)
  50
  51     args = [str(a) for a in args] # convert to strs
  52
  53     stdin_pipe = subprocess.PIPE if stdin else None
  54     proc = subprocess.Popen(args, stdin=stdin_pipe, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  55     if stdin_pipe:
  56       proc.stdin.write(stdin)
  57       proc.stdin.close()
  58
  59     stdout_worker = ThreadWorker(proc.stdout)
  60     stderr_worker = ThreadWorker(proc.stderr)
  61     stdout_worker.start()
  62     stderr_worker.start()
  63
  64     proc.wait()
  65     stdout_worker.join()
  66     stderr_worker.join()
  67
  68   except KeyboardInterrupt, e:
  69     sys.exit(-1)
  70
  71   stdout, stderr = stdout_worker.all, stderr_worker.all
  72   result = (stdout, stderr, proc.returncode)
  73   return result
  74
  75 def get_js_files():
  76   (out, err, exit) = run('find ../jit-test/tests -name "*.js"')
  77   if (err, exit) != ("", 0):
  78     sys.exit("Wrong directory, run from an objdir")
  79   return out.split()
  80
  81
  82
  83 #####################################################################
  84 # Blacklisting
  85 #####################################################################
  86 def in_blacklist(sig):
  87   return sig in blacklist
  88
  89 def add_to_blacklist(sig):
  90   blacklist[sig] = blacklist.get(sig, 0)
  91   blacklist[sig] += 1
  92
  93 # How often is a particular lines important for this.
  94 def count_lines():
  95   """Keep track of the amount of times individual lines occur, in order to
  96      prioritize the errors which occur most frequently."""
  97   counts = {}
  98   for string,count in blacklist.items():
  99     for line in string.split("\n"):
 100       counts[line] = counts.get(line, 0) + count
 101
 102   lines = []
 103   for k,v in counts.items():
 104     lines.append("%6d: %s" % (v,k))
 105
 106   lines.sort()
 107
 108   countlog = file("../OOM_count_log", "w")
 109   countlog.write("\n".join(lines))
 110   countlog.flush()
 111   countlog.close()
 112
 113
 114 #####################################################################
 115 # Output cleaning
 116 #####################################################################
 117 def clean_voutput(err):
 118   # Skip what we can't reproduce
 119   err = re.sub(r"^--\d+-- run: /usr/bin/dsymutil \"shell/js\"$", "", err, flags=re.MULTILINE)
 120   err = re.sub(r"^==\d+==", "", err, flags=re.MULTILINE)
 121   err = re.sub(r"^\*\*\d+\*\*", "", err, flags=re.MULTILINE)
 122   err = re.sub(r"^\s+by 0x[0-9A-Fa-f]+: ", "by: ", err, flags=re.MULTILINE)
 123   err = re.sub(r"^\s+at 0x[0-9A-Fa-f]+: ", "at: ", err, flags=re.MULTILINE)
 124   err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is not stack'd)", r"\1\2", err, flags=re.MULTILINE)
 125   err = re.sub(r"(^\s+Invalid write of size )\d+", r"\1x", err, flags=re.MULTILINE)
 126   err = re.sub(r"(^\s+Invalid read of size )\d+", r"\1x", err, flags=re.MULTILINE)
 127   err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is )\d+( bytes inside a block of size )[0-9,]+( free'd)", r"\1\2\3\4", err, flags=re.MULTILINE)
 128
 129   # Skip the repeating bit due to the segfault
 130   lines = []
 131   for l in err.split('\n'):
 132     if l == " Process terminating with default action of signal 11 (SIGSEGV)":
 133       break
 134     lines.append(l)
 135   err = '\n'.join(lines)
 136
 137   return err
 138
 139 def remove_failed_allocation_backtraces(err):
 140   lines = []
 141
 142   add = True
 143   for l in err.split('\n'):
 144
 145     # Set start and end conditions for including text
 146     if l == " The site of the failed allocation is:":
 147       add = False
 148     elif l[:2] not in ['by: ', 'at:']:
 149       add = True
 150
 151     if add:
 152       lines.append(l)
 153
 154
 155   err = '\n'.join(lines)
 156
 157   return err
 158
 159
 160 def clean_output(err):
 161   err = re.sub(r"^js\(\d+,0x[0-9a-f]+\) malloc: \*\*\* error for object 0x[0-9a-f]+: pointer being freed was not allocated\n\*\*\* set a breakppoint in malloc_error_break to debug\n$", "pointer being freed was not allocated", err, flags=re.MULTILINE)
 162
 163   return err
 164
 165
 166 #####################################################################
 167 # Consts, etc
 168 #####################################################################
 169
 170 command_template = 'shell/js' \
 171                  + ' -m -j -p' \
 172                  + ' -e "const platform=\'darwin\'; const libdir=\'../jit-test/lib/\';"' \
 173                  + ' -f ../jit-test/lib/prolog.js' \
 174                  + ' -f %s'
 175
 176
 177 # Blacklists are things we don't want to see in our logs again (though we do
 178 # want to count them when they happen). Whitelists we do want to see in our
 179 # logs again, principally because the information we have isn't enough.
 180
 181 blacklist = {}
 182 add_to_blacklist(r"('', '', 1)") # 1 means OOM if the shell hasn't launched yet.
 183 add_to_blacklist(r"('', 'out of memory\n', 1)")
 184
 185 whitelist = set()
 186 whitelist.add(r"('', 'out of memory\n', -11)") # -11 means OOM
 187 whitelist.add(r"('', 'out of memory\nout of memory\n', -11)")
 188
 189
 190
 191 #####################################################################
 192 # Program
 193 #####################################################################
 194
 195 # Options
 196 parser = OptionParser(usage=usage)
 197 parser.add_option("-r", "--regression", action="store", metavar="REGRESSION_COUNT", help=help,
 198                   type="int", dest="regression", default=None)
 199
 200 (OPTIONS, args) = parser.parse_args()
 201
 202
 203 if OPTIONS.regression != None:
 204   # TODO: This should be expanded as we get a better hang of the OOM problems.
 205   # For now, we'll just check that the number of OOMs in one short file does not
 206   # increase.
 207   files = ["../jit-test/tests/arguments/args-createontrace.js"]
 208 else:
 209   files = get_js_files()
 210
 211   # Use a command-line arg to reduce the set of files
 212   if len (args):
 213     files = [f for f in files if f.find(args[0]) != -1]
 214
 215
 216 if OPTIONS.regression == None:
 217   # Don't use a logfile, this is automated for tinderbox.
 218   log = file("../OOM_log", "w")
 219
 220
 221 num_failures = 0
 222 for f in files:
 223
 224   # Run it once to establish boundaries
 225   command = (command_template + ' -O') % (f)
 226   out, err, exit = run(command)
 227   max = re.match(".*OOM max count: (\d+).*", out, flags=re.DOTALL).groups()[0]
 228   max = int(max)
 229
 230   # OOMs don't recover well for the first 20 allocations or so.
 231   # TODO: revisit this.
 232   for i in range(20, max):
 233
 234     if OPTIONS.regression == None:
 235       print "Testing allocation %d/%d in %s" % (i,max,f)
 236     else:
 237       sys.stdout.write('.') # something short for tinderbox, no space or \n
 238
 239     command = (command_template + ' -A %d') % (f, i)
 240     out, err, exit = run(command)
 241
 242     # Success (5 is SM's exit code for controlled errors)
 243     if exit == 5 and err.find("out of memory") != -1:
 244       continue
 245
 246     # Failure
 247     else:
 248
 249       if OPTIONS.regression != None:
 250         # Just count them
 251         num_failures += 1
 252         continue
 253
 254       #########################################################################
 255       # The regression tests ends above. The rest of this is for running  the
 256       # script manually.
 257       #########################################################################
 258
 259       problem = str((out, err, exit))
 260       if in_blacklist(problem) and problem not in whitelist:
 261         add_to_blacklist(problem)
 262         continue
 263
 264       add_to_blacklist(problem)
 265
 266
 267       # Get valgrind output for a good stack trace
 268       vcommand = "valgrind --dsymutil=yes -q --log-file=OOM_valgrind_log_file " + command
 269       run(vcommand)
 270       vout = file("OOM_valgrind_log_file").read()
 271       vout = clean_voutput(vout)
 272       sans_alloc_sites = remove_failed_allocation_backtraces(vout)
 273
 274       # Don't print duplicate information
 275       if in_blacklist(sans_alloc_sites):
 276         add_to_blacklist(sans_alloc_sites)
 277         continue
 278
 279       add_to_blacklist(sans_alloc_sites)
 280
 281       log.write ("\n")
 282       log.write ("\n")
 283       log.write ("=========================================================================")
 284       log.write ("\n")
 285       log.write ("An allocation failure at\n\tallocation %d/%d in %s\n\tcauses problems (detected using bug 624094)" % (i, max, f))
 286       log.write ("\n")
 287       log.write ("\n")
 288
 289       log.write ("Command (from obj directory, using patch from bug 624094):\n  " + command)
 290       log.write ("\n")
 291       log.write ("\n")
 292       log.write ("stdout, stderr, exitcode:\n  " + problem)
 293       log.write ("\n")
 294       log.write ("\n")
 295
 296       double_free = err.find("pointer being freed was not allocated") != -1
 297       oom_detected = err.find("out of memory") != -1
 298       multiple_oom_detected = err.find("out of memory\nout of memory") != -1
 299       segfault_detected = exit == -11
 300
 301       log.write ("Diagnosis: ")
 302       log.write ("\n")
 303       if multiple_oom_detected:
 304         log.write ("  - Multiple OOMs reported")
 305         log.write ("\n")
 306       if segfault_detected:
 307         log.write ("  - segfault")
 308         log.write ("\n")
 309       if not oom_detected:
 310         log.write ("  - No OOM checking")
 311         log.write ("\n")
 312       if double_free:
 313         log.write ("  - Double free")
 314         log.write ("\n")
 315
 316       log.write ("\n")
 317
 318       log.write ("Valgrind info:\n" + vout)
 319       log.write ("\n")
 320       log.write ("\n")
 321       log.flush()
 322
 323   if OPTIONS.regression == None:
 324     count_lines()
 325
 326 print '\n',
 327
 328 # Do the actual regression check
 329 if OPTIONS.regression != None:
 330   expected_num_failures = OPTIONS.regression
 331
 332   if num_failures != expected_num_failures:
 333
 334     print "TEST-UNEXPECTED-FAIL |",
 335     if num_failures > expected_num_failures:
 336       print "More out-of-memory errors were found (%s) than expected (%d). This probably means an allocation site has been added without a NULL-check. If this is unavoidable, you can account for it by updating Makefile.in." % (num_failures, expected_num_failures),
 337     else:
 338       print "Congratulations, you have removed %d out-of-memory error(s) (%d remain)! Please account for it by updating Makefile.in." % (expected_num_failures - num_failures, num_failures),
 339     sys.exit(-1)
 340   else:
 341     print 'TEST-PASS | find_OOM_errors | Found the expected number of OOM errors (%d)' % (expected_num_failures)
 342