scripts/ci/babysitter

   1 #!/usr/bin/env python3
   2
   3 # Mimics GNU timeout, but has special modes which gather test result data and retry failed tests.
   4
   5 ######################################### How this works ##########################################
   6 #
   7 # Because we have several different test harnesses and we don't invoke them directly, communication
   8 # between this script and the harness is done through the simplest means possible (environment
   9 # variables to communicate babysitter->harness, files in standard locations harness->babysitter).
  10 #
  11 # The script supports three different ways of extracting test data from the invoked test suite:
  12 #
  13 # 1. "The babysitter protocol": The babysitter sets five environment variables (see below):
  14 #        "Ran test file": A path to a file where the harness should write a line-delimited list of
  15 #            tests which ran to completion.
  16 #        "Failed test file": A path to a file where the harness should write a line-delimited list
  17 #            of tests that failed.
  18 #        "Current test file": A path to a file where the harness should write the currently running
  19 #            test before a test begins, then delete afterward (used to detect early termination).
  20 #        "Run test": A list of test names, used by:
  21 #        "Run mode": This is either RUN or EXCLUDE. If RUN, the test list is a whitelist; run only
  22 #            those tests. If EXCLUDE, the list is a blacklist; run all except those tests.
  23 #    This is the most featureful mode: It can report where we failed in the case of timeouts or
  24 #    crashes that take down the harness, and if the feature is enabled it can retry failed tests.
  25 #    However, it requires modification to the test harness.
  26 #
  27 # 2. NUnit XML: The babysitter also sets a sixth environment variable:
  28 #        "XML list file": A path to a file where the harness should write a line-delimited list of
  29 #        paths to NUnit-format XML result files it created.
  30 #    This also requires modification to the test harness, but less of it.
  31 #
  32 # 3. NUnit XML (manually specified): If the test harness can't be modified, but the caller of the
  33 #    babysitter script happens to know where the harness writes its result XML files, the caller
  34 #    can specify those paths in the "Extra XML" environment variable (see --help)
  35 #
  36 # A single babysitter invocation can currently handle either the babysitter protocol or the XML,
  37 # but never a mix of the two.
  38 #
  39 ###################################################################################################
  40
  41 import argparse
  42 import subprocess
  43 import re
  44 import signal
  45 import time
  46 import sys
  47 import os.path
  48 import copy
  49 import tempfile
  50 import calendar
  51 import json
  52 import platform
  53 from xml.dom.minidom import parse as xmlparse
  54
  55 ### Constants
  56
  57 # Filenames
  58
  59 CURRENT_TEST_FILE = "babysitter_report_current_test_file.txt"
  60 RAN_TEST_FILE     = "babysitter_report_ran_test_file.txt"
  61 FAILED_TEST_FILE  = "babysitter_report_failed_test_file.txt"
  62 XML_LIST_FILE     = "babysitter_report_xml_list_file.txt"
  63 LOGGING_FILE      = "babysitter_report.json_lines"
  64
  65 # Environment keys
  66
  67 # Keys used for Babysitter<->Nunit IPC
  68 CURRENT_TEST_KEY  = 'MONO_BABYSITTER_NUNIT_CURRENT_TEST_FILE' # Tell nunit where to leave files
  69 RAN_TEST_KEY      = 'MONO_BABYSITTER_NUNIT_RAN_TEST_FILE'
  70 FAILED_TEST_KEY   = 'MONO_BABYSITTER_NUNIT_FAILED_TEST_FILE'
  71 XML_LIST_KEY      = 'MONO_BABYSITTER_NUNIT_XML_LIST_FILE'
  72 RUN_KEY           = 'MONO_BABYSITTER_NUNIT_RUN_TEST' # Semicolon-separated list of test names
  73 RUN_MODE_KEY      = 'MONO_BABYSITTER_NUNIT_RUN_MODE' # Equal to either RUN or EXCLUDE
  74
  75 # Keys used for script configuration (see --help text)
  76 LOG_FILE_KEY      = 'MONO_BABYSITTER_LOG_FILE'       # Path
  77 EXTRA_XML_KEY     = 'MONO_BABYSITTER_EXTRA_XML'      # Semicolon-separated list of paths
  78 RETRY_KEY         = 'MONO_BABYSITTER_RETRY'          # Equal to an integer
  79 VERBOSE_KEY       = 'MONO_BABYSITTER_VERBOSE'        # "Undocumented"-- used for debugging babysitter
  80
  81 # JSON keys
  82
  83 DATE_JSON       = 'date'          # POSIX timestamp of test suite run
  84 INVOKE_JSON     = 'invocation'
  85 COUNT_JSON      = 'iteration'        # How many times was command executed?
  86 LIMIT_JSON      = 'failure_max'
  87 SUPPORT_JSON    = 'babysitter_protocol' # Was the test suite running with a babysitter-aware nunit?
  88 LOADED_XML_JSON = 'loaded_xml'          # True if we loaded result XML from the test suite
  89 FINAL_CODE_JSON = 'final_code'
  90 TESTS_JSON      = 'tests'         # Holds dictionary of (test case name)->(dict with TEST_ keys below)
  91 TEST_FAILURES         = 'normal_failures'
  92 TEST_CRASH_FAILURES   = 'crash_failures'
  93 TEST_TIMEOUT_FAILURES = 'timeout_failures'
  94
  95 ### Interpret arguments
  96
  97 scriptname = sys.argv[0]
  98
  99 # This is very silly: This program needs to switch from argparse to manual parsing
 100 # after the second positional argument, but argparse doesn't let you do this.
 101 # I work around this with a custom subclass which can selectively swallow errors:
 102 class Hesitate(Exception):
 103         pass
 104 class HesitantParser(argparse.ArgumentParser):
 105         def __init__(s, *args, **kwargs):
 106                 s.hesitating = True  # Errors will be swallowed until this is set false
 107                 argparse.ArgumentParser.__init__(s, *args, **kwargs)
 108         def error(s, *args, **kwargs):
 109                 if s.hesitating:
 110                         raise Hesitate() # Bail out before errors are printed.
 111                 argparse.ArgumentParser.error(s, *args, **kwargs)
 112
 113 # Define args
 114 argparser = HesitantParser(description="""\
 115 Run a test suite with a timeout.\n
 116 Durations are floating point numbers followed by an optional unit:\n
 117 's' for seconds (the default)
 118 'm' for minutes
 119 'h' for hours
 120 'd' for days\n
 121 supported environment variables:
 122   %s: File to write logs to (as line-delimited JSON)
 123   %s: If set to a number, failed test cases will be rerun this many times (NUnit test suites only)
 124   %s: Semicolon-separated list of additional NUnit XMLs to check for errors""" %
 125                 (LOG_FILE_KEY, RETRY_KEY, EXTRA_XML_KEY),
 126         formatter_class=argparse.RawTextHelpFormatter)
 127 argparser.add_argument('-s', '--signal', dest='signal', metavar='signal', default='TERM',
 128         help="Send this signal to the command on timeout, instead of TERM.")
 129 argparser.add_argument('-k', '--kill-after-duration', dest='kill_after', metavar='duration',
 130         help="If process continues running after signal, send KILL after this duration.")
 131 argparser.add_argument('duration',
 132         help="Time to run before sending signal.")
 133 argparser.add_argument('command', nargs="+", help="Command+args to run.")
 134
 135 # Repeatedly parse the given args until we find the shortest prefix for which parsing succeeds.
 136 argc = len(sys.argv)
 137 extra_args = []
 138 for limit in range(1,argc+1):
 139         try:
 140                 if limit == argc: # On the final pass, parse for real
 141                         argparser.hesitating = False
 142                 args = argparser.parse_args(sys.argv[1:limit])
 143                 # If we're still here, parse_args succeeded.
 144                 # The final parsed arg is the command; remaining argv items are its args.
 145                 extra_args = sys.argv[limit:]
 146                 break
 147         except Hesitate: # Before the final pass, if parse_args fails, skip
 148                 pass
 149
 150 argparser.hesitating = False # Just act like a normal argparser from here
 151
 152 durationre = re.compile(r'(\d+)([smhd]?)')
 153 def parse_duration(duration): # Accept units
 154         match = durationre.match(duration)
 155         if not match:
 156                 argparser.error("Could not understand duration %s" % duration)
 157         time, units = match.group(1), match.group(2)
 158         time = int(time)
 159         if units == 'm':
 160                 time *= 60
 161         elif units == 'h':
 162                 time *= 60*60
 163         elif units == 'd':
 164                 time *= 60*60*24
 165         return time
 166
 167 def parse_signal(sig):        # Accept names
 168         if sig.isdigit():
 169                 return int(sig)
 170         for k,v in signal.__dict__.items():
 171                 if k == ("SIG%s" % sig):
 172                         return v
 173         argparser.error("Could not understand signal name %s" % sig)
 174
 175 # Final interpretation of arguments
 176 duration = parse_duration(args.duration)
 177 kill_after = parse_duration(args.kill_after) if args.kill_after is not None else None
 178 timeout_signal = parse_signal(args.signal)
 179 command = args.command + extra_args
 180
 181 # If we are running in Cygwin, Python will believe it is a UNIX application but Mono will be Windows.
 182
 183 cygwin = platform.system().startswith("CYGWIN")
 184
 185 def outgoingPath(path):
 186         if cygwin: # Invoke cygpath and strip newline
 187                 return subprocess.check_output(["cygpath", "-w", path])[:-1].decode(sys.stdin.encoding)
 188         return path
 189
 190 def incomingPath(path):
 191         if cygwin:
 192                 return subprocess.check_output(["cygpath", path])[:-1].decode(sys.stdin.encoding)
 193         return path
 194
 195 # Some of the things we put in global_env are paths. If we're in cygwin, we have to keep separate
 196 # local-use and env (mono use) copies of these keys.
 197 env_source = {}
 198
 199 # Process environment
 200 global_env = copy.deepcopy( os.environ )
 201
 202 verbose = VERBOSE_KEY in global_env
 203 logging = LOG_FILE_KEY in global_env
 204 logfile = global_env[LOG_FILE_KEY] if logging else None
 205 xml_list = global_env[EXTRA_XML_KEY].split(";") if EXTRA_XML_KEY in global_env and global_env[EXTRA_XML_KEY] else []
 206 crash_resuming = True # TODO: Consider exposing this option, or adding a retry_on_crash option.
 207 failmax = int(global_env[RETRY_KEY]) if RETRY_KEY in global_env else 0
 208 babysitting = True # If false, babysitter becomes a timeout clone with no env manipulation or anything.
 209 if babysitting:
 210         babysitter_dir = tempfile.mkdtemp()
 211         env_source[CURRENT_TEST_KEY] = os.path.join(babysitter_dir, CURRENT_TEST_FILE)
 212         env_source[RAN_TEST_KEY]     = os.path.join(babysitter_dir, RAN_TEST_FILE)
 213         env_source[FAILED_TEST_KEY]  = os.path.join(babysitter_dir, FAILED_TEST_FILE)
 214         env_source[XML_LIST_KEY]     = os.path.join(babysitter_dir, XML_LIST_FILE)
 215
 216 env_source_keys = [CURRENT_TEST_KEY, RAN_TEST_KEY, FAILED_TEST_KEY, XML_LIST_KEY]
 217 for key in env_source_keys:
 218         global_env[key] = outgoingPath(env_source[key])
 219
 220 have_unix_process_groups = 'killpg' in os.__dict__
 221 have_windows_process_groups = 'CREATE_NEW_PROCESS_GROUP' in subprocess.__dict__
 222
 223 ### Timeout implementation
 224
 225 def wait(proc, duration):
 226         # TODO: If we detect Python 3.3, Popen objects have a wait(timeout) method we can use
 227         start = time.time()
 228         while True:
 229                 code = proc.poll()
 230                 if code is not None:
 231                         return code
 232                 if time.time()-start > duration:
 233                         return None
 234                 time.sleep(0.05)
 235
 236 # Popen and send_signal can't be called in their basic forms because we want to
 237 # send signals to all children, not just to the immediately spawned process.
 238 # Unfortunately the way to do this varies by operating system.
 239 def popen(*args, **kwargs):
 240         if have_unix_process_groups: # Call function on spawn to become process group leader
 241                 kwargs['preexec_fn'] = os.setsid
 242         elif have_windows_process_groups: # Set magic flag for Windows process groups
 243                 kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
 244         return subprocess.Popen(*args, **kwargs)
 245
 246 def send_signal(proc, sig):
 247         if have_unix_process_groups: # UNIX
 248                 # For compatibility with GNU timeout, pre-send the signal to just the monitored process
 249                 os.kill(proc.pid, sig)
 250                 # Send signal to entire group
 251                 try:
 252                         os.killpg(proc.pid, sig)
 253                 except OSError as e:
 254                         sys.stderr.write("%s: Warning, could not kill process group %s because %s\n" % (scriptname, proc.pid, e))
 255                 # For compatibility with GNU Timeout, send a SIGCONT after the signal
 256                 # (so delivery has a chance to occur even for stopped processes)
 257                 if sig != signal.SIGKILL and sig != signal.SIGCONT:
 258                         os.kill(proc.pid, signal.SIGCONT)
 259         elif have_windows_process_groups: # Windows with Python 2.7 or better
 260                 os.kill(proc.pid, sig) # Becuase CREATE_NEW_PROCESS_GROUP, will go to entire group
 261         else: # Windows with Python 2.6-- CREATE_NEW_PROCESS_GROUP not supported
 262                 proc.send_signal(sig) # No way to contact group, just kill process
 263
 264 ### Utility functions
 265
 266 def attemptDelete(path):
 267         try:
 268                 os.remove(path)
 269         except OSError:
 270                 pass
 271
 272 def attemptLines(path):
 273         try:
 274                 with open(path) as f:
 275                         return map(lambda s: s.strip('\r\n'), f.readlines())
 276         except (OSError, IOError):
 277                 return []
 278
 279 def attemptFirstLine(path):
 280         lines = attemptLines(path)
 281         if lines:
 282                 return lines[0]
 283         return None
 284
 285 def posixtime(): # Amazingly, time.time() does not guarantee an epoch in the docs. However this does:
 286         return calendar.timegm(time.gmtime())
 287
 288 failcount = {}
 289 def failure_may_retry(test):
 290         if test not in failcount:
 291                 failcount[test] = 0
 292         failcount[test] += 1
 293         return failcount[test] < failmax
 294
 295 def verbose_print(arg):
 296         if (verbose):
 297                 print(arg)
 298
 299 def failure_annotate(test):
 300         return "%s (failure #%d of %d allowed)" % (test, failcount[test], failmax)
 301
 302 def pluralize(lst):
 303         return "s" if len(lst) > 1 else ""
 304
 305 ### Run command
 306
 307 def run(): # Returns exit code
 308         resume_after = []
 309         retry_these = []
 310         ever_completed = False
 311         died_politely = False
 312         proc = None
 313         code = None
 314
 315         # Set up logging
 316         log = {DATE_JSON: posixtime(), COUNT_JSON:0, LIMIT_JSON:failmax, SUPPORT_JSON:False,
 317                 LOADED_XML_JSON:False, INVOKE_JSON: " ".join(command)}
 318
 319         def log_value(key, set=None, add=None, target=log): # Call to add toplevel value to log
 320                 if add is not None:
 321                         if key not in target:
 322                                 target[key] = 0
 323                         target[key] += add
 324                 else:
 325                         target[key] = set
 326
 327         def log_test(testname, key, set=None, add=None):   # Call to add test-case-level value to log
 328                 if TESTS_JSON not in log:
 329                         log[TESTS_JSON] = {}
 330                 if testname not in log[TESTS_JSON]:
 331                         log[TESTS_JSON][testname] = {}
 332                 log_value(key, set=set, add=add, target=log[TESTS_JSON][testname])
 333
 334         # Ready to run tests
 335         try:
 336                 while True:
 337                         env = copy.copy(global_env)
 338                         if ever_completed:
 339                                 retry_next = []
 340                         else: # Persist reported failures list until first non-crash run
 341                                 retry_next = retry_these
 342
 343                         log_value(COUNT_JSON, add=1)
 344
 345                         # Prepare environment/filesystem
 346                         if babysitting:
 347                                 for key in env_source_keys: # Clear all paths intended for use by mono
 348                                         attemptDelete(env_source[key])
 349                                 if resume_after:
 350                                         env[RUN_KEY] = ";".join(resume_after)
 351                                         env[RUN_MODE_KEY] = "EXCLUDE"
 352                                 elif retry_these:
 353                                         env[RUN_KEY] = ";".join(retry_these)
 354                                         env[RUN_MODE_KEY] = "RUN"
 355
 356                         # Run test suite
 357                         try:
 358                                 proc = popen(command, env=env)
 359                         except OSError:
 360                                 died_politely = True
 361                                 sys.stderr.write("%s: Could not execute command `%s`\n" % (scriptname, command[0]))
 362                                 sys.exit(127)
 363
 364                         code = wait(proc, duration)
 365                         timed_out = code is None
 366                         if timed_out:                  # Initial timeout
 367                                 sys.stderr.write("%s: Sending %s to `%s`\n" % (scriptname, "SIG" + args.signal, command[0]))
 368                                 send_signal(proc, timeout_signal)
 369                                 if kill_after is not None: # Kill-after timeout
 370                                         code = wait(proc, kill_after)
 371                                         if code is None:
 372                                                 sys.stderr.write("%s: Sending SIGKILL to `%s`\n" % (scriptname, command[0]))
 373                                                 send_signal(proc, signal.SIGKILL)
 374                                                 code = wait(proc, 1)
 375                                                 sys.stderr.write("%s: Command `%s` returned with %s after SIGKILL\n" % (scriptname, command[0], str(code)))
 376                                         else:
 377                                                 sys.stderr.write("%s: Command `%s` returned with %s after %s\n" % (scriptname, command[0], str(code), "SIG" + args.signal))
 378                                 code = proc.wait()         # Waits forever
 379                                 sys.stderr.write("%s: Command `%s` timed out with %s\n" % (scriptname, command[0], str(code)))
 380                         died_politely = True
 381
 382                         # The test suite has now run, and what happens next varies:
 383                         # 1. The suite either completed fully without failures, or timed out: Just quit.
 384                         # 2. The suite crashed (halted without completing):
 385                         #   Remember any failures for later and rerun, using a blacklist of testcases we have completed.
 386                         # 3. The suite completed, but there were failures reported:
 387                         #   Rerun, using a whitelist of only reported-failed testcases.
 388                         # 4. The suite crashed partway through a run with a whitelist:
 389                         #   Rerun, using a whitelist consisting of the previous whitelist minus successful testcases.
 390
 391                         crashed_at = attemptFirstLine(env_source[CURRENT_TEST_KEY])
 392                         failed_tests = attemptLines(env_source[FAILED_TEST_KEY])
 393                         ran_tests = attemptLines(env_source[RAN_TEST_KEY])
 394                         wrote_xml = attemptLines(env_source[XML_LIST_KEY])
 395                         bailout = False
 396
 397                         if crashed_at or failed_tests or ran_tests: # Test suite follows the babysitter protocol
 398                                 log_value(SUPPORT_JSON, True)
 399
 400                         if not crashed_at and not ever_completed:  # The resume_after whitelist is only
 401                                 resume_after = []                      # used before the first noncrashing run
 402                                 ever_completed = True
 403
 404                         if timed_out:       # Currently no retries after timeout
 405                                 bailout = True
 406                                 code = 124      # See GNU timeout manpage
 407
 408                         if code or crashed_at: # Process failures
 409                                 # Handle crash failures
 410                                 if crashed_at and not timed_out:
 411                                         log_test(crashed_at, TEST_CRASH_FAILURES, add=1)
 412                                         if not crash_resuming:
 413                                                 bailout = True
 414
 415                                         if failure_may_retry(crashed_at):
 416                                                 if ever_completed:           # Rerun with whitelist next time
 417                                                         for test in retry_these: # Prepopulate with last whitelist minus run testcases
 418                                                                 if test == crashed_at or test not in ran_tests: # (plus crashed testcase)
 419                                                                         retry_next.append(test)
 420                                                 else:                            # Rerun with blacklist next time
 421                                                         for test in ran_tests:       # Add testcases we just ran to blacklist
 422                                                                 if test != crashed_at:   # (except for the crashed testcase)
 423                                                                         resume_after.append(test)
 424                                         else:
 425                                                 bailout = True
 426
 427                                 # Handle reported failures
 428                                 for test in failed_tests:
 429                                         log_test(test, TEST_FAILURES, add=1)
 430                                         if failure_may_retry(test):
 431                                                 retry_next.append(test)
 432                                         else:
 433                                                 bailout = True
 434
 435                                 # Report human-readable failures for stdout log.
 436                                 message = "%s:" % (scriptname)
 437                                 if timed_out:
 438                                         message += " Saw timeout in test case %s (never allowed)." % (crashed_at)
 439                                 elif crashed_at:
 440                                         message += " Saw crash in test case %s." % (failure_annotate(crashed_at))
 441                                 if failed_tests:
 442                                         message += " Saw test failure in test case%s %s." % (pluralize(failed_tests), "; ".join(map(failure_annotate, failed_tests)))
 443                                 if not (timed_out or crashed_at or failed_tests):
 444                                         message += " Test suite terminated with code %d, " % (code)
 445                                         if log[SUPPORT_JSON]:
 446                                                 message += "but failure did not occur during a test case. Halting."
 447                                         elif xml_list or wrote_xml:
 448                                                 message += "will extract test results from XML. Halting."
 449                                         else:
 450                                                 message += "and suite cannot report test case data. Halting."
 451                                 elif bailout:
 452                                         message += " Will halt testing."
 453                                 print(message)
 454
 455                         if not log[SUPPORT_JSON]:
 456                                 for xml in (xml_list + [incomingPath(xml) for xml in wrote_xml]):
 457                                         verbose_print("Will attempt to load XML from %s" % (xml))
 458                                         try:
 459                                                 data = xmlparse(xml).documentElement
 460                                                 if data.nodeName != 'test-results':
 461                                                         raise ValueError("Toplevel element was not <test-results />")
 462
 463                                                 log_value(LOADED_XML_JSON, True)
 464
 465                                                 search = [data]
 466                                                 while search:
 467                                                         nextsearch = []
 468                                                         for node in search:
 469                                                                 for child in node.childNodes:
 470                                                                         if child.nodeName == 'test-suite' or child.nodeName == 'results':
 471                                                                                 nextsearch.append(child) # Descend
 472                                                                         elif child.nodeName == 'test-case':
 473                                                                                 name = child.getAttribute("name")
 474                                                                                 if child.getAttribute("executed") == "True" and child.getAttribute("success") != "True":
 475                                                                                         log_test(name, TEST_FAILURES, add=1)
 476
 477                                                         search = nextsearch
 478                                         except Exception as e:
 479                                                 print("Could not load XML file %s. Reason: %s" % (xml, e))
 480                                         data
 481
 482                         if bailout or not (resume_after or retry_next): # If not retrying
 483                                 return code
 484
 485                         # If we got here, a retry is occurring
 486                         retry_these = retry_next
 487
 488                         # Report human-readable retry notice for stdout log.
 489                         message = "%s: Will rerun test suite" % (scriptname)
 490                         if log[COUNT_JSON] > 1:
 491                                 message += " (iteration #%d)" % (log[COUNT_JSON])
 492                         if resume_after:
 493                                 message += ", resuming at crashed testcase %s." % (crashed_at)
 494                         else:
 495                                 message += ", running previously failed testcase%s: %s." % (pluralize(retry_these), "; ".join(retry_these))
 496                         print(message)
 497         finally:
 498                 # Emergency: Ensure command does not outlive this script
 499                 if proc is not None and not died_politely:
 500                         send_signal(proc, signal.SIGKILL)
 501
 502                 # Write out logs
 503                 log_value(FINAL_CODE_JSON, "EXCEPTION" if code is None else code)
 504                 if logging:
 505                         with open(logfile, "a") as f:
 506                                 f.write(json.dumps(log) + os.linesep)
 507
 508 sys.exit( run() )