3 # Mimics GNU timeout, but has special modes which gather test result data and retry failed tests.
5 ######################################### How this works ##########################################
7 # Because we have several different test harnesses and we don't invoke them directly, communication
8 # between this script and the harness is done through the simplest means possible (environment
9 # variables to communicate babysitter->harness, files in standard locations harness->babysitter).
11 # The script supports three different ways of extracting test data from the invoked test suite:
13 # 1. "The babysitter protocol": The babysitter sets five environment variables (see below):
14 # "Ran test file": A path to a file where the harness should write a line-delimited list of
15 # tests which ran to completion.
16 # "Failed test file": A path to a file where the harness should write a line-delimited list
17 # of tests that failed.
18 # "Current test file": A path to a file where the harness should write the currently running
19 # test before a test begins, then delete afterward (used to detect early termination).
20 # "Run test": A list of test names, used by:
21 # "Run mode": This is either RUN or EXCLUDE. If RUN, the test list is a whitelist; run only
22 # those tests. If EXCLUDE, the list is a blacklist; run all except those tests.
23 # This is the most featureful mode: It can report where we failed in the case of timeouts or
24 # crashes that take down the harness, and if the feature is enabled it can retry failed tests.
25 # However, it requires modification to the test harness.
27 # 2. NUnit XML: The babysitter also sets a sixth environment variable:
28 # "XML list file": A path to a file where the harness should write a line-delimited list of
29 # paths to NUnit-format XML result files it created.
30 # This also requires modification to the test harness, but less of it.
32 # 3. NUnit XML (manually specified): If the test harness can't be modified, but the caller of the
33 # babysitter script happens to know where the harness writes its result XML files, the caller
34 # can specify those paths in the "Extra XML" environment variable (see --help)
36 # A single babysitter invocation can currently handle either the babysitter protocol or the XML,
37 # but never a mix of the two.
39 ###################################################################################################
53 from xml.dom.minidom import parse as xmlparse
59 CURRENT_TEST_FILE = "babysitter_report_current_test_file.txt"
60 RAN_TEST_FILE = "babysitter_report_ran_test_file.txt"
61 FAILED_TEST_FILE = "babysitter_report_failed_test_file.txt"
62 XML_LIST_FILE = "babysitter_report_xml_list_file.txt"
63 LOGGING_FILE = "babysitter_report.json_lines"
67 # Keys used for Babysitter<->Nunit IPC
68 CURRENT_TEST_KEY = 'MONO_BABYSITTER_NUNIT_CURRENT_TEST_FILE' # Tell nunit where to leave files
69 RAN_TEST_KEY = 'MONO_BABYSITTER_NUNIT_RAN_TEST_FILE'
70 FAILED_TEST_KEY = 'MONO_BABYSITTER_NUNIT_FAILED_TEST_FILE'
71 XML_LIST_KEY = 'MONO_BABYSITTER_NUNIT_XML_LIST_FILE'
72 RUN_KEY = 'MONO_BABYSITTER_NUNIT_RUN_TEST' # Semicolon-separated list of test names
73 RUN_MODE_KEY = 'MONO_BABYSITTER_NUNIT_RUN_MODE' # Equal to either RUN or EXCLUDE
75 # Keys used for script configuration (see --help text)
76 LOG_FILE_KEY = 'MONO_BABYSITTER_LOG_FILE' # Path
77 EXTRA_XML_KEY = 'MONO_BABYSITTER_EXTRA_XML' # Semicolon-separated list of paths
78 RETRY_KEY = 'MONO_BABYSITTER_RETRY' # Equal to an integer
79 VERBOSE_KEY = 'MONO_BABYSITTER_VERBOSE' # "Undocumented"-- used for debugging babysitter
83 DATE_JSON = 'date' # POSIX timestamp of test suite run
84 INVOKE_JSON = 'invocation'
85 COUNT_JSON = 'iteration' # How many times was command executed?
86 LIMIT_JSON = 'failure_max'
87 SUPPORT_JSON = 'babysitter_protocol' # Was the test suite running with a babysitter-aware nunit?
88 LOADED_XML_JSON = 'loaded_xml' # True if we loaded result XML from the test suite
89 FINAL_CODE_JSON = 'final_code'
90 TESTS_JSON = 'tests' # Holds dictionary of (test case name)->(dict with TEST_ keys below)
91 TEST_FAILURES = 'normal_failures'
92 TEST_CRASH_FAILURES = 'crash_failures'
93 TEST_TIMEOUT_FAILURES = 'timeout_failures'
95 ### Interpret arguments
97 scriptname = sys.argv[0]
99 # This is very silly: This program needs to switch from argparse to manual parsing
100 # after the second positional argument, but argparse doesn't let you do this.
101 # I work around this with a custom subclass which can selectively swallow errors:
102 class Hesitate(Exception):
104 class HesitantParser(argparse.ArgumentParser):
105 def __init__(s, *args, **kwargs):
106 s.hesitating = True # Errors will be swallowed until this is set false
107 argparse.ArgumentParser.__init__(s, *args, **kwargs)
108 def error(s, *args, **kwargs):
110 raise Hesitate() # Bail out before errors are printed.
111 argparse.ArgumentParser.error(s, *args, **kwargs)
114 argparser = HesitantParser(description="""\
115 Run a test suite with a timeout.\n
116 Durations are floating point numbers followed by an optional unit:\n
117 's' for seconds (the default)
121 supported environment variables:
122 %s: File to write logs to (as line-delimited JSON)
123 %s: If set to a number, failed test cases will be rerun this many times (NUnit test suites only)
124 %s: Semicolon-separated list of additional NUnit XMLs to check for errors""" %
125 (LOG_FILE_KEY, RETRY_KEY, EXTRA_XML_KEY),
126 formatter_class=argparse.RawTextHelpFormatter)
127 argparser.add_argument('-s', '--signal', dest='signal', metavar='signal', default='TERM',
128 help="Send this signal to the command on timeout, instead of TERM.")
129 argparser.add_argument('-k', '--kill-after-duration', dest='kill_after', metavar='duration',
130 help="If process continues running after signal, send KILL after this duration.")
131 argparser.add_argument('duration',
132 help="Time to run before sending signal.")
133 argparser.add_argument('command', nargs="+", help="Command+args to run.")
135 # Repeatedly parse the given args until we find the shortest prefix for which parsing succeeds.
138 for limit in range(1,argc+1):
140 if limit == argc: # On the final pass, parse for real
141 argparser.hesitating = False
142 args = argparser.parse_args(sys.argv[1:limit])
143 # If we're still here, parse_args succeeded.
144 # The final parsed arg is the command; remaining argv items are its args.
145 extra_args = sys.argv[limit:]
147 except Hesitate: # Before the final pass, if parse_args fails, skip
150 argparser.hesitating = False # Just act like a normal argparser from here
152 durationre = re.compile(r'(\d+)([smhd]?)')
153 def parse_duration(duration): # Accept units
154 match = durationre.match(duration)
156 argparser.error("Could not understand duration %s" % duration)
157 time, units = match.group(1), match.group(2)
167 def parse_signal(sig): # Accept names
170 for k,v in signal.__dict__.items():
171 if k == ("SIG%s" % sig):
173 argparser.error("Could not understand signal name %s" % sig)
175 # Final interpretation of arguments
176 duration = parse_duration(args.duration)
177 kill_after = parse_duration(args.kill_after) if args.kill_after is not None else None
178 timeout_signal = parse_signal(args.signal)
179 command = args.command + extra_args
181 # If we are running in Cygwin, Python will believe it is a UNIX application but Mono will be Windows.
183 cygwin = platform.system().startswith("CYGWIN")
185 def outgoingPath(path):
186 if cygwin: # Invoke cygpath and strip newline
187 return subprocess.check_output(["cygpath", "-w", path])[:-1].decode(sys.stdin.encoding)
190 def incomingPath(path):
192 return subprocess.check_output(["cygpath", path])[:-1].decode(sys.stdin.encoding)
195 # Some of the things we put in global_env are paths. If we're in cygwin, we have to keep separate
196 # local-use and env (mono use) copies of these keys.
199 # Process environment
200 global_env = copy.deepcopy( os.environ )
202 verbose = VERBOSE_KEY in global_env
203 logging = LOG_FILE_KEY in global_env
204 logfile = global_env[LOG_FILE_KEY] if logging else None
205 xml_list = global_env[EXTRA_XML_KEY].split(";") if EXTRA_XML_KEY in global_env and global_env[EXTRA_XML_KEY] else []
206 crash_resuming = True # TODO: Consider exposing this option, or adding a retry_on_crash option.
207 failmax = int(global_env[RETRY_KEY]) if RETRY_KEY in global_env else 0
208 babysitting = True # If false, babysitter becomes a timeout clone with no env manipulation or anything.
210 babysitter_dir = tempfile.mkdtemp()
211 env_source[CURRENT_TEST_KEY] = os.path.join(babysitter_dir, CURRENT_TEST_FILE)
212 env_source[RAN_TEST_KEY] = os.path.join(babysitter_dir, RAN_TEST_FILE)
213 env_source[FAILED_TEST_KEY] = os.path.join(babysitter_dir, FAILED_TEST_FILE)
214 env_source[XML_LIST_KEY] = os.path.join(babysitter_dir, XML_LIST_FILE)
216 env_source_keys = [CURRENT_TEST_KEY, RAN_TEST_KEY, FAILED_TEST_KEY, XML_LIST_KEY]
217 for key in env_source_keys:
218 global_env[key] = outgoingPath(env_source[key])
220 have_unix_process_groups = 'killpg' in os.__dict__
221 have_windows_process_groups = 'CREATE_NEW_PROCESS_GROUP' in subprocess.__dict__
223 ### Timeout implementation
225 def wait(proc, duration):
226 # TODO: If we detect Python 3.3, Popen objects have a wait(timeout) method we can use
232 if time.time()-start > duration:
236 # Popen and send_signal can't be called in their basic forms because we want to
237 # send signals to all children, not just to the immediately spawned process.
238 # Unfortunately the way to do this varies by operating system.
239 def popen(*args, **kwargs):
240 if have_unix_process_groups: # Call function on spawn to become process group leader
241 kwargs['preexec_fn'] = os.setsid
242 elif have_windows_process_groups: # Set magic flag for Windows process groups
243 kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
244 return subprocess.Popen(*args, **kwargs)
246 def send_signal(proc, sig):
247 if have_unix_process_groups: # UNIX
248 # For compatibility with GNU timeout, pre-send the signal to just the monitored process
249 os.kill(proc.pid, sig)
250 # Send signal to entire group
252 os.killpg(proc.pid, sig)
254 sys.stderr.write("%s: Warning, could not kill process group %s because %s\n" % (scriptname, proc.pid, e))
255 # For compatibility with GNU Timeout, send a SIGCONT after the signal
256 # (so delivery has a chance to occur even for stopped processes)
257 if sig != signal.SIGKILL and sig != signal.SIGCONT:
258 os.kill(proc.pid, signal.SIGCONT)
259 elif have_windows_process_groups: # Windows with Python 2.7 or better
260 os.kill(proc.pid, sig) # Becuase CREATE_NEW_PROCESS_GROUP, will go to entire group
261 else: # Windows with Python 2.6-- CREATE_NEW_PROCESS_GROUP not supported
262 proc.send_signal(sig) # No way to contact group, just kill process
264 ### Utility functions
266 def attemptDelete(path):
272 def attemptLines(path):
274 with open(path) as f:
275 return map(lambda s: s.strip('\r\n'), f.readlines())
276 except (OSError, IOError):
279 def attemptFirstLine(path):
280 lines = attemptLines(path)
285 def posixtime(): # Amazingly, time.time() does not guarantee an epoch in the docs. However this does:
286 return calendar.timegm(time.gmtime())
289 def failure_may_retry(test):
290 if test not in failcount:
293 return failcount[test] < failmax
295 def verbose_print(arg):
299 def failure_annotate(test):
300 return "%s (failure #%d of %d allowed)" % (test, failcount[test], failmax)
303 return "s" if len(lst) > 1 else ""
307 def run(): # Returns exit code
310 ever_completed = False
311 died_politely = False
316 log = {DATE_JSON: posixtime(), COUNT_JSON:0, LIMIT_JSON:failmax, SUPPORT_JSON:False,
317 LOADED_XML_JSON:False, INVOKE_JSON: " ".join(command)}
319 def log_value(key, set=None, add=None, target=log): # Call to add toplevel value to log
321 if key not in target:
327 def log_test(testname, key, set=None, add=None): # Call to add test-case-level value to log
328 if TESTS_JSON not in log:
330 if testname not in log[TESTS_JSON]:
331 log[TESTS_JSON][testname] = {}
332 log_value(key, set=set, add=add, target=log[TESTS_JSON][testname])
337 env = copy.copy(global_env)
340 else: # Persist reported failures list until first non-crash run
341 retry_next = retry_these
343 log_value(COUNT_JSON, add=1)
345 # Prepare environment/filesystem
347 for key in env_source_keys: # Clear all paths intended for use by mono
348 attemptDelete(env_source[key])
350 env[RUN_KEY] = ";".join(resume_after)
351 env[RUN_MODE_KEY] = "EXCLUDE"
353 env[RUN_KEY] = ";".join(retry_these)
354 env[RUN_MODE_KEY] = "RUN"
358 proc = popen(command, env=env)
361 sys.stderr.write("%s: Could not execute command `%s`\n" % (scriptname, command[0]))
364 code = wait(proc, duration)
365 timed_out = code is None
366 if timed_out: # Initial timeout
367 sys.stderr.write("%s: Sending %s to `%s`\n" % (scriptname, "SIG" + args.signal, command[0]))
368 send_signal(proc, timeout_signal)
369 if kill_after is not None: # Kill-after timeout
370 code = wait(proc, kill_after)
372 sys.stderr.write("%s: Sending SIGKILL to `%s`\n" % (scriptname, command[0]))
373 send_signal(proc, signal.SIGKILL)
375 sys.stderr.write("%s: Command `%s` returned with %s after SIGKILL\n" % (scriptname, command[0], str(code)))
377 sys.stderr.write("%s: Command `%s` returned with %s after %s\n" % (scriptname, command[0], str(code), "SIG" + args.signal))
378 code = proc.wait() # Waits forever
379 sys.stderr.write("%s: Command `%s` timed out with %s\n" % (scriptname, command[0], str(code)))
382 # The test suite has now run, and what happens next varies:
383 # 1. The suite either completed fully without failures, or timed out: Just quit.
384 # 2. The suite crashed (halted without completing):
385 # Remember any failures for later and rerun, using a blacklist of testcases we have completed.
386 # 3. The suite completed, but there were failures reported:
387 # Rerun, using a whitelist of only reported-failed testcases.
388 # 4. The suite crashed partway through a run with a whitelist:
389 # Rerun, using a whitelist consisting of the previous whitelist minus successful testcases.
391 crashed_at = attemptFirstLine(env_source[CURRENT_TEST_KEY])
392 failed_tests = attemptLines(env_source[FAILED_TEST_KEY])
393 ran_tests = attemptLines(env_source[RAN_TEST_KEY])
394 wrote_xml = attemptLines(env_source[XML_LIST_KEY])
397 if crashed_at or failed_tests or ran_tests: # Test suite follows the babysitter protocol
398 log_value(SUPPORT_JSON, True)
400 if not crashed_at and not ever_completed: # The resume_after whitelist is only
401 resume_after = [] # used before the first noncrashing run
402 ever_completed = True
404 if timed_out: # Currently no retries after timeout
406 code = 124 # See GNU timeout manpage
408 if code or crashed_at: # Process failures
409 # Handle crash failures
410 if crashed_at and not timed_out:
411 log_test(crashed_at, TEST_CRASH_FAILURES, add=1)
412 if not crash_resuming:
415 if failure_may_retry(crashed_at):
416 if ever_completed: # Rerun with whitelist next time
417 for test in retry_these: # Prepopulate with last whitelist minus run testcases
418 if test == crashed_at or test not in ran_tests: # (plus crashed testcase)
419 retry_next.append(test)
420 else: # Rerun with blacklist next time
421 for test in ran_tests: # Add testcases we just ran to blacklist
422 if test != crashed_at: # (except for the crashed testcase)
423 resume_after.append(test)
427 # Handle reported failures
428 for test in failed_tests:
429 log_test(test, TEST_FAILURES, add=1)
430 if failure_may_retry(test):
431 retry_next.append(test)
435 # Report human-readable failures for stdout log.
436 message = "%s:" % (scriptname)
438 message += " Saw timeout in test case %s (never allowed)." % (crashed_at)
440 message += " Saw crash in test case %s." % (failure_annotate(crashed_at))
442 message += " Saw test failure in test case%s %s." % (pluralize(failed_tests), "; ".join(map(failure_annotate, failed_tests)))
443 if not (timed_out or crashed_at or failed_tests):
444 message += " Test suite terminated with code %d, " % (code)
445 if log[SUPPORT_JSON]:
446 message += "but failure did not occur during a test case. Halting."
447 elif xml_list or wrote_xml:
448 message += "will extract test results from XML. Halting."
450 message += "and suite cannot report test case data. Halting."
452 message += " Will halt testing."
455 if not log[SUPPORT_JSON]:
456 for xml in (xml_list + [incomingPath(xml) for xml in wrote_xml]):
457 verbose_print("Will attempt to load XML from %s" % (xml))
459 data = xmlparse(xml).documentElement
460 if data.nodeName != 'test-results':
461 raise ValueError("Toplevel element was not <test-results />")
463 log_value(LOADED_XML_JSON, True)
469 for child in node.childNodes:
470 if child.nodeName == 'test-suite' or child.nodeName == 'results':
471 nextsearch.append(child) # Descend
472 elif child.nodeName == 'test-case':
473 name = child.getAttribute("name")
474 if child.getAttribute("executed") == "True" and child.getAttribute("success") != "True":
475 log_test(name, TEST_FAILURES, add=1)
478 except Exception as e:
479 print("Could not load XML file %s. Reason: %s" % (xml, e))
482 if bailout or not (resume_after or retry_next): # If not retrying
485 # If we got here, a retry is occurring
486 retry_these = retry_next
488 # Report human-readable retry notice for stdout log.
489 message = "%s: Will rerun test suite" % (scriptname)
490 if log[COUNT_JSON] > 1:
491 message += " (iteration #%d)" % (log[COUNT_JSON])
493 message += ", resuming at crashed testcase %s." % (crashed_at)
495 message += ", running previously failed testcase%s: %s." % (pluralize(retry_these), "; ".join(retry_these))
498 # Emergency: Ensure command does not outlive this script
499 if proc is not None and not died_politely:
500 send_signal(proc, signal.SIGKILL)
503 log_value(FINAL_CODE_JSON, "EXCEPTION" if code is None else code)
505 with open(logfile, "a") as f:
506 f.write(json.dumps(log) + os.linesep)