Split ContractsBCL into separate classes and move them to shared CoreLib (#22006)
[mono-project.git] / scripts / ci / babysitter
blob8ec3579e0b98ead258008241e5f002d09cb33acc
1 #!/usr/bin/env python
3 # Mimics GNU timeout, but has special modes which gather test result data and retry failed tests.
5 ######################################### How this works ##########################################
7 # Because we have several different test harnesses and we don't invoke them directly, communication
8 # between this script and the harness is done through the simplest means possible (environment
9 # variables to communicate babysitter->harness, files in standard locations harness->babysitter).
11 # The script supports three different ways of extracting test data from the invoked test suite:
13 # 1. "The babysitter protocol": The babysitter sets five environment variables (see below):
14 # "Ran test file": A path to a file where the harness should write a line-delimited list of
15 # tests which ran to completion.
16 # "Failed test file": A path to a file where the harness should write a line-delimited list
17 # of tests that failed.
18 # "Current test file": A path to a file where the harness should write the currently running
19 # test before a test begins, then delete afterward (used to detect early termination).
20 # "Run test": A list of test names, used by:
21 # "Run mode": This is either RUN or EXCLUDE. If RUN, the test list is a whitelist; run only
22 # those tests. If EXCLUDE, the list is a blacklist; run all except those tests.
23 # This is the most featureful mode: It can report where we failed in the case of timeouts or
24 # crashes that take down the harness, and if the feature is enabled it can retry failed tests.
25 # However, it requires modification to the test harness.
27 # 2. NUnit XML: The babysitter also sets a sixth environment variable:
28 # "XML list file": A path to a file where the harness should write a line-delimited list of
29 # paths to NUnit-format XML result files it created.
30 # This also requires modification to the test harness, but less of it.
32 # 3. NUnit XML (manually specified): If the test harness can't be modified, but the caller of the
33 # babysitter script happens to know where the harness writes its result XML files, the caller
34 # can specify those paths in the "Extra XML" environment variable (see --help)
36 # A single babysitter invocation can currently handle either the babysitter protocol or the XML,
37 # but never a mix of the two.
39 ###################################################################################################
41 import argparse
42 import subprocess
43 import re
44 import signal
45 import time
46 import sys
47 import os.path
48 import copy
49 import tempfile
50 import calendar
51 import json
52 import platform
53 from xml.dom.minidom import parse as xmlparse
55 ### Constants
57 # Filenames
59 CURRENT_TEST_FILE = "babysitter_report_current_test_file.txt"
60 RAN_TEST_FILE = "babysitter_report_ran_test_file.txt"
61 FAILED_TEST_FILE = "babysitter_report_failed_test_file.txt"
62 XML_LIST_FILE = "babysitter_report_xml_list_file.txt"
63 LOGGING_FILE = "babysitter_report.json_lines"
65 # Environment keys
67 # Keys used for Babysitter<->Nunit IPC
68 CURRENT_TEST_KEY = 'MONO_BABYSITTER_NUNIT_CURRENT_TEST_FILE' # Tell nunit where to leave files
69 RAN_TEST_KEY = 'MONO_BABYSITTER_NUNIT_RAN_TEST_FILE'
70 FAILED_TEST_KEY = 'MONO_BABYSITTER_NUNIT_FAILED_TEST_FILE'
71 XML_LIST_KEY = 'MONO_BABYSITTER_NUNIT_XML_LIST_FILE'
72 RUN_KEY = 'MONO_BABYSITTER_NUNIT_RUN_TEST' # Semicolon-separated list of test names
73 RUN_MODE_KEY = 'MONO_BABYSITTER_NUNIT_RUN_MODE' # Equal to either RUN or EXCLUDE
75 # Keys used for script configuration (see --help text)
76 LOG_FILE_KEY = 'MONO_BABYSITTER_LOG_FILE' # Path
77 EXTRA_XML_KEY = 'MONO_BABYSITTER_EXTRA_XML' # Semicolon-separated list of paths
78 RETRY_KEY = 'MONO_BABYSITTER_RETRY' # Equal to an integer
79 VERBOSE_KEY = 'MONO_BABYSITTER_VERBOSE' # "Undocumented"-- used for debugging babysitter
81 # JSON keys
83 DATE_JSON = 'date' # POSIX timestamp of test suite run
84 INVOKE_JSON = 'invocation'
85 COUNT_JSON = 'iteration' # How many times was command executed?
86 LIMIT_JSON = 'failure_max'
87 SUPPORT_JSON = 'babysitter_protocol' # Was the test suite running with a babysitter-aware nunit?
88 LOADED_XML_JSON = 'loaded_xml' # True if we loaded result XML from the test suite
89 FINAL_CODE_JSON = 'final_code'
90 TESTS_JSON = 'tests' # Holds dictionary of (test case name)->(dict with TEST_ keys below)
91 TEST_FAILURES = 'normal_failures'
92 TEST_CRASH_FAILURES = 'crash_failures'
93 TEST_TIMEOUT_FAILURES = 'timeout_failures'
95 ### Interpret arguments
97 scriptname = sys.argv[0]
99 # This is very silly: This program needs to switch from argparse to manual parsing
100 # after the second positional argument, but argparse doesn't let you do this.
101 # I work around this with a custom subclass which can selectively swallow errors:
102 class Hesitate(Exception):
103 pass
104 class HesitantParser(argparse.ArgumentParser):
105 def __init__(s, *args, **kwargs):
106 s.hesitating = True # Errors will be swallowed until this is set false
107 argparse.ArgumentParser.__init__(s, *args, **kwargs)
108 def error(s, *args, **kwargs):
109 if s.hesitating:
110 raise Hesitate() # Bail out before errors are printed.
111 argparse.ArgumentParser.error(s, *args, **kwargs)
113 # Define args
114 argparser = HesitantParser(description="""\
115 Run a test suite with a timeout.\n
116 Durations are floating point numbers followed by an optional unit:\n
117 's' for seconds (the default)
118 'm' for minutes
119 'h' for hours
120 'd' for days\n
121 supported environment variables:
122 %s: File to write logs to (as line-delimited JSON)
123 %s: If set to a number, failed test cases will be rerun this many times (NUnit test suites only)
124 %s: Semicolon-separated list of additional NUnit XMLs to check for errors""" %
125 (LOG_FILE_KEY, RETRY_KEY, EXTRA_XML_KEY),
126 formatter_class=argparse.RawTextHelpFormatter)
127 argparser.add_argument('-s', '--signal', dest='signal', metavar='signal', default='TERM',
128 help="Send this signal to the command on timeout, instead of TERM.")
129 argparser.add_argument('-k', '--kill-after-duration', dest='kill_after', metavar='duration',
130 help="If process continues running after signal, send KILL after this duration.")
131 argparser.add_argument('duration',
132 help="Time to run before sending signal.")
133 argparser.add_argument('command', nargs="+", help="Command+args to run.")
135 # Repeatedly parse the given args until we find the shortest prefix for which parsing succeeds.
136 argc = len(sys.argv)
137 extra_args = []
138 for limit in range(1,argc+1):
139 try:
140 if limit == argc: # On the final pass, parse for real
141 argparser.hesitating = False
142 args = argparser.parse_args(sys.argv[1:limit])
143 # If we're still here, parse_args succeeded.
144 # The final parsed arg is the command; remaining argv items are its args.
145 extra_args = sys.argv[limit:]
146 break
147 except Hesitate: # Before the final pass, if parse_args fails, skip
148 pass
150 argparser.hesitating = False # Just act like a normal argparser from here
152 durationre = re.compile(r'(\d+)([smhd]?)')
153 def parse_duration(duration): # Accept units
154 match = durationre.match(duration)
155 if not match:
156 argparser.error("Could not understand duration %s" % duration)
157 time, units = match.group(1), match.group(2)
158 time = int(time)
159 if units == 'm':
160 time *= 60
161 elif units == 'h':
162 time *= 60*60
163 elif units == 'd':
164 time *= 60*60*24
165 return time
167 def parse_signal(sig): # Accept names
168 if sig.isdigit():
169 return int(sig)
170 for k,v in signal.__dict__.items():
171 if k == ("SIG%s" % sig):
172 return v
173 argparser.error("Could not understand signal name %s" % sig)
175 # Final interpretation of arguments
176 duration = parse_duration(args.duration)
177 kill_after = parse_duration(args.kill_after) if args.kill_after is not None else None
178 timeout_signal = parse_signal(args.signal)
179 command = args.command + extra_args
181 # If we are running in Cygwin, Python will believe it is a UNIX application but Mono will be Windows.
183 cygwin = platform.system().startswith("CYGWIN")
185 def outgoingPath(path):
186 if cygwin: # Invoke cygpath and strip newline
187 return subprocess.check_output(["cygpath", "-w", path])[:-1]
188 return path
190 def incomingPath(path):
191 if cygwin:
192 return subprocess.check_output(["cygpath", path])[:-1]
193 return path
195 # Some of the things we put in global_env are paths. If we're in cygwin, we have to keep separate
196 # local-use and env (mono use) copies of these keys.
197 env_source = {}
199 # Process environment
200 global_env = copy.deepcopy( os.environ )
202 verbose = VERBOSE_KEY in global_env
203 logging = LOG_FILE_KEY in global_env
204 logfile = global_env[LOG_FILE_KEY] if logging else None
205 xml_list = global_env[EXTRA_XML_KEY].split(";") if EXTRA_XML_KEY in global_env and global_env[EXTRA_XML_KEY] else []
206 crash_resuming = True # TODO: Consider exposing this option, or adding a retry_on_crash option.
207 failmax = int(global_env[RETRY_KEY]) if RETRY_KEY in global_env else 0
208 babysitting = True # If false, babysitter becomes a timeout clone with no env manipulation or anything.
209 if babysitting:
210 babysitter_dir = tempfile.mkdtemp()
211 env_source[CURRENT_TEST_KEY] = os.path.join(babysitter_dir, CURRENT_TEST_FILE)
212 env_source[RAN_TEST_KEY] = os.path.join(babysitter_dir, RAN_TEST_FILE)
213 env_source[FAILED_TEST_KEY] = os.path.join(babysitter_dir, FAILED_TEST_FILE)
214 env_source[XML_LIST_KEY] = os.path.join(babysitter_dir, XML_LIST_FILE)
216 env_source_keys = [CURRENT_TEST_KEY, RAN_TEST_KEY, FAILED_TEST_KEY, XML_LIST_KEY]
217 for key in env_source_keys:
218 global_env[key] = outgoingPath(env_source[key])
220 have_unix_process_groups = 'killpg' in os.__dict__
221 have_windows_process_groups = 'CREATE_NEW_PROCESS_GROUP' in subprocess.__dict__
223 ### Timeout implementation
225 def wait(proc, duration):
226 # TODO: If we detect Python 3.3, Popen objects have a wait(timeout) method we can use
227 start = time.time()
228 while True:
229 code = proc.poll()
230 if code is not None:
231 return code
232 if time.time()-start > duration:
233 return None
234 time.sleep(0.05)
236 # Popen and send_signal can't be called in their basic forms because we want to
237 # send signals to all children, not just to the immediately spawned process.
238 # Unfortunately the way to do this varies by operating system.
239 def popen(*args, **kwargs):
240 if have_unix_process_groups: # Call function on spawn to become process group leader
241 kwargs['preexec_fn'] = os.setsid
242 elif have_windows_process_groups: # Set magic flag for Windows process groups
243 kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
244 return subprocess.Popen(*args, **kwargs)
246 def send_signal(proc, sig):
247 if have_unix_process_groups: # UNIX
248 # For compatibility with GNU timeout, pre-send the signal to just the monitored process
249 os.kill(proc.pid, sig)
250 # Send signal to entire group
251 try:
252 os.killpg(proc.pid, sig)
253 except OSError as e:
254 sys.stderr.write("%s: Warning, could not kill process group %s because %s\n" % (scriptname, proc.pid, e))
255 # For compatibility with GNU Timeout, send a SIGCONT after the signal
256 # (so delivery has a chance to occur even for stopped processes)
257 if sig != signal.SIGKILL and sig != signal.SIGCONT:
258 os.kill(proc.pid, signal.SIGCONT)
259 elif have_windows_process_groups: # Windows with Python 2.7 or better
260 os.kill(proc.pid, sig) # Becuase CREATE_NEW_PROCESS_GROUP, will go to entire group
261 else: # Windows with Python 2.6-- CREATE_NEW_PROCESS_GROUP not supported
262 proc.send_signal(sig) # No way to contact group, just kill process
264 ### Utility functions
266 def attemptDelete(path):
267 try:
268 os.remove(path)
269 except OSError:
270 pass
272 def attemptLines(path):
273 try:
274 with open(path) as f:
275 return map(lambda s: s.strip('\r\n'), f.readlines())
276 except (OSError, IOError):
277 return []
279 def attemptFirstLine(path):
280 lines = attemptLines(path)
281 if lines:
282 return lines[0]
283 return None
285 def posixtime(): # Amazingly, time.time() does not guarantee an epoch in the docs. However this does:
286 return calendar.timegm(time.gmtime())
288 failcount = {}
289 def failure_may_retry(test):
290 if test not in failcount:
291 failcount[test] = 0
292 failcount[test] += 1
293 return failcount[test] < failmax
295 def verbose_print(arg):
296 if (verbose):
297 print(arg)
299 def failure_annotate(test):
300 return "%s (failure #%d of %d allowed)" % (test, failcount[test], failmax)
302 def pluralize(lst):
303 return "s" if len(lst) > 1 else ""
305 ### Run command
307 def run(): # Returns exit code
308 resume_after = []
309 retry_these = []
310 ever_completed = False
311 died_politely = False
312 proc = None
313 code = None
315 # Set up logging
316 log = {DATE_JSON: posixtime(), COUNT_JSON:0, LIMIT_JSON:failmax, SUPPORT_JSON:False,
317 LOADED_XML_JSON:False, INVOKE_JSON: " ".join(command)}
319 def log_value(key, set=None, add=None, target=log): # Call to add toplevel value to log
320 if add is not None:
321 if key not in target:
322 target[key] = 0
323 target[key] += add
324 else:
325 target[key] = set
327 def log_test(testname, key, set=None, add=None): # Call to add test-case-level value to log
328 if TESTS_JSON not in log:
329 log[TESTS_JSON] = {}
330 if testname not in log[TESTS_JSON]:
331 log[TESTS_JSON][testname] = {}
332 log_value(key, set=set, add=add, target=log[TESTS_JSON][testname])
334 # Ready to run tests
335 try:
336 while True:
337 env = copy.copy(global_env)
338 if ever_completed:
339 retry_next = []
340 else: # Persist reported failures list until first non-crash run
341 retry_next = retry_these
343 log_value(COUNT_JSON, add=1)
345 # Prepare environment/filesystem
346 if babysitting:
347 for key in env_source_keys: # Clear all paths intended for use by mono
348 attemptDelete(env_source[key])
349 if resume_after:
350 env[RUN_KEY] = ";".join(resume_after)
351 env[RUN_MODE_KEY] = "EXCLUDE"
352 elif retry_these:
353 env[RUN_KEY] = ";".join(retry_these)
354 env[RUN_MODE_KEY] = "RUN"
356 # Run test suite
357 try:
358 proc = popen(command, env=env)
359 except OSError:
360 died_politely = True
361 sys.stderr.write("%s: Could not execute command `%s`\n" % (scriptname, command[0]))
362 sys.exit(127)
364 code = wait(proc, duration)
365 timed_out = code is None
366 if timed_out: # Initial timeout
367 sys.stderr.write("%s: Sending %s to `%s`\n" % (scriptname, "SIG" + args.signal, command[0]))
368 send_signal(proc, timeout_signal)
369 if kill_after is not None: # Kill-after timeout
370 code = wait(proc, kill_after)
371 if code is None:
372 sys.stderr.write("%s: Sending SIGKILL to `%s`\n" % (scriptname, command[0]))
373 send_signal(proc, signal.SIGKILL)
374 code = wait(proc, 1)
375 sys.stderr.write("%s: Command `%s` returned with %s after SIGKILL\n" % (scriptname, command[0], str(code)))
376 else:
377 sys.stderr.write("%s: Command `%s` returned with %s after %s\n" % (scriptname, command[0], str(code), "SIG" + args.signal))
378 code = proc.wait() # Waits forever
379 sys.stderr.write("%s: Command `%s` timed out with %s\n" % (scriptname, command[0], str(code)))
380 died_politely = True
382 # The test suite has now run, and what happens next varies:
383 # 1. The suite either completed fully without failures, or timed out: Just quit.
384 # 2. The suite crashed (halted without completing):
385 # Remember any failures for later and rerun, using a blacklist of testcases we have completed.
386 # 3. The suite completed, but there were failures reported:
387 # Rerun, using a whitelist of only reported-failed testcases.
388 # 4. The suite crashed partway through a run with a whitelist:
389 # Rerun, using a whitelist consisting of the previous whitelist minus successful testcases.
391 crashed_at = attemptFirstLine(env_source[CURRENT_TEST_KEY])
392 failed_tests = attemptLines(env_source[FAILED_TEST_KEY])
393 ran_tests = attemptLines(env_source[RAN_TEST_KEY])
394 wrote_xml = attemptLines(env_source[XML_LIST_KEY])
395 bailout = False
397 if crashed_at or failed_tests or ran_tests: # Test suite follows the babysitter protocol
398 log_value(SUPPORT_JSON, True)
400 if not crashed_at and not ever_completed: # The resume_after whitelist is only
401 resume_after = [] # used before the first noncrashing run
402 ever_completed = True
404 if timed_out: # Currently no retries after timeout
405 bailout = True
406 code = 124 # See GNU timeout manpage
408 if code or crashed_at: # Process failures
409 # Handle crash failures
410 if crashed_at and not timed_out:
411 log_test(crashed_at, TEST_CRASH_FAILURES, add=1)
412 if not crash_resuming:
413 bailout = True
415 if failure_may_retry(crashed_at):
416 if ever_completed: # Rerun with whitelist next time
417 for test in retry_these: # Prepopulate with last whitelist minus run testcases
418 if test == crashed_at or test not in ran_tests: # (plus crashed testcase)
419 retry_next.append(test)
420 else: # Rerun with blacklist next time
421 for test in ran_tests: # Add testcases we just ran to blacklist
422 if test != crashed_at: # (except for the crashed testcase)
423 resume_after.append(test)
424 else:
425 bailout = True
427 # Handle reported failures
428 for test in failed_tests:
429 log_test(test, TEST_FAILURES, add=1)
430 if failure_may_retry(test):
431 retry_next.append(test)
432 else:
433 bailout = True
435 # Report human-readable failures for stdout log.
436 message = "%s:" % (scriptname)
437 if timed_out:
438 message += " Saw timeout in test case %s (never allowed)." % (crashed_at)
439 elif crashed_at:
440 message += " Saw crash in test case %s." % (failure_annotate(crashed_at))
441 if failed_tests:
442 message += " Saw test failure in test case%s %s." % (pluralize(failed_tests), "; ".join(map(failure_annotate, failed_tests)))
443 if not (timed_out or crashed_at or failed_tests):
444 message += " Test suite terminated with code %d, " % (code)
445 if log[SUPPORT_JSON]:
446 message += "but failure did not occur during a test case. Halting."
447 elif xml_list or wrote_xml:
448 message += "will extract test results from XML. Halting."
449 else:
450 message += "and suite cannot report test case data. Halting."
451 elif bailout:
452 message += " Will halt testing."
453 print(message)
455 if not log[SUPPORT_JSON]:
456 for xml in (xml_list + [incomingPath(xml) for xml in wrote_xml]):
457 verbose_print("Will attempt to load XML from %s" % (xml))
458 try:
459 data = xmlparse(xml).documentElement
460 if data.nodeName != 'test-results':
461 raise ValueError("Toplevel element was not <test-results />")
463 log_value(LOADED_XML_JSON, True)
465 search = [data]
466 while search:
467 nextsearch = []
468 for node in search:
469 for child in node.childNodes:
470 if child.nodeName == 'test-suite' or child.nodeName == 'results':
471 nextsearch.append(child) # Descend
472 elif child.nodeName == 'test-case':
473 name = child.getAttribute("name")
474 if child.getAttribute("executed") == "True" and child.getAttribute("success") != "True":
475 log_test(name, TEST_FAILURES, add=1)
477 search = nextsearch
478 except Exception as e:
479 print("Could not load XML file %s. Reason: %s" % (xml, e))
480 data
482 if bailout or not (resume_after or retry_next): # If not retrying
483 return code
485 # If we got here, a retry is occurring
486 retry_these = retry_next
488 # Report human-readable retry notice for stdout log.
489 message = "%s: Will rerun test suite" % (scriptname)
490 if log[COUNT_JSON] > 1:
491 message += " (iteration #%d)" % (log[COUNT_JSON])
492 if resume_after:
493 message += ", resuming at crashed testcase %s." % (crashed_at)
494 else:
495 message += ", running previously failed testcase%s: %s." % (pluralize(retry_these), "; ".join(retry_these))
496 print(message)
497 finally:
498 # Emergency: Ensure command does not outlive this script
499 if proc is not None and not died_politely:
500 send_signal(proc, signal.SIGKILL)
502 # Write out logs
503 log_value(FINAL_CODE_JSON, "EXCEPTION" if code is None else code)
504 if logging:
505 with open(logfile, "a") as f:
506 f.write(json.dumps(log) + os.linesep)
508 sys.exit( run() )