3 # Mimics GNU timeout, but has special modes which gather test result data and retry failed tests.
5 ######################################### How this works ##########################################
7 # Because we have several different test harnesses and we don't invoke them directly, communication
8 # between this script and the harness is done through the simplest means possible (environment
9 # variables to communicate babysitter->harness, files in standard locations harness->babysitter).
11 # The script supports three different ways of extracting test data from the invoked test suite:
13 # 1. "The babysitter protocol": The babysitter sets five environment variables (see below):
14 # "Ran test file": A path to a file where the harness should write a line-delimited list of
15 # tests which ran to completion.
16 # "Failed test file": A path to a file where the harness should write a line-delimited list
17 # of tests that failed.
18 # "Current test file": A path to a file where the harness should write the currently running
19 # test before a test begins, then delete afterward (used to detect early termination).
20 # "Run test": A list of test names, used by:
21 # "Run mode": This is either RUN or EXCLUDE. If RUN, the test list is a whitelist; run only
22 # those tests. If EXCLUDE, the list is a blacklist; run all except those tests.
23 # This is the most featureful mode: It can report where we failed in the case of timeouts or
24 # crashes that take down the harness, and if the feature is enabled it can retry failed tests.
25 # However, it requires modification to the test harness.
27 # 2. NUnit XML: The babysitter also sets a sixth environment variable:
28 # "XML list file": A path to a file where the harness should write a line-delimited list of
29 # paths to NUnit-format XML result files it created.
30 # This also requires modification to the test harness, but less of it.
32 # 3. NUnit XML (manually specified): If the test harness can't be modified, but the caller of the
33 # babysitter script happens to know where the harness writes its result XML files, the caller
34 # can specify those paths in the "Extra XML" environment variable (see --help)
36 # A single babysitter invocation can currently handle either the babysitter protocol or the XML,
37 # but never a mix of the two.
39 ###################################################################################################
53 from xml
.dom
.minidom
import parse
as xmlparse
59 CURRENT_TEST_FILE
= "babysitter_report_current_test_file.txt"
60 RAN_TEST_FILE
= "babysitter_report_ran_test_file.txt"
61 FAILED_TEST_FILE
= "babysitter_report_failed_test_file.txt"
62 XML_LIST_FILE
= "babysitter_report_xml_list_file.txt"
63 LOGGING_FILE
= "babysitter_report.json_lines"
67 # Keys used for Babysitter<->Nunit IPC
68 CURRENT_TEST_KEY
= 'MONO_BABYSITTER_NUNIT_CURRENT_TEST_FILE' # Tell nunit where to leave files
69 RAN_TEST_KEY
= 'MONO_BABYSITTER_NUNIT_RAN_TEST_FILE'
70 FAILED_TEST_KEY
= 'MONO_BABYSITTER_NUNIT_FAILED_TEST_FILE'
71 XML_LIST_KEY
= 'MONO_BABYSITTER_NUNIT_XML_LIST_FILE'
72 RUN_KEY
= 'MONO_BABYSITTER_NUNIT_RUN_TEST' # Semicolon-separated list of test names
73 RUN_MODE_KEY
= 'MONO_BABYSITTER_NUNIT_RUN_MODE' # Equal to either RUN or EXCLUDE
75 # Keys used for script configuration (see --help text)
76 LOG_FILE_KEY
= 'MONO_BABYSITTER_LOG_FILE' # Path
77 EXTRA_XML_KEY
= 'MONO_BABYSITTER_EXTRA_XML' # Semicolon-separated list of paths
78 RETRY_KEY
= 'MONO_BABYSITTER_RETRY' # Equal to an integer
79 VERBOSE_KEY
= 'MONO_BABYSITTER_VERBOSE' # "Undocumented"-- used for debugging babysitter
83 DATE_JSON
= 'date' # POSIX timestamp of test suite run
84 INVOKE_JSON
= 'invocation'
85 COUNT_JSON
= 'iteration' # How many times was command executed?
86 LIMIT_JSON
= 'failure_max'
87 SUPPORT_JSON
= 'babysitter_protocol' # Was the test suite running with a babysitter-aware nunit?
88 LOADED_XML_JSON
= 'loaded_xml' # True if we loaded result XML from the test suite
89 FINAL_CODE_JSON
= 'final_code'
90 TESTS_JSON
= 'tests' # Holds dictionary of (test case name)->(dict with TEST_ keys below)
91 TEST_FAILURES
= 'normal_failures'
92 TEST_CRASH_FAILURES
= 'crash_failures'
93 TEST_TIMEOUT_FAILURES
= 'timeout_failures'
95 ### Interpret arguments
97 scriptname
= sys
.argv
[0]
99 # This is very silly: This program needs to switch from argparse to manual parsing
100 # after the second positional argument, but argparse doesn't let you do this.
101 # I work around this with a custom subclass which can selectively swallow errors:
102 class Hesitate(Exception):
104 class HesitantParser(argparse
.ArgumentParser
):
105 def __init__(s
, *args
, **kwargs
):
106 s
.hesitating
= True # Errors will be swallowed until this is set false
107 argparse
.ArgumentParser
.__init
__(s
, *args
, **kwargs
)
108 def error(s
, *args
, **kwargs
):
110 raise Hesitate() # Bail out before errors are printed.
111 argparse
.ArgumentParser
.error(s
, *args
, **kwargs
)
114 argparser
= HesitantParser(description
="""\
115 Run a test suite with a timeout.\n
116 Durations are floating point numbers followed by an optional unit:\n
117 's' for seconds (the default)
121 supported environment variables:
122 %s: File to write logs to (as line-delimited JSON)
123 %s: If set to a number, failed test cases will be rerun this many times (NUnit test suites only)
124 %s: Semicolon-separated list of additional NUnit XMLs to check for errors""" %
125 (LOG_FILE_KEY
, RETRY_KEY
, EXTRA_XML_KEY
),
126 formatter_class
=argparse
.RawTextHelpFormatter
)
127 argparser
.add_argument('-s', '--signal', dest
='signal', metavar
='signal', default
='TERM',
128 help="Send this signal to the command on timeout, instead of TERM.")
129 argparser
.add_argument('-k', '--kill-after-duration', dest
='kill_after', metavar
='duration',
130 help="If process continues running after signal, send KILL after this duration.")
131 argparser
.add_argument('duration',
132 help="Time to run before sending signal.")
133 argparser
.add_argument('command', nargs
="+", help="Command+args to run.")
135 # Repeatedly parse the given args until we find the shortest prefix for which parsing succeeds.
138 for limit
in range(1,argc
+1):
140 if limit
== argc
: # On the final pass, parse for real
141 argparser
.hesitating
= False
142 args
= argparser
.parse_args(sys
.argv
[1:limit
])
143 # If we're still here, parse_args succeeded.
144 # The final parsed arg is the command; remaining argv items are its args.
145 extra_args
= sys
.argv
[limit
:]
147 except Hesitate
: # Before the final pass, if parse_args fails, skip
150 argparser
.hesitating
= False # Just act like a normal argparser from here
152 durationre
= re
.compile(r
'(\d+)([smhd]?)')
153 def parse_duration(duration
): # Accept units
154 match
= durationre
.match(duration
)
156 argparser
.error("Could not understand duration %s" % duration
)
157 time
, units
= match
.group(1), match
.group(2)
167 def parse_signal(sig
): # Accept names
170 for k
,v
in signal
.__dict
__.items():
171 if k
== ("SIG%s" % sig
):
173 argparser
.error("Could not understand signal name %s" % sig
)
175 # Final interpretation of arguments
176 duration
= parse_duration(args
.duration
)
177 kill_after
= parse_duration(args
.kill_after
) if args
.kill_after
is not None else None
178 timeout_signal
= parse_signal(args
.signal
)
179 command
= args
.command
+ extra_args
181 # If we are running in Cygwin, Python will believe it is a UNIX application but Mono will be Windows.
183 cygwin
= platform
.system().startswith("CYGWIN")
185 def outgoingPath(path
):
186 if cygwin
: # Invoke cygpath and strip newline
187 return subprocess
.check_output(["cygpath", "-w", path
])[:-1]
190 def incomingPath(path
):
192 return subprocess
.check_output(["cygpath", path
])[:-1]
195 # Some of the things we put in global_env are paths. If we're in cygwin, we have to keep separate
196 # local-use and env (mono use) copies of these keys.
199 # Process environment
200 global_env
= copy
.deepcopy( os
.environ
)
202 verbose
= VERBOSE_KEY
in global_env
203 logging
= LOG_FILE_KEY
in global_env
204 logfile
= global_env
[LOG_FILE_KEY
] if logging
else None
205 xml_list
= global_env
[EXTRA_XML_KEY
].split(";") if EXTRA_XML_KEY
in global_env
and global_env
[EXTRA_XML_KEY
] else []
206 crash_resuming
= True # TODO: Consider exposing this option, or adding a retry_on_crash option.
207 failmax
= int(global_env
[RETRY_KEY
]) if RETRY_KEY
in global_env
else 0
208 babysitting
= True # If false, babysitter becomes a timeout clone with no env manipulation or anything.
210 babysitter_dir
= tempfile
.mkdtemp()
211 env_source
[CURRENT_TEST_KEY
] = os
.path
.join(babysitter_dir
, CURRENT_TEST_FILE
)
212 env_source
[RAN_TEST_KEY
] = os
.path
.join(babysitter_dir
, RAN_TEST_FILE
)
213 env_source
[FAILED_TEST_KEY
] = os
.path
.join(babysitter_dir
, FAILED_TEST_FILE
)
214 env_source
[XML_LIST_KEY
] = os
.path
.join(babysitter_dir
, XML_LIST_FILE
)
216 env_source_keys
= [CURRENT_TEST_KEY
, RAN_TEST_KEY
, FAILED_TEST_KEY
, XML_LIST_KEY
]
217 for key
in env_source_keys
:
218 global_env
[key
] = outgoingPath(env_source
[key
])
220 have_unix_process_groups
= 'killpg' in os
.__dict
__
221 have_windows_process_groups
= 'CREATE_NEW_PROCESS_GROUP' in subprocess
.__dict
__
223 ### Timeout implementation
225 def wait(proc
, duration
):
226 # TODO: If we detect Python 3.3, Popen objects have a wait(timeout) method we can use
232 if time
.time()-start
> duration
:
236 # Popen and send_signal can't be called in their basic forms because we want to
237 # send signals to all children, not just to the immediately spawned process.
238 # Unfortunately the way to do this varies by operating system.
239 def popen(*args
, **kwargs
):
240 if have_unix_process_groups
: # Call function on spawn to become process group leader
241 kwargs
['preexec_fn'] = os
.setsid
242 elif have_windows_process_groups
: # Set magic flag for Windows process groups
243 kwargs
['creationflags'] = subprocess
.CREATE_NEW_PROCESS_GROUP
244 return subprocess
.Popen(*args
, **kwargs
)
246 def send_signal(proc
, sig
):
247 if have_unix_process_groups
: # UNIX
248 # For compatibility with GNU timeout, pre-send the signal to just the monitored process
249 os
.kill(proc
.pid
, sig
)
250 # Send signal to entire group
252 os
.killpg(proc
.pid
, sig
)
254 sys
.stderr
.write("%s: Warning, could not kill process group %s because %s\n" % (scriptname
, proc
.pid
, e
))
255 # For compatibility with GNU Timeout, send a SIGCONT after the signal
256 # (so delivery has a chance to occur even for stopped processes)
257 if sig
!= signal
.SIGKILL
and sig
!= signal
.SIGCONT
:
258 os
.kill(proc
.pid
, signal
.SIGCONT
)
259 elif have_windows_process_groups
: # Windows with Python 2.7 or better
260 os
.kill(proc
.pid
, sig
) # Becuase CREATE_NEW_PROCESS_GROUP, will go to entire group
261 else: # Windows with Python 2.6-- CREATE_NEW_PROCESS_GROUP not supported
262 proc
.send_signal(sig
) # No way to contact group, just kill process
264 ### Utility functions
266 def attemptDelete(path
):
272 def attemptLines(path
):
274 with
open(path
) as f
:
275 return map(lambda s
: s
.strip('\r\n'), f
.readlines())
276 except (OSError, IOError):
279 def attemptFirstLine(path
):
280 lines
= attemptLines(path
)
285 def posixtime(): # Amazingly, time.time() does not guarantee an epoch in the docs. However this does:
286 return calendar
.timegm(time
.gmtime())
289 def failure_may_retry(test
):
290 if test
not in failcount
:
293 return failcount
[test
] < failmax
295 def verbose_print(arg
):
299 def failure_annotate(test
):
300 return "%s (failure #%d of %d allowed)" % (test
, failcount
[test
], failmax
)
303 return "s" if len(lst
) > 1 else ""
307 def run(): # Returns exit code
310 ever_completed
= False
311 died_politely
= False
316 log
= {DATE_JSON
: posixtime(), COUNT_JSON
:0, LIMIT_JSON
:failmax
, SUPPORT_JSON
:False,
317 LOADED_XML_JSON
:False, INVOKE_JSON
: " ".join(command
)}
319 def log_value(key
, set=None, add
=None, target
=log
): # Call to add toplevel value to log
321 if key
not in target
:
327 def log_test(testname
, key
, set=None, add
=None): # Call to add test-case-level value to log
328 if TESTS_JSON
not in log
:
330 if testname
not in log
[TESTS_JSON
]:
331 log
[TESTS_JSON
][testname
] = {}
332 log_value(key
, set=set, add
=add
, target
=log
[TESTS_JSON
][testname
])
337 env
= copy
.copy(global_env
)
340 else: # Persist reported failures list until first non-crash run
341 retry_next
= retry_these
343 log_value(COUNT_JSON
, add
=1)
345 # Prepare environment/filesystem
347 for key
in env_source_keys
: # Clear all paths intended for use by mono
348 attemptDelete(env_source
[key
])
350 env
[RUN_KEY
] = ";".join(resume_after
)
351 env
[RUN_MODE_KEY
] = "EXCLUDE"
353 env
[RUN_KEY
] = ";".join(retry_these
)
354 env
[RUN_MODE_KEY
] = "RUN"
358 proc
= popen(command
, env
=env
)
361 sys
.stderr
.write("%s: Could not execute command `%s`\n" % (scriptname
, command
[0]))
364 code
= wait(proc
, duration
)
365 timed_out
= code
is None
366 if timed_out
: # Initial timeout
367 sys
.stderr
.write("%s: Sending %s to `%s`\n" % (scriptname
, "SIG" + args
.signal
, command
[0]))
368 send_signal(proc
, timeout_signal
)
369 if kill_after
is not None: # Kill-after timeout
370 code
= wait(proc
, kill_after
)
372 sys
.stderr
.write("%s: Sending SIGKILL to `%s`\n" % (scriptname
, command
[0]))
373 send_signal(proc
, signal
.SIGKILL
)
375 sys
.stderr
.write("%s: Command `%s` returned with %s after SIGKILL\n" % (scriptname
, command
[0], str(code
)))
377 sys
.stderr
.write("%s: Command `%s` returned with %s after %s\n" % (scriptname
, command
[0], str(code
), "SIG" + args
.signal
))
378 code
= proc
.wait() # Waits forever
379 sys
.stderr
.write("%s: Command `%s` timed out with %s\n" % (scriptname
, command
[0], str(code
)))
382 # The test suite has now run, and what happens next varies:
383 # 1. The suite either completed fully without failures, or timed out: Just quit.
384 # 2. The suite crashed (halted without completing):
385 # Remember any failures for later and rerun, using a blacklist of testcases we have completed.
386 # 3. The suite completed, but there were failures reported:
387 # Rerun, using a whitelist of only reported-failed testcases.
388 # 4. The suite crashed partway through a run with a whitelist:
389 # Rerun, using a whitelist consisting of the previous whitelist minus successful testcases.
391 crashed_at
= attemptFirstLine(env_source
[CURRENT_TEST_KEY
])
392 failed_tests
= attemptLines(env_source
[FAILED_TEST_KEY
])
393 ran_tests
= attemptLines(env_source
[RAN_TEST_KEY
])
394 wrote_xml
= attemptLines(env_source
[XML_LIST_KEY
])
397 if crashed_at
or failed_tests
or ran_tests
: # Test suite follows the babysitter protocol
398 log_value(SUPPORT_JSON
, True)
400 if not crashed_at
and not ever_completed
: # The resume_after whitelist is only
401 resume_after
= [] # used before the first noncrashing run
402 ever_completed
= True
404 if timed_out
: # Currently no retries after timeout
406 code
= 124 # See GNU timeout manpage
408 if code
or crashed_at
: # Process failures
409 # Handle crash failures
410 if crashed_at
and not timed_out
:
411 log_test(crashed_at
, TEST_CRASH_FAILURES
, add
=1)
412 if not crash_resuming
:
415 if failure_may_retry(crashed_at
):
416 if ever_completed
: # Rerun with whitelist next time
417 for test
in retry_these
: # Prepopulate with last whitelist minus run testcases
418 if test
== crashed_at
or test
not in ran_tests
: # (plus crashed testcase)
419 retry_next
.append(test
)
420 else: # Rerun with blacklist next time
421 for test
in ran_tests
: # Add testcases we just ran to blacklist
422 if test
!= crashed_at
: # (except for the crashed testcase)
423 resume_after
.append(test
)
427 # Handle reported failures
428 for test
in failed_tests
:
429 log_test(test
, TEST_FAILURES
, add
=1)
430 if failure_may_retry(test
):
431 retry_next
.append(test
)
435 # Report human-readable failures for stdout log.
436 message
= "%s:" % (scriptname
)
438 message
+= " Saw timeout in test case %s (never allowed)." % (crashed_at
)
440 message
+= " Saw crash in test case %s." % (failure_annotate(crashed_at
))
442 message
+= " Saw test failure in test case%s %s." % (pluralize(failed_tests
), "; ".join(map(failure_annotate
, failed_tests
)))
443 if not (timed_out
or crashed_at
or failed_tests
):
444 message
+= " Test suite terminated with code %d, " % (code
)
445 if log
[SUPPORT_JSON
]:
446 message
+= "but failure did not occur during a test case. Halting."
447 elif xml_list
or wrote_xml
:
448 message
+= "will extract test results from XML. Halting."
450 message
+= "and suite cannot report test case data. Halting."
452 message
+= " Will halt testing."
455 if not log
[SUPPORT_JSON
]:
456 for xml
in (xml_list
+ [incomingPath(xml
) for xml
in wrote_xml
]):
457 verbose_print("Will attempt to load XML from %s" % (xml
))
459 data
= xmlparse(xml
).documentElement
460 if data
.nodeName
!= 'test-results':
461 raise ValueError("Toplevel element was not <test-results />")
463 log_value(LOADED_XML_JSON
, True)
469 for child
in node
.childNodes
:
470 if child
.nodeName
== 'test-suite' or child
.nodeName
== 'results':
471 nextsearch
.append(child
) # Descend
472 elif child
.nodeName
== 'test-case':
473 name
= child
.getAttribute("name")
474 if child
.getAttribute("executed") == "True" and child
.getAttribute("success") != "True":
475 log_test(name
, TEST_FAILURES
, add
=1)
478 except Exception as e
:
479 print("Could not load XML file %s. Reason: %s" % (xml
, e
))
482 if bailout
or not (resume_after
or retry_next
): # If not retrying
485 # If we got here, a retry is occurring
486 retry_these
= retry_next
488 # Report human-readable retry notice for stdout log.
489 message
= "%s: Will rerun test suite" % (scriptname
)
490 if log
[COUNT_JSON
] > 1:
491 message
+= " (iteration #%d)" % (log
[COUNT_JSON
])
493 message
+= ", resuming at crashed testcase %s." % (crashed_at
)
495 message
+= ", running previously failed testcase%s: %s." % (pluralize(retry_these
), "; ".join(retry_these
))
498 # Emergency: Ensure command does not outlive this script
499 if proc
is not None and not died_politely
:
500 send_signal(proc
, signal
.SIGKILL
)
503 log_value(FINAL_CODE_JSON
, "EXCEPTION" if code
is None else code
)
505 with
open(logfile
, "a") as f
:
506 f
.write(json
.dumps(log
) + os
.linesep
)