2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Snapshot Build Bisect Tool
8 This script bisects a snapshot archive using binary search. It starts at
9 a bad revision (it will try to guess HEAD) and asks for a last known-good
10 revision. It will then binary search across this revision range by downloading,
11 unzipping, and opening Chromium for you. After testing the specific revision,
12 it will ask you whether it is good or bad before continuing the search.
15 # The root URL for storage.
16 BASE_URL
= 'http://commondatastorage.googleapis.com/chromium-browser-snapshots'
18 # The root URL for official builds.
19 OFFICIAL_BASE_URL
= 'http://master.chrome.corp.google.com/official_builds'
22 CHANGELOG_URL
= 'http://build.chromium.org/f/chromium/' \
23 'perf/dashboard/ui/changelog.html?url=/trunk/src&range=%d%%3A%d'
25 # Official Changelogs URL.
26 OFFICIAL_CHANGELOG_URL
= 'http://omahaproxy.appspot.com/'\
27 'changelog?old_version=%s&new_version=%s'
30 DEPS_FILE
= 'http://src.chromium.org/viewvc/chrome/trunk/src/DEPS?revision=%d'
31 # WebKit Changelogs URL.
32 WEBKIT_CHANGELOG_URL
= 'http://trac.webkit.org/log/' \
33 'trunk/?rev=%d&stop_rev=%d&verbose=on&limit=10000'
35 DONE_MESSAGE
= 'You are probably looking for a change made after ' \
36 '%s (known good), but no later than %s (first known bad).'
38 ###############################################################################
51 from distutils
.version
import LooseVersion
52 from xml
.etree
import ElementTree
56 class PathContext(object):
57 """A PathContext is used to carry the information used to construct URLs and
58 paths when dealing with the storage server and archives."""
59 def __init__(self
, platform
, good_revision
, bad_revision
, is_official
):
60 super(PathContext
, self
).__init
__()
61 # Store off the input parameters.
62 self
.platform
= platform
# What's passed in to the '-a/--archive' option.
63 self
.good_revision
= good_revision
64 self
.bad_revision
= bad_revision
65 self
.is_official
= is_official
67 # The name of the ZIP file in a revision directory on the server.
68 self
.archive_name
= None
70 # Set some internal members:
71 # _listing_platform_dir = Directory that holds revisions. Ends with a '/'.
72 # _archive_extract_dir = Uncompressed directory in the archive_name file.
73 # _binary_name = The name of the executable to run.
74 if self
.platform
== 'linux' or self
.platform
== 'linux64':
75 self
._binary
_name
= 'chrome'
76 elif self
.platform
== 'mac':
77 self
.archive_name
= 'chrome-mac.zip'
78 self
._archive
_extract
_dir
= 'chrome-mac'
79 elif self
.platform
== 'win':
80 self
.archive_name
= 'chrome-win32.zip'
81 self
._archive
_extract
_dir
= 'chrome-win32'
82 self
._binary
_name
= 'chrome.exe'
84 raise Exception('Invalid platform: %s' % self
.platform
)
87 if self
.platform
== 'linux':
88 self
._listing
_platform
_dir
= 'lucid32bit/'
89 self
.archive_name
= 'chrome-lucid32bit.zip'
90 self
._archive
_extract
_dir
= 'chrome-lucid32bit'
91 elif self
.platform
== 'linux64':
92 self
._listing
_platform
_dir
= 'lucid64bit/'
93 self
.archive_name
= 'chrome-lucid64bit.zip'
94 self
._archive
_extract
_dir
= 'chrome-lucid64bit'
95 elif self
.platform
== 'mac':
96 self
._listing
_platform
_dir
= 'mac/'
97 self
._binary
_name
= 'Google Chrome.app/Contents/MacOS/Google Chrome'
98 elif self
.platform
== 'win':
99 self
._listing
_platform
_dir
= 'win/'
101 if self
.platform
== 'linux' or self
.platform
== 'linux64':
102 self
.archive_name
= 'chrome-linux.zip'
103 self
._archive
_extract
_dir
= 'chrome-linux'
104 if self
.platform
== 'linux':
105 self
._listing
_platform
_dir
= 'Linux/'
106 elif self
.platform
== 'linux64':
107 self
._listing
_platform
_dir
= 'Linux_x64/'
108 elif self
.platform
== 'mac':
109 self
._listing
_platform
_dir
= 'Mac/'
110 self
._binary
_name
= 'Chromium.app/Contents/MacOS/Chromium'
111 elif self
.platform
== 'win':
112 self
._listing
_platform
_dir
= 'Win/'
114 def GetListingURL(self
, marker
=None):
115 """Returns the URL for a directory listing, with an optional marker."""
118 marker_param
= '&marker=' + str(marker
)
119 return BASE_URL
+ '/?delimiter=/&prefix=' + self
._listing
_platform
_dir
+ \
122 def GetDownloadURL(self
, revision
):
123 """Gets the download URL for a build archive of a specific revision."""
125 return "%s/%s/%s%s" % (
126 OFFICIAL_BASE_URL
, revision
, self
._listing
_platform
_dir
,
129 return "%s/%s%s/%s" % (
130 BASE_URL
, self
._listing
_platform
_dir
, revision
, self
.archive_name
)
132 def GetLastChangeURL(self
):
133 """Returns a URL to the LAST_CHANGE file."""
134 return BASE_URL
+ '/' + self
._listing
_platform
_dir
+ 'LAST_CHANGE'
136 def GetLaunchPath(self
):
137 """Returns a relative path (presumably from the archive extraction location)
138 that is used to run the executable."""
139 return os
.path
.join(self
._archive
_extract
_dir
, self
._binary
_name
)
141 def ParseDirectoryIndex(self
):
142 """Parses the Google Storage directory listing into a list of revision
143 numbers. The range starts with self.good_revision and goes until
144 self.bad_revision."""
146 def _FetchAndParse(url
):
147 """Fetches a URL and returns a 2-Tuple of ([revisions], next-marker). If
148 next-marker is not None, then the listing is a partial listing and another
149 fetch should be performed with next-marker being the marker= GET
151 handle
= urllib
.urlopen(url
)
152 document
= ElementTree
.parse(handle
)
154 # All nodes in the tree are namespaced. Get the root's tag name to extract
155 # the namespace. Etree does namespaces as |{namespace}tag|.
156 root_tag
= document
.getroot().tag
157 end_ns_pos
= root_tag
.find('}')
159 raise Exception("Could not locate end namespace for directory index")
160 namespace
= root_tag
[:end_ns_pos
+ 1]
162 # Find the prefix (_listing_platform_dir) and whether or not the list is
164 prefix_len
= len(document
.find(namespace
+ 'Prefix').text
)
166 is_truncated
= document
.find(namespace
+ 'IsTruncated')
167 if is_truncated
is not None and is_truncated
.text
.lower() == 'true':
168 next_marker
= document
.find(namespace
+ 'NextMarker').text
170 # Get a list of all the revisions.
171 all_prefixes
= document
.findall(namespace
+ 'CommonPrefixes/' +
172 namespace
+ 'Prefix')
173 # The <Prefix> nodes have content of the form of
174 # |_listing_platform_dir/revision/|. Strip off the platform dir and the
175 # trailing slash to just have a number.
177 for prefix
in all_prefixes
:
178 revnum
= prefix
.text
[prefix_len
:-1]
181 revisions
.append(revnum
)
184 return (revisions
, next_marker
)
186 # Fetch the first list of revisions.
187 (revisions
, next_marker
) = _FetchAndParse(self
.GetListingURL())
189 # If the result list was truncated, refetch with the next marker. Do this
190 # until an entire directory listing is done.
192 next_url
= self
.GetListingURL(next_marker
)
193 (new_revisions
, next_marker
) = _FetchAndParse(next_url
)
194 revisions
.extend(new_revisions
)
197 def GetRevList(self
):
198 """Gets the list of revision numbers between self.good_revision and
199 self.bad_revision."""
200 # Download the revlist and filter for just the range between good and bad.
201 minrev
= self
.good_revision
202 maxrev
= self
.bad_revision
203 revlist
= map(int, self
.ParseDirectoryIndex())
204 revlist
= [x
for x
in revlist
if x
>= int(minrev
) and x
<= int(maxrev
)]
208 def GetOfficialBuildsList(self
):
209 """Gets the list of official build numbers between self.good_revision and
210 self.bad_revision."""
211 # Download the revlist and filter for just the range between good and bad.
212 minrev
= self
.good_revision
213 maxrev
= self
.bad_revision
214 handle
= urllib
.urlopen(OFFICIAL_BASE_URL
)
215 dirindex
= handle
.read()
217 build_numbers
= re
.findall(r
'<a href="([0-9][0-9].*)/">', dirindex
)
220 parsed_build_numbers
= [LooseVersion(x
) for x
in build_numbers
]
221 for build_number
in sorted(parsed_build_numbers
):
222 path
= OFFICIAL_BASE_URL
+ '/' + str(build_number
) + '/' + \
223 self
._listing
_platform
_dir
+ self
.archive_name
226 connection
= urllib
.urlopen(path
)
228 if build_number
> maxrev
:
230 if build_number
>= minrev
:
231 final_list
.append(str(build_number
))
232 except urllib
.HTTPError
, e
:
236 def UnzipFilenameToDir(filename
, dir):
237 """Unzip |filename| to directory |dir|."""
239 if not os
.path
.isabs(filename
):
240 filename
= os
.path
.join(cwd
, filename
)
241 zf
= zipfile
.ZipFile(filename
)
243 if not os
.path
.isdir(dir):
247 for info
in zf
.infolist():
249 if name
.endswith('/'): # dir
250 if not os
.path
.isdir(name
):
253 dir = os
.path
.dirname(name
)
254 if not os
.path
.isdir(dir):
256 out
= open(name
, 'wb')
257 out
.write(zf
.read(name
))
259 # Set permissions. Permission info in external_attr is shifted 16 bits.
260 os
.chmod(name
, info
.external_attr
>> 16L)
264 def FetchRevision(context
, rev
, filename
, quit_event
=None, progress_event
=None):
265 """Downloads and unzips revision |rev|.
266 @param context A PathContext instance.
267 @param rev The Chromium revision number/tag to download.
268 @param filename The destination for the downloaded file.
269 @param quit_event A threading.Event which will be set by the master thread to
270 indicate that the download should be aborted.
271 @param progress_event A threading.Event which will be set by the master thread
272 to indicate that the progress of the download should be
275 def ReportHook(blocknum
, blocksize
, totalsize
):
276 if quit_event
and quit_event
.isSet():
277 raise RuntimeError("Aborting download of revision %s" % str(rev
))
278 if progress_event
and progress_event
.isSet():
279 size
= blocknum
* blocksize
280 if totalsize
== -1: # Total size not known.
281 progress
= "Received %d bytes" % size
283 size
= min(totalsize
, size
)
284 progress
= "Received %d of %d bytes, %.2f%%" % (
285 size
, totalsize
, 100.0 * size
/ totalsize
)
286 # Send a \r to let all progress messages use just one line of output.
287 sys
.stdout
.write("\r" + progress
)
290 download_url
= context
.GetDownloadURL(rev
)
292 urllib
.urlretrieve(download_url
, filename
, ReportHook
)
293 if progress_event
and progress_event
.isSet():
295 except RuntimeError, e
:
299 def RunRevision(context
, revision
, zipfile
, profile
, num_runs
, args
):
300 """Given a zipped revision, unzip it and run the test."""
301 print "Trying revision %s..." % str(revision
)
303 # Create a temp directory and unzip the revision into it.
305 tempdir
= tempfile
.mkdtemp(prefix
='bisect_tmp')
306 UnzipFilenameToDir(zipfile
, tempdir
)
309 # Run the build as many times as specified.
310 testargs
= [context
.GetLaunchPath(), '--user-data-dir=%s' % profile
] + args
311 # The sandbox must be run as root on Official Chrome, so bypass it.
312 if context
.is_official
and (context
.platform
== 'linux' or
313 context
.platform
== 'linux64'):
314 testargs
.append('--no-sandbox')
316 for i
in range(0, num_runs
):
317 subproc
= subprocess
.Popen(testargs
,
319 stdout
=subprocess
.PIPE
,
320 stderr
=subprocess
.PIPE
)
321 (stdout
, stderr
) = subproc
.communicate()
325 shutil
.rmtree(tempdir
, True)
329 return (subproc
.returncode
, stdout
, stderr
)
332 def AskIsGoodBuild(rev
, official_builds
, status
, stdout
, stderr
):
333 """Ask the user whether build |rev| is good or bad."""
334 # Loop until we get a response that we can parse.
336 response
= raw_input('Revision %s is [(g)ood/(b)ad/(u)nknown/(q)uit]: ' %
338 if response
and response
in ('g', 'b', 'u'):
340 if response
and response
== 'q':
344 class DownloadJob(object):
345 """DownloadJob represents a task to download a given Chromium revision."""
346 def __init__(self
, context
, name
, rev
, zipfile
):
347 super(DownloadJob
, self
).__init
__()
348 # Store off the input parameters.
349 self
.context
= context
352 self
.zipfile
= zipfile
353 self
.quit_event
= threading
.Event()
354 self
.progress_event
= threading
.Event()
357 """Starts the download."""
358 fetchargs
= (self
.context
,
363 self
.thread
= threading
.Thread(target
=FetchRevision
,
369 """Stops the download which must have been started previously."""
370 self
.quit_event
.set()
372 os
.unlink(self
.zipfile
)
375 """Prints a message and waits for the download to complete. The download
376 must have been started previously."""
377 print "Downloading revision %s..." % str(self
.rev
)
378 self
.progress_event
.set() # Display progress of download.
389 evaluate
=AskIsGoodBuild
):
390 """Given known good and known bad revisions, run a binary search on all
391 archived revisions to determine the last known good revision.
393 @param platform Which build to download/run ('mac', 'win', 'linux64', etc.).
394 @param official_builds Specify build type (Chromium or Official build).
395 @param good_rev Number/tag of the last known good revision.
396 @param bad_rev Number/tag of the first known bad revision.
397 @param num_runs Number of times to run each build for asking good/bad.
398 @param try_args A tuple of arguments to pass to the test application.
399 @param profile The name of the user profile to run with.
400 @param evaluate A function which returns 'g' if the argument build is good,
401 'b' if it's bad or 'u' if unknown.
403 Threading is used to fetch Chromium revisions in the background, speeding up
404 the user's experience. For example, suppose the bounds of the search are
405 good_rev=0, bad_rev=100. The first revision to be checked is 50. Depending on
406 whether revision 50 is good or bad, the next revision to check will be either
407 25 or 75. So, while revision 50 is being checked, the script will download
408 revisions 25 and 75 in the background. Once the good/bad verdict on rev 50 is
411 - If rev 50 is good, the download of rev 25 is cancelled, and the next test
414 - If rev 50 is bad, the download of rev 75 is cancelled, and the next test
421 context
= PathContext(platform
, good_rev
, bad_rev
, official_builds
)
426 print "Downloading list of known revisions..."
427 _GetDownloadPath
= lambda rev
: os
.path
.join(cwd
,
428 '%s-%s' % (str(rev
), context
.archive_name
))
430 revlist
= context
.GetOfficialBuildsList()
432 revlist
= context
.GetRevList()
434 # Get a list of revisions to bisect across.
435 if len(revlist
) < 2: # Don't have enough builds to bisect.
436 msg
= 'We don\'t have enough builds to bisect. revlist: %s' % revlist
437 raise RuntimeError(msg
)
439 print 'Bisecting range [%s, %s].' % (revlist
[0], revlist
[-1])
441 # Figure out our bookends and first pivot point; fetch the pivot revision.
443 bad
= len(revlist
) - 1
446 zipfile
= _GetDownloadPath(rev
)
447 initial_fetch
= DownloadJob(context
, 'initial_fetch', rev
, zipfile
)
448 initial_fetch
.Start()
449 initial_fetch
.WaitFor()
451 # Binary search time!
452 while zipfile
and bad
- good
> 1:
453 # Pre-fetch next two possible pivots
454 # - down_pivot is the next revision to check if the current revision turns
456 # - up_pivot is the next revision to check if the current revision turns
458 down_pivot
= int((pivot
- good
) / 2) + good
460 if down_pivot
!= pivot
and down_pivot
!= good
:
461 down_rev
= revlist
[down_pivot
]
462 down_fetch
= DownloadJob(context
, 'down_fetch', down_rev
,
463 _GetDownloadPath(down_rev
))
466 up_pivot
= int((bad
- pivot
) / 2) + pivot
468 if up_pivot
!= pivot
and up_pivot
!= bad
:
469 up_rev
= revlist
[up_pivot
]
470 up_fetch
= DownloadJob(context
, 'up_fetch', up_rev
,
471 _GetDownloadPath(up_rev
))
474 # Run test on the pivot revision.
479 (status
, stdout
, stderr
) = RunRevision(context
,
486 print >>sys
.stderr
, e
490 # Call the evaluate function to see if the current revision is good or bad.
491 # On that basis, kill one of the background downloads and complete the
492 # other, as described in the comments above.
494 answer
= evaluate(rev
, official_builds
, status
, stdout
, stderr
)
498 down_fetch
.Stop() # Kill the download of the older revision.
502 zipfile
= up_fetch
.zipfile
506 up_fetch
.Stop() # Kill the download of the newer revision.
510 zipfile
= down_fetch
.zipfile
512 # Nuke the revision from the revlist and choose a new pivot.
514 bad
-= 1 # Assumes bad >= pivot.
518 # Alternate between using down_pivot or up_pivot for the new pivot
519 # point, without affecting the range. Do this instead of setting the
520 # pivot to the midpoint of the new range because adjacent revisions
521 # are likely affected by the same issue that caused the (u)nknown
523 if up_fetch
and down_fetch
:
524 fetch
= [up_fetch
, down_fetch
][len(revlist
) % 2]
530 if fetch
== up_fetch
:
531 pivot
= up_pivot
- 1 # Subtracts 1 because revlist was resized.
534 zipfile
= fetch
.zipfile
536 if down_fetch
and fetch
!= down_fetch
:
538 if up_fetch
and fetch
!= up_fetch
:
541 assert False, "Unexpected return value from evaluate(): " + answer
543 print "Cleaning up..."
544 for f
in [_GetDownloadPath(revlist
[down_pivot
]),
545 _GetDownloadPath(revlist
[up_pivot
])]:
554 return (revlist
[good
], revlist
[bad
])
557 def GetWebKitRevisionForChromiumRevision(rev
):
558 """Returns the webkit revision that was in chromium's DEPS file at
559 chromium revision |rev|."""
560 # . doesn't match newlines without re.DOTALL, so this is safe.
561 webkit_re
= re
.compile(r
'webkit_revision.:\D*(\d+)')
562 url
= urllib
.urlopen(DEPS_FILE
% rev
)
563 m
= webkit_re
.search(url
.read())
566 return int(m
.group(1))
568 raise Exception('Could not get webkit revision for cr rev %d' % rev
)
571 def GetChromiumRevision(url
):
572 """Returns the chromium revision read from given URL."""
574 # Location of the latest build revision number
575 return int(urllib
.urlopen(url
).read())
577 print('Could not determine latest revision. This could be bad...')
582 usage
= ('%prog [options] [-- chromium-options]\n'
583 'Perform binary search on the snapshot builds.\n'
585 'Tip: add "-- --no-first-run" to bypass the first run prompts.')
586 parser
= optparse
.OptionParser(usage
=usage
)
587 # Strangely, the default help output doesn't include the choice list.
588 choices
= ['mac', 'win', 'linux', 'linux64']
589 # linux-chromiumos lacks a continuous archive http://crbug.com/78158
590 parser
.add_option('-a', '--archive',
592 help = 'The buildbot archive to bisect [%s].' %
594 parser
.add_option('-o', action
="store_true", dest
='official_builds',
595 help = 'Bisect across official ' +
596 'Chrome builds (internal only) instead of ' +
597 'Chromium archives.')
598 parser
.add_option('-b', '--bad', type = 'str',
599 help = 'The bad revision to bisect to. Default is HEAD.')
600 parser
.add_option('-g', '--good', type = 'str',
601 help = 'The last known good revision to bisect from. ' +
603 parser
.add_option('-p', '--profile', '--user-data-dir', type = 'str',
604 help = 'Profile to use; this will not reset every run. ' +
605 'Defaults to a clean profile.', default
= 'profile')
606 parser
.add_option('-t', '--times', type = 'int',
607 help = 'Number of times to run each build before asking ' +
608 'if it\'s good or bad. Temporary profiles are reused.',
610 (opts
, args
) = parser
.parse_args()
612 if opts
.archive
is None:
613 print 'Error: missing required parameter: --archive'
618 # Create the context. Initialize 0 for the revisions as they are set below.
619 context
= PathContext(opts
.archive
, 0, 0, opts
.official_builds
)
620 # Pick a starting point, try to get HEAD for this.
624 bad_rev
= '999.0.0.0'
625 if not opts
.official_builds
:
626 bad_rev
= GetChromiumRevision(context
.GetLastChangeURL())
628 # Find out when we were good.
632 good_rev
= '0.0.0.0' if opts
.official_builds
else 0
634 if opts
.official_builds
:
635 good_rev
= LooseVersion(good_rev
)
636 bad_rev
= LooseVersion(bad_rev
)
638 good_rev
= int(good_rev
)
639 bad_rev
= int(bad_rev
)
641 if good_rev
> bad_rev
:
642 print ('The good revision (%s) must precede the bad revision (%s).\n' %
648 print('Number of times to run (%d) must be greater than or equal to 1.' %
653 (last_known_good_rev
, first_known_bad_rev
) = Bisect(
654 opts
.archive
, opts
.official_builds
, good_rev
, bad_rev
, opts
.times
, args
,
657 # Get corresponding webkit revisions.
659 last_known_good_webkit_rev
= GetWebKitRevisionForChromiumRevision(
661 first_known_bad_webkit_rev
= GetWebKitRevisionForChromiumRevision(
664 # Silently ignore the failure.
665 last_known_good_webkit_rev
, first_known_bad_webkit_rev
= 0, 0
667 # We're done. Let the user know the results in an official manner.
668 print DONE_MESSAGE
% (str(last_known_good_rev
), str(first_known_bad_rev
))
669 if last_known_good_webkit_rev
!= first_known_bad_webkit_rev
:
670 print 'WEBKIT CHANGELOG URL:'
671 print ' ' + WEBKIT_CHANGELOG_URL
% (first_known_bad_webkit_rev
,
672 last_known_good_webkit_rev
)
673 print 'CHANGELOG URL:'
674 if opts
.official_builds
:
675 print OFFICIAL_CHANGELOG_URL
% (last_known_good_rev
, first_known_bad_rev
)
677 print ' ' + CHANGELOG_URL
% (last_known_good_rev
, first_known_bad_rev
)
679 if __name__
== '__main__':