From b6e07d160a29b1da442d0ada33859fa21ed11912 Mon Sep 17 00:00:00 2001 From: "maruel@chromium.org" Date: Fri, 11 Apr 2014 14:35:35 +0000 Subject: [PATCH] Make isolate_driver.py process build.ninja and extract dependencies. (bis) This uses a few assumption: - This basically breaks non-ninja build for component builds. This never worked anyway. - This assumes the file format of .ninja files. This will likely be quite obvious when this breaks. - It makes some assumptions about the build steps, for example '.so.TOC' -> '.so'. On the other hand, it creates a deterministic dependency tree, which is awesome. Technically it would work as well for non-component builds but I don't want to go this far yet. But in the end, that's the goal that nobody has to enter the binary dependencies in the .isolate files. Was reverted in r263072, second try. Added a check to ensure the files added exist and have the +x bit set. TBR=vadimsh@chromium.org BUG=360223,333473 Review URL: https://codereview.chromium.org/234433002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@263240 0039d316-1c4b-4281-b951-d872f2087c98 --- tools/isolate_driver.py | 226 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 166 insertions(+), 60 deletions(-) diff --git a/tools/isolate_driver.py b/tools/isolate_driver.py index 00fe5f730c5d..a779820bd2e8 100755 --- a/tools/isolate_driver.py +++ b/tools/isolate_driver.py @@ -8,16 +8,16 @@ Creates a wrapping .isolate which 'includes' the original one, that can be consumed by tools/swarming_client/isolate.py. Path variables are determined based on the current working directory. The relative_cwd in the .isolated file -is determined based on *the .isolate file that declare the 'command' variable to -be used* so the wrapping .isolate doesn't affect this value. +is determined based on the .isolate file that declare the 'command' variable to +be used so the wrapping .isolate doesn't affect this value. -It packages all the dynamic libraries found in this wrapping .isolate. This is -inefficient and non-deterministic. In the very near future, it will parse -build.ninja, find back the root target and find all the dynamic libraries that -are marked as a dependency to this target. +This script loads build.ninja and processes it to determine all the executables +referenced by the isolated target. It adds them in the wrapping .isolate file. """ +import StringIO import glob +import logging import os import posixpath import subprocess @@ -33,54 +33,153 @@ sys.path.insert(0, SWARMING_CLIENT_DIR) import isolate_format +def load_ninja_recursively(build_dir, ninja_path, build_steps): + """Crudely extracts all the subninja and build referenced in ninja_path. -# Location to grab binaries based on the OS. -DYNAMIC_LIBRARIES = { - 'darwin': '*.dylib', - 'linux2': 'lib/*.so', - 'win32': '*.dll', -} + In particular, it ignores rule and variable declarations. The goal is to be + performant (well, as much as python can be performant) which is currently in + the <200ms range for a complete chromium tree. As such the code is laid out + for performance instead of readability. + """ + logging.debug('Loading %s', ninja_path) + try: + with open(os.path.join(build_dir, ninja_path), 'rb') as f: + line = None + merge_line = '' + subninja = [] + for line in f: + line = line.rstrip() + if not line: + continue + if line[-1] == '$': + # The next line needs to be merged in. + merge_line += line[:-1] + continue -def get_dynamic_libs(build_dir): - """Finds all the dynamic libs to map. + if merge_line: + line = merge_line + line + merge_line = '' - Returns: - list of relative path, e.g. [../out/Debug/lib/libuser_prefs.so]. - """ - items = set() - root = os.path.join(build_dir, DYNAMIC_LIBRARIES[sys.platform]) - for i in glob.iglob(root): - try: - # Will throw on Windows if another process is writing to this file. - open(i).close() - items.add((i, os.stat(i).st_size)) - except IOError: - continue - - # The following sleep value was carefully selected via random guessing. The - # goal is to detect files that are being linked by checking their file size - # after a small delay. - # - # This happens as other build targets can be built simultaneously. For - # example, base_unittests.isolated is being processed but dynamic libraries - # for chrome are currently being linked. - # - # TODO(maruel): Obviously, this must go away and be replaced with a proper - # ninja parser but we need something now. http://crbug.com/333473 - time.sleep(10) - - for item in sorted(items): - file_name, file_size = item + statement = line[:line.find(' ')] + if statement == 'build': + # Save the dependency list as a raw string. Only the lines needed will + # be processed with raw_build_to_deps(). This saves a good 70ms of + # processing time. + build_target, dependencies = line[6:].split(': ', 1) + # Interestingly, trying to be smart and only saving the build steps + # with the intended extensions ('', '.stamp', '.so') slows down + # parsing even if 90% of the build rules can be skipped. + # On Windows, a single step may generate two target, so split items + # accordingly. It has only been seen for .exe/.exe.pdb combos. + for i in build_target.strip().split(): + build_steps[i] = dependencies + elif statement == 'subninja': + subninja.append(line[9:]) + except IOError: + print >> sys.stderr, 'Failed to open %s' % ninja_path + raise + + total = 1 + for rel_path in subninja: try: - open(file_name).close() - if os.stat(file_name).st_size != file_size: - items.remove(item) + # Load each of the files referenced. + # TODO(maruel): Skip the files known to not be needed. It saves an aweful + # lot of processing time. + total += load_ninja_recursively(build_dir, rel_path, build_steps) except IOError: - items.remove(item) - continue + print >> sys.stderr, '... as referenced by %s' % ninja_path + raise + return total + + +def load_ninja(build_dir): + """Loads the tree of .ninja files in build_dir.""" + build_steps = {} + total = load_ninja_recursively(build_dir, 'build.ninja', build_steps) + logging.info('Loaded %d ninja files, %d build steps', total, len(build_steps)) + return build_steps + + +def using_blacklist(item): + """Returns True if an item should be analyzed. + + Ignores many rules that are assumed to not depend on a dynamic library. If + the assumption doesn't hold true anymore for a file format, remove it from + this list. This is simply an optimization. + """ + IGNORED = ( + '.a', '.cc', '.css', '.def', '.h', '.html', '.js', '.json', '.manifest', + '.o', '.obj', '.pak', '.png', '.pdb', '.strings', '.txt', + ) + # ninja files use native path format. + ext = os.path.splitext(item)[1] + if ext in IGNORED: + return False + # Special case Windows, keep .dll.lib but discard .lib. + if item.endswith('.dll.lib'): + return True + if ext == '.lib': + return False + return item not in ('', '|', '||') + + +def raw_build_to_deps(item): + """Converts a raw ninja build statement into the list of interesting + dependencies. + """ + # TODO(maruel): Use a whitelist instead? .stamp, .so.TOC, .dylib.TOC, + # .dll.lib, .exe and empty. + # The first item is the build rule, e.g. 'link', 'cxx', 'phony', etc. + return filter(using_blacklist, item.split(' ')[1:]) + - return [i[0].replace(os.path.sep, '/') for i in items] +def recurse(target, build_steps, rules_seen): + """Recursively returns all the interesting dependencies for root_item.""" + out = [] + if rules_seen is None: + rules_seen = set() + if target in rules_seen: + # TODO(maruel): Figure out how it happens. + logging.warning('Circular dependency for %s!', target) + return [] + rules_seen.add(target) + try: + dependencies = raw_build_to_deps(build_steps[target]) + except KeyError: + logging.info('Failed to find a build step to generate: %s', target) + return [] + logging.debug('recurse(%s) -> %s', target, dependencies) + for dependency in dependencies: + out.append(dependency) + dependency_raw_dependencies = build_steps.get(dependency) + if dependency_raw_dependencies: + for i in raw_build_to_deps(dependency_raw_dependencies): + out.extend(recurse(i, build_steps, rules_seen)) + else: + logging.info('Failed to find a build step to generate: %s', dependency) + return out + + +def post_process_deps(build_dir, dependencies): + """Processes the dependency list with OS specific rules.""" + def filter_item(i): + if i.endswith('.so.TOC'): + # Remove only the suffix .TOC, not the .so! + return i[:-4] + if i.endswith('.dylib.TOC'): + # Remove only the suffix .TOC, not the .dylib! + return i[:-4] + if i.endswith('.dll.lib'): + # Remove only the suffix .lib, not the .dll! + return i[:-4] + return i + + # Check for execute access. This gets rid of all the phony rules. + return [ + i for i in map(filter_item, dependencies) + if os.access(os.path.join(build_dir, i), os.X_OK) + ] def create_wrapper(args, isolate_index, isolated_index): @@ -111,19 +210,24 @@ def create_wrapper(args, isolate_index, isolated_index): isolate_relpath = os.path.relpath( '.', temp_isolate_dir).replace(os.path.sep, '/') - # Will look like ['<(PRODUCT_DIR)/lib/flibuser_prefs.so']. - rebased_libs = [ - '<(PRODUCT_DIR)/%s' % i[len(build_dir)+1:] - for i in get_dynamic_libs(build_dir) - ] + # It's a big assumption here that the name of the isolate file matches the + # primary target. Fix accordingly if this doesn't hold true. + target = isolate[:-len('.isolate')] + build_steps = load_ninja(build_dir) + binary_deps = post_process_deps(build_dir, recurse(target, build_steps, None)) + logging.debug( + 'Binary dependencies:%s', ''.join('\n ' + i for i in binary_deps)) # Now do actual wrapping .isolate. - out = { + isolate_dict = { 'includes': [ posixpath.join(isolate_relpath, isolate), ], 'variables': { - isolate_format.KEY_TRACKED: rebased_libs, + # Will look like ['<(PRODUCT_DIR)/lib/flibuser_prefs.so']. + isolate_format.KEY_TRACKED: sorted( + '<(PRODUCT_DIR)/%s' % i.replace(os.path.sep, '/') + for i in binary_deps), }, } if not os.path.isdir(temp_isolate_dir): @@ -131,14 +235,18 @@ def create_wrapper(args, isolate_index, isolated_index): comment = ( '# Warning: this file was AUTOGENERATED.\n' '# DO NO EDIT.\n') + out = StringIO.StringIO() + isolate_format.print_all(comment, isolate_dict, out) + isolate_content = out.getvalue() with open(temp_isolate, 'wb') as f: - isolate_format.print_all(comment, out, f) - if '--verbose' in args: - print('Added %d dynamic libs' % len(dynamic_libs)) + f.write(isolate_content) + logging.info('Added %d dynamic libs', len(binary_deps)) + logging.debug('%s', isolate_content) args[isolate_index] = temp_isolate def main(): + logging.basicConfig(level=logging.ERROR, format='%(levelname)7s %(message)s') args = sys.argv[1:] isolate = None isolated = None @@ -154,9 +262,7 @@ def main(): print >> sys.stderr, 'Internal failure' return 1 - # Implement a ninja parser. - # http://crbug.com/360223 - if is_component and False: + if is_component: create_wrapper(args, isolate, isolated) swarming_client = os.path.join(SRC_DIR, 'tools', 'swarming_client') -- 2.11.4.GIT