Add maruel, remove cmp&darin from //testing/buildbot/OWNERS.
[chromium-blink-merge.git] / android_webview / tools / webview_licenses.py
blob6b87fc7d97eee1c1cfbe5a844809c190a5354703
1 #!/usr/bin/python
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Checks third-party licenses for the purposes of the Android WebView build.
8 The Android tree includes a snapshot of Chromium in order to power the system
9 WebView. This tool checks that all code uses open-source licenses compatible
10 with Android, and that we meet the requirements of those licenses. It can also
11 be used to generate an Android NOTICE file for the third-party code.
13 It makes use of src/tools/licenses.py and the README.chromium files on which
14 it depends. It also makes use of a data file, third_party_files_whitelist.txt,
15 which whitelists indicidual files which contain third-party code but which
16 aren't in a third-party directory with a README.chromium file.
17 """
19 import imp
20 import json
21 import multiprocessing
22 import optparse
23 import os
24 import re
25 import sys
26 import textwrap
29 REPOSITORY_ROOT = os.path.abspath(os.path.join(
30 os.path.dirname(__file__), '..', '..'))
32 # Import third_party/PRESUBMIT.py via imp to avoid importing a random
33 # PRESUBMIT.py from $PATH, also make sure we don't generate a .pyc file.
34 sys.dont_write_bytecode = True
35 third_party = \
36 imp.load_source('PRESUBMIT', \
37 os.path.join(REPOSITORY_ROOT, 'third_party', 'PRESUBMIT.py'))
39 sys.path.append(os.path.join(REPOSITORY_ROOT, 'third_party'))
40 import jinja2
41 sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools'))
42 import licenses
44 import copyright_scanner
46 class InputApi(object):
47 def __init__(self):
48 self.os_path = os.path
49 self.os_walk = os.walk
50 self.re = re
51 self.ReadFile = _ReadFile
52 self.change = InputApiChange()
54 class InputApiChange(object):
55 def __init__(self):
56 self.RepositoryRoot = lambda: REPOSITORY_ROOT
58 class ScanResult(object):
59 Ok, Warnings, Errors = range(3)
61 # Needs to be a top-level function for multiprocessing
62 def _FindCopyrightViolations(files_to_scan_as_string):
63 return copyright_scanner.FindCopyrightViolations(
64 InputApi(), REPOSITORY_ROOT, files_to_scan_as_string)
66 def _ShardList(l, shard_len):
67 return [l[i:i + shard_len] for i in range(0, len(l), shard_len)]
69 def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
70 """Checks that all files which are not in a listed third-party directory,
71 and which do not use the standard Chromium license, are whitelisted.
72 Args:
73 excluded_dirs_list: The list of directories to exclude from scanning.
74 whitelisted_files: The whitelist of files.
75 Returns:
76 ScanResult.Ok if all files with non-standard license headers are whitelisted
77 and the whitelist contains no stale entries;
78 ScanResult.Warnings if there are stale entries;
79 ScanResult.Errors if new non-whitelisted entries found.
80 """
81 input_api = InputApi()
82 files_to_scan = copyright_scanner.FindFiles(
83 input_api, REPOSITORY_ROOT, ['.'], excluded_dirs_list)
84 sharded_files_to_scan = _ShardList(files_to_scan, 2000)
85 pool = multiprocessing.Pool()
86 offending_files_chunks = pool.map_async(
87 _FindCopyrightViolations, sharded_files_to_scan).get(999999)
88 pool.close()
89 pool.join()
90 # Flatten out the result
91 offending_files = \
92 [item for sublist in offending_files_chunks for item in sublist]
94 (unknown, missing, stale) = copyright_scanner.AnalyzeScanResults(
95 input_api, whitelisted_files, offending_files)
97 if unknown:
98 print 'The following files contain a third-party license but are not in ' \
99 'a listed third-party directory and are not whitelisted. You must ' \
100 'add the following files to the whitelist.\n%s' % \
101 '\n'.join(sorted(unknown))
102 if missing:
103 print 'The following files are whitelisted, but do not exist.\n%s' % \
104 '\n'.join(sorted(missing))
105 if stale:
106 print 'The following files are whitelisted unnecessarily. You must ' \
107 'remove the following files from the whitelist.\n%s' % \
108 '\n'.join(sorted(stale))
110 if unknown:
111 code = ScanResult.Errors
112 elif stale or missing:
113 code = ScanResult.Warnings
114 else:
115 code = ScanResult.Ok
117 problem_paths = sorted(set(unknown + missing + stale))
118 return (code, problem_paths)
121 def _ReadFile(full_path, mode='rU'):
122 """Reads a file from disk. This emulates presubmit InputApi.ReadFile func.
123 Args:
124 full_path: The path of the file to read.
125 Returns:
126 The contents of the file as a string.
129 with open(full_path, mode) as f:
130 return f.read()
133 def _Scan():
134 """Checks that license meta-data is present for all third-party code and
135 that all non third-party code doesn't contain external copyrighted code.
136 Returns:
137 ScanResult.Ok if everything is in order;
138 ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist
139 entries)
140 ScanResult.Errors otherwise.
143 third_party_dirs = licenses.FindThirdPartyDirsWithFiles(REPOSITORY_ROOT)
145 problem_paths = []
147 # First, check designated third-party directories using src/tools/licenses.py.
148 all_licenses_valid = True
149 for path in sorted(third_party_dirs):
150 try:
151 licenses.ParseDir(path, REPOSITORY_ROOT)
152 except licenses.LicenseError, e:
153 print 'Got LicenseError "%s" while scanning %s' % (e, path)
154 problem_paths.append(path)
155 all_licenses_valid = False
157 # Second, check for non-standard license text.
158 whitelisted_files = copyright_scanner.LoadWhitelistedFilesList(InputApi())
159 licenses_check, more_problem_paths = _CheckLicenseHeaders(
160 third_party_dirs, whitelisted_files)
162 problem_paths.extend(more_problem_paths)
164 return (licenses_check if all_licenses_valid else ScanResult.Errors,
165 problem_paths)
168 class TemplateEntryGenerator(object):
169 def __init__(self):
170 self._generate_licenses_file_list_only = False
171 self._toc_index = 0
173 def SetGenerateLicensesFileListOnly(self, generate_licenses_file_list_only):
174 self._generate_licenses_file_list_only = generate_licenses_file_list_only
176 def _ReadFileGuessEncoding(self, name):
177 if self._generate_licenses_file_list_only:
178 return ''
179 contents = ''
180 with open(name, 'rb') as input_file:
181 contents = input_file.read()
182 try:
183 return contents.decode('utf8')
184 except UnicodeDecodeError:
185 pass
186 # If it's not UTF-8, it must be CP-1252. Fail otherwise.
187 return contents.decode('cp1252')
189 def MetadataToTemplateEntry(self, metadata):
190 self._toc_index += 1
191 return {
192 'name': metadata['Name'],
193 'url': metadata['URL'],
194 'license_file': metadata['License File'],
195 'license': self._ReadFileGuessEncoding(metadata['License File']),
196 'toc_href': 'entry' + str(self._toc_index),
200 def GenerateNoticeFile(generate_licenses_file_list_only=False):
201 """Generates the contents of an Android NOTICE file for the third-party code.
202 This is used by the snapshot tool.
203 Returns:
204 The contents of the NOTICE file.
207 generator = TemplateEntryGenerator()
208 generator.SetGenerateLicensesFileListOnly(generate_licenses_file_list_only)
209 # Start from Chromium's LICENSE file
210 entries = [generator.MetadataToTemplateEntry({
211 'Name': 'The Chromium Project',
212 'URL': 'http://www.chromium.org',
213 'License File': os.path.join(REPOSITORY_ROOT, 'LICENSE') })
216 third_party_dirs = licenses.FindThirdPartyDirsWithFiles(REPOSITORY_ROOT)
217 # We provide attribution for all third-party directories.
218 # TODO(mnaganov): Limit this to only code used by the WebView binary.
219 for directory in sorted(third_party_dirs):
220 try:
221 metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
222 require_license_file=False)
223 except licenses.LicenseError:
224 # Since this code is called during project files generation,
225 # we don't want to break the it. But we assume that release
226 # WebView apks are built using checkouts that pass
227 # 'webview_licenses.py scan' check, thus they don't contain
228 # projects with non-compatible licenses.
229 continue
230 license_file = metadata['License File']
231 if license_file and license_file != licenses.NOT_SHIPPED:
232 entries.append(generator.MetadataToTemplateEntry(metadata))
234 if generate_licenses_file_list_only:
235 return [entry['license_file'] for entry in entries]
236 else:
237 env = jinja2.Environment(
238 loader=jinja2.FileSystemLoader(os.path.dirname(__file__)),
239 extensions=['jinja2.ext.autoescape'])
240 template = env.get_template('licenses_notice.tmpl')
241 return template.render({ 'entries': entries }).encode('utf8')
244 def main():
245 class FormatterWithNewLines(optparse.IndentedHelpFormatter):
246 def format_description(self, description):
247 paras = description.split('\n')
248 formatted_paras = [textwrap.fill(para, self.width) for para in paras]
249 return '\n'.join(formatted_paras) + '\n'
251 parser = optparse.OptionParser(formatter=FormatterWithNewLines(),
252 usage='%prog [options]')
253 parser.add_option('--json', help='Path to JSON output file')
254 parser.description = (__doc__ +
255 '\nCommands:\n'
256 ' scan Check licenses.\n'
257 ' notice_deps Generate the list of dependencies for '
258 'Android NOTICE file.\n'
259 ' notice [file] Generate Android NOTICE file on '
260 'stdout or into |file|.\n'
261 ' display_copyrights Display autorship on the files'
262 ' using names provided via stdin.\n')
263 (options, args) = parser.parse_args()
264 if len(args) < 1:
265 parser.print_help()
266 return ScanResult.Errors
268 if args[0] == 'scan':
269 scan_result, problem_paths = _Scan()
270 if scan_result == ScanResult.Ok:
271 print 'OK!'
272 if options.json:
273 with open(options.json, 'w') as f:
274 json.dump(problem_paths, f)
275 return scan_result
276 elif args[0] == 'notice_deps':
277 # 'set' is used to eliminate duplicate references to the same license file.
278 print ' '.join(
279 sorted(set(GenerateNoticeFile(generate_licenses_file_list_only=True))))
280 return ScanResult.Ok
281 elif args[0] == 'notice':
282 notice_file_contents = GenerateNoticeFile()
283 if len(args) == 1:
284 print notice_file_contents
285 else:
286 with open(args[1], 'w') as output_file:
287 output_file.write(notice_file_contents)
288 return ScanResult.Ok
289 elif args[0] == 'display_copyrights':
290 files = sys.stdin.read().splitlines()
291 for f, c in \
292 zip(files, copyright_scanner.FindCopyrights(InputApi(), '.', files)):
293 print f, '\t', ' / '.join(sorted(c))
294 return ScanResult.Ok
295 parser.print_help()
296 return ScanResult.Errors
298 if __name__ == '__main__':
299 sys.exit(main())