Merge #12062: Increment MIT Licence copyright header year on files modified in 2017
[bitcoinplatinum.git] / contrib / devtools / copyright_header.py
blob445acb6fe84594ea8d31a5e715747d4742f13e00
1 #!/usr/bin/env python3
2 # Copyright (c) 2016-2017 The Bitcoin Core developers
3 # Distributed under the MIT software license, see the accompanying
4 # file COPYING or http://www.opensource.org/licenses/mit-license.php.
6 import re
7 import fnmatch
8 import sys
9 import subprocess
10 import datetime
11 import os
13 ################################################################################
14 # file filtering
15 ################################################################################
17 EXCLUDE = [
18 # libsecp256k1:
19 'src/secp256k1/include/secp256k1.h',
20 'src/secp256k1/include/secp256k1_ecdh.h',
21 'src/secp256k1/include/secp256k1_recovery.h',
22 'src/secp256k1/include/secp256k1_schnorr.h',
23 'src/secp256k1/src/java/org_bitcoin_NativeSecp256k1.c',
24 'src/secp256k1/src/java/org_bitcoin_NativeSecp256k1.h',
25 'src/secp256k1/src/java/org_bitcoin_Secp256k1Context.c',
26 'src/secp256k1/src/java/org_bitcoin_Secp256k1Context.h',
27 # auto generated:
28 'src/univalue/lib/univalue_escapes.h',
29 'src/qt/bitcoinstrings.cpp',
30 'src/chainparamsseeds.h',
31 # other external copyrights:
32 'src/tinyformat.h',
33 'src/leveldb/util/env_win.cc',
34 'src/crypto/ctaes/bench.c',
35 'test/functional/test_framework/bignum.py',
36 # python init:
37 '*__init__.py',
39 EXCLUDE_COMPILED = re.compile('|'.join([fnmatch.translate(m) for m in EXCLUDE]))
41 INCLUDE = ['*.h', '*.cpp', '*.cc', '*.c', '*.py']
42 INCLUDE_COMPILED = re.compile('|'.join([fnmatch.translate(m) for m in INCLUDE]))
44 def applies_to_file(filename):
45 return ((EXCLUDE_COMPILED.match(filename) is None) and
46 (INCLUDE_COMPILED.match(filename) is not None))
48 ################################################################################
49 # obtain list of files in repo according to INCLUDE and EXCLUDE
50 ################################################################################
52 GIT_LS_CMD = 'git ls-files'
54 def call_git_ls():
55 out = subprocess.check_output(GIT_LS_CMD.split(' '))
56 return [f for f in out.decode("utf-8").split('\n') if f != '']
58 def get_filenames_to_examine():
59 filenames = call_git_ls()
60 return sorted([filename for filename in filenames if
61 applies_to_file(filename)])
63 ################################################################################
64 # define and compile regexes for the patterns we are looking for
65 ################################################################################
68 COPYRIGHT_WITH_C = 'Copyright \(c\)'
69 COPYRIGHT_WITHOUT_C = 'Copyright'
70 ANY_COPYRIGHT_STYLE = '(%s|%s)' % (COPYRIGHT_WITH_C, COPYRIGHT_WITHOUT_C)
72 YEAR = "20[0-9][0-9]"
73 YEAR_RANGE = '(%s)(-%s)?' % (YEAR, YEAR)
74 YEAR_LIST = '(%s)(, %s)+' % (YEAR, YEAR)
75 ANY_YEAR_STYLE = '(%s|%s)' % (YEAR_RANGE, YEAR_LIST)
76 ANY_COPYRIGHT_STYLE_OR_YEAR_STYLE = ("%s %s" % (ANY_COPYRIGHT_STYLE,
77 ANY_YEAR_STYLE))
79 ANY_COPYRIGHT_COMPILED = re.compile(ANY_COPYRIGHT_STYLE_OR_YEAR_STYLE)
81 def compile_copyright_regex(copyright_style, year_style, name):
82 return re.compile('%s %s %s' % (copyright_style, year_style, name))
84 EXPECTED_HOLDER_NAMES = [
85 "Satoshi Nakamoto\n",
86 "The Bitcoin Core developers\n",
87 "The Bitcoin Core developers \n",
88 "Bitcoin Core Developers\n",
89 "the Bitcoin Core developers\n",
90 "The Bitcoin developers\n",
91 "The LevelDB Authors\. All rights reserved\.\n",
92 "BitPay Inc\.\n",
93 "BitPay, Inc\.\n",
94 "University of Illinois at Urbana-Champaign\.\n",
95 "MarcoFalke\n",
96 "Pieter Wuille\n",
97 "Pieter Wuille +\*\n",
98 "Pieter Wuille, Gregory Maxwell +\*\n",
99 "Pieter Wuille, Andrew Poelstra +\*\n",
100 "Andrew Poelstra +\*\n",
101 "Wladimir J. van der Laan\n",
102 "Jeff Garzik\n",
103 "Diederik Huys, Pieter Wuille +\*\n",
104 "Thomas Daede, Cory Fields +\*\n",
105 "Jan-Klaas Kollhof\n",
106 "Sam Rushing\n",
107 "ArtForz -- public domain half-a-node\n",
110 DOMINANT_STYLE_COMPILED = {}
111 YEAR_LIST_STYLE_COMPILED = {}
112 WITHOUT_C_STYLE_COMPILED = {}
114 for holder_name in EXPECTED_HOLDER_NAMES:
115 DOMINANT_STYLE_COMPILED[holder_name] = (
116 compile_copyright_regex(COPYRIGHT_WITH_C, YEAR_RANGE, holder_name))
117 YEAR_LIST_STYLE_COMPILED[holder_name] = (
118 compile_copyright_regex(COPYRIGHT_WITH_C, YEAR_LIST, holder_name))
119 WITHOUT_C_STYLE_COMPILED[holder_name] = (
120 compile_copyright_regex(COPYRIGHT_WITHOUT_C, ANY_YEAR_STYLE,
121 holder_name))
123 ################################################################################
124 # search file contents for copyright message of particular category
125 ################################################################################
127 def get_count_of_copyrights_of_any_style_any_holder(contents):
128 return len(ANY_COPYRIGHT_COMPILED.findall(contents))
130 def file_has_dominant_style_copyright_for_holder(contents, holder_name):
131 match = DOMINANT_STYLE_COMPILED[holder_name].search(contents)
132 return match is not None
134 def file_has_year_list_style_copyright_for_holder(contents, holder_name):
135 match = YEAR_LIST_STYLE_COMPILED[holder_name].search(contents)
136 return match is not None
138 def file_has_without_c_style_copyright_for_holder(contents, holder_name):
139 match = WITHOUT_C_STYLE_COMPILED[holder_name].search(contents)
140 return match is not None
142 ################################################################################
143 # get file info
144 ################################################################################
146 def read_file(filename):
147 return open(os.path.abspath(filename), 'r').read()
149 def gather_file_info(filename):
150 info = {}
151 info['filename'] = filename
152 c = read_file(filename)
153 info['contents'] = c
155 info['all_copyrights'] = get_count_of_copyrights_of_any_style_any_holder(c)
157 info['classified_copyrights'] = 0
158 info['dominant_style'] = {}
159 info['year_list_style'] = {}
160 info['without_c_style'] = {}
161 for holder_name in EXPECTED_HOLDER_NAMES:
162 has_dominant_style = (
163 file_has_dominant_style_copyright_for_holder(c, holder_name))
164 has_year_list_style = (
165 file_has_year_list_style_copyright_for_holder(c, holder_name))
166 has_without_c_style = (
167 file_has_without_c_style_copyright_for_holder(c, holder_name))
168 info['dominant_style'][holder_name] = has_dominant_style
169 info['year_list_style'][holder_name] = has_year_list_style
170 info['without_c_style'][holder_name] = has_without_c_style
171 if has_dominant_style or has_year_list_style or has_without_c_style:
172 info['classified_copyrights'] = info['classified_copyrights'] + 1
173 return info
175 ################################################################################
176 # report execution
177 ################################################################################
179 SEPARATOR = '-'.join(['' for _ in range(80)])
181 def print_filenames(filenames, verbose):
182 if not verbose:
183 return
184 for filename in filenames:
185 print("\t%s" % filename)
187 def print_report(file_infos, verbose):
188 print(SEPARATOR)
189 examined = [i['filename'] for i in file_infos]
190 print("%d files examined according to INCLUDE and EXCLUDE fnmatch rules" %
191 len(examined))
192 print_filenames(examined, verbose)
194 print(SEPARATOR)
195 print('')
196 zero_copyrights = [i['filename'] for i in file_infos if
197 i['all_copyrights'] == 0]
198 print("%4d with zero copyrights" % len(zero_copyrights))
199 print_filenames(zero_copyrights, verbose)
200 one_copyright = [i['filename'] for i in file_infos if
201 i['all_copyrights'] == 1]
202 print("%4d with one copyright" % len(one_copyright))
203 print_filenames(one_copyright, verbose)
204 two_copyrights = [i['filename'] for i in file_infos if
205 i['all_copyrights'] == 2]
206 print("%4d with two copyrights" % len(two_copyrights))
207 print_filenames(two_copyrights, verbose)
208 three_copyrights = [i['filename'] for i in file_infos if
209 i['all_copyrights'] == 3]
210 print("%4d with three copyrights" % len(three_copyrights))
211 print_filenames(three_copyrights, verbose)
212 four_or_more_copyrights = [i['filename'] for i in file_infos if
213 i['all_copyrights'] >= 4]
214 print("%4d with four or more copyrights" % len(four_or_more_copyrights))
215 print_filenames(four_or_more_copyrights, verbose)
216 print('')
217 print(SEPARATOR)
218 print('Copyrights with dominant style:\ne.g. "Copyright (c)" and '
219 '"<year>" or "<startYear>-<endYear>":\n')
220 for holder_name in EXPECTED_HOLDER_NAMES:
221 dominant_style = [i['filename'] for i in file_infos if
222 i['dominant_style'][holder_name]]
223 if len(dominant_style) > 0:
224 print("%4d with '%s'" % (len(dominant_style),
225 holder_name.replace('\n', '\\n')))
226 print_filenames(dominant_style, verbose)
227 print('')
228 print(SEPARATOR)
229 print('Copyrights with year list style:\ne.g. "Copyright (c)" and '
230 '"<year1>, <year2>, ...":\n')
231 for holder_name in EXPECTED_HOLDER_NAMES:
232 year_list_style = [i['filename'] for i in file_infos if
233 i['year_list_style'][holder_name]]
234 if len(year_list_style) > 0:
235 print("%4d with '%s'" % (len(year_list_style),
236 holder_name.replace('\n', '\\n')))
237 print_filenames(year_list_style, verbose)
238 print('')
239 print(SEPARATOR)
240 print('Copyrights with no "(c)" style:\ne.g. "Copyright" and "<year>" or '
241 '"<startYear>-<endYear>":\n')
242 for holder_name in EXPECTED_HOLDER_NAMES:
243 without_c_style = [i['filename'] for i in file_infos if
244 i['without_c_style'][holder_name]]
245 if len(without_c_style) > 0:
246 print("%4d with '%s'" % (len(without_c_style),
247 holder_name.replace('\n', '\\n')))
248 print_filenames(without_c_style, verbose)
250 print('')
251 print(SEPARATOR)
253 unclassified_copyrights = [i['filename'] for i in file_infos if
254 i['classified_copyrights'] < i['all_copyrights']]
255 print("%d with unexpected copyright holder names" %
256 len(unclassified_copyrights))
257 print_filenames(unclassified_copyrights, verbose)
258 print(SEPARATOR)
260 def exec_report(base_directory, verbose):
261 original_cwd = os.getcwd()
262 os.chdir(base_directory)
263 filenames = get_filenames_to_examine()
264 file_infos = [gather_file_info(f) for f in filenames]
265 print_report(file_infos, verbose)
266 os.chdir(original_cwd)
268 ################################################################################
269 # report cmd
270 ################################################################################
272 REPORT_USAGE = """
273 Produces a report of all copyright header notices found inside the source files
274 of a repository.
276 Usage:
277 $ ./copyright_header.py report <base_directory> [verbose]
279 Arguments:
280 <base_directory> - The base directory of a bitcoin source code repository.
281 [verbose] - Includes a list of every file of each subcategory in the report.
284 def report_cmd(argv):
285 if len(argv) == 2:
286 sys.exit(REPORT_USAGE)
288 base_directory = argv[2]
289 if not os.path.exists(base_directory):
290 sys.exit("*** bad <base_directory>: %s" % base_directory)
292 if len(argv) == 3:
293 verbose = False
294 elif argv[3] == 'verbose':
295 verbose = True
296 else:
297 sys.exit("*** unknown argument: %s" % argv[2])
299 exec_report(base_directory, verbose)
301 ################################################################################
302 # query git for year of last change
303 ################################################################################
305 GIT_LOG_CMD = "git log --pretty=format:%%ai %s"
307 def call_git_log(filename):
308 out = subprocess.check_output((GIT_LOG_CMD % filename).split(' '))
309 return out.decode("utf-8").split('\n')
311 def get_git_change_years(filename):
312 git_log_lines = call_git_log(filename)
313 if len(git_log_lines) == 0:
314 return [datetime.date.today().year]
315 # timestamp is in ISO 8601 format. e.g. "2016-09-05 14:25:32 -0600"
316 return [line.split(' ')[0].split('-')[0] for line in git_log_lines]
318 def get_most_recent_git_change_year(filename):
319 return max(get_git_change_years(filename))
321 ################################################################################
322 # read and write to file
323 ################################################################################
325 def read_file_lines(filename):
326 f = open(os.path.abspath(filename), 'r')
327 file_lines = f.readlines()
328 f.close()
329 return file_lines
331 def write_file_lines(filename, file_lines):
332 f = open(os.path.abspath(filename), 'w')
333 f.write(''.join(file_lines))
334 f.close()
336 ################################################################################
337 # update header years execution
338 ################################################################################
340 COPYRIGHT = 'Copyright \(c\)'
341 YEAR = "20[0-9][0-9]"
342 YEAR_RANGE = '(%s)(-%s)?' % (YEAR, YEAR)
343 HOLDER = 'The Bitcoin Core developers'
344 UPDATEABLE_LINE_COMPILED = re.compile(' '.join([COPYRIGHT, YEAR_RANGE, HOLDER]))
346 def get_updatable_copyright_line(file_lines):
347 index = 0
348 for line in file_lines:
349 if UPDATEABLE_LINE_COMPILED.search(line) is not None:
350 return index, line
351 index = index + 1
352 return None, None
354 def parse_year_range(year_range):
355 year_split = year_range.split('-')
356 start_year = year_split[0]
357 if len(year_split) == 1:
358 return start_year, start_year
359 return start_year, year_split[1]
361 def year_range_to_str(start_year, end_year):
362 if start_year == end_year:
363 return start_year
364 return "%s-%s" % (start_year, end_year)
366 def create_updated_copyright_line(line, last_git_change_year):
367 copyright_splitter = 'Copyright (c) '
368 copyright_split = line.split(copyright_splitter)
369 # Preserve characters on line that are ahead of the start of the copyright
370 # notice - they are part of the comment block and vary from file-to-file.
371 before_copyright = copyright_split[0]
372 after_copyright = copyright_split[1]
374 space_split = after_copyright.split(' ')
375 year_range = space_split[0]
376 start_year, end_year = parse_year_range(year_range)
377 if end_year == last_git_change_year:
378 return line
379 return (before_copyright + copyright_splitter +
380 year_range_to_str(start_year, last_git_change_year) + ' ' +
381 ' '.join(space_split[1:]))
383 def update_updatable_copyright(filename):
384 file_lines = read_file_lines(filename)
385 index, line = get_updatable_copyright_line(file_lines)
386 if not line:
387 print_file_action_message(filename, "No updatable copyright.")
388 return
389 last_git_change_year = get_most_recent_git_change_year(filename)
390 new_line = create_updated_copyright_line(line, last_git_change_year)
391 if line == new_line:
392 print_file_action_message(filename, "Copyright up-to-date.")
393 return
394 file_lines[index] = new_line
395 write_file_lines(filename, file_lines)
396 print_file_action_message(filename,
397 "Copyright updated! -> %s" % last_git_change_year)
399 def exec_update_header_year(base_directory):
400 original_cwd = os.getcwd()
401 os.chdir(base_directory)
402 for filename in get_filenames_to_examine():
403 update_updatable_copyright(filename)
404 os.chdir(original_cwd)
406 ################################################################################
407 # update cmd
408 ################################################################################
410 UPDATE_USAGE = """
411 Updates all the copyright headers of "The Bitcoin Core developers" which were
412 changed in a year more recent than is listed. For example:
414 // Copyright (c) <firstYear>-<lastYear> The Bitcoin Core developers
416 will be updated to:
418 // Copyright (c) <firstYear>-<lastModifiedYear> The Bitcoin Core developers
420 where <lastModifiedYear> is obtained from the 'git log' history.
422 This subcommand also handles copyright headers that have only a single year. In those cases:
424 // Copyright (c) <year> The Bitcoin Core developers
426 will be updated to:
428 // Copyright (c) <year>-<lastModifiedYear> The Bitcoin Core developers
430 where the update is appropriate.
432 Usage:
433 $ ./copyright_header.py update <base_directory>
435 Arguments:
436 <base_directory> - The base directory of a bitcoin source code repository.
439 def print_file_action_message(filename, action):
440 print("%-52s %s" % (filename, action))
442 def update_cmd(argv):
443 if len(argv) != 3:
444 sys.exit(UPDATE_USAGE)
446 base_directory = argv[2]
447 if not os.path.exists(base_directory):
448 sys.exit("*** bad base_directory: %s" % base_directory)
449 exec_update_header_year(base_directory)
451 ################################################################################
452 # inserted copyright header format
453 ################################################################################
455 def get_header_lines(header, start_year, end_year):
456 lines = header.split('\n')[1:-1]
457 lines[0] = lines[0] % year_range_to_str(start_year, end_year)
458 return [line + '\n' for line in lines]
460 CPP_HEADER = '''
461 // Copyright (c) %s The Bitcoin Core developers
462 // Distributed under the MIT software license, see the accompanying
463 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
466 def get_cpp_header_lines_to_insert(start_year, end_year):
467 return reversed(get_header_lines(CPP_HEADER, start_year, end_year))
469 PYTHON_HEADER = '''
470 # Copyright (c) %s The Bitcoin Core developers
471 # Distributed under the MIT software license, see the accompanying
472 # file COPYING or http://www.opensource.org/licenses/mit-license.php.
475 def get_python_header_lines_to_insert(start_year, end_year):
476 return reversed(get_header_lines(PYTHON_HEADER, start_year, end_year))
478 ################################################################################
479 # query git for year of last change
480 ################################################################################
482 def get_git_change_year_range(filename):
483 years = get_git_change_years(filename)
484 return min(years), max(years)
486 ################################################################################
487 # check for existing core copyright
488 ################################################################################
490 def file_already_has_core_copyright(file_lines):
491 index, _ = get_updatable_copyright_line(file_lines)
492 return index != None
494 ################################################################################
495 # insert header execution
496 ################################################################################
498 def file_has_hashbang(file_lines):
499 if len(file_lines) < 1:
500 return False
501 if len(file_lines[0]) <= 2:
502 return False
503 return file_lines[0][:2] == '#!'
505 def insert_python_header(filename, file_lines, start_year, end_year):
506 if file_has_hashbang(file_lines):
507 insert_idx = 1
508 else:
509 insert_idx = 0
510 header_lines = get_python_header_lines_to_insert(start_year, end_year)
511 for line in header_lines:
512 file_lines.insert(insert_idx, line)
513 write_file_lines(filename, file_lines)
515 def insert_cpp_header(filename, file_lines, start_year, end_year):
516 header_lines = get_cpp_header_lines_to_insert(start_year, end_year)
517 for line in header_lines:
518 file_lines.insert(0, line)
519 write_file_lines(filename, file_lines)
521 def exec_insert_header(filename, style):
522 file_lines = read_file_lines(filename)
523 if file_already_has_core_copyright(file_lines):
524 sys.exit('*** %s already has a copyright by The Bitcoin Core developers'
525 % (filename))
526 start_year, end_year = get_git_change_year_range(filename)
527 if style == 'python':
528 insert_python_header(filename, file_lines, start_year, end_year)
529 else:
530 insert_cpp_header(filename, file_lines, start_year, end_year)
532 ################################################################################
533 # insert cmd
534 ################################################################################
536 INSERT_USAGE = """
537 Inserts a copyright header for "The Bitcoin Core developers" at the top of the
538 file in either Python or C++ style as determined by the file extension. If the
539 file is a Python file and it has a '#!' starting the first line, the header is
540 inserted in the line below it.
542 The copyright dates will be set to be:
544 "<year_introduced>-<current_year>"
546 where <year_introduced> is according to the 'git log' history. If
547 <year_introduced> is equal to <current_year>, the date will be set to be:
549 "<current_year>"
551 If the file already has a copyright for "The Bitcoin Core developers", the
552 script will exit.
554 Usage:
555 $ ./copyright_header.py insert <file>
557 Arguments:
558 <file> - A source file in the bitcoin repository.
561 def insert_cmd(argv):
562 if len(argv) != 3:
563 sys.exit(INSERT_USAGE)
565 filename = argv[2]
566 if not os.path.isfile(filename):
567 sys.exit("*** bad filename: %s" % filename)
568 _, extension = os.path.splitext(filename)
569 if extension not in ['.h', '.cpp', '.cc', '.c', '.py']:
570 sys.exit("*** cannot insert for file extension %s" % extension)
572 if extension == '.py':
573 style = 'python'
574 else:
575 style = 'cpp'
576 exec_insert_header(filename, style)
578 ################################################################################
579 # UI
580 ################################################################################
582 USAGE = """
583 copyright_header.py - utilities for managing copyright headers of 'The Bitcoin
584 Core developers' in repository source files.
586 Usage:
587 $ ./copyright_header <subcommand>
589 Subcommands:
590 report
591 update
592 insert
594 To see subcommand usage, run them without arguments.
597 SUBCOMMANDS = ['report', 'update', 'insert']
599 if __name__ == "__main__":
600 if len(sys.argv) == 1:
601 sys.exit(USAGE)
602 subcommand = sys.argv[1]
603 if subcommand not in SUBCOMMANDS:
604 sys.exit(USAGE)
605 if subcommand == 'report':
606 report_cmd(sys.argv)
607 elif subcommand == 'update':
608 update_cmd(sys.argv)
609 elif subcommand == 'insert':
610 insert_cmd(sys.argv)