[scripts] Add missing univalue file to copyright_header.py
[bitcoinplatinum.git] / contrib / devtools / copyright_header.py
blob6ee19eae1603419acff3eb5385b9f12ea9c75321
1 #!/usr/bin/env python3
2 # Copyright (c) 2016 The Bitcoin Core developers
3 # Distributed under the MIT software license, see the accompanying
4 # file COPYING or http://www.opensource.org/licenses/mit-license.php.
6 import re
7 import fnmatch
8 import sys
9 import subprocess
10 import datetime
11 import os
13 ################################################################################
14 # file filtering
15 ################################################################################
17 EXCLUDE = [
18 # libsecp256k1:
19 'src/secp256k1/include/secp256k1.h',
20 'src/secp256k1/include/secp256k1_ecdh.h',
21 'src/secp256k1/include/secp256k1_recovery.h',
22 'src/secp256k1/include/secp256k1_schnorr.h',
23 'src/secp256k1/src/java/org_bitcoin_NativeSecp256k1.c',
24 'src/secp256k1/src/java/org_bitcoin_NativeSecp256k1.h',
25 'src/secp256k1/src/java/org_bitcoin_Secp256k1Context.c',
26 'src/secp256k1/src/java/org_bitcoin_Secp256k1Context.h',
27 # univalue:
28 'src/univalue/test/object.cpp',
29 'src/univalue/lib/univalue_escapes.h',
30 # auto generated:
31 'src/qt/bitcoinstrings.cpp',
32 'src/chainparamsseeds.h',
33 # other external copyrights:
34 'src/tinyformat.h',
35 'src/leveldb/util/env_win.cc',
36 'src/crypto/ctaes/bench.c',
37 'test/functional/test_framework/bignum.py',
38 # python init:
39 '*__init__.py',
41 EXCLUDE_COMPILED = re.compile('|'.join([fnmatch.translate(m) for m in EXCLUDE]))
43 INCLUDE = ['*.h', '*.cpp', '*.cc', '*.c', '*.py']
44 INCLUDE_COMPILED = re.compile('|'.join([fnmatch.translate(m) for m in INCLUDE]))
46 def applies_to_file(filename):
47 return ((EXCLUDE_COMPILED.match(filename) is None) and
48 (INCLUDE_COMPILED.match(filename) is not None))
50 ################################################################################
51 # obtain list of files in repo according to INCLUDE and EXCLUDE
52 ################################################################################
54 GIT_LS_CMD = 'git ls-files'
56 def call_git_ls():
57 out = subprocess.check_output(GIT_LS_CMD.split(' '))
58 return [f for f in out.decode("utf-8").split('\n') if f != '']
60 def get_filenames_to_examine():
61 filenames = call_git_ls()
62 return sorted([filename for filename in filenames if
63 applies_to_file(filename)])
65 ################################################################################
66 # define and compile regexes for the patterns we are looking for
67 ################################################################################
70 COPYRIGHT_WITH_C = 'Copyright \(c\)'
71 COPYRIGHT_WITHOUT_C = 'Copyright'
72 ANY_COPYRIGHT_STYLE = '(%s|%s)' % (COPYRIGHT_WITH_C, COPYRIGHT_WITHOUT_C)
74 YEAR = "20[0-9][0-9]"
75 YEAR_RANGE = '(%s)(-%s)?' % (YEAR, YEAR)
76 YEAR_LIST = '(%s)(, %s)+' % (YEAR, YEAR)
77 ANY_YEAR_STYLE = '(%s|%s)' % (YEAR_RANGE, YEAR_LIST)
78 ANY_COPYRIGHT_STYLE_OR_YEAR_STYLE = ("%s %s" % (ANY_COPYRIGHT_STYLE,
79 ANY_YEAR_STYLE))
81 ANY_COPYRIGHT_COMPILED = re.compile(ANY_COPYRIGHT_STYLE_OR_YEAR_STYLE)
83 def compile_copyright_regex(copyright_style, year_style, name):
84 return re.compile('%s %s %s' % (copyright_style, year_style, name))
86 EXPECTED_HOLDER_NAMES = [
87 "Satoshi Nakamoto\n",
88 "The Bitcoin Core developers\n",
89 "The Bitcoin Core developers \n",
90 "Bitcoin Core Developers\n",
91 "the Bitcoin Core developers\n",
92 "The Bitcoin developers\n",
93 "The LevelDB Authors\. All rights reserved\.\n",
94 "BitPay Inc\.\n",
95 "BitPay, Inc\.\n",
96 "University of Illinois at Urbana-Champaign\.\n",
97 "MarcoFalke\n",
98 "Pieter Wuille\n",
99 "Pieter Wuille +\*\n",
100 "Pieter Wuille, Gregory Maxwell +\*\n",
101 "Pieter Wuille, Andrew Poelstra +\*\n",
102 "Andrew Poelstra +\*\n",
103 "Wladimir J. van der Laan\n",
104 "Jeff Garzik\n",
105 "Diederik Huys, Pieter Wuille +\*\n",
106 "Thomas Daede, Cory Fields +\*\n",
107 "Jan-Klaas Kollhof\n",
108 "Sam Rushing\n",
109 "ArtForz -- public domain half-a-node\n",
112 DOMINANT_STYLE_COMPILED = {}
113 YEAR_LIST_STYLE_COMPILED = {}
114 WITHOUT_C_STYLE_COMPILED = {}
116 for holder_name in EXPECTED_HOLDER_NAMES:
117 DOMINANT_STYLE_COMPILED[holder_name] = (
118 compile_copyright_regex(COPYRIGHT_WITH_C, YEAR_RANGE, holder_name))
119 YEAR_LIST_STYLE_COMPILED[holder_name] = (
120 compile_copyright_regex(COPYRIGHT_WITH_C, YEAR_LIST, holder_name))
121 WITHOUT_C_STYLE_COMPILED[holder_name] = (
122 compile_copyright_regex(COPYRIGHT_WITHOUT_C, ANY_YEAR_STYLE,
123 holder_name))
125 ################################################################################
126 # search file contents for copyright message of particular category
127 ################################################################################
129 def get_count_of_copyrights_of_any_style_any_holder(contents):
130 return len(ANY_COPYRIGHT_COMPILED.findall(contents))
132 def file_has_dominant_style_copyright_for_holder(contents, holder_name):
133 match = DOMINANT_STYLE_COMPILED[holder_name].search(contents)
134 return match is not None
136 def file_has_year_list_style_copyright_for_holder(contents, holder_name):
137 match = YEAR_LIST_STYLE_COMPILED[holder_name].search(contents)
138 return match is not None
140 def file_has_without_c_style_copyright_for_holder(contents, holder_name):
141 match = WITHOUT_C_STYLE_COMPILED[holder_name].search(contents)
142 return match is not None
144 ################################################################################
145 # get file info
146 ################################################################################
148 def read_file(filename):
149 return open(os.path.abspath(filename), 'r').read()
151 def gather_file_info(filename):
152 info = {}
153 info['filename'] = filename
154 c = read_file(filename)
155 info['contents'] = c
157 info['all_copyrights'] = get_count_of_copyrights_of_any_style_any_holder(c)
159 info['classified_copyrights'] = 0
160 info['dominant_style'] = {}
161 info['year_list_style'] = {}
162 info['without_c_style'] = {}
163 for holder_name in EXPECTED_HOLDER_NAMES:
164 has_dominant_style = (
165 file_has_dominant_style_copyright_for_holder(c, holder_name))
166 has_year_list_style = (
167 file_has_year_list_style_copyright_for_holder(c, holder_name))
168 has_without_c_style = (
169 file_has_without_c_style_copyright_for_holder(c, holder_name))
170 info['dominant_style'][holder_name] = has_dominant_style
171 info['year_list_style'][holder_name] = has_year_list_style
172 info['without_c_style'][holder_name] = has_without_c_style
173 if has_dominant_style or has_year_list_style or has_without_c_style:
174 info['classified_copyrights'] = info['classified_copyrights'] + 1
175 return info
177 ################################################################################
178 # report execution
179 ################################################################################
181 SEPARATOR = '-'.join(['' for _ in range(80)])
183 def print_filenames(filenames, verbose):
184 if not verbose:
185 return
186 for filename in filenames:
187 print("\t%s" % filename)
189 def print_report(file_infos, verbose):
190 print(SEPARATOR)
191 examined = [i['filename'] for i in file_infos]
192 print("%d files examined according to INCLUDE and EXCLUDE fnmatch rules" %
193 len(examined))
194 print_filenames(examined, verbose)
196 print(SEPARATOR)
197 print('')
198 zero_copyrights = [i['filename'] for i in file_infos if
199 i['all_copyrights'] == 0]
200 print("%4d with zero copyrights" % len(zero_copyrights))
201 print_filenames(zero_copyrights, verbose)
202 one_copyright = [i['filename'] for i in file_infos if
203 i['all_copyrights'] == 1]
204 print("%4d with one copyright" % len(one_copyright))
205 print_filenames(one_copyright, verbose)
206 two_copyrights = [i['filename'] for i in file_infos if
207 i['all_copyrights'] == 2]
208 print("%4d with two copyrights" % len(two_copyrights))
209 print_filenames(two_copyrights, verbose)
210 three_copyrights = [i['filename'] for i in file_infos if
211 i['all_copyrights'] == 3]
212 print("%4d with three copyrights" % len(three_copyrights))
213 print_filenames(three_copyrights, verbose)
214 four_or_more_copyrights = [i['filename'] for i in file_infos if
215 i['all_copyrights'] >= 4]
216 print("%4d with four or more copyrights" % len(four_or_more_copyrights))
217 print_filenames(four_or_more_copyrights, verbose)
218 print('')
219 print(SEPARATOR)
220 print('Copyrights with dominant style:\ne.g. "Copyright (c)" and '
221 '"<year>" or "<startYear>-<endYear>":\n')
222 for holder_name in EXPECTED_HOLDER_NAMES:
223 dominant_style = [i['filename'] for i in file_infos if
224 i['dominant_style'][holder_name]]
225 if len(dominant_style) > 0:
226 print("%4d with '%s'" % (len(dominant_style),
227 holder_name.replace('\n', '\\n')))
228 print_filenames(dominant_style, verbose)
229 print('')
230 print(SEPARATOR)
231 print('Copyrights with year list style:\ne.g. "Copyright (c)" and '
232 '"<year1>, <year2>, ...":\n')
233 for holder_name in EXPECTED_HOLDER_NAMES:
234 year_list_style = [i['filename'] for i in file_infos if
235 i['year_list_style'][holder_name]]
236 if len(year_list_style) > 0:
237 print("%4d with '%s'" % (len(year_list_style),
238 holder_name.replace('\n', '\\n')))
239 print_filenames(year_list_style, verbose)
240 print('')
241 print(SEPARATOR)
242 print('Copyrights with no "(c)" style:\ne.g. "Copyright" and "<year>" or '
243 '"<startYear>-<endYear>":\n')
244 for holder_name in EXPECTED_HOLDER_NAMES:
245 without_c_style = [i['filename'] for i in file_infos if
246 i['without_c_style'][holder_name]]
247 if len(without_c_style) > 0:
248 print("%4d with '%s'" % (len(without_c_style),
249 holder_name.replace('\n', '\\n')))
250 print_filenames(without_c_style, verbose)
252 print('')
253 print(SEPARATOR)
255 unclassified_copyrights = [i['filename'] for i in file_infos if
256 i['classified_copyrights'] < i['all_copyrights']]
257 print("%d with unexpected copyright holder names" %
258 len(unclassified_copyrights))
259 print_filenames(unclassified_copyrights, verbose)
260 print(SEPARATOR)
262 def exec_report(base_directory, verbose):
263 original_cwd = os.getcwd()
264 os.chdir(base_directory)
265 filenames = get_filenames_to_examine()
266 file_infos = [gather_file_info(f) for f in filenames]
267 print_report(file_infos, verbose)
268 os.chdir(original_cwd)
270 ################################################################################
271 # report cmd
272 ################################################################################
274 REPORT_USAGE = """
275 Produces a report of all copyright header notices found inside the source files
276 of a repository.
278 Usage:
279 $ ./copyright_header.py report <base_directory> [verbose]
281 Arguments:
282 <base_directory> - The base directory of a bitcoin source code repository.
283 [verbose] - Includes a list of every file of each subcategory in the report.
286 def report_cmd(argv):
287 if len(argv) == 2:
288 sys.exit(REPORT_USAGE)
290 base_directory = argv[2]
291 if not os.path.exists(base_directory):
292 sys.exit("*** bad <base_directory>: %s" % base_directory)
294 if len(argv) == 3:
295 verbose = False
296 elif argv[3] == 'verbose':
297 verbose = True
298 else:
299 sys.exit("*** unknown argument: %s" % argv[2])
301 exec_report(base_directory, verbose)
303 ################################################################################
304 # query git for year of last change
305 ################################################################################
307 GIT_LOG_CMD = "git log --pretty=format:%%ai %s"
309 def call_git_log(filename):
310 out = subprocess.check_output((GIT_LOG_CMD % filename).split(' '))
311 return out.decode("utf-8").split('\n')
313 def get_git_change_years(filename):
314 git_log_lines = call_git_log(filename)
315 if len(git_log_lines) == 0:
316 return [datetime.date.today().year]
317 # timestamp is in ISO 8601 format. e.g. "2016-09-05 14:25:32 -0600"
318 return [line.split(' ')[0].split('-')[0] for line in git_log_lines]
320 def get_most_recent_git_change_year(filename):
321 return max(get_git_change_years(filename))
323 ################################################################################
324 # read and write to file
325 ################################################################################
327 def read_file_lines(filename):
328 f = open(os.path.abspath(filename), 'r')
329 file_lines = f.readlines()
330 f.close()
331 return file_lines
333 def write_file_lines(filename, file_lines):
334 f = open(os.path.abspath(filename), 'w')
335 f.write(''.join(file_lines))
336 f.close()
338 ################################################################################
339 # update header years execution
340 ################################################################################
342 COPYRIGHT = 'Copyright \(c\)'
343 YEAR = "20[0-9][0-9]"
344 YEAR_RANGE = '(%s)(-%s)?' % (YEAR, YEAR)
345 HOLDER = 'The Bitcoin Core developers'
346 UPDATEABLE_LINE_COMPILED = re.compile(' '.join([COPYRIGHT, YEAR_RANGE, HOLDER]))
348 def get_updatable_copyright_line(file_lines):
349 index = 0
350 for line in file_lines:
351 if UPDATEABLE_LINE_COMPILED.search(line) is not None:
352 return index, line
353 index = index + 1
354 return None, None
356 def parse_year_range(year_range):
357 year_split = year_range.split('-')
358 start_year = year_split[0]
359 if len(year_split) == 1:
360 return start_year, start_year
361 return start_year, year_split[1]
363 def year_range_to_str(start_year, end_year):
364 if start_year == end_year:
365 return start_year
366 return "%s-%s" % (start_year, end_year)
368 def create_updated_copyright_line(line, last_git_change_year):
369 copyright_splitter = 'Copyright (c) '
370 copyright_split = line.split(copyright_splitter)
371 # Preserve characters on line that are ahead of the start of the copyright
372 # notice - they are part of the comment block and vary from file-to-file.
373 before_copyright = copyright_split[0]
374 after_copyright = copyright_split[1]
376 space_split = after_copyright.split(' ')
377 year_range = space_split[0]
378 start_year, end_year = parse_year_range(year_range)
379 if end_year == last_git_change_year:
380 return line
381 return (before_copyright + copyright_splitter +
382 year_range_to_str(start_year, last_git_change_year) + ' ' +
383 ' '.join(space_split[1:]))
385 def update_updatable_copyright(filename):
386 file_lines = read_file_lines(filename)
387 index, line = get_updatable_copyright_line(file_lines)
388 if not line:
389 print_file_action_message(filename, "No updatable copyright.")
390 return
391 last_git_change_year = get_most_recent_git_change_year(filename)
392 new_line = create_updated_copyright_line(line, last_git_change_year)
393 if line == new_line:
394 print_file_action_message(filename, "Copyright up-to-date.")
395 return
396 file_lines[index] = new_line
397 write_file_lines(filename, file_lines)
398 print_file_action_message(filename,
399 "Copyright updated! -> %s" % last_git_change_year)
401 def exec_update_header_year(base_directory):
402 original_cwd = os.getcwd()
403 os.chdir(base_directory)
404 for filename in get_filenames_to_examine():
405 update_updatable_copyright(filename)
406 os.chdir(original_cwd)
408 ################################################################################
409 # update cmd
410 ################################################################################
412 UPDATE_USAGE = """
413 Updates all the copyright headers of "The Bitcoin Core developers" which were
414 changed in a year more recent than is listed. For example:
416 // Copyright (c) <firstYear>-<lastYear> The Bitcoin Core developers
418 will be updated to:
420 // Copyright (c) <firstYear>-<lastModifiedYear> The Bitcoin Core developers
422 where <lastModifiedYear> is obtained from the 'git log' history.
424 This subcommand also handles copyright headers that have only a single year. In those cases:
426 // Copyright (c) <year> The Bitcoin Core developers
428 will be updated to:
430 // Copyright (c) <year>-<lastModifiedYear> The Bitcoin Core developers
432 where the update is appropriate.
434 Usage:
435 $ ./copyright_header.py update <base_directory>
437 Arguments:
438 <base_directory> - The base directory of a bitcoin source code repository.
441 def print_file_action_message(filename, action):
442 print("%-52s %s" % (filename, action))
444 def update_cmd(argv):
445 if len(argv) != 3:
446 sys.exit(UPDATE_USAGE)
448 base_directory = argv[2]
449 if not os.path.exists(base_directory):
450 sys.exit("*** bad base_directory: %s" % base_directory)
451 exec_update_header_year(base_directory)
453 ################################################################################
454 # inserted copyright header format
455 ################################################################################
457 def get_header_lines(header, start_year, end_year):
458 lines = header.split('\n')[1:-1]
459 lines[0] = lines[0] % year_range_to_str(start_year, end_year)
460 return [line + '\n' for line in lines]
462 CPP_HEADER = '''
463 // Copyright (c) %s The Bitcoin Core developers
464 // Distributed under the MIT software license, see the accompanying
465 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
468 def get_cpp_header_lines_to_insert(start_year, end_year):
469 return reversed(get_header_lines(CPP_HEADER, start_year, end_year))
471 PYTHON_HEADER = '''
472 # Copyright (c) %s The Bitcoin Core developers
473 # Distributed under the MIT software license, see the accompanying
474 # file COPYING or http://www.opensource.org/licenses/mit-license.php.
477 def get_python_header_lines_to_insert(start_year, end_year):
478 return reversed(get_header_lines(PYTHON_HEADER, start_year, end_year))
480 ################################################################################
481 # query git for year of last change
482 ################################################################################
484 def get_git_change_year_range(filename):
485 years = get_git_change_years(filename)
486 return min(years), max(years)
488 ################################################################################
489 # check for existing core copyright
490 ################################################################################
492 def file_already_has_core_copyright(file_lines):
493 index, _ = get_updatable_copyright_line(file_lines)
494 return index != None
496 ################################################################################
497 # insert header execution
498 ################################################################################
500 def file_has_hashbang(file_lines):
501 if len(file_lines) < 1:
502 return False
503 if len(file_lines[0]) <= 2:
504 return False
505 return file_lines[0][:2] == '#!'
507 def insert_python_header(filename, file_lines, start_year, end_year):
508 if file_has_hashbang(file_lines):
509 insert_idx = 1
510 else:
511 insert_idx = 0
512 header_lines = get_python_header_lines_to_insert(start_year, end_year)
513 for line in header_lines:
514 file_lines.insert(insert_idx, line)
515 write_file_lines(filename, file_lines)
517 def insert_cpp_header(filename, file_lines, start_year, end_year):
518 header_lines = get_cpp_header_lines_to_insert(start_year, end_year)
519 for line in header_lines:
520 file_lines.insert(0, line)
521 write_file_lines(filename, file_lines)
523 def exec_insert_header(filename, style):
524 file_lines = read_file_lines(filename)
525 if file_already_has_core_copyright(file_lines):
526 sys.exit('*** %s already has a copyright by The Bitcoin Core developers'
527 % (filename))
528 start_year, end_year = get_git_change_year_range(filename)
529 if style == 'python':
530 insert_python_header(filename, file_lines, start_year, end_year)
531 else:
532 insert_cpp_header(filename, file_lines, start_year, end_year)
534 ################################################################################
535 # insert cmd
536 ################################################################################
538 INSERT_USAGE = """
539 Inserts a copyright header for "The Bitcoin Core developers" at the top of the
540 file in either Python or C++ style as determined by the file extension. If the
541 file is a Python file and it has a '#!' starting the first line, the header is
542 inserted in the line below it.
544 The copyright dates will be set to be:
546 "<year_introduced>-<current_year>"
548 where <year_introduced> is according to the 'git log' history. If
549 <year_introduced> is equal to <current_year>, the date will be set to be:
551 "<current_year>"
553 If the file already has a copyright for "The Bitcoin Core developers", the
554 script will exit.
556 Usage:
557 $ ./copyright_header.py insert <file>
559 Arguments:
560 <file> - A source file in the bitcoin repository.
563 def insert_cmd(argv):
564 if len(argv) != 3:
565 sys.exit(INSERT_USAGE)
567 filename = argv[2]
568 if not os.path.isfile(filename):
569 sys.exit("*** bad filename: %s" % filename)
570 _, extension = os.path.splitext(filename)
571 if extension not in ['.h', '.cpp', '.cc', '.c', '.py']:
572 sys.exit("*** cannot insert for file extension %s" % extension)
574 if extension == '.py':
575 style = 'python'
576 else:
577 style = 'cpp'
578 exec_insert_header(filename, style)
580 ################################################################################
581 # UI
582 ################################################################################
584 USAGE = """
585 copyright_header.py - utilities for managing copyright headers of 'The Bitcoin
586 Core developers' in repository source files.
588 Usage:
589 $ ./copyright_header <subcommand>
591 Subcommands:
592 report
593 update
594 insert
596 To see subcommand usage, run them without arguments.
599 SUBCOMMANDS = ['report', 'update', 'insert']
601 if __name__ == "__main__":
602 if len(sys.argv) == 1:
603 sys.exit(USAGE)
604 subcommand = sys.argv[1]
605 if subcommand not in SUBCOMMANDS:
606 sys.exit(USAGE)
607 if subcommand == 'report':
608 report_cmd(sys.argv)
609 elif subcommand == 'update':
610 update_cmd(sys.argv)
611 elif subcommand == 'insert':
612 insert_cmd(sys.argv)