Script to fix license headers and copyrights in Java sources
[jgit.git] / tools / fix-headers.pl
blob62141b34bd7ee925083456fda8ec803acfcf1cf5
1 #!/usr/bin/perl
2 # ------------------------------------------------------------
3 # This script fixes the license headers of all Java sources
4 # to use the Eclipse EDL license template and updates the
5 # copyright statements using author information from git blame
7 # To fix this in all revisions rewrite the history
8 # git filter-branch --tree-filter 'fixHeaders.pl' HEAD
9 # ------------------------------------------------------------
10 use strict;
12 # Table of author names, start date, end date, actual copyright owner.
14 my @author_employers = (
15 [ qr/spearce\@spearce.org/, 2008, 8, 9999, 12, 'Google Inc.'],
17 [ qr/\@(.*\.|)google.com/, 0, 0, 9999, 12, 'Google Inc.'],
20 # License text itself.
22 my $license_text = <<'EOF';
23 and other copyright owners as documented in the project's IP log.
25 This program and the accompanying materials are made available
26 under the terms of the Eclipse Distribution License v1.0 which
27 accompanies this distribution, is reproduced below, and is
28 available at http://www.eclipse.org/org/documents/edl-v10.php
30 All rights reserved.
32 Redistribution and use in source and binary forms, with or
33 without modification, are permitted provided that the following
34 conditions are met:
36 - Redistributions of source code must retain the above copyright
37 notice, this list of conditions and the following disclaimer.
39 - Redistributions in binary form must reproduce the above
40 copyright notice, this list of conditions and the following
41 disclaimer in the documentation and/or other materials provided
42 with the distribution.
44 - Neither the name of the Eclipse Foundation, Inc. nor the
45 names of its contributors may be used to endorse or promote
46 products derived from this software without specific prior
47 written permission.
49 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
50 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
51 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
52 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
54 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
56 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
57 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
58 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
59 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
60 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
61 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62 EOF
64 my @files = @ARGV;
65 unless (@files) {
66 open( F, '-|', 'git ls-files' );
67 @files = <F>;
68 chop @files;
69 close F;
72 foreach (@files) {
73 if (/\.java$/ || $_ eq 'LICENSE') {
74 next if $_ eq 'org.eclipse.jgit/src/org/eclipse/jgit/util/Base64.java';
75 update_file(\&java_file, $_);
77 } elsif (/pom\.xml$/) {
78 update_file(\&pom_file, $_);
80 } elsif (/\.sh$/) {
81 update_file(\&sh_file, $_);
85 sub java_file
87 my $fd = shift;
88 my $header = '';
89 my $preamble = '';
91 # header is everything before package statement
92 while (<$fd>) {
93 if (/^package /) {
94 $preamble = $_;
95 last;
97 $header .= $_;
100 # preamble is everything with blanks or imports
101 while (<$fd>) {
102 $preamble .= $_;
103 last unless (/^import / || /^$/);
105 my $lineno = $. - 1;
107 return ($header, $preamble, $lineno,
108 "/*\n", sub { s/^/ */mg }, " */\n");
111 sub pom_file
113 my $fd = shift;
114 my $header = '';
115 my $preamble = '';
117 # header is everything before project
118 while (<$fd>) {
119 if (/<project/) {
120 $preamble = $_;
121 last;
123 $header .= $_;
125 my $lineno = $. - 1;
127 return ($header, $preamble, $lineno,
128 qq{<?xml version="1.0" encoding="UTF-8"?>\n<!--\n},
129 sub { s/^(.)/ $1/mg },
130 qq{-->\n});
133 sub sh_file
135 my $fd = shift;
136 my $top = <$fd>;
137 my $header = '';
138 my $preamble = '';
140 while (<$fd>) {
141 if (/^#/) {
142 $header .= $_;
143 next;
145 $preamble = $_;
146 last;
148 my $lineno = $. - 1;
150 return ($header, $preamble, $lineno, $top, sub { s/^/#/mg }, "");
153 sub update_file
155 my $func = shift;
156 my $old_file = shift;
157 my $new_file = "$old_file.license.$$";
159 open(I, '<', $old_file);
160 my ($header, $preamble, $lineno,
161 $top, $fmt, $btm) = &{$func}(\*I);
163 my %all_years;
164 my %author_years;
165 my %minyear;
166 my %maxyear;
168 # find explicit copyright statements in sources
169 my @lines = split( /\n/, $header );
170 foreach my $line ( @lines ) {
171 # * Copyright (c) 2008, Example Company Inc.
172 # * Copyright (c) 2008, Joe Developer <joe.dev@example.org>
173 # * Copyright (c) 2008, 2009 Joe Developer <joe.dev@example.org>
174 # * Copyright (c) 2005-2009 Joe Developer <joe.dev@example.org>
175 # * Copyright (c) 2008, 2009 Other Examples Inc.
176 # * Copyright (c) 2008-2010 Example Company Inc.
177 # * Copyright (C) 2009-2010, Yet More Examples Ltd.
178 if( $line =~ m/Copyright \(c\) (\d{4})(?:\s*[,-]\s*(\d{4}))?,?\s*([^<>]+)\s*(<.*?>)?/i ) {
179 my ($y, $y2, $n, $e) = ($1, $2, $3, $4);
180 my $year = trim($y);
181 my $author_name = trim($n);
182 my $author_email = trim($e);
183 my $who = $author_name;
184 $who .= " $author_email" if $author_email;
185 update_author_info(\%minyear, \%maxyear, \%all_years, \%author_years, $who, $year);
186 if (my $year2 = $y2) {
187 update_author_info(\%minyear, \%maxyear, \%all_years, \%author_years, $who, $year2);
192 if ($old_file eq 'LICENSE') {
193 } else {
194 # add implicit copyright statements from authors found in git blame
195 my (%line_counts, %line_authors);
196 my ($last_commit, $author_name, $author_email);
197 my @blame_args = ('git', 'blame', "-L$lineno,", '-C', '-w', '-p');
198 push(@blame_args, $ENV{'GIT_COMMIT'}) if $ENV{'GIT_COMMIT'};
199 push(@blame_args, '--', $old_file);
200 open( B, '-|', @blame_args);
201 while (<B>) {
202 chop;
203 if (/^([0-9a-f]{40}) \d+ \d+ (\d+)$/) {
204 $last_commit = $1;
205 $line_counts{$1} += $2;
206 next;
208 if (/^author (.*)$/) {
209 $author_name = trim($1);
210 next;
212 if (/^author-mail (<.*>)$/) {
213 $author_email = trim($1);
214 next;
216 if (/^author-time (\d+)$/) {
217 # skip uncommitted changes
218 my $who = "$author_name $author_email";
219 next if $who eq 'Not Committed Yet <not.committed.yet>';
220 my @tm = localtime($1);
221 my $year = $tm[5] + 1900;
222 my $mon = $tm[4] + 1;
223 $who = translate_author($who, $year, $mon);
224 $line_authors{$last_commit} = [$who, $year, $mon];
227 close B;
229 my %author_linecounts;
230 foreach $last_commit (keys %line_counts) {
231 my $who = $line_authors{$last_commit}[0];
232 next unless $who;
233 $author_linecounts{$who} += $line_counts{$last_commit};
236 my $sz = 100;
237 my $count_big = 0;
238 foreach (values %author_linecounts) {
239 $count_big++ if $_ >= $sz;
242 my $added_count = 0;
243 foreach (values %line_authors) {
244 my ($who, $year, $mon) = @$_;
245 next if ($count_big && $author_linecounts{$who} < $sz);
246 $all_years{$year} = 1;
247 update_author_info(\%minyear, \%maxyear, \%all_years, \%author_years, $who, $year, $mon);
251 # rewrite file
252 open( O, '>', $new_file );
253 print O $top;
255 my %used_author;
256 foreach my $year ( sort { $a cmp $b } keys %all_years ) {
257 foreach my $who ( sort keys %author_years ) {
258 next if $used_author{$who}++;
259 local $_ = format_copyright($minyear{$who}, $maxyear{$who}, $who);
260 &{$fmt}();
261 print O;
265 local $_ = $license_text;
266 &{$fmt}();
267 print O;
268 print O $btm;
269 print O "\n";
270 print O $preamble;
271 print O while <I>;
272 close I;
273 close O;
275 rename( $new_file, $old_file );
278 sub trim($)
280 my $string = shift;
281 $string =~ s/^\s+//;
282 $string =~ s/\s+$//;
283 return $string;
286 sub update_author_info
288 my ($minyear_ref, $maxyear_ref, $all_years_ref, $author_years_ref, $who, $year, $mon) = @_;
290 $who = translate_author($who, $year, $mon);
291 $all_years_ref->{$year} = 1;
292 $author_years_ref->{$who}{$year} = 1;
294 my $y = $minyear_ref->{$who};
295 if ($y < 1900) {
296 $y = 9999;
298 if ($year < $y) {
299 $minyear_ref->{$who} = $year;
301 $y = $maxyear_ref->{$who};
302 if ($year > $y) {
303 $maxyear_ref->{$who} = $year;
307 sub date_cmp
309 my ($a_year, $a_mon, $b_year, $b_mon) = @_;
311 if ($a_year < $b_year) {
312 return -1;
313 } elsif ($a_year == $b_year) {
314 return ($a_mon <=> $b_mon);
315 } else {
316 return 1;
320 sub translate_author
322 my ($who, $year, $mon) = @_;
324 return $who if not defined $mon;
326 foreach my $spec (@author_employers) {
327 next unless $who =~ $spec->[0];
328 next if (date_cmp($year, $mon, $spec->[1], $spec->[2]) < 0);
329 next if (date_cmp($year, $mon, $spec->[3], $spec->[4]) > 0);
330 return $spec->[5];
332 return $who;
335 sub format_copyright {
336 my ($minyear, $maxyear, $who) = @_;
337 if ($minyear < $maxyear) {
338 return " Copyright (C) $minyear-$maxyear, $who\n";
339 } else {
340 return " Copyright (C) $minyear, $who\n";