utils/git2cl

   1 #!/bin/sh
   2 exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
   3 #!perl -w
   4
   5 # Copyright (C) 2007, 2008 Simon Josefsson <simon@josefsson.org>
   6 # Copyright (C) 2007 Luis Mondesi <lemsx1@gmail.com>
   7 # * calls git directly. To use it just:
   8 #   cd ~/Project/my_git_repo; git2cl > ChangeLog
   9 # * implements strptime()
  10 # * fixes bugs in $comment parsing
  11 #   - copy input before we remove leading spaces
  12 #   - skip "merge branch" statements as they don't
  13 #     have information about files (i.e. we never
  14 #     go into $state 2)
  15 #   - behaves like a pipe/filter if input is given from the CLI
  16 #     else it calls git log by itself
  17 #
  18 # The functions mywrap, last_line_len, wrap_log_entry are derived from
  19 # the cvs2cl tool, see <http://www.red-bean.com/cvs2cl/>:
  20 # Copyright (C) 2001,2002,2003,2004 Martyn J. Pearce <fluffy@cpan.org>
  21 # Copyright (C) 1999 Karl Fogel <kfogel@red-bean.com>
  22 #
  23 # git2cl is free software; you can redistribute it and/or modify it
  24 # under the terms of the GNU General Public License as published by
  25 # the Free Software Foundation; either version 2, or (at your option)
  26 # any later version.
  27 #
  28 # git2cl is distributed in the hope that it will be useful, but
  29 # WITHOUT ANY WARRANTY; without even the implied warranty of
  30 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  31 # General Public License for more details.
  32 #
  33 # You should have received a copy of the GNU General Public License
  34 # along with git2cl; see the file COPYING.  If not, write to the Free
  35 # Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  36 # 02111-1307, USA.
  37
  38 use strict;
  39 use POSIX qw(strftime);
  40 use Getopt::Long   qw( GetOptions );
  41 use Text::Wrap qw(wrap);
  42 use FileHandle;
  43
  44 use constant EMPTY_LOG_MESSAGE => '*** empty log message ***';
  45
  46 # Print debugging messages?
  47 my $Debug = 0;
  48
  49 # Expand usernames to email addresses based on a map file?
  50 my $User_Map_File = '';
  51 my $User_Passwd_File;
  52 my $Mail_Domain;
  53 my $Obfuscate_Emails = 0;
  54
  55 # Just print usage message and exit?
  56 my $Print_Usage = 0;
  57
  58 GetOptions('help|usage|h'   => \$Print_Usage,
  59            'debug'          => \$Debug,         # unadvertised option, heh
  60            'obfuscate|o'      => \$Obfuscate_Emails,
  61            'usermap|U=s'    => \$User_Map_File, # not implemented yet
  62            )
  63     or die "options parsing failed\n";
  64
  65 if ($Print_Usage) {
  66   &usage ();
  67   exit (0);
  68 }
  69
  70 # -------------------------------------
  71
  72 sub usage {
  73
  74   eval "use Pod::Usage qw( pod2usage )";
  75
  76    if ( $@ ) {
  77     print <<'END';
  78
  79 * Pod::Usage was not found.  The formatting may be suboptimal.  Consider
  80   upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and
  81   versions of perl prior to 5.6 are getting rather rusty, now.  Alternatively,
  82   install Pod::Usage direct from CPAN.
  83 END
  84
  85     local $/ = undef;
  86     my $message = <DATA>;
  87     $message =~ s/^=(head1|item) //gm;
  88     $message =~ s/^=(over|back).*\n//gm;
  89     $message =~ s/\n{3,}/\n\n/g;
  90     print $message;
  91   } else {
  92     print "\n";
  93     pod2usage( -exitval => 'NOEXIT',
  94                -verbose => 1,
  95                -output  => \*STDOUT,
  96              );
  97   }
  98
  99   return;
 100 }
 101
 102 # this is a helper hash for stptime.
 103 # Assumes you are calling 'git log ...' with LC_ALL=C
 104 my %month = (
 105     'Jan'=>0,
 106     'Feb'=>1,
 107     'Mar'=>2,
 108     'Apr'=>3,
 109     'May'=>4,
 110     'Jun'=>5,
 111     'Jul'=>6,
 112     'Aug'=>7,
 113     'Sep'=>8,
 114     'Oct'=>9,
 115     'Nov'=>10,
 116     'Dec'=>11,
 117 );
 118
 119 my $fh = new FileHandle;
 120
 121 sub key_ready
 122 {
 123     my ($rin, $nfd);
 124     $rin = '';
 125     vec($rin, fileno(STDIN), 1) = 1;
 126     return $nfd = select($rin, undef, undef, 0);
 127 }
 128
 129 sub strptime {
 130     my $str = shift;
 131     return undef if not defined $str;
 132
 133     # we are parsing this format
 134     # Fri Oct 26 00:42:56 2007 -0400
 135     # to these fields
 136     # sec, min, hour, mday, mon, year, wday = -1, yday = -1, isdst = -1
 137     # Luis Mondesi <lemsx1@gmail.com>
 138     my @date;
 139     if ($str =~ /([[:alpha:]]{3})\s+([[:alpha:]]{3})\s+([[:digit:]]{1,2})\s+([[:digit:]]{1,2}):([[:digit:]]{1,2}):([[:digit:]]{1,2})\s+([[:digit:]]{4})/){
 140         push(@date,$6,$5,$4,$3,$month{$2},($7 - 1900),-1,-1,-1);
 141     } else {
 142         die ("Cannot parse date '$str'\n'");
 143     }
 144     return @date;
 145 }
 146
 147 sub mywrap {
 148     my ($indent1, $indent2, @text) = @_;
 149     # If incoming text looks preformatted, don't get clever
 150     my $text = Text::Wrap::wrap($indent1, $indent2, @text);
 151     if ( grep /^\s+/m, @text ) {
 152         return $text;
 153     }
 154     my @lines = split /\n/, $text;
 155     $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e;
 156     $lines[0] =~ s/^$indent1\s+/$indent1/;
 157     s/^$indent2\s+/$indent2/
 158         for @lines[1..$#lines];
 159     my $newtext = join "\n", @lines;
 160     $newtext .= "\n"
 161         if substr($text, -1) eq "\n";
 162     return $newtext;
 163 }
 164
 165 sub last_line_len {
 166     my $files_list = shift;
 167     my @lines = split (/\n/, $files_list);
 168     my $last_line = pop (@lines);
 169     return length ($last_line);
 170 }
 171
 172 # Obfuscate email addresses in the author information at the
 173 # beginning of each log.
 174 sub obfuscate_email {
 175     my $text = shift;                  # The text to wrap.
 176     my $text_out = '';
 177     my @words = split (/ /, $text);
 178     while (@words)   # Don't use `foreach' here, it won't work.
 179     {
 180         my $this_word = shift (@words);
 181         if ($this_word =~ /[a-zA-Z_\.0-9]@[a-zA-Z_\.0-9]/) {
 182             $this_word =~ s/^[<\[]/* /;
 183             $this_word =~ s/\./ dot /g;
 184             $this_word =~ s/@/ AT /g;
 185             $this_word =~ s/[>\]]/ */;
 186         }
 187         $text_out = $text_out . " " . $this_word;
 188       }
 189
 190     return $text_out;
 191   }
 192
 193 # A custom wrap function, sensitive to some common constructs used in
 194 # log entries.
 195 sub wrap_log_entry {
 196     my $text = shift;                  # The text to wrap.
 197     my $left_pad_str = shift;          # String to pad with on the left.
 198
 199     # These do NOT take left_pad_str into account:
 200     my $length_remaining = shift;      # Amount left on current line.
 201     my $max_line_length  = shift;      # Amount left for a blank line.
 202
 203     my $wrapped_text = '';             # The accumulating wrapped entry.
 204     my $user_indent = '';              # Inherited user_indent from prev line.
 205
 206     my $first_time = 1;                # First iteration of the loop?
 207     my $suppress_line_start_match = 0; # Set to disable line start checks.
 208
 209     my @lines = split (/\n/, $text);
 210     while (@lines)   # Don't use `foreach' here, it won't work.
 211     {
 212         my $this_line = shift (@lines);
 213         chomp $this_line;
 214
 215         if ($this_line =~ /^(\s+)/) {
 216             $user_indent = $1;
 217         }
 218         else {
 219             $user_indent = '';
 220         }
 221
 222         # If it matches any of the line-start regexps, print a newline now...
 223         if ($suppress_line_start_match)
 224         {
 225             $suppress_line_start_match = 0;
 226         }
 227         elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
 228                || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
 229                || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
 230                || ($this_line =~ /^(\s+)(\S+)/)
 231                || ($this_line =~ /^(\s*)- +/)
 232                || ($this_line =~ /^()\s*$/)
 233                || ($this_line =~ /^(\s*)\*\) +/)
 234                || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
 235         {
 236             $length_remaining = $max_line_length - (length ($user_indent));
 237         }
 238
 239         # Now that any user_indent has been preserved, strip off leading
 240         # whitespace, so up-folding has no ugly side-effects.
 241         $this_line =~ s/^\s*//;
 242
 243         # obfuscate any emails found inside the log
 244         if ($Obfuscate_Emails && $this_line =~ /@/ ) {
 245             $this_line = &obfuscate_email ($this_line);
 246         }
 247
 248         # Accumulate the line, and adjust parameters for next line.
 249         my $this_len = length ($this_line);
 250         if ($this_len == 0)
 251         {
 252             # Blank lines should cancel any user_indent level.
 253             $user_indent = '';
 254             $length_remaining = $max_line_length;
 255         }
 256         elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
 257         {
 258             # Walk backwards from the end.  At first acceptable spot, break
 259             # a new line.
 260             my $idx = $length_remaining - 1;
 261             if ($idx < 0) { $idx = 0 };
 262             while ($idx > 0)
 263             {
 264                 if (substr ($this_line, $idx, 1) =~ /\s/)
 265                 {
 266                     my $line_now = substr ($this_line, 0, $idx);
 267                     my $next_line = substr ($this_line, $idx);
 268                     $this_line = $line_now;
 269
 270                     # Clean whitespace off the end.
 271                     chomp $this_line;
 272
 273                     # The current line is ready to be printed.
 274                     $this_line .= "\n${left_pad_str}";
 275
 276                     # Make sure the next line is allowed full room.
 277                     $length_remaining = $max_line_length - (length ($user_indent));
 278
 279                     # Strip next_line, but then preserve any user_indent.
 280                     $next_line =~ s/^\s*//;
 281
 282                     # Sneak a peek at the user_indent of the upcoming line, so
 283                     # $next_line (which will now precede it) can inherit that
 284                     # indent level.  Otherwise, use whatever user_indent level
 285                     # we currently have, which might be none.
 286                     my $next_next_line = shift (@lines);
 287                     if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
 288                         $next_line = $1 . $next_line if (defined ($1));
 289                         # $length_remaining = $max_line_length - (length ($1));
 290                         $next_next_line =~ s/^\s*//;
 291                     }
 292                     else {
 293                         $next_line = $user_indent . $next_line;
 294                     }
 295                     if (defined ($next_next_line)) {
 296                         unshift (@lines, $next_next_line);
 297                     }
 298                     unshift (@lines, $next_line);
 299
 300                     # Our new next line might, coincidentally, begin with one of
 301                     # the line-start regexps, so we temporarily turn off
 302                     # sensitivity to that until we're past the line.
 303                     $suppress_line_start_match = 1;
 304
 305                     last;
 306                 }
 307                 else
 308                 {
 309                     $idx--;
 310                 }
 311             }
 312
 313             if ($idx == 0)
 314             {
 315                 # We bottomed out because the line is longer than the
 316                 # available space.  But that could be because the space is
 317                 # small, or because the line is longer than even the maximum
 318                 # possible space.  Handle both cases below.
 319
 320                 if ($length_remaining == ($max_line_length - (length ($user_indent))))
 321                 {
 322                     # The line is simply too long -- there is no hope of ever
 323                     # breaking it nicely, so just insert it verbatim, with
 324                     # appropriate padding.
 325                     $this_line = "\n${left_pad_str}${this_line}";
 326                 }
 327                 else
 328                 {
 329                     # Can't break it here, but may be able to on the next round...
 330                     unshift (@lines, $this_line);
 331                     $length_remaining = $max_line_length - (length ($user_indent));
 332                     $this_line = "\n${left_pad_str}";
 333                 }
 334             }
 335         }
 336         else  # $this_len < $length_remaining, so tack on what we can.
 337         {
 338             # Leave a note for the next iteration.
 339             $length_remaining = $length_remaining - $this_len;
 340
 341             if ($this_line =~ /\.$/)
 342             {
 343                 $this_line .= "  ";
 344                 $length_remaining -= 2;
 345             }
 346             else  # not a sentence end
 347             {
 348                 $this_line .= " ";
 349                 $length_remaining -= 1;
 350             }
 351         }
 352
 353         # Unconditionally indicate that loop has run at least once.
 354         $first_time = 0;
 355
 356         $wrapped_text .= "${user_indent}${this_line}";
 357     }
 358
 359     # One last bit of padding.
 360     $wrapped_text .= "\n";
 361
 362     return $wrapped_text;
 363 }
 364
 365 # main
 366
 367
 368 my @date;
 369 my $author;
 370 my @files;
 371 my $comment;
 372
 373 my $state; # 0-header 1-comment 2-files
 374 my $done = 0;
 375
 376 $state = 0;
 377
 378 # if reading from STDIN, we assume that we are
 379 # getting git log as input
 380 if (key_ready())
 381 {
 382
 383     #my $dummyfh; # don't care about writing
 384     #($fh,$dummyfh) = FileHandle::pipe;
 385     $fh->fdopen(*STDIN, 'r');
 386 }
 387 else
 388 {
 389     $fh->open("LC_ALL=C git log --pretty --numstat --summary|")
 390         or die("Cannot execute git log...$!\n");
 391 }
 392
 393 print "This ChangeLog is automatically generated from the git commit messages
 394 during the snapshot process.  See README.snapshots and utils/git2cl.
 395
 396 ";
 397
 398 while (my $_l = <$fh>) {
 399     #print STDERR "debug ($state, " . (@date ? (strftime "%Y-%m-%d", @date) : "") . "): `$_'\n";
 400     if ($state == 0) {
 401         if ($_l =~ m,^Author: (.*),) {
 402             $author = $1;
 403             if ($Obfuscate_Emails) {
 404               $author = &obfuscate_email ($author);
 405             }
 406         }
 407         if ($_l =~ m,^Date: (.*),) {
 408             @date = strptime($1);
 409         }
 410         $state = 1 if ($_l =~ m,^$, and $author and (@date+0>0));
 411     } elsif ($state == 1) {
 412         # * modifying our input text is a bad choice
 413         #   let's make a copy of it first, then we remove spaces
 414         # * if we meet a "merge branch" statement, we need to start
 415         #   over and find a real entry
 416         # Luis Mondesi <lemsx1@gmail.com>
 417         my $_s = $_l;
 418         $_s =~ s/^    //g;
 419         if ($_s =~ m/^Merge branch/)
 420         {
 421             $state=0;
 422             next;
 423         }
 424         $comment = $comment . $_s;
 425         $state = 2 if ($_l =~ m,^$,);
 426     } elsif ($state == 2) {
 427         if ($_l =~ m,^([0-9]+)\t([0-9]+)\t(.*)$,) {
 428             push @files, $3;
 429         }
 430         $done = 1 if ($_l =~ m,^$,);
 431     }
 432
 433     if ($done) {
 434         print (strftime "%Y-%m-%d  $author\n\n", @date);
 435
 436         my $files = join (", ", @files);
 437         $files = mywrap ("\t", "\t", "* $files");
 438
 439         if (index($comment, EMPTY_LOG_MESSAGE) > -1 ) {
 440             $comment = "[no log message]\n";
 441         }
 442
 443         my $files_last_line_len = 0;
 444         $files_last_line_len = last_line_len($files) + 1;
 445         my $msg = wrap_log_entry($comment, "\t", 69-$files_last_line_len, 69);
 446
 447         $msg =~ s/[ \t]+\n/\n/g;
 448
 449         print "$files: $msg\n";
 450
 451         @date = ();
 452         $author = "";
 453         @files = ();
 454         $comment = "";
 455
 456         $state = 0;
 457         $done = 0;
 458     }
 459 }
 460
 461 if (@date + 0)
 462 {
 463     print (strftime "%Y-%m-%d  $author\n\n", @date);
 464     my $msg = wrap_log_entry($comment, "\t", 69, 69);
 465     $msg =~ s/[ \t]+\n/\n/g;
 466     print "\t* $msg\n";
 467 }
 468
 469 __DATA__
 470
 471 =head1 NAME
 472
 473 cvs2cl.pl - convert cvs log messages to changelogs
 474
 475 =head1 SYNOPSIS
 476
 477 B<cvs2cl> [I<options>] [I<FILE1> [I<FILE2> ...]]
 478
 479 =head1 DESCRIPTION
 480
 481 cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by
 482 running "cvs log" and parsing the output. Duplicate log messages get
 483 unified in the Right Way.
 484
 485 The default output of cvs2cl is designed to be compact, formally unambiguous,
 486 but still easy for humans to read.  It should be largely self-explanatory; the
 487 one abbreviation that might not be obvious is "utags".  That stands for
 488 "universal tags" -- a universal tag is one held by all the files in a given
 489 change entry.
 490
 491 If you need output that's easy for a program to parse, use the B<--xml> option.
 492 Note that with XML output, just about all available information is included
 493 with each change entry, whether you asked for it or not, on the theory that
 494 your parser can ignore anything it's not looking for.
 495
 496 If filenames are given as arguments cvs2cl only shows log information for the
 497 named files.
 498
 499 =head1 OPTIONS
 500
 501 =over 4
 502
 503 =item B<-h>, B<-help>, B<--help>, B<-?>
 504
 505 Show a short help and exit.
 506
 507 =item B<--obfuscate>, B<-O>
 508
 509 Obfuscate email addresses in the logs.
 510
 511 =item -B<U> I<UFILE>, B<--usermap> I<UFILE>
 512
 513 Expand usernames to email addresses from I<UFILE>.
 514 NOT IMPLEMENTED YET
 515
 516 =head1 SEE ALSO
 517 git(1)
 518 cvs2cl(1)