maintenance/cvs2cl_by_file.pl

   1 #!/bin/sh
   2 exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
   3 #!perl -w
   4
   5 ###########################################################################
   6 # This is a quick hack version of cvs2cl.pl (see below)
   7 # that simply outputs in a file-centric mannor.
   8 # only CVS::Utils::ChangeLog::EntrySet::Output::output_changelog
   9 # was altered
  10 #
  11 # Usage:
  12 # cvs2cl_by_file.pl --delta bioperl-release-1-5-1:bioperl-release-1-5-2
  13 # Generates a file called ChangeLog showing, per file, all the commit
  14 # messages since tag bioperl-release-1-5-1 up to tag bioperl-release-1-5-2
  15 # It generates in wikipedia format suitable for immediate pasting into the
  16 # a wiki page for the delta
  17 #
  18 # Sendu Bala <bix@sendu.me.uk>
  19 ###########################################################################
  20
  21
  22 ##############################################################
  23 ###                                                        ###
  24 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
  25 ###                                                        ###
  26 ##############################################################
  27
  28 ## $Revision$
  29 ## $Date$
  30 ## $Author$
  31 ##
  32
  33 use strict;
  34
  35 use File::Basename qw( fileparse );
  36 use Getopt::Long   qw( GetOptions );
  37 use Text::Wrap     qw( );
  38 use Time::Local    qw( timegm );
  39 use User::pwent    qw( getpwnam );
  40 use File::Spec;
  41
  42 # The Plan:
  43 #
  44 # Read in the logs for multiple files, spit out a nice ChangeLog that
  45 # mirrors the information entered during `cvs commit'.
  46 #
  47 # The problem presents some challenges. In an ideal world, we could
  48 # detect files with the same author, log message, and checkin time --
  49 # each <filelist, author, time, logmessage> would be a changelog entry.
  50 # We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*
  51 # so checkins can span a range of times.  Also, the directory structure
  52 # could be hierarchical.
  53 #
  54 # Another question is whether we really want to have the ChangeLog
  55 # exactly reflect commits. An author could issue two related commits,
  56 # with different log entries, reflecting a single logical change to the
  57 # source. GNU style ChangeLogs group these under a single author/date.
  58 # We try to do the same.
  59 #
  60 # So, we parse the output of `cvs log', storing log messages in a
  61 # multilevel hash that stores the mapping:
  62 #   directory => author => time => message => filelist
  63 # As we go, we notice "nearby" commit times and store them together
  64 # (i.e., under the same timestamp), so they appear in the same log
  65 # entry.
  66 #
  67 # When we've read all the logs, we twist this mapping into
  68 # a time => author => message => filelist mapping for each directory.
  69 #
  70 # If we're not using the `--distributed' flag, the directory is always
  71 # considered to be `./', even as descend into subdirectories.
  72
  73 # Call Tree
  74
  75 # name                         number of lines (10.xii.03)
  76 # parse_options                         192
  77 # derive_changelog                       13
  78 # +-maybe_grab_accumulation_date         38
  79 # +-read_changelog                      277
  80 #   +-maybe_read_user_map_file           94
  81 #     +-run_ext                           9
  82 #   +-read_file_path                     29
  83 #   +-read_symbolic_name                 43
  84 #   +-read_revision                      49
  85 #   +-read_date_author_and_state         25
  86 #     +-parse_date_author_and_state      20
  87 #   +-read_branches                      36
  88 # +-output_changelog                    424
  89 #   +-pretty_file_list                  290
  90 #     +-common_path_prefix               35
  91 #   +-preprocess_msg_text                30
  92 #     +-min                               1
  93 #   +-mywrap                             16
  94 #   +-last_line_len                       5
  95 #   +-wrap_log_entry                    177
  96 #
  97 # Utilities
  98 #
  99 # xml_escape                              6
 100 # slurp_file                             11
 101 # debug                                   5
 102 # version                                 2
 103 # usage                                 142
 104
 105 # -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
 106 #
 107 # Note about a bug-slash-opportunity:
 108 # -----------------------------------
 109 #
 110 # There's a bug in Text::Wrap, which affects cvs2cl.  This script
 111 # reveals it:
 112 #
 113 #   #!/usr/bin/perl -w
 114 #
 115 #   use Text::Wrap;
 116 #
 117 #   my $test_text =
 118 #   "This script demonstrates a bug in Text::Wrap.  The very long line
 119 #   following this paragraph will be relocated relative to the surrounding
 120 #   text:
 121 #
 122 #   ====================================================================
 123 #
 124 #   See?  When the bug happens, we'll get the line of equal signs below
 125 #   this paragraph, even though it should be above.";
 126 #
 127 #
 128 #   # Print out the test text with no wrapping:
 129 #   print "$test_text";
 130 #   print "\n";
 131 #   print "\n";
 132 #
 133 #   # Now print it out wrapped, and see the bug:
 134 #   print wrap ("\t", "        ", "$test_text");
 135 #   print "\n";
 136 #   print "\n";
 137 #
 138 # If the line of equal signs were one shorter, then the bug doesn't
 139 # happen.  Interesting.
 140 #
 141 # Anyway, rather than fix this in Text::Wrap, we might as well write a
 142 # new wrap() which has the following much-needed features:
 143 #
 144 # * initial indentation, like current Text::Wrap()
 145 # * subsequent line indentation, like current Text::Wrap()
 146 # * user chooses among: force-break long words, leave them alone, or die()?
 147 # * preserve existing indentation: chopped chunks from an indented line
 148 #   are indented by same (like this line, not counting the asterisk!)
 149 # * optional list of things to preserve on line starts, default ">"
 150 #
 151 # Note that the last two are essentially the same concept, so unify in
 152 # implementation and give a good interface to controlling them.
 153 #
 154 # And how about:
 155 #
 156 # Optionally, when encounter a line pre-indented by same as previous
 157 # line, then strip the newline and refill, but indent by the same.
 158 # Yeah...
 159
 160 # Globals --------------------------------------------------------------------
 161
 162 # In case we have to print it out:
 163 my $VERSION = '$Revision$';
 164 $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
 165
 166 ## Vars set by options:
 167
 168 # Print debugging messages?
 169 my $Debug = 0;
 170
 171 # Just show version and exit?
 172 my $Print_Version = 0;
 173
 174 # Just print usage message and exit?
 175 my $Print_Usage = 0;
 176
 177 # What file should we generate (defaults to "ChangeLog")?
 178 my $Log_File_Name = "ChangeLog";
 179
 180 # Grab most recent entry date from existing ChangeLog file, just add
 181 # to that ChangeLog.
 182 my $Cumulative = 0;
 183
 184 # `cvs log -d`, this will repeat the last entry in the old log.  This is OK,
 185 # as it guarantees at least one entry in the update changelog, which means
 186 # that there will always be a date to extract for the next update.  The repeat
 187 # entry can be removed in postprocessing, if necessary.
 188
 189 # MJP 2003-08-02
 190 # I don't think this actually does anything useful
 191 my $Update = 0;
 192
 193 # Expand usernames to email addresses based on a map file?
 194 my $User_Map_File = '';
 195 my $User_Passwd_File;
 196 my $Mail_Domain;
 197
 198 # Output log in chronological order? [default is reverse chronological order]
 199 my $Chronological_Order = 0;
 200
 201 # Grab user details via gecos
 202 my $Gecos = 0;
 203
 204 # User domain for gecos email addresses
 205 my $Domain;
 206
 207 # Output to a file or to stdout?
 208 my $Output_To_Stdout = 0;
 209
 210 # Eliminate empty log messages?
 211 my $Prune_Empty_Msgs = 0;
 212
 213 # Tags of which not to output
 214 my %ignore_tags;
 215
 216 # Show only revisions with Tags
 217 my %show_tags;
 218
 219 # Don't call Text::Wrap on the body of the message
 220 my $No_Wrap = 0;
 221
 222 # Indentation of log messages
 223 my $Indent = "\t";
 224
 225 # Don't do any pretty print processing
 226 my $Summary = 0;
 227
 228 # Separates header from log message.  Code assumes it is either " " or
 229 # "\n\n", so if there's ever an option to set it to something else,
 230 # make sure to go through all conditionals that use this var.
 231 my $After_Header = " ";
 232
 233 # XML Encoding
 234 my $XML_Encoding = '';
 235
 236 # Format more for programs than for humans.
 237 my $XML_Output = 0;
 238 my $No_XML_Namespace = 0;
 239 my $No_XML_ISO_Date = 0;
 240
 241 # Do some special tweaks for log data that was written in FSF
 242 # ChangeLog style.
 243 my $FSF_Style = 0;
 244
 245 # Show times in UTC instead of local time
 246 my $UTC_Times = 0;
 247
 248 # Show times in output?
 249 my $Show_Times = 1;
 250
 251 # Show day of week in output?
 252 my $Show_Day_Of_Week = 0;
 253
 254 # Show revision numbers in output?
 255 my $Show_Revisions = 0;
 256
 257 # Show dead files in output?
 258 my $Show_Dead = 0;
 259
 260 # Hide dead trunk files which were created as a result of additions on a
 261 # branch?
 262 my $Hide_Branch_Additions = 1;
 263
 264 # Show tags (symbolic names) in output?
 265 my $Show_Tags = 0;
 266
 267 # Show tags separately in output?
 268 my $Show_Tag_Dates = 0;
 269
 270 # Show branches by symbolic name in output?
 271 my $Show_Branches = 0;
 272
 273 # Show only revisions on these branches or their ancestors.
 274 my @Follow_Branches;
 275 # Show only revisions on these branches or their ancestors; ignore descendent
 276 # branches.
 277 my @Follow_Only;
 278
 279 # Don't bother with files matching this regexp.
 280 my @Ignore_Files;
 281
 282 # How exactly we match entries.  We definitely want "o",
 283 # and user might add "i" by using --case-insensitive option.
 284 my $Case_Insensitive = 0;
 285
 286 # Maybe only show log messages matching a certain regular expression.
 287 my $Regexp_Gate = '';
 288
 289 # Pass this global option string along to cvs, to the left of `log':
 290 my $Global_Opts = '';
 291
 292 # Pass this option string along to the cvs log subcommand:
 293 my $Command_Opts = '';
 294
 295 # Read log output from stdin instead of invoking cvs log?
 296 my $Input_From_Stdin = 0;
 297
 298 # Don't show filenames in output.
 299 my $Hide_Filenames = 0;
 300
 301 # Don't shorten directory names from filenames.
 302 my $Common_Dir = 1;
 303
 304 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
 305 # times that span a range of time. We assume that checkins will last no
 306 # longer than $Max_Checkin_Duration seconds, and that similarly, no
 307 # checkins will happen from the same users with the same message less
 308 # than $Max_Checkin_Duration seconds apart.
 309 my $Max_Checkin_Duration = 180;
 310
 311 # What to put at the front of [each] ChangeLog.
 312 my $ChangeLog_Header = '';
 313
 314 # Whether to enable 'delta' mode, and for what start/end tags.
 315 my $Delta_Mode = 0;
 316 my $Delta_From = '';
 317 my $Delta_To = '';
 318
 319 my $TestCode;
 320
 321 # Whether to parse filenames from the RCS filename, and if so what
 322 # prefix to strip.
 323 my $RCS_Root;
 324
 325 # Whether to output information on the # of lines added and removed
 326 # by each file modification.
 327 my $Show_Lines_Modified = 0;
 328
 329 ## end vars set by options.
 330
 331 # latest observed times for the start/end tags in delta mode
 332 my $Delta_StartTime = 0;
 333 my $Delta_EndTime = 0;
 334
 335 my $No_Ancestors = 0;
 336
 337 my $No_Extra_Indent = 0;
 338
 339 my $GroupWithinDate = 0;
 340
 341 # ----------------------------------------------------------------------------
 342
 343 package CVS::Utils::ChangeLog::EntrySet;
 344
 345 sub new {
 346   my $class = shift;
 347   my %self;
 348   bless \%self, $class;
 349 }
 350
 351 # -------------------------------------
 352
 353 sub output_changelog {
 354   my $output_type = $XML_Output ? 'XML' : 'Text';
 355   my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
 356   my $output = $output_class->new(follow_branches => \@Follow_Branches,
 357                                   follow_only     => \@Follow_Only,
 358                                   ignore_tags     => \%ignore_tags,
 359                                   show_tags       => \%show_tags,
 360                                  );
 361   $output->output_changelog(@_);
 362 }
 363
 364 # -------------------------------------
 365
 366 sub add_fileentry {
 367   my ($self, $file_full_path, $time, $revision, $state, $lines,
 368       $branch_names, $branch_roots, $branch_numbers,
 369       $symbolic_names, $author, $msg_txt) = @_;
 370
 371       my $qunk =
 372         CVS::Utils::ChangeLog::FileEntry->new($file_full_path, $time, $revision,
 373                                               $state, $lines,
 374                                               $branch_names, $branch_roots,
 375                                               $branch_numbers,
 376                                               $symbolic_names);
 377
 378       # We might be including revision numbers and/or tags and/or
 379       # branch names in the output.  Most of the code from here to
 380       # loop-end deals with organizing these in qunk.
 381
 382       unless ( $Hide_Branch_Additions
 383                and
 384                $msg_txt =~ /file .+ was initially added on branch \S+./ ) {
 385         # Add this file to the list
 386         # (We use many spoonfuls of autovivication magic. Hashes and arrays
 387         # will spring into existence if they aren't there already.)
 388
 389         &main::debug ("(pushing log msg for ". $qunk->dir_key . $qunk->filename . ")\n");
 390
 391         # Store with the files in this commit.  Later we'll loop through
 392         # again, making sure that revisions with the same log message
 393         # and nearby commit times are grouped together as one commit.
 394         $self->{$qunk->dir_key}{$author}{$time}{$msg_txt} =
 395           CVS::Utils::ChangeLog::Message->new($msg_txt)
 396               unless exists $self->{$qunk->dir_key}{$author}{$time}{$msg_txt};
 397         $self->{$qunk->dir_key}{$author}{$time}{$msg_txt}->add_fileentry($qunk);
 398       }
 399
 400 }
 401
 402 # ----------------------------------------------------------------------------
 403
 404 package CVS::Utils::ChangeLog::EntrySet::Output::Text;
 405
 406 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
 407
 408 use File::Basename qw( fileparse );
 409
 410 sub new {
 411   my $class = shift;
 412   my $self = $class->SUPER::new(@_);
 413 }
 414
 415 # -------------------------------------
 416
 417 sub wday {
 418   my $self = shift; my $class = ref $self;
 419   my ($wday) = @_;
 420
 421   return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : '';
 422 }
 423
 424 # -------------------------------------
 425
 426 sub header_line {
 427   my $self = shift;
 428   my ($time, $author, $lastdate) = @_;
 429
 430   my $header_line = '';
 431
 432   my (undef,$min,$hour,$mday,$mon,$year,$wday)
 433     = $UTC_Times ? gmtime($time) : localtime($time);
 434
 435   my $date = $self->fdatetime($time);
 436
 437   if ($Show_Times) {
 438     $header_line =
 439       sprintf "%s  %s\n\n", $date, $author;
 440   } else {
 441     if ( ! defined $lastdate or $date ne $lastdate or ! $GroupWithinDate ) {
 442       if ( $GroupWithinDate ) {
 443         $header_line = "$date\n\n";
 444       } else {
 445         $header_line = "$date  $author\n\n";
 446       }
 447     } else {
 448       $header_line = '';
 449     }
 450   }
 451 }
 452
 453 # -------------------------------------
 454
 455 sub preprocess_msg_text {
 456   my $self = shift;
 457   my ($text) = @_;
 458
 459   $text = $self->SUPER::preprocess_msg_text($text);
 460
 461   unless ( $No_Wrap ) {
 462     # Strip off lone newlines, but only for lines that don't begin with
 463     # whitespace or a mail-quoting character, since we want to preserve
 464     # that kind of formatting.  Also don't strip newlines that follow a
 465     # period; we handle those specially next.  And don't strip
 466     # newlines that precede an open paren.
 467     1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g;
 468
 469     # If a newline follows a period, make sure that when we bring up the
 470     # bottom sentence, it begins with two spaces.
 471     1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g;
 472   }
 473
 474   return $text;
 475 }
 476
 477 # -------------------------------------
 478
 479 # Here we take a bunch of qunks and convert them into printed
 480 # summary that will include all the information the user asked for.
 481 sub pretty_file_list {
 482   my $self = shift;
 483
 484   return ''
 485     if $Hide_Filenames;
 486
 487   my $qunksref = shift;
 488
 489   my @filenames;
 490   my $beauty = '';          # The accumulating header string for this entry.
 491   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
 492   my %unanimous_tags;       # Tags found in all qunks
 493   my %all_branches;         # Branches found in any qunk
 494   my $fbegun = 0;           # Did we begin printing filenames yet?
 495
 496   my ($common_dir, $qunkrefs) =
 497     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref);
 498
 499   my @qunkrefs = @$qunkrefs;
 500
 501   # Not XML output, so complexly compactify for chordate consumption.  At this
 502   # point we have enough global information about all the qunks to organize
 503   # them non-redundantly for output.
 504
 505   if ($common_dir) {
 506     # Note that $common_dir still has its trailing slash
 507     $beauty .= "$common_dir: ";
 508   }
 509
 510   if ($Show_Branches)
 511   {
 512     # For trailing revision numbers.
 513     my @brevisions;
 514
 515     foreach my $branch (keys (%all_branches))
 516     {
 517       foreach my $qunkref (@qunkrefs)
 518       {
 519         if ((defined ($qunkref->branch))
 520             and ($qunkref->branch eq $branch))
 521         {
 522           if ($fbegun) {
 523             # kff todo: comma-delimited in XML too?  Sure.
 524             $beauty .= ", ";
 525           }
 526           else {
 527             $fbegun = 1;
 528           }
 529           my $fname = substr ($qunkref->filename, length ($common_dir));
 530           $beauty .= $fname;
 531           $qunkref->{'printed'} = 1;  # Just setting a mark bit, basically
 532
 533           if ( $Show_Tags and defined $qunkref->tags ) {
 534             my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
 535
 536             if (@tags) {
 537               $beauty .= " (tags: ";
 538               $beauty .= join (', ', @tags);
 539               $beauty .= ")";
 540             }
 541           }
 542
 543           if ($Show_Revisions) {
 544             # Collect the revision numbers' last components, but don't
 545             # print them -- they'll get printed with the branch name
 546             # later.
 547             $qunkref->revision =~ /.+\.([\d]+)$/;
 548             push (@brevisions, $1);
 549
 550             # todo: we're still collecting branch roots, but we're not
 551             # showing them anywhere.  If we do show them, it would be
 552             # nifty to just call them revision "0" on a the branch.
 553             # Yeah, that's the ticket.
 554           }
 555         }
 556       }
 557       $beauty .= " ($branch";
 558       if (@brevisions) {
 559         if ((scalar (@brevisions)) > 1) {
 560           $beauty .= ".[";
 561           $beauty .= (join (',', @brevisions));
 562           $beauty .= "]";
 563         }
 564         else {
 565           # Square brackets are spurious here, since there's no range to
 566           # encapsulate
 567           $beauty .= ".$brevisions[0]";
 568         }
 569       }
 570       $beauty .= ")";
 571     }
 572   }
 573
 574   # Okay; any qunks that were done according to branch are taken care
 575   # of, and marked as printed.  Now print everyone else.
 576
 577   my %fileinfo_printed;
 578   foreach my $qunkref (@qunkrefs)
 579   {
 580     next if (defined ($qunkref->{'printed'}));   # skip if already printed
 581
 582     my $b = substr ($qunkref->filename, length ($common_dir));
 583     # todo: Shlomo's change was this:
 584     # $beauty .= substr ($qunkref->filename,
 585     #              (($common_dir eq "./") ? '' : length ($common_dir)));
 586     $qunkref->{'printed'} = 1;  # Set a mark bit.
 587
 588     if ($Show_Revisions || $Show_Tags || $Show_Dead)
 589     {
 590       my $started_addendum = 0;
 591
 592       if ($Show_Revisions) {
 593         $started_addendum = 1;
 594         $b .= " (";
 595         $b .= $qunkref->revision;
 596       }
 597       if ($Show_Dead && $qunkref->state =~ /dead/)
 598       {
 599         # Deliberately not using $started_addendum. Keeping it simple.
 600         $b .= "[DEAD]";
 601       }
 602       if ($Show_Tags && (defined $qunkref->tags)) {
 603         my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
 604         if ((scalar (@tags)) > 0) {
 605           if ($started_addendum) {
 606             $b .= ", ";
 607           }
 608           else {
 609             $b .= " (tags: ";
 610           }
 611           $b .= join (', ', @tags);
 612           $started_addendum = 1;
 613         }
 614       }
 615       if ($started_addendum) {
 616         $b .= ")";
 617       }
 618     }
 619
 620     unless ( exists $fileinfo_printed{$b} ) {
 621       if ($fbegun) {
 622         $beauty .= ", ";
 623       } else {
 624         $fbegun = 1;
 625       }
 626       $beauty .= $b, $fileinfo_printed{$b} = 1;
 627     }
 628   }
 629
 630   # Unanimous tags always come last.
 631   if ($Show_Tags && %unanimous_tags)
 632   {
 633     $beauty .= " (utags: ";
 634     $beauty .= join (', ', sort keys (%unanimous_tags));
 635     $beauty .= ")";
 636   }
 637
 638   # todo: still have to take care of branch_roots?
 639
 640   $beauty = "$beauty:";
 641
 642   return $beauty;
 643 }
 644
 645 # -------------------------------------
 646
 647 sub output_tagdate {
 648   my $self = shift;
 649   my ($fh, $time, $tag) = @_;
 650
 651   my $fdatetime = $self->fdatetime($time);
 652   print $fh "$fdatetime  tag $tag\n\n";
 653   return;
 654 }
 655
 656 # -------------------------------------
 657
 658 sub format_body {
 659   my $self = shift;
 660   my ($msg, $files, $qunklist) = @_;
 661
 662   my $body;
 663
 664   if ( $No_Wrap and ! $Summary ) {
 665     $msg = $self->preprocess_msg_text($msg);
 666     $files = $self->mywrap("\t", "\t  ", "* $files");
 667     $msg =~ s/\n(.+)/\n$Indent$1/g;
 668     unless ($After_Header eq " ") {
 669       $msg =~ s/^(.+)/$Indent$1/g;
 670     }
 671     if ( $Hide_Filenames ) {
 672       $body = $After_Header . $msg;
 673     } else {
 674       $body = $files . $After_Header . $msg;
 675     }
 676   } elsif ( $Summary ) {
 677     my ($filelist, $qunk);
 678     my (@DeletedQunks, @AddedQunks, @ChangedQunks);
 679
 680     $msg = $self->preprocess_msg_text($msg);
 681     #
 682     #     Sort the files (qunks) according to the operation that was
 683     # performed.  Files which were added have no line change
 684     # indicator, whereas deleted files have state dead.
 685     #
 686     foreach $qunk ( @$qunklist ) {
 687       if ( "dead" eq $qunk->state) {
 688         push @DeletedQunks, $qunk;
 689       } elsif ( ! defined $qunk->lines ) {
 690         push @AddedQunks, $qunk;
 691       } else {
 692         push @ChangedQunks, $qunk;
 693       }
 694     }
 695     #
 696     #     The qunks list was  originally in tree search order.  Let's
 697     # get that back.  The lists, if they exist, will be reversed upon
 698     # processing.
 699     #
 700
 701     #
 702     #     Now write the three sections onto $filelist
 703     #
 704     if ( @DeletedQunks ) {
 705       $filelist .= "\tDeleted:\n";
 706       foreach $qunk ( @DeletedQunks ) {
 707         $filelist .= "\t\t" . $qunk->filename;
 708         $filelist .= " (" . $qunk->revision . ")";
 709         $filelist .= "\n";
 710       }
 711       undef @DeletedQunks;
 712     }
 713
 714     if ( @AddedQunks ) {
 715       $filelist .= "\tAdded:\n";
 716       foreach $qunk (@AddedQunks) {
 717         $filelist .= "\t\t" . $qunk->filename;
 718         $filelist .= " (" . $qunk->revision . ")";
 719         $filelist .= "\n";
 720       }
 721       undef @AddedQunks ;
 722     }
 723
 724     if ( @ChangedQunks ) {
 725       $filelist .= "\tChanged:\n";
 726       foreach $qunk (@ChangedQunks) {
 727         $filelist .= "\t\t" . $qunk->filename;
 728         $filelist .= " (" . $qunk->revision . ")";
 729         $filelist .= ", \"" . $qunk->state . "\"";
 730         $filelist .= ", lines: " . $qunk->lines;
 731         $filelist .= "\n";
 732       }
 733       undef @ChangedQunks;
 734     }
 735
 736     chomp $filelist;
 737
 738     if ( $Hide_Filenames ) {
 739       $filelist = '';
 740     }
 741
 742     $msg =~ s/\n(.*)/\n$Indent$1/g;
 743     unless ( $After_Header eq " " or $FSF_Style ) {
 744       $msg =~ s/^(.*)/$Indent$1/g;
 745     }
 746
 747     unless ( $No_Wrap ) {
 748       if ( $FSF_Style ) {
 749         $msg = $self->wrap_log_entry($msg, '', 69, 69);
 750         chomp($msg);
 751         chomp($msg);
 752       } else {
 753         $msg = $self->mywrap('', $Indent, "$msg");
 754         $msg =~ s/[ \t]+\n/\n/g;
 755       }
 756     }
 757
 758     $body = $filelist . $After_Header . $msg;
 759   } else {  # do wrapping, either FSF-style or regular
 760     my $latter_wrap = $No_Extra_Indent ? $Indent : "$Indent  ";
 761
 762     if ( $FSF_Style ) {
 763       $files = $self->mywrap($Indent, $latter_wrap, "* $files");
 764
 765       my $files_last_line_len = 0;
 766       if ( $After_Header eq " " ) {
 767         $files_last_line_len = $self->last_line_len($files);
 768         $files_last_line_len += 1;  # for $After_Header
 769       }
 770
 771       $msg = $self->wrap_log_entry($msg, $latter_wrap, 69-$files_last_line_len, 69);
 772       $body = $files . $After_Header . $msg;
 773     } else {  # not FSF-style
 774       $msg = $self->preprocess_msg_text($msg);
 775       $body = $files . $After_Header . $msg;
 776       $body = $self->mywrap($Indent, $latter_wrap, "* $body");
 777       $body =~ s/[ \t]+\n/\n/g;
 778     }
 779   }
 780
 781   return $body;
 782 }
 783
 784 # ----------------------------------------------------------------------------
 785
 786 package CVS::Utils::ChangeLog::EntrySet::Output::XML;
 787
 788 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
 789
 790 use File::Basename qw( fileparse );
 791
 792 sub new {
 793   my $class = shift;
 794   my $self = $class->SUPER::new(@_);
 795 }
 796
 797 # -------------------------------------
 798
 799 sub header_line {
 800   my $self = shift;
 801   my ($time, $author, $lastdate) = @_;
 802
 803   my $header_line = '';
 804
 805   my $isoDate;
 806
 807   my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
 808
 809   # Ideally, this would honor $UTC_Times and use +HH:MM syntax
 810   $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
 811                      $y + 1900, $m + 1, $d, $H, $M, $S);
 812
 813   my (undef,$min,$hour,$mday,$mon,$year,$wday)
 814     = $UTC_Times ? gmtime($time) : localtime($time);
 815
 816   my $date = $self->fdatetime($time);
 817   $wday = $self->wday($wday);
 818
 819   $header_line =
 820     sprintf ("<date>%4u-%02u-%02u</date>\n${wday}<time>%02u:%02u</time>\n",
 821              $year+1900, $mon+1, $mday, $hour, $min);
 822   $header_line .= "<isoDate>$isoDate</isoDate>\n"
 823     unless $No_XML_ISO_Date;
 824   $header_line .= sprintf("<author>%s</author>\n" , $author);
 825 }
 826
 827 # -------------------------------------
 828
 829 sub wday {
 830   my $self = shift; my $class = ref $self;
 831   my ($wday) = @_;
 832
 833   return '<weekday>' . $class->weekday_en($wday) . "</weekday>\n";
 834 }
 835
 836 # -------------------------------------
 837
 838 sub escape {
 839   my $self = shift;
 840
 841   my $txt = shift;
 842   $txt =~ s/&/&amp;/g;
 843   $txt =~ s/</&lt;/g;
 844   $txt =~ s/>/&gt;/g;
 845   return $txt;
 846 }
 847
 848 # -------------------------------------
 849
 850 sub output_header {
 851   my $self = shift;
 852   my ($fh) = @_;
 853
 854   my $encoding    =
 855     length $XML_Encoding ? qq'encoding="$XML_Encoding"' : '';
 856   my $version     = 'version="1.0"';
 857   my $declaration =
 858     sprintf '<?xml %s?>', join ' ', grep length, $version, $encoding;
 859   my $root        =
 860     $No_XML_Namespace ?
 861       '<changelog>'     :
 862         '<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
 863   print $fh "$declaration\n\n$root\n\n";
 864 }
 865
 866 # -------------------------------------
 867
 868 sub output_footer {
 869   my $self = shift;
 870   my ($fh) = @_;
 871
 872   print $fh "</changelog>\n";
 873 }
 874
 875 # -------------------------------------
 876
 877 sub preprocess_msg_text {
 878   my $self = shift;
 879   my ($text) = @_;
 880
 881   $text = $self->SUPER::preprocess_msg_text($text);
 882
 883   $text = $self->escape($text);
 884   chomp $text;
 885   $text = "<msg>${text}</msg>\n";
 886
 887   return $text;
 888 }
 889
 890 # -------------------------------------
 891
 892 # Here we take a bunch of qunks and convert them into a printed
 893 # summary that will include all the information the user asked for.
 894 sub pretty_file_list {
 895   my $self = shift;
 896   my ($qunksref) = @_;
 897
 898   my $beauty = '';          # The accumulating header string for this entry.
 899   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
 900   my %unanimous_tags;       # Tags found in all qunks
 901   my %all_branches;         # Branches found in any qunk
 902   my $fbegun = 0;           # Did we begin printing filenames yet?
 903
 904   my ($common_dir, $qunkrefs) =
 905     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches),
 906       $qunksref);
 907
 908   my @qunkrefs = @$qunkrefs;
 909
 910   # If outputting XML, then our task is pretty simple, because we
 911   # don't have to detect common dir, common tags, branch prefixing,
 912   # etc.  We just output exactly what we have, and don't worry about
 913   # redundancy or readability.
 914
 915   foreach my $qunkref (@qunkrefs)
 916   {
 917     my $filename    = $qunkref->filename;
 918     my $state       = $qunkref->state;
 919     my $revision    = $qunkref->revision;
 920     my $tags        = $qunkref->tags;
 921     my $branch      = $qunkref->branch;
 922     my $branchroots = $qunkref->roots;
 923     my $lines       = $qunkref->lines;
 924
 925     $filename = $self->escape($filename);   # probably paranoia
 926     $revision = $self->escape($revision);   # definitely paranoia
 927
 928     $beauty .= "<file>\n";
 929     $beauty .= "<name>${filename}</name>\n";
 930     $beauty .= "<cvsstate>${state}</cvsstate>\n";
 931     $beauty .= "<revision>${revision}</revision>\n";
 932
 933     if ($Show_Lines_Modified
 934         && $lines && $lines =~ m/\+(\d+)\s+-(\d+)/) {
 935         $beauty .= "<linesadded>$1</linesadded>\n";
 936         $beauty .= "<linesremoved>$2</linesremoved>\n";
 937     }
 938
 939     if ($branch) {
 940       $branch   = $self->escape($branch);     # more paranoia
 941       $beauty .= "<branch>${branch}</branch>\n";
 942     }
 943     foreach my $tag (@$tags) {
 944       $tag = $self->escape($tag);  # by now you're used to the paranoia
 945       $beauty .= "<tag>${tag}</tag>\n";
 946     }
 947     foreach my $root (@$branchroots) {
 948       $root = $self->escape($root);  # which is good, because it will continue
 949       $beauty .= "<branchroot>${root}</branchroot>\n";
 950     }
 951     $beauty .= "</file>\n";
 952   }
 953
 954   # Theoretically, we could go home now.  But as long as we're here,
 955   # let's print out the common_dir and utags, as a convenience to
 956   # the receiver (after all, earlier code calculated that stuff
 957   # anyway, so we might as well take advantage of it).
 958
 959   if ((scalar (keys (%unanimous_tags))) > 1) {
 960     foreach my $utag ((keys (%unanimous_tags))) {
 961       $utag = $self->escape($utag);   # the usual paranoia
 962       $beauty .= "<utag>${utag}</utag>\n";
 963     }
 964   }
 965   if ($common_dir) {
 966     $common_dir = $self->escape($common_dir);
 967     $beauty .= "<commondir>${common_dir}</commondir>\n";
 968   }
 969
 970   # That's enough for XML, time to go home:
 971   return $beauty;
 972 }
 973
 974 # -------------------------------------
 975
 976 sub output_tagdate {
 977   my $self = shift;
 978   my ($fh, $time, $tag) = @_;
 979
 980   my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
 981
 982   # Ideally, this would honor $UTC_Times and use +HH:MM syntax
 983   my $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
 984                        $y + 1900, $m + 1, $d, $H, $M, $S);
 985
 986   print $fh "<tagdate>\n";
 987   print $fh "<tagisodate>$isoDate</tagisodate>\n";
 988   print $fh "<tagdatetag>$tag</tagdatetag>\n";
 989   print $fh "</tagdate>\n\n";
 990   return;
 991 }
 992
 993 # -------------------------------------
 994
 995 sub output_entry {
 996   my $self = shift;
 997   my ($fh, $entry) = @_;
 998   print $fh "<entry>\n$entry</entry>\n\n";
 999 }
1000
1001 # -------------------------------------
1002
1003 sub format_body {
1004   my $self = shift;
1005   my ($msg, $files, $qunklist) = @_;
1006
1007   $msg = $self->preprocess_msg_text($msg);
1008   return $files . $msg;
1009 }
1010
1011 # ----------------------------------------------------------------------------
1012
1013 package CVS::Utils::ChangeLog::EntrySet::Output;
1014
1015 use Carp           qw( croak );
1016 use File::Basename qw( fileparse );
1017
1018 # Class Utility Functions -------------
1019
1020 { # form closure
1021
1022 my @weekdays = (qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday));
1023 sub weekday_en {
1024   my $class = shift;
1025   return $weekdays[$_[0]];
1026 }
1027
1028 }
1029
1030 # -------------------------------------
1031
1032 sub new {
1033   my ($proto, %args) = @_;
1034   my $class = ref $proto || $proto;
1035
1036   my $follow_branches = delete $args{follow_branches};
1037   my $follow_only     = delete $args{follow_only};
1038   my $ignore_tags     = delete $args{ignore_tags};
1039   my $show_tags       = delete $args{show_tags};
1040   die "Unrecognized arg to EntrySet::Output::new: '$_'\n"
1041     for keys %args;
1042
1043   bless +{follow_branches => $follow_branches,
1044           follow_only     => $follow_only,
1045           show_tags       => $show_tags,
1046           ignore_tags     => $ignore_tags,
1047          }, $class;
1048 }
1049
1050 # Abstract Subrs ----------------------
1051
1052 sub wday               { croak "Whoops.  Abtract method call (wday).\n" }
1053 sub pretty_file_list   { croak "Whoops.  Abtract method call (pretty_file_list).\n" }
1054 sub output_tagdate     { croak "Whoops.  Abtract method call (output_tagdate).\n" }
1055 sub header_line        { croak "Whoops.  Abtract method call (header_line).\n" }
1056
1057 # Instance Subrs ----------------------
1058
1059 sub output_header { }
1060
1061 # -------------------------------------
1062
1063 sub output_entry {
1064   my $self = shift;
1065   my ($fh, $entry) = @_;
1066   print $fh "$entry\n";
1067 }
1068
1069 # -------------------------------------
1070
1071 sub output_footer { }
1072
1073 # -------------------------------------
1074
1075 sub escape { return $_[1] }
1076
1077 # -------------------------------------
1078
1079 sub _revision_is_wanted {
1080   my ($self, $qunk) = @_;
1081
1082   my ($revision, $branch_numbers) = @{$qunk}{qw( revision branch_numbers )};
1083   my $follow_branches = $self->{follow_branches};
1084   my $follow_only     = $self->{follow_only};
1085
1086   for my $ignore_tag (keys %{$self->{ignore_tags}}) {
1087     return
1088       if defined $qunk->{tags} and grep $_ eq $ignore_tag, @{$qunk->{tags}};
1089   }
1090
1091   if ( keys %{$self->{show_tags}} ) {
1092     for my $show_tag (keys %{$self->{show_tags}}) {
1093       return
1094         if ! defined $qunk->{tags} or ! grep $_ eq $show_tag, @{$qunk->{tags}};
1095     }
1096   }
1097
1098   return 1
1099     unless @$follow_branches + @$follow_only; # no follow is follow all
1100
1101   for my $x (map([$_, 1], @$follow_branches),
1102              map([$_, 0], @$follow_only    )) {
1103     my ($branch, $followsub) = @$x;
1104
1105     # Special case for following trunk revisions
1106     return 1
1107       if $branch =~ /^trunk$/i and $revision =~ /^[0-9]+\.[0-9]+$/;
1108
1109     if ( my $branch_number = $branch_numbers->{$branch} ) {
1110       # Are we on one of the follow branches or an ancestor of same?
1111
1112       # If this revision is a prefix of the branch number, or possibly is less
1113       # in the minormost number, OR if this branch number is a prefix of the
1114       # revision, then yes.  Otherwise, no.
1115
1116       # So below, we determine if any of those conditions are met.
1117
1118       # Trivial case: is this revision on the branch?  (Compare this way to
1119       # avoid regexps that screw up Emacs indentation, argh.)
1120       if ( substr($revision, 0, (length($branch_number) + 1))
1121            eq
1122            ($branch_number . ".") ) {
1123         if ( $followsub ) {
1124           return 1;
1125 #        } elsif ( length($revision) == length($branch_number)+2 ) {
1126         } elsif ( substr($revision, length($branch_number)+1) =~ /^\d+$/ ) {
1127           return 1;
1128         }
1129       } elsif ( length($branch_number) > length($revision)
1130                 and
1131                 ! $No_Ancestors ) {
1132         # Non-trivial case: check if rev is ancestral to branch
1133
1134         # r_left still has the trailing "."
1135         my ($r_left, $r_end) = ($revision =~ /^((?:\d+\.)+)(\d+)$/);
1136
1137         # b_left still has trailing "."
1138         # b_mid has no trailing "."
1139         my ($b_left, $b_mid) = ($branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/);
1140         return 1
1141           if $r_left eq $b_left and $r_end <= $b_mid;
1142       }
1143     }
1144   }
1145
1146   return;
1147 }
1148
1149 # -------------------------------------
1150
1151 sub output_changelog {
1152 my $self = shift; my $class = ref $self;
1153     my ($grand_poobah) = @_;
1154     ### Process each ChangeLog
1155
1156     while (my ($dir,$authorhash) = each %$grand_poobah) {
1157         &main::debug ("DOING DIR: $dir\n");
1158
1159         # Here we twist our hash around, from being
1160         #   author => time => message => filelist
1161         # in %$authorhash to
1162         #   time => author => message => filelist
1163         # in %changelog.
1164         #
1165         # This is also where we merge entries.  The algorithm proceeds
1166         # through the timeline of the changelog with a sliding window of
1167         # $Max_Checkin_Duration seconds; within that window, entries that
1168         # have the same log message are merged.
1169         #
1170         # (To save space, we zap %$authorhash after we've copied
1171         # everything out of it.)
1172
1173         # commit messages matching these will be ignored
1174         # should probably read these in from a file since they
1175         # will mostly change each release, but this is a quick hack!
1176         my @skip = ("Updating mailing lists URL",
1177                     "Updated bug reporting",
1178                     "use base, return true",
1179                     "Removed spurious ",
1180                     "cleaning unnecessary ",
1181                     "Updating emails",
1182                     "Improved POD markup",
1183                     "Fixed spaces",
1184                     "I'll be using bioperl.org mail address",
1185                     "Switched vanilla throw",
1186                     "regexp madness",
1187                     "minor edit",
1188                     "pod",
1189                     "pdoc",
1190                     "email address",
1191                     "typo",
1192                     "be explicit",
1193                     "using 'our'",
1194                     "silly email",
1195                     "regex clarity",
1196                     "polishing",
1197                     "Removed unused \"use vars",
1198                     "return, not return undef",
1199                     "lexically scoped file handles",
1200                     "No setting of own \$VERSION",
1201                     "do not return directly from sort",
1202                     "have NAME match module",
1203                     "Updating URLs",
1204                     "Changing emails",
1205                     "Updated doc",
1206                     "No setting own version",
1207                     "no log message");
1208
1209         # not interested in these files
1210         my %files_to_skip = ( AUTHORS => 1,
1211                               Changes => 1,
1212                               'INSTALL.PROGRAMS' => 1,
1213                               README => 1,
1214                               BUGS => 1,
1215                               INSTALL => 1,
1216                               LICENSE => 1,
1217                               DEPENDENCIES => 1,
1218                               DEPRECATED => 1,
1219                               'INSTALL.WIN' => 1,
1220                               MANIFEST => 1,
1221                               'MANIFEST.SKIP' => 1,
1222                               PLATFORMS => 1);
1223
1224         my %changelog;
1225         while (my ($author,$timehash) = each %$authorhash) {
1226             foreach my $time (sort {$a <=> $b} (keys %$timehash)) {
1227                 next if ($Delta_Mode && (($time <= $Delta_StartTime) || ($time > $Delta_EndTime && $Delta_EndTime)));
1228
1229                 my $msghash = $timehash->{$time};
1230                 MSG: while (my ($msg, $qunklist) = each %$msghash) {
1231                     foreach my $skip (@skip) {
1232                         if ($msg =~ /$skip/i) {
1233                             next MSG;
1234                         }
1235                     }
1236                     if ($msg =~ /merge/i && $msg =~ /head/i) {
1237                         next MSG;
1238                     }
1239
1240                     $msg =~ s/\n/ /g;
1241
1242                     foreach my $file (@{$qunklist->files}) {
1243                         $changelog{$file->filename}{$time} = $msg;
1244                     }
1245                 }
1246             }
1247         }
1248         undef (%$authorhash);
1249
1250         ### Now we can write out the ChangeLog!
1251
1252         my ($logfile_here, $logfile_bak, $tmpfile);
1253         my $lastdate;
1254
1255         if (! $Output_To_Stdout) {
1256             $logfile_here =  $dir . $Log_File_Name;
1257             $logfile_here =~ s/^\.\/\//\//;   # fix any leading ".//" problem
1258             $tmpfile      = "${logfile_here}.cvs2cl$$.tmp";
1259             $logfile_bak  = "${logfile_here}.bak";
1260
1261             open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
1262         }
1263         else {
1264             open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
1265         }
1266
1267         print LOG_OUT $ChangeLog_Header;
1268
1269         print LOG_OUT "These are detailed notes on changes made between $Delta_From and $Delta_To.\n\n";
1270
1271         my %tag_date_printed;
1272
1273         $self->output_header(\*LOG_OUT);
1274
1275         my @file_list = sort {$a cmp $b} (keys %changelog);
1276         foreach my $file (@file_list) {
1277             # skip files we don't need to see changes for
1278             next if exists $files_to_skip{$file};
1279             next if $file =~ /^t\//;
1280
1281             # convert module filenames to module name
1282             my $module = $file;
1283             if ($module =~ /^Bio/) {
1284                 $module = '<nowiki>'.join("::", File::Spec->splitdir($file)).'</nowiki>';
1285                 $module =~ s/\.pm//;
1286             }
1287             print LOG_OUT "; $module\n";
1288             foreach my $time (sort {$a <=> $b} keys %{$changelog{$file}}) {
1289                 my $msg = $changelog{$file}{$time};
1290
1291                 # uppercase first letter
1292                 $msg =~ s/^(\w)/\U$1/;
1293
1294                 # link bugs to bugzilla
1295                 $msg =~ s/bug.*(\d{4})/{{Bugzilla|$1}}/i;
1296
1297                 print LOG_OUT ": $msg\n";
1298             }
1299             print LOG_OUT "\n";
1300         }
1301
1302         $self->output_footer(\*LOG_OUT);
1303
1304         close (LOG_OUT);
1305
1306         if ( ! $Output_To_Stdout ) {
1307             # If accumulating, append old data to new before renaming.  But
1308             # don't append the most recent entry, since it's already in the
1309             # new log due to CVS's idiosyncratic interpretation of "log -d".
1310             if ($Cumulative && -f $logfile_here) {
1311               open NEW_LOG, ">>$tmpfile"
1312                 or die "trouble appending to $tmpfile ($!)";
1313
1314               open OLD_LOG, "<$logfile_here"
1315                 or die "trouble reading from $logfile_here ($!)";
1316
1317               my $started_first_entry = 0;
1318               my $passed_first_entry = 0;
1319               while (<OLD_LOG>) {
1320                 if ( ! $passed_first_entry ) {
1321                   if ( ( ! $started_first_entry )
1322                       and /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
1323                     $started_first_entry = 1;
1324                   } elsif ( /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
1325                     $passed_first_entry = 1;
1326                     print NEW_LOG $_;
1327                   }
1328                 } else {
1329                   print NEW_LOG $_;
1330                 }
1331               }
1332
1333               close NEW_LOG;
1334               close OLD_LOG;
1335             }
1336
1337             if ( -f $logfile_here ) {
1338               rename $logfile_here, $logfile_bak;
1339             }
1340             rename $tmpfile, $logfile_here;
1341         }
1342     }
1343 }
1344
1345 # -------------------------------------
1346
1347 # Don't call this wrap, because with 5.5.3, that clashes with the
1348 # (unconditional :-( ) export of wrap() from Text::Wrap
1349 sub mywrap {
1350   my $self = shift;
1351   my ($indent1, $indent2, @text) = @_;
1352   # If incoming text looks preformatted, don't get clever
1353   my $text = Text::Wrap::wrap($indent1, $indent2, @text);
1354   if ( grep /^\s+/m, @text ) {
1355     return $text;
1356   }
1357   my @lines = split /\n/, $text;
1358   $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e;
1359   $lines[0] =~ s/^$indent1\s+/$indent1/;
1360   s/^$indent2\s+/$indent2/
1361     for @lines[1..$#lines];
1362   my $newtext = join "\n", @lines;
1363   $newtext .= "\n"
1364     if substr($text, -1) eq "\n";
1365   return $newtext;
1366 }
1367
1368 # -------------------------------------
1369
1370 sub preprocess_msg_text {
1371   my $self = shift;
1372   my ($text) = @_;
1373
1374   # Strip out carriage returns (as they probably result from DOSsy editors).
1375   $text =~ s/\r\n/\n/g;
1376   # If it *looks* like two newlines, make it *be* two newlines:
1377   $text =~ s/\n\s*\n/\n\n/g;
1378
1379   return $text;
1380 }
1381
1382 # -------------------------------------
1383
1384 sub last_line_len {
1385   my $self = shift;
1386
1387   my $files_list = shift;
1388   my @lines = split (/\n/, $files_list);
1389   my $last_line = pop (@lines);
1390   return length ($last_line);
1391 }
1392
1393 # -------------------------------------
1394
1395 # A custom wrap function, sensitive to some common constructs used in
1396 # log entries.
1397 sub wrap_log_entry {
1398   my $self = shift;
1399
1400   my $text = shift;                  # The text to wrap.
1401   my $left_pad_str = shift;          # String to pad with on the left.
1402
1403   # These do NOT take left_pad_str into account:
1404   my $length_remaining = shift;      # Amount left on current line.
1405   my $max_line_length  = shift;      # Amount left for a blank line.
1406
1407   my $wrapped_text = '';             # The accumulating wrapped entry.
1408   my $user_indent = '';              # Inherited user_indent from prev line.
1409
1410   my $first_time = 1;                # First iteration of the loop?
1411   my $suppress_line_start_match = 0; # Set to disable line start checks.
1412
1413   my @lines = split (/\n/, $text);
1414   while (@lines)   # Don't use `foreach' here, it won't work.
1415   {
1416     my $this_line = shift (@lines);
1417     chomp $this_line;
1418
1419     if ($this_line =~ /^(\s+)/) {
1420       $user_indent = $1;
1421     }
1422     else {
1423       $user_indent = '';
1424     }
1425
1426     # If it matches any of the line-start regexps, print a newline now...
1427     if ($suppress_line_start_match)
1428     {
1429       $suppress_line_start_match = 0;
1430     }
1431     elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
1432            || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
1433            || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
1434            || ($this_line =~ /^(\s+)(\S+)/)
1435            || ($this_line =~ /^(\s*)- +/)
1436            || ($this_line =~ /^()\s*$/)
1437            || ($this_line =~ /^(\s*)\*\) +/)
1438            || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
1439     {
1440       # Make a line break immediately, unless header separator is set
1441       # and this line is the first line in the entry, in which case
1442       # we're getting the blank line for free already and shouldn't
1443       # add an extra one.
1444       unless (($After_Header ne " ") and ($first_time))
1445       {
1446         if ($this_line =~ /^()\s*$/) {
1447           $suppress_line_start_match = 1;
1448           $wrapped_text .= "\n${left_pad_str}";
1449         }
1450
1451         $wrapped_text .= "\n${left_pad_str}";
1452       }
1453
1454       $length_remaining = $max_line_length - (length ($user_indent));
1455     }
1456
1457     # Now that any user_indent has been preserved, strip off leading
1458     # whitespace, so up-folding has no ugly side-effects.
1459     $this_line =~ s/^\s*//;
1460
1461     # Accumulate the line, and adjust parameters for next line.
1462     my $this_len = length ($this_line);
1463     if ($this_len == 0)
1464     {
1465       # Blank lines should cancel any user_indent level.
1466       $user_indent = '';
1467       $length_remaining = $max_line_length;
1468     }
1469     elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
1470     {
1471       # Walk backwards from the end.  At first acceptable spot, break
1472       # a new line.
1473       my $idx = $length_remaining - 1;
1474       if ($idx < 0) { $idx = 0 };
1475       while ($idx > 0)
1476       {
1477         if (substr ($this_line, $idx, 1) =~ /\s/)
1478         {
1479           my $line_now = substr ($this_line, 0, $idx);
1480           my $next_line = substr ($this_line, $idx);
1481           $this_line = $line_now;
1482
1483           # Clean whitespace off the end.
1484           chomp $this_line;
1485
1486           # The current line is ready to be printed.
1487           $this_line .= "\n${left_pad_str}";
1488
1489           # Make sure the next line is allowed full room.
1490           $length_remaining = $max_line_length - (length ($user_indent));
1491
1492           # Strip next_line, but then preserve any user_indent.
1493           $next_line =~ s/^\s*//;
1494
1495           # Sneak a peek at the user_indent of the upcoming line, so
1496           # $next_line (which will now precede it) can inherit that
1497           # indent level.  Otherwise, use whatever user_indent level
1498           # we currently have, which might be none.
1499           my $next_next_line = shift (@lines);
1500           if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
1501             $next_line = $1 . $next_line if (defined ($1));
1502             # $length_remaining = $max_line_length - (length ($1));
1503             $next_next_line =~ s/^\s*//;
1504           }
1505           else {
1506             $next_line = $user_indent . $next_line;
1507           }
1508           if (defined ($next_next_line)) {
1509             unshift (@lines, $next_next_line);
1510           }
1511           unshift (@lines, $next_line);
1512
1513           # Our new next line might, coincidentally, begin with one of
1514           # the line-start regexps, so we temporarily turn off
1515           # sensitivity to that until we're past the line.
1516           $suppress_line_start_match = 1;
1517
1518           last;
1519         }
1520         else
1521         {
1522           $idx--;
1523         }
1524       }
1525
1526       if ($idx == 0)
1527       {
1528         # We bottomed out because the line is longer than the
1529         # available space.  But that could be because the space is
1530         # small, or because the line is longer than even the maximum
1531         # possible space.  Handle both cases below.
1532
1533         if ($length_remaining == ($max_line_length - (length ($user_indent))))
1534         {
1535           # The line is simply too long -- there is no hope of ever
1536           # breaking it nicely, so just insert it verbatim, with
1537           # appropriate padding.
1538           $this_line = "\n${left_pad_str}${this_line}";
1539         }
1540         else
1541         {
1542           # Can't break it here, but may be able to on the next round...
1543           unshift (@lines, $this_line);
1544           $length_remaining = $max_line_length - (length ($user_indent));
1545           $this_line = "\n${left_pad_str}";
1546         }
1547       }
1548     }
1549     else  # $this_len < $length_remaining, so tack on what we can.
1550     {
1551       # Leave a note for the next iteration.
1552       $length_remaining = $length_remaining - $this_len;
1553
1554       if ($this_line =~ /\.$/)
1555       {
1556         $this_line .= "  ";
1557         $length_remaining -= 2;
1558       }
1559       else  # not a sentence end
1560       {
1561         $this_line .= " ";
1562         $length_remaining -= 1;
1563       }
1564     }
1565
1566     # Unconditionally indicate that loop has run at least once.
1567     $first_time = 0;
1568
1569     $wrapped_text .= "${user_indent}${this_line}";
1570   }
1571
1572   # One last bit of padding.
1573   $wrapped_text .= "\n";
1574
1575   return $wrapped_text;
1576 }
1577
1578 # -------------------------------------
1579
1580 sub _pretty_file_list {
1581   my $self = shift;
1582
1583   my ($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_;
1584
1585   my @qunkrefs =
1586     grep +( ( ! $_->tags_exists
1587               or
1588               ! grep exists $ignore_tags{$_}, @{$_->tags})
1589             and
1590             ( ! keys %show_tags
1591               or
1592               ( $_->tags_exists
1593                 and
1594                 grep exists $show_tags{$_}, @{$_->tags} )
1595             )
1596           ),
1597     @$qunksref;
1598
1599   my $common_dir;           # Dir prefix common to all files ('' if none)
1600
1601   # First, loop over the qunks gathering all the tag/branch names.
1602   # We'll put them all in non_unanimous_tags, and take out the
1603   # unanimous ones later.
1604  QUNKREF:
1605   foreach my $qunkref (@qunkrefs)
1606   {
1607     # Keep track of whether all the files in this commit were in the
1608     # same directory, and memorize it if so.  We can make the output a
1609     # little more compact by mentioning the directory only once.
1610     if ($Common_Dir && (scalar (@qunkrefs)) > 1)
1611     {
1612       if (! (defined ($common_dir)))
1613       {
1614         my ($base, $dir);
1615         ($base, $dir, undef) = fileparse ($qunkref->filename);
1616
1617         if ((! (defined ($dir)))  # this first case is sheer paranoia
1618             or ($dir eq '')
1619             or ($dir eq "./")
1620             or ($dir eq ".\\"))
1621         {
1622           $common_dir = '';
1623         }
1624         else
1625         {
1626           $common_dir = $dir;
1627         }
1628       }
1629       elsif ($common_dir ne '')
1630       {
1631         # Already have a common dir prefix, so how much of it can we preserve?
1632         $common_dir = &main::common_path_prefix ($qunkref->filename, $common_dir);
1633       }
1634     }
1635     else  # only one file in this entry anyway, so common dir not an issue
1636     {
1637       $common_dir = '';
1638     }
1639
1640     if (defined ($qunkref->branch)) {
1641       $all_branches->{$qunkref->branch} = 1;
1642     }
1643     if (defined ($qunkref->tags)) {
1644       foreach my $tag (@{$qunkref->tags}) {
1645         $non_unanimous_tags->{$tag} = 1;
1646       }
1647     }
1648   }
1649
1650   # Any tag held by all qunks will be printed specially... but only if
1651   # there are multiple qunks in the first place!
1652   if ((scalar (@qunkrefs)) > 1) {
1653     foreach my $tag (keys (%$non_unanimous_tags)) {
1654       my $everyone_has_this_tag = 1;
1655       foreach my $qunkref (@qunkrefs) {
1656         if ((! (defined ($qunkref->tags)))
1657             or (! (grep ($_ eq $tag, @{$qunkref->tags})))) {
1658           $everyone_has_this_tag = 0;
1659         }
1660       }
1661       if ($everyone_has_this_tag) {
1662         $unanimous_tags->{$tag} = 1;
1663         delete $non_unanimous_tags->{$tag};
1664       }
1665     }
1666   }
1667
1668   return $common_dir, \@qunkrefs;
1669 }
1670
1671 # -------------------------------------
1672
1673 sub fdatetime {
1674   my $self = shift;
1675
1676   my ($year, $mday, $mon, $wday, $hour, $min);
1677
1678   if ( @_ > 1 ) {
1679     ($year, $mday, $mon, $wday, $hour, $min) = @_;
1680   } else {
1681     my ($time) = @_;
1682     (undef, $min, $hour, $mday, $mon, $year, $wday) =
1683       $UTC_Times ? gmtime($time) : localtime($time);
1684
1685     $year += 1900;
1686     $mon  += 1;
1687     $wday  = $self->wday($wday);
1688   }
1689
1690   my $fdate = $self->fdate($year, $mon, $mday, $wday);
1691
1692   if ($Show_Times) {
1693     my $ftime = $self->ftime($hour, $min);
1694     return "$fdate $ftime";
1695   } else {
1696     return $fdate;
1697   }
1698 }
1699
1700 # -------------------------------------
1701
1702 sub fdate {
1703   my $self = shift;
1704
1705   my ($year, $mday, $mon, $wday);
1706
1707   if ( @_ > 1 ) {
1708     ($year, $mon, $mday, $wday) = @_;
1709   } else {
1710     my ($time) = @_;
1711     (undef, undef, undef, $mday, $mon, $year, $wday) =
1712       $UTC_Times ? gmtime($time) : localtime($time);
1713
1714     $year += 1900;
1715     $mon  += 1;
1716     $wday  = $self->wday($wday);
1717   }
1718
1719   return sprintf '%4u-%02u-%02u%s', $year, $mon, $mday, $wday;
1720 }
1721
1722 # -------------------------------------
1723
1724 sub ftime {
1725   my $self = shift;
1726
1727   my ($hour, $min);
1728
1729   if ( @_ > 1 ) {
1730     ($hour, $min) = @_;
1731   } else {
1732     my ($time) = @_;
1733     (undef, $min, $hour) = $UTC_Times ? gmtime($time) : localtime($time);
1734   }
1735
1736   return sprintf '%02u:%02u', $hour, $min;
1737 }
1738
1739 # ----------------------------------------------------------------------------
1740
1741 package CVS::Utils::ChangeLog::Message;
1742
1743 sub new {
1744   my $class = shift;
1745   my ($msg) = @_;
1746
1747   my %self = (msg => $msg, files => []);
1748
1749   bless \%self, $class;
1750 }
1751
1752 sub add_fileentry {
1753   my $self = shift;
1754   my ($fileentry) = @_;
1755
1756   die "Not a fileentry: $fileentry"
1757     unless $fileentry->isa('CVS::Utils::ChangeLog::FileEntry');
1758
1759   push @{$self->{files}}, $fileentry;
1760 }
1761
1762 sub files { wantarray ? @{$_[0]->{files}} : $_[0]->{files} }
1763
1764 # ----------------------------------------------------------------------------
1765
1766 package CVS::Utils::ChangeLog::FileEntry;
1767
1768 use File::Basename qw( fileparse );
1769
1770 # Each revision of a file has a little data structure (a `qunk')
1771 # associated with it.  That data structure holds not only the
1772 # file's name, but any additional information about the file
1773 # that might be needed in the output, such as the revision
1774 # number, tags, branches, etc.  The reason to have these things
1775 # arranged in a data structure, instead of just appending them
1776 # textually to the file's name, is that we may want to do a
1777 # little rearranging later as we write the output.  For example,
1778 # all the files on a given tag/branch will go together, followed
1779 # by the tag in parentheses (so trunk or otherwise non-tagged
1780 # files would go at the end of the file list for a given log
1781 # message).  This rearrangement is a lot easier to do if we
1782 # don't have to reparse the text.
1783 #
1784 # A qunk looks like this:
1785 #
1786 #   {
1787 #     filename    =>    "hello.c",
1788 #     revision    =>    "1.4.3.2",
1789 #     time        =>    a timegm() return value (moment of commit)
1790 #     tags        =>    [ "tag1", "tag2", ... ],
1791 #     branch      =>    "branchname" # There should be only one, right?
1792 #     roots       =>    [ "branchtag1", "branchtag2", ... ]
1793 #     lines       =>    "+x -y" # or undefined; x and y are integers
1794 #   }
1795
1796 # Single top-level ChangeLog, or one per subdirectory?
1797 my $distributed;
1798 sub distributed { $#_ ? ($distributed = $_[1]) : $distributed; }
1799
1800 sub new {
1801   my $class = shift;
1802   my ($path, $time, $revision, $state, $lines,
1803       $branch_names, $branch_roots, $branch_numbers, $symbolic_names) = @_;
1804
1805   my %self = (time     => $time,
1806               revision => $revision,
1807               state    => $state,
1808               lines    => $lines,
1809               branch_numbers => $branch_numbers,
1810              );
1811
1812   if ( $distributed ) {
1813     @self{qw(filename dir_key)} = fileparse($path);
1814   } else {
1815     @self{qw(filename dir_key)} = ($path, './');
1816   }
1817
1818   { # Scope for $branch_prefix
1819     (my ($branch_prefix) = ($revision =~ /((?:\d+\.)+)\d+/));
1820     $branch_prefix =~ s/\.$//;
1821     if ( $branch_names->{$branch_prefix} ) {
1822       my $branch_name = $branch_names->{$branch_prefix};
1823       $self{branch}   = $branch_name;
1824       $self{branches} = [$branch_name];
1825     }
1826     while ( $branch_prefix =~ s/^(\d+(?:\.\d+\.\d+)+)\.\d+\.\d+$/$1/ ) {
1827       push @{$self{branches}}, $branch_names->{$branch_prefix}
1828         if exists $branch_names->{$branch_prefix};
1829     }
1830   }
1831
1832   # If there's anything in the @branch_roots array, then this
1833   # revision is the root of at least one branch.  We'll display
1834   # them as branch names instead of revision numbers, the
1835   # substitution for which is done directly in the array:
1836   $self{'roots'} = [ map { $branch_names->{$_} } @$branch_roots ]
1837     if @$branch_roots;
1838
1839   if ( exists $symbolic_names->{$revision} ) {
1840     $self{tags} = delete $symbolic_names->{$revision};
1841     &main::delta_check($time, $self{tags});
1842   }
1843
1844   bless \%self, $class;
1845 }
1846
1847 sub filename       { $_[0]->{filename}       }
1848 sub dir_key        { $_[0]->{dir_key}        }
1849 sub revision       { $_[0]->{revision}       }
1850 sub branch         { $_[0]->{branch}         }
1851 sub state          { $_[0]->{state}          }
1852 sub lines          { $_[0]->{lines}          }
1853 sub roots          { $_[0]->{roots}          }
1854 sub branch_numbers { $_[0]->{branch_numbers} }
1855
1856 sub tags        { $_[0]->{tags}     }
1857 sub tags_exists {
1858   exists $_[0]->{tags};
1859 }
1860
1861 # This may someday be used in a more sophisticated calculation of what other
1862 # files are involved in this commit.  For now, we don't use it much except for
1863 # delta mode, because the common-commit-detection algorithm is hypothesized to
1864 # be "good enough" as it stands.
1865 sub time     { $_[0]->{time}     }
1866
1867 # ----------------------------------------------------------------------------
1868
1869 package CVS::Utils::ChangeLog::EntrySetBuilder;
1870
1871 use File::Basename qw( fileparse );
1872 use Time::Local    qw( timegm );
1873
1874 use constant MAILNAME => "/etc/mailname";
1875
1876 # In 'cvs log' output, one long unbroken line of equal signs separates files:
1877 use constant FILE_SEPARATOR => '=' x 77;# . "\n";
1878 # In 'cvs log' output, a shorter line of dashes separates log messages within
1879 # a file:
1880 use constant REV_SEPARATOR  => '-' x 28;# . "\n";
1881
1882 use constant EMPTY_LOG_MESSAGE => '*** empty log message ***';
1883
1884 # -------------------------------------
1885
1886 sub new {
1887   my ($proto) = @_;
1888   my $class = ref $proto || $proto;
1889
1890   my $poobah  = CVS::Utils::ChangeLog::EntrySet->new;
1891   my $self = bless +{ grand_poobah => $poobah }, $class;
1892
1893   $self->clear_file;
1894   $self->maybe_read_user_map_file;
1895   return $self;
1896 }
1897
1898 # -------------------------------------
1899
1900 sub clear_msg {
1901   my ($self) = @_;
1902
1903   # Make way for the next message
1904   undef $self->{rev_msg};
1905   undef $self->{rev_time};
1906   undef $self->{rev_revision};
1907   undef $self->{rev_author};
1908   undef $self->{rev_state};
1909   undef $self->{lines};
1910   $self->{rev_branch_roots} = [];       # For showing which files are branch
1911                                         # ancestors.
1912   $self->{collecting_symbolic_names} = 0;
1913 }
1914
1915 # -------------------------------------
1916
1917 sub clear_file {
1918   my ($self) = @_;
1919   $self->clear_msg;
1920
1921   undef $self->{filename};
1922   $self->{branch_names}   = +{};        # We'll grab branch names while we're
1923                                         # at it.
1924   $self->{branch_numbers} = +{};        # Save some revisions for
1925                                         # @Follow_Branches
1926   $self->{symbolic_names} = +{};        # Where tag names get stored.
1927 }
1928
1929 # -------------------------------------
1930
1931 sub grand_poobah { $_[0]->{grand_poobah} }
1932
1933 # -------------------------------------
1934
1935 sub read_changelog {
1936   my ($self, $command) = @_;
1937
1938   local (*READER, *WRITER);
1939   my $pid;
1940   if (! $Input_From_Stdin) {
1941     pipe(READER, WRITER)
1942       or die "Couldn't form pipe: $!\n";
1943     $pid = fork;
1944     die "Couldn't fork: $!\n"
1945       if ! defined $pid;
1946     if ( ! $pid ) { # child
1947       open STDOUT, '>&=' . fileno WRITER
1948         or die "Couldn't dup stderr to ", fileno WRITER, "\n";
1949       # strangely, some perls give spurious warnings about STDIN being opened
1950       # for output only these close calls precede the STDOUT reopen above.
1951       # I think they must be reusing fd 1.
1952       close READER;
1953       close STDIN;
1954
1955       exec @$command;
1956     }
1957
1958     close WRITER;
1959
1960     &main::debug ("(run \"@$command\")\n");
1961   }
1962   else {
1963     open READER, '-' or die "unable to open stdin for reading";
1964   }
1965
1966   binmode READER;
1967
1968  XX_Log_Source:
1969   while (<READER>) {
1970     chomp;
1971     s!\r$!!;
1972
1973     # If on a new file and don't see filename, skip until we find it, and
1974     # when we find it, grab it.
1975     if ( ! defined $self->{filename} ) {
1976       $self->read_file_path($_);
1977     } elsif ( /^symbolic names:$/ ) {
1978       $self->{collecting_symbolic_names} = 1;
1979     } elsif ( $self->{collecting_symbolic_names} ) {
1980       $self->read_symbolic_name($_);
1981     } elsif ( $_ eq FILE_SEPARATOR and ! defined $self->{rev_revision} ) {
1982       $self->clear_file;
1983     } elsif ( ! defined $self->{rev_revision} ) {
1984         # If have file name, but not revision, and see revision, then grab
1985         # it.  (We collect unconditionally, even though we may or may not
1986         # ever use it.)
1987       $self->read_revision($_);
1988     } elsif ( ! defined $self->{rev_time} ) { # and /^date: /) {
1989       $self->read_date_author_and_state($_);
1990     } elsif ( /^branches:\s+(.*);$/ ) {
1991       $self->read_branches($1);
1992     } elsif ( ! ( $_ eq FILE_SEPARATOR or $_ eq REV_SEPARATOR ) ) {
1993       # If have file name, time, and author, then we're just grabbing
1994       # log message texts:
1995       $self->{rev_msg} .= $_ . "\n";   # Normally, just accumulate the message...
1996     } else {
1997       my $noadd = 0;
1998       if ( ! $self->{rev_msg}
1999            or $self->{rev_msg} =~ /^\s*(\.\s*)?$/
2000            or index($self->{rev_msg}, EMPTY_LOG_MESSAGE) > -1 ) {
2001         # ... until a msg separator is encountered:
2002         # Ensure the message contains something:
2003         $self->clear_msg, $noadd = 1
2004           if $Prune_Empty_Msgs;
2005         $self->{rev_msg} = "[no log message]\n";
2006       }
2007
2008       $self->add_file_entry
2009         unless $noadd;
2010
2011       if ( $_ eq FILE_SEPARATOR ) {
2012         $self->clear_file;
2013       } else {
2014         $self->clear_msg;
2015       }
2016     }
2017   }
2018
2019   close READER
2020     or die "Couldn't close pipe reader: $!\n";
2021   if ( defined $pid ) {
2022     my $rv;
2023     waitpid $pid, 0;
2024     0 == $?
2025       or $!=1, die sprintf("Problem reading log input (pid/exit/signal/core: %d/%d/%d/%d)\n",
2026                            $pid, $? >> 8, $? & 127, $? & 128);
2027   }
2028   return;
2029 }
2030
2031 # -------------------------------------
2032
2033 sub add_file_entry {
2034   $_[0]->grand_poobah->add_fileentry(@{$_[0]}{qw(filename rev_time rev_revision
2035                                                  rev_state lines branch_names
2036                                                  rev_branch_roots
2037                                                  branch_numbers
2038                                                  symbolic_names
2039                                                  rev_author rev_msg)});
2040 }
2041
2042 # -------------------------------------
2043
2044 sub maybe_read_user_map_file {
2045   my ($self) = @_;
2046
2047   my %expansions;
2048   my $User_Map_Input;
2049
2050   if ($User_Map_File)
2051   {
2052     if ( $User_Map_File =~ m{^([-\w\@+=.,\/]+):([-\w\@+=.,\/:]+)} and
2053          !-f $User_Map_File )
2054     {
2055       my $rsh = (exists $ENV{'CVS_RSH'} ? $ENV{'CVS_RSH'} : 'ssh');
2056       $User_Map_Input = "$rsh $1 'cat $2' |";
2057       &main::debug ("(run \"${User_Map_Input}\")\n");
2058     }
2059     else
2060     {
2061       $User_Map_Input = "<$User_Map_File";
2062     }
2063
2064     open (MAPFILE, $User_Map_Input)
2065         or die ("Unable to open $User_Map_File ($!)");
2066
2067     while (<MAPFILE>)
2068     {
2069       next if /^\s*#/;  # Skip comment lines.
2070       next if not /:/;  # Skip lines without colons.
2071
2072       # It is now safe to split on ':'.
2073       my ($username, $expansion) = split ':';
2074       chomp $expansion;
2075       $expansion =~ s/^'(.*)'$/$1/;
2076       $expansion =~ s/^"(.*)"$/$1/;
2077
2078       # If it looks like the expansion has a real name already, then
2079       # we toss the username we got from CVS log.  Otherwise, keep
2080       # it to use in combination with the email address.
2081
2082       if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
2083         # Also, add angle brackets if none present
2084         if (! ($expansion =~ /<\S+@\S+>/)) {
2085           $expansions{$username} = "$username <$expansion>";
2086         }
2087         else {
2088           $expansions{$username} = "$username $expansion";
2089         }
2090       }
2091       else {
2092         $expansions{$username} = $expansion;
2093       }
2094     } # fi ($User_Map_File)
2095
2096     close (MAPFILE);
2097   }
2098
2099   if (defined $User_Passwd_File)
2100   {
2101     if ( ! defined $Domain ) {
2102       if ( -e MAILNAME ) {
2103         chomp($Domain = slurp_file(MAILNAME));
2104       } else {
2105       MAILDOMAIN_CMD:
2106         for ([qw(hostname -d)], 'dnsdomainname', 'domainname') {
2107           my ($text, $exit, $sig, $core) = run_ext($_);
2108           if ( $exit == 0 && $sig == 0 && $core == 0 ) {
2109             chomp $text;
2110             if ( length $text ) {
2111               $Domain = $text;
2112               last MAILDOMAIN_CMD;
2113             }
2114           }
2115         }
2116       }
2117     }
2118
2119     die "No mail domain found\n"
2120       unless defined $Domain;
2121
2122     open (MAPFILE, "<$User_Passwd_File")
2123         or die ("Unable to open $User_Passwd_File ($!)");
2124     while (<MAPFILE>)
2125     {
2126       # all lines are valid
2127       my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
2128       my $expansion = '';
2129       ($expansion) = split (',', $gecos)
2130         if defined $gecos && length $gecos;
2131
2132       my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
2133       $expansions{$username} = "$expansion <$mailname>";
2134     }
2135     close (MAPFILE);
2136   }
2137
2138  $self->{usermap} = \%expansions;
2139 }
2140
2141 # -------------------------------------
2142
2143 sub read_file_path {
2144   my ($self, $line) = @_;
2145
2146   my $path;
2147
2148   if ( $line =~ /^Working file: (.*)/ ) {
2149     $path = $1;
2150   } elsif ( defined $RCS_Root
2151             and
2152             $line =~ m|^RCS file: $RCS_Root[/\\](.*),v$| ) {
2153     $path = $1;
2154     $path =~ s!Attic/!!;
2155   } else {
2156     return;
2157   }
2158
2159   if ( @Ignore_Files ) {
2160     my $base;
2161     ($base, undef, undef) = fileparse($path);
2162
2163     my $xpath = $Case_Insensitive ? lc($path) : $path;
2164     return
2165       if grep $path =~ /$_/, @Ignore_Files;
2166   }
2167
2168   $self->{filename} = $path;
2169   return;
2170 }
2171
2172 # -------------------------------------
2173
2174 sub read_symbolic_name {
2175   my ($self, $line) = @_;
2176
2177   # All tag names are listed with whitespace in front in cvs log
2178   # output; so if see non-whitespace, then we're done collecting.
2179   if ( /^\S/ ) {
2180     $self->{collecting_symbolic_names} = 0;
2181     return;
2182   } else {
2183     # we're looking at a tag name, so parse & store it
2184
2185     # According to the Cederqvist manual, in node "Tags", tag names must start
2186     # with an uppercase or lowercase letter and can contain uppercase and
2187     # lowercase letters, digits, `-', and `_'.  However, it's not our place to
2188     # enforce that, so we'll allow anything CVS hands us to be a tag:
2189     my ($tag_name, $tag_rev) = ($line =~ /^\s+([^:]+): ([\d.]+)$/);
2190
2191     # A branch number either has an odd number of digit sections
2192     # (and hence an even number of dots), or has ".0." as the
2193     # second-to-last digit section.  Test for these conditions.
2194     my $real_branch_rev = '';
2195     if ( $tag_rev =~ /^(\d+\.\d+\.)+\d+$/             # Even number of dots...
2196          and
2197          $tag_rev !~ /^(1\.)+1$/ ) {                  # ...but not "1.[1.]1"
2198       $real_branch_rev = $tag_rev;
2199     } elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) {  # Has ".0."
2200       $real_branch_rev = $1 . $3;
2201     }
2202
2203     # If we got a branch, record its number.
2204     if ( $real_branch_rev ) {
2205       $self->{branch_names}->{$real_branch_rev} = $tag_name;
2206       $self->{branch_numbers}->{$tag_name} = $real_branch_rev;
2207       $tag_rev =~ s/^(\d+\.\d+).+/$1/;
2208     }
2209
2210     # regardless if it is a name on a branch, store it as symbolic name so that tag deltas always work
2211     push @{$self->{symbolic_names}->{$tag_rev}}, $tag_name;
2212   }
2213
2214   $self->{collecting_symbolic_names} = 1;
2215   return;
2216 }
2217
2218 # -------------------------------------
2219
2220 sub read_revision {
2221   my ($self, $line) = @_;
2222
2223   my ($revision) = ( $line =~ /^revision (\d+\.[\d.]+)/ );
2224
2225   return
2226     unless $revision;
2227
2228   $self->{rev_revision} = $revision;
2229   return;
2230 }
2231
2232 # -------------------------------------
2233
2234 { # Closure over %gecos_warned
2235 my %gecos_warned;
2236 sub read_date_author_and_state {
2237   my ($self, $line) = @_;
2238
2239   my ($time, $author, $state) = $self->parse_date_author_and_state($line);
2240
2241   if ( defined($self->{usermap}->{$author}) and $self->{usermap}->{$author} ) {
2242     $author = $self->{usermap}->{$author};
2243   } elsif ( defined $Domain or $Gecos == 1 ) {
2244     my $email = $author;
2245     $email = $author."@".$Domain
2246       if defined $Domain && $Domain ne '';
2247
2248     my $pw = getpwnam($author);
2249     my ($fullname, $office, $workphone, $homephone, $gcos);
2250     if ( defined $pw ) {
2251       $gcos = (getpwnam($author))[6];
2252       ($fullname, $office, $workphone, $homephone) =
2253         split /\s*,\s*/, $gcos;
2254     } else {
2255       warn "Couldn't find gecos info for author '$author'\n"
2256         unless $gecos_warned{$author}++;
2257       $fullname = '';
2258     }
2259     for (grep defined, $fullname, $office, $workphone, $homephone) {
2260       s/&/ucfirst(lc($pw->name))/ge;
2261     }
2262     $author = $fullname . "  <" . $email . ">"
2263       if $fullname ne '';
2264   }
2265
2266   $self->{rev_state}  = $state;
2267   $self->{rev_time}   = $time;
2268   $self->{rev_author} = $author;
2269   return;
2270 }
2271 }
2272
2273 # -------------------------------------
2274
2275 sub read_branches {
2276   # A "branches: ..." line here indicates that one or more branches
2277   # are rooted at this revision.  If we're showing branches, then we
2278   # want to show that fact as well, so we collect all the branches
2279   # that this is the latest ancestor of and store them in
2280   # $self->[rev_branch_roots}.  Just for reference, the format of the
2281   # line we're seeing at this point is:
2282   #
2283   #    branches:  1.5.2;  1.5.4;  ...;
2284   #
2285   # Okay, here goes:
2286   my ($self, $line) = @_;
2287
2288   # Ugh.  This really bothers me.  Suppose we see a log entry
2289   # like this:
2290   #
2291   #    ----------------------------
2292   #    revision 1.1
2293   #    date: 1999/10/17 03:07:38;  author: jrandom;  state: Exp;
2294   #    branches:  1.1.2;
2295   #    Intended first line of log message begins here.
2296   #    ----------------------------
2297   #
2298   # The question is, how we can tell the difference between that
2299   # log message and a *two*-line log message whose first line is
2300   #
2301   #    "branches:  1.1.2;"
2302   #
2303   # See the problem?  The output of "cvs log" is inherently
2304   # ambiguous.
2305   #
2306   # For now, we punt: we liberally assume that people don't
2307   # write log messages like that, and just toss a "branches:"
2308   # line if we see it but are not showing branches.  I hope no
2309   # one ever loses real log data because of this.
2310   if ( $Show_Branches ) {
2311     $line =~ s/(1\.)+1;|(1\.)+1$//;  # ignore the trivial branch 1.1.1
2312     $self->{rev_branch_roots} = [split /;\s+/, $line]
2313       if length $line;
2314   }
2315 }
2316
2317 # -------------------------------------
2318
2319 sub parse_date_author_and_state {
2320   my ($self, $line) = @_;
2321   # Parses the date/time and author out of a line like:
2322   #
2323   # date: 1999/02/19 23:29:05;  author: apharris;  state: Exp;
2324   #
2325   # or, in CVS 1.12.9:
2326   #
2327   # date: 2004-06-05 16:10:32 +0000; author: somebody; state: Exp;
2328
2329   my ($year, $mon, $mday, $hours, $min, $secs, $utcOffset, $author, $state, $rest) =
2330     $line =~
2331       m!(\d+)[-/](\d+)[-/](\d+)\s+(\d+):(\d+):(\d+)(\s+[+-]\d{4})?;\s+
2332         author:\s+([^;]+);\s+state:\s+([^;]+);(.*)!x
2333     or  die "Couldn't parse date ``$line''";
2334   die "Bad date or Y2K issues"
2335     unless $year > 1969 and $year < 2258;
2336   # Kinda arbitrary, but useful as a sanity check
2337   my $time = timegm($secs, $min, $hours, $mday, $mon-1, $year-1900);
2338   if ( defined $utcOffset ) {
2339     my ($plusminus, $hour, $minute) = ($utcOffset =~ m/([+-])(\d\d)(\d\d)/);
2340     my $offset = (($hour * 60) + $minute) * 60 * ($plusminus eq '+' ? -1 : 1);
2341     $time += $offset;
2342   }
2343   if ( $rest =~ m!\s+lines:\s+(.*)! ) {
2344     $self->{lines} = $1;
2345   }
2346
2347   return $time, $author, $state;
2348 }
2349
2350 # Subrs ----------------------------------------------------------------------
2351
2352 package main;
2353
2354 sub delta_check {
2355   my ($time, $tags) = @_;
2356
2357   # If we're in 'delta' mode, update the latest observed times for the
2358   # beginning and ending tags, and when we get around to printing output, we
2359   # will simply restrict ourselves to that timeframe...
2360   return
2361     unless $Delta_Mode;
2362
2363   $Delta_StartTime = $time
2364     if $time > $Delta_StartTime and grep { $_ eq $Delta_From } @$tags;
2365
2366   $Delta_EndTime = $time
2367     if $time > $Delta_EndTime and grep { $_ eq $Delta_To } @$tags;
2368 }
2369
2370 sub run_ext {
2371   my ($cmd) = @_;
2372   $cmd = [$cmd]
2373     unless ref $cmd;
2374   local $" = ' ';
2375   my $out = qx"@$cmd 2>&1";
2376   my $rv  = $?;
2377   my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);
2378   return $out, $exit, $sig, $core;
2379 }
2380
2381 # -------------------------------------
2382
2383 # If accumulating, grab the boundary date from pre-existing ChangeLog.
2384 sub maybe_grab_accumulation_date {
2385   if (! $Cumulative || $Update) {
2386     return '';
2387   }
2388
2389   # else
2390
2391   open (LOG, "$Log_File_Name")
2392       or die ("trouble opening $Log_File_Name for reading ($!)");
2393
2394   my $boundary_date;
2395   while (<LOG>)
2396   {
2397     if (/^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/)
2398     {
2399       $boundary_date = "$1";
2400       last;
2401     }
2402   }
2403
2404   close (LOG);
2405
2406   # convert time from utc to local timezone if the ChangeLog has
2407   # dates/times in utc
2408   if ($UTC_Times && $boundary_date)
2409   {
2410     # convert the utc time to a time value
2411     my ($year,$mon,$mday,$hour,$min) = $boundary_date =~
2412       m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#;
2413     my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900);
2414     # print the timevalue in the local timezone
2415     my ($ignore,$wday);
2416     ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time);
2417     $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u",
2418                             $year+1900,$mon+1,$mday,$hour,$min);
2419   }
2420
2421   return $boundary_date;
2422 }
2423
2424 # -------------------------------------
2425
2426 # Fills up a ChangeLog structure in the current directory.
2427 sub derive_changelog {
2428   my ($command) = @_;
2429
2430   # See "The Plan" above for a full explanation.
2431
2432   # Might be adding to an existing ChangeLog
2433   my $accumulation_date = maybe_grab_accumulation_date;
2434   if ($accumulation_date) {
2435     # Insert -d immediately after 'cvs log'
2436     my $Log_Date_Command = "-d>${accumulation_date}";
2437
2438     my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command;
2439     splice @$command, $log_index+1, 0, $Log_Date_Command;
2440     &debug ("(adding log msg starting from $accumulation_date)\n");
2441   }
2442
2443 #  output_changelog(read_changelog($command));
2444   my $builder = CVS::Utils::ChangeLog::EntrySetBuilder->new;
2445   $builder->read_changelog($command);
2446   $builder->grand_poobah->output_changelog;
2447 }
2448
2449 # -------------------------------------
2450
2451 sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
2452
2453 # -------------------------------------
2454
2455 sub common_path_prefix {
2456   my ($path1, $path2) = @_;
2457
2458   # For compatibility (with older versions of cvs2cl.pl), we think in UN*X
2459   # terms, and mould windoze filenames to match.  Is this really appropriate?
2460   # If a file is checked in under UN*X, and cvs log run on windoze, which way
2461   # do the path separators slope?  Can we use fileparse as per the local
2462   # conventions?  If so, we should probably have a user option to specify an
2463   # OS to emulate to handle stdin-fed logs.  If we did this, we could avoid
2464   # the nasty \-/ transmogrification below.
2465
2466   my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;
2467
2468   # Transmogrify Windows filenames to look like Unix.
2469   # (It is far more likely that someone is running cvs2cl.pl under
2470   # Windows than that they would genuinely have backslashes in their
2471   # filenames.)
2472   tr!\\!/!
2473     for $dir1, $dir2;
2474
2475   my ($accum1, $accum2, $last_common_prefix) = ('') x 3;
2476
2477   my @path1 = grep length($_), split qr!/!, $dir1;
2478   my @path2 = grep length($_), split qr!/!, $dir2;
2479
2480   my @common_path;
2481   for (0..min($#path1,$#path2)) {
2482     if ( $path1[$_] eq $path2[$_]) {
2483       push @common_path, $path1[$_];
2484     } else {
2485       last;
2486     }
2487   }
2488
2489   return join '', map "$_/", @common_path;
2490 }
2491
2492 # -------------------------------------
2493
2494 sub parse_options {
2495   # Check this internally before setting the global variable.
2496   my $output_file;
2497
2498   # If this gets set, we encountered unknown options and will exit at
2499   # the end of this subroutine.
2500   my $exit_with_admonishment = 0;
2501
2502   # command to generate the log
2503   my @log_source_command = qw( cvs log );
2504
2505   my (@Global_Opts, @Local_Opts);
2506
2507   Getopt::Long::Configure(qw( bundling permute no_getopt_compat
2508                               pass_through no_ignore_case ));
2509   GetOptions('help|usage|h'   => \$Print_Usage,
2510              'debug'          => \$Debug,        # unadvertised option, heh
2511              'version'        => \$Print_Version,
2512
2513              'file|f=s'       => \$output_file,
2514              'accum'          => \$Cumulative,
2515              'update'         => \$Update,
2516              'fsf'            => \$FSF_Style,
2517              'rcs=s'          => \$RCS_Root,
2518              'usermap|U=s'    => \$User_Map_File,
2519              'gecos'          => \$Gecos,
2520              'domain=s'       => \$Domain,
2521              'passwd=s'       => \$User_Passwd_File,
2522              'window|W=i'     => \$Max_Checkin_Duration,
2523              'chrono'         => \$Chronological_Order,
2524              'ignore|I=s'     => \@Ignore_Files,
2525              'case-insensitive|C' => \$Case_Insensitive,
2526              'regexp|R=s'     => \$Regexp_Gate,
2527              'stdin'          => \$Input_From_Stdin,
2528              'stdout'         => \$Output_To_Stdout,
2529              'distributed|d'  => sub { CVS::Utils::ChangeLog::FileEntry->distributed(1) },
2530              'prune|P'        => \$Prune_Empty_Msgs,
2531              'no-wrap'        => \$No_Wrap,
2532              'gmt|utc'        => \$UTC_Times,
2533              'day-of-week|w'  => \$Show_Day_Of_Week,
2534              'revisions|r'    => \$Show_Revisions,
2535              'show-dead'      => \$Show_Dead,
2536              'tags|t'         => \$Show_Tags,
2537              'tagdates|T'     => \$Show_Tag_Dates,
2538              'branches|b'     => \$Show_Branches,
2539              'follow|F=s'     => \@Follow_Branches,
2540              'follow-only=s'  => \@Follow_Only,
2541              'xml-encoding=s' => \$XML_Encoding,
2542              'xml'            => \$XML_Output,
2543              'noxmlns'        => \$No_XML_Namespace,
2544              'no-xml-iso-date' => \$No_XML_ISO_Date,
2545              'no-ancestors'   => \$No_Ancestors,
2546              'lines-modified' => \$Show_Lines_Modified,
2547
2548              'no-indent'    => sub {
2549                $Indent = '';
2550              },
2551
2552              'summary'      => sub {
2553                $Summary = 1;
2554                $After_Header = "\n\n"; # Summary implies --separate-header
2555              },
2556
2557              'no-times'     => sub {
2558                $Show_Times = 0;
2559              },
2560
2561              'no-hide-branch-additions' => sub {
2562                $Hide_Branch_Additions = 0;
2563              },
2564
2565              'no-common-dir'  => sub {
2566                $Common_Dir = 0;
2567              },
2568
2569              'ignore-tag=s'   => sub {
2570                $ignore_tags{$_[1]} = 1;
2571              },
2572
2573              'show-tag=s'     => sub {
2574                $show_tags{$_[1]} = 1;
2575              },
2576
2577              # Deliberately undocumented.  This is not a public interface, and
2578              # may change/disappear at any time.
2579              'test-code=s'    => \$TestCode,
2580
2581              'delta=s'        => sub {
2582                my $arg = $_[1];
2583                if ( $arg =~
2584                     /^([A-Za-z][A-Za-z0-9_\-\]\[]*):([A-Za-z][A-Za-z0-9_\-\]\[]*)$/ ) {
2585                  $Delta_From = $1;
2586                  $Delta_To = $2;
2587                  $Delta_Mode = 1;
2588                } else {
2589                  die "--delta FROM_TAG:TO_TAG is what you meant to say.\n";
2590                }
2591              },
2592
2593              'FSF'             => sub {
2594                $Show_Times = 0;
2595                $Common_Dir = 0;
2596                $No_Extra_Indent = 1;
2597                $Indent = "\t";
2598              },
2599
2600              'header=s'        => sub {
2601                my $narg = $_[1];
2602                $ChangeLog_Header = &slurp_file ($narg);
2603                if (! defined ($ChangeLog_Header)) {
2604                  $ChangeLog_Header = '';
2605                }
2606              },
2607
2608              'global-opts|g=s' => sub {
2609                my $narg = $_[1];
2610                push @Global_Opts, $narg;
2611                splice @log_source_command, 1, 0, $narg;
2612              },
2613
2614              'log-opts|l=s' => sub {
2615                my $narg = $_[1];
2616                push @Local_Opts, $narg;
2617                push @log_source_command, $narg;
2618              },
2619
2620              'mailname=s'   => sub {
2621                my $narg = $_[1];
2622                warn "--mailname is deprecated; please use --domain instead\n";
2623                $Domain = $narg;
2624              },
2625
2626              'separate-header|S' => sub {
2627                $After_Header = "\n\n";
2628                $No_Extra_Indent = 1;
2629              },
2630
2631              'group-within-date' => sub {
2632                $GroupWithinDate = 1;
2633                $Show_Times = 0;
2634              },
2635
2636              'hide-filenames' => sub {
2637                $Hide_Filenames = 1;
2638                $After_Header = '';
2639              },
2640             )
2641     or die "options parsing failed\n";
2642
2643   push @log_source_command, map "$_", @ARGV;
2644
2645   ## Check for contradictions...
2646
2647   if ($Output_To_Stdout && CVS::Utils::ChangeLog::FileEntry->distributed) {
2648     print STDERR "cannot pass both --stdout and --distributed\n";
2649     $exit_with_admonishment = 1;
2650   }
2651
2652   if ($Output_To_Stdout && $output_file) {
2653     print STDERR "cannot pass both --stdout and --file\n";
2654     $exit_with_admonishment = 1;
2655   }
2656
2657   if ($Input_From_Stdin && @Global_Opts) {
2658     print STDERR "cannot pass both --stdin and -g\n";
2659     $exit_with_admonishment = 1;
2660   }
2661
2662   if ($Input_From_Stdin && @Local_Opts) {
2663     print STDERR "cannot pass both --stdin and -l\n";
2664     $exit_with_admonishment = 1;
2665   }
2666
2667   if ($XML_Output && $Cumulative) {
2668     print STDERR "cannot pass both --xml and --accum\n";
2669     $exit_with_admonishment = 1;
2670   }
2671
2672   # Other consistency checks and option-driven logic
2673
2674   # Bleargh.  Compensate for a deficiency of custom wrapping.
2675   if ( ($After_Header ne " ") and $FSF_Style ) {
2676     $After_Header .= "\t";
2677   }
2678
2679   @Ignore_Files = map lc, @Ignore_Files
2680     if $Case_Insensitive;
2681
2682   # Or if any other error message has already been printed out, we
2683   # just leave now:
2684   if ($exit_with_admonishment) {
2685     &usage ();
2686     exit (1);
2687   }
2688   elsif ($Print_Usage) {
2689     &usage ();
2690     exit (0);
2691   }
2692   elsif ($Print_Version) {
2693     &version ();
2694     exit (0);
2695   }
2696
2697   ## Else no problems, so proceed.
2698
2699   if ($output_file) {
2700     $Log_File_Name = $output_file;
2701   }
2702
2703   return \@log_source_command;
2704 }
2705
2706 # -------------------------------------
2707
2708 sub slurp_file {
2709   my $filename = shift || die ("no filename passed to slurp_file()");
2710   my $retstr;
2711
2712   open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
2713   local $/ = undef;
2714   $retstr = <SLURPEE>;
2715   close (SLURPEE);
2716   return $retstr;
2717 }
2718
2719 # -------------------------------------
2720
2721 sub debug {
2722   if ($Debug) {
2723     my $msg = shift;
2724     print STDERR $msg;
2725   }
2726 }
2727
2728 # -------------------------------------
2729
2730 sub version {
2731   print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
2732 }
2733
2734 # -------------------------------------
2735
2736 sub usage {
2737   &version ();
2738
2739   eval "use Pod::Usage qw( pod2usage )";
2740
2741    if ( $@ ) {
2742     print <<'END';
2743
2744 * Pod::Usage was not found.  The formatting may be suboptimal.  Consider
2745   upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and
2746   versions of perl prior to 5.6 are getting rather rusty, now.  Alternatively,
2747   install Pod::Usage direct from CPAN.
2748 END
2749
2750     local $/ = undef;
2751     my $message = <DATA>;
2752     $message =~ s/^=(head1|item) //gm;
2753     $message =~ s/^=(over|back).*\n//gm;
2754     $message =~ s/\n{3,}/\n\n/g;
2755     print $message;
2756   } else {
2757     print "\n";
2758     pod2usage( -exitval => 'NOEXIT',
2759                -verbose => 1,
2760                -output  => \*STDOUT,
2761              );
2762   }
2763
2764   return;
2765 }
2766
2767 # Main -----------------------------------------------------------------------
2768
2769 my $log_source_command = parse_options;
2770 if ( defined $TestCode ) {
2771   eval $TestCode;
2772   die "Eval failed: '$@'\n"
2773     if $@;
2774 } else {
2775   derive_changelog($log_source_command);
2776 }
2777
2778 __DATA__
2779
2780 =head1 NAME
2781
2782 cvs2cl_by_file.pl - convert cvs log messages to changelogs
2783
2784 =head1 SYNOPSIS
2785
2786 B<cvs2cl> [I<options>] [I<FILE1> [I<FILE2> ...]]
2787 cvs2cl_by_file.pl --delta bioperl-release-1-5-1:bioperl-release-1-5-2
2788
2789 =head1 DESCRIPTION
2790
2791 This is a quick hack version of cvs2cl.pl that simply outputs in a
2792 file-centric way.
2793 Only CVS::Utils::ChangeLog::EntrySet::Output::output_changelog
2794 was altered
2795
2796 Usage:
2797 cvs2cl_by_file.pl --delta bioperl-release-1-5-1:bioperl-release-1-5-2
2798 Generates a file called ChangeLog showing, per file, all the commit
2799 messages since tag bioperl-release-1-5-1 up to tag bioperl-release-1-5-2
2800
2801 Original cvs2cl docs now follow, but some things may not work because of
2802 the hack!
2803
2804
2805 cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by
2806 running "cvs log" and parsing the output. Duplicate log messages get
2807 unified in the Right Way.
2808
2809 The default output of cvs2cl is designed to be compact, formally unambiguous,
2810 but still easy for humans to read.  It should be largely self-explanatory; the
2811 one abbreviation that might not be obvious is "utags".  That stands for
2812 "universal tags" -- a universal tag is one held by all the files in a given
2813 change entry.
2814
2815 If you need output that's easy for a program to parse, use the B<--xml> option.
2816 Note that with XML output, just about all available information is included
2817 with each change entry, whether you asked for it or not, on the theory that
2818 your parser can ignore anything it's not looking for.
2819
2820 If filenames are given as arguments cvs2cl only shows log information for the
2821 named files.
2822
2823 =head1 OPTIONS
2824
2825 =over 4
2826
2827 =item B<-h>, B<-help>, B<--help>, B<-?>
2828
2829 Show a short help and exit.
2830
2831 =item B<--version>
2832
2833 Show version and exit.
2834
2835 =item B<-r>, B<--revisions>
2836
2837 Show revision numbers in output.
2838
2839 =item B<-b>, B<--branches>
2840
2841 Show branch names in revisions when possible.
2842
2843 =item B<-t>, B<--tags>
2844
2845 Show tags (symbolic names) in output.
2846
2847 =item B<-T>, B<--tagdates>
2848
2849 Show tags in output on their first occurance.
2850
2851 =item B<--show-dead>
2852
2853 Show dead files.
2854
2855 =item B<--stdin>
2856
2857 Read from stdin, don't run cvs log.
2858
2859 =item B<--stdout>
2860
2861 Output to stdout not to ChangeLog.
2862
2863 =item B<-d>, B<--distributed>
2864
2865 Put ChangeLogs in subdirs.
2866
2867 =item B<-f> I<FILE>, B<--file> I<FILE>
2868
2869 Write to I<FILE> instead of ChangeLog.
2870
2871 =item B<--fsf>
2872
2873 Use this if log data is in FSF ChangeLog style.
2874
2875 =item B<--FSF>
2876
2877 Attempt strict FSF-standard compatible output.
2878
2879 =item B<-W> I<SECS>, B<--window> I<SECS>
2880
2881 Window of time within which log entries unify.
2882
2883 =item -B<U> I<UFILE>, B<--usermap> I<UFILE>
2884
2885 Expand usernames to email addresses from I<UFILE>.
2886
2887 =item B<--passwd> I<PASSWORDFILE>
2888
2889 Use system passwd file for user name expansion.  If no mail domain is provided
2890 (via B<--domain>), it tries to read one from B</etc/mailname>, output of B<hostname
2891 -d>, B<dnsdomainname>, or B<domain-name>.  cvs2cl exits with an error if none of
2892 those options is successful. Use a domain of '' to prevent the addition of a
2893 mail domain.
2894
2895 =item B<--domain> I<DOMAIN>
2896
2897 Domain to build email addresses from.
2898
2899 =item B<--gecos>
2900
2901 Get user information from GECOS data.
2902
2903 =item B<-R> I<REGEXP>, B<--regexp> I<REGEXP>
2904
2905 Include only entries that match I<REGEXP>.  This option may be used multiple
2906 times.
2907
2908 =item B<-I> I<REGEXP>, B<--ignore> I<REGEXP>
2909
2910 Ignore files whose names match I<REGEXP>.  This option may be used multiple
2911 times.  The regexp is a perl regular expression.  It is matched as is; you may
2912 want to prefix with a ^ or suffix with a $ to anchor the match.
2913
2914 =item B<-C>, B<--case-insensitive>
2915
2916 Any regexp matching is done case-insensitively.
2917
2918 =item B<-F> I<BRANCH>, B<--follow> I<BRANCH>
2919
2920 Show only revisions on or ancestral to I<BRANCH>.
2921
2922 =item B<--follow-only> I<BRANCH>
2923
2924 Like --follow, but sub-branches are not followed.
2925
2926 =item B<--no-ancestors>
2927
2928 When using B<-F>, only track changes since the I<BRANCH> started.
2929
2930 =item B<--no-hide-branch-additions>
2931
2932 By default, entries generated by cvs for a file added on a branch (a dead 1.1
2933 entry) are not shown.  This flag reverses that action.
2934
2935 =item B<-S>, B<--separate-header>
2936
2937 Blank line between each header and log message.
2938
2939 =item B<--summary>
2940
2941 Add CVS change summary information.
2942
2943 =item B<--no-wrap>
2944
2945 Don't auto-wrap log message (recommend B<-S> also).
2946
2947 =item B<--no-indent>
2948
2949 Don't indent log message
2950
2951 =item B<--gmt>, B<--utc>
2952
2953 Show times in GMT/UTC instead of local time.
2954
2955 =item B<--accum>
2956
2957 Add to an existing ChangeLog (incompatible with B<--xml>).
2958
2959 =item B<-w>, B<--day-of-week>
2960
2961 Show day of week.
2962
2963 =item B<--no-times>
2964
2965 Don't show times in output.
2966
2967 =item B<--chrono>
2968
2969 Output log in chronological order (default is reverse chronological order).
2970
2971 =item B<--header> I<FILE>
2972
2973 Get ChangeLog header from I<FILE> ("B<->" means stdin).
2974
2975 =item B<--xml>
2976
2977 Output XML instead of ChangeLog format.
2978
2979 =item B<--xml-encoding> I<ENCODING.>
2980
2981 Insert encoding clause in XML header.
2982
2983 =item B<--noxmlns>
2984
2985 Don't include xmlns= attribute in root element.
2986
2987 =item B<--hide-filenames>
2988
2989 Don't show filenames (ignored for XML output).
2990
2991 =item B<--no-common-dir>
2992
2993 Don't shorten directory names from filenames.
2994
2995 =item B<--rcs> I<CVSROOT>
2996
2997 Handle filenames from raw RCS, for instance those produced by "cvs rlog"
2998 output, stripping the prefix I<CVSROOT>.
2999
3000 =item B<-P>, B<--prune>
3001
3002 Don't show empty log messages.
3003
3004 =item B<--lines-modified>
3005
3006 Output the number of lines added and the number of lines removed for
3007 each checkin (if applicable). At the moment, this only affects the
3008 XML output mode.
3009
3010 =item B<--ignore-tag> I<TAG>
3011
3012 Ignore individual changes that are associated with a given tag.
3013 May be repeated, if so, changes that are associated with any of
3014 the given tags are ignored.
3015
3016 =item B<--show-tag> I<TAG>
3017
3018 Log only individual changes that are associated with a given
3019 tag.  May be repeated, if so, changes that are associated with
3020 any of the given tags are logged.
3021
3022 =item B<--delta> I<FROM_TAG>B<:>I<TO_TAG>
3023
3024 Attempt a delta between two tags (since I<FROM_TAG> up to and
3025 including I<TO_TAG>).  The algorithm is a simple date-based one
3026 (this is a hard problem) so results are imperfect.
3027
3028 =item B<-g> I<OPTS>, B<--global-opts> I<OPTS>
3029
3030 Pass I<OPTS> to cvs like in "cvs I<OPTS> log ...".
3031
3032 =item B<-l> I<OPTS>, B<--log-opts> I<OPTS>
3033
3034 Pass I<OPTS> to cvs log like in "cvs ... log I<OPTS>".
3035
3036 =back
3037
3038 Notes about the options and arguments:
3039
3040 =over 4
3041
3042 =item *
3043
3044 The B<-I> and B<-F> options may appear multiple times.
3045
3046 =item *
3047
3048 To follow trunk revisions, use "B<-F trunk>" ("B<-F TRUNK>" also works).  This is
3049 okay because no would ever, ever be crazy enough to name a branch "trunk",
3050 right?  Right.
3051
3052 =item *
3053
3054 For the B<-U> option, the I<UFILE> should be formatted like CVSROOT/users. That is,
3055 each line of I<UFILE> looks like this:
3056
3057        jrandom:jrandom@red-bean.com
3058
3059 or maybe even like this
3060
3061        jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
3062
3063 Don't forget to quote the portion after the colon if necessary.
3064
3065 =item *
3066
3067 Many people want to filter by date.  To do so, invoke cvs2cl.pl like this:
3068
3069        cvs2cl.pl -l "-d'DATESPEC'"
3070
3071 where DATESPEC is any date specification valid for "cvs log -d".  (Note that
3072 CVS 1.10.7 and below requires there be no space between -d and its argument).
3073
3074 =item *
3075
3076 Dates/times are interpreted in the local time zone.
3077
3078 =item *
3079
3080 Remember to quote the argument to `B<-l>' so that your shell doesn't interpret
3081 spaces as argument separators.
3082
3083 =item *
3084
3085 See the 'Common Options' section of the cvs manual ('info cvs' on UNIX-like
3086 systems) for more information.
3087
3088 =item *
3089
3090 Note that the rules for quoting under windows shells are different.
3091
3092 =item *
3093
3094 To run in an automated environment such as CGI or PHP, suidperl may be needed
3095 in order to execute as the correct user to enable /cvsroot read lock files to
3096 be written for the 'cvs log' command.  This is likely just a case of changing
3097 the /usr/bin/perl command to /usr/bin/suidperl, and explicitly declaring the
3098 PATH variable.
3099
3100 =back
3101
3102 =head1 EXAMPLES
3103
3104 Some examples (working on UNIX shells):
3105
3106       # logs after 6th March, 2003 (inclusive)
3107       cvs2cl.pl -l "-d'>2003-03-06'"
3108       # logs after 4:34PM 6th March, 2003 (inclusive)
3109       cvs2cl.pl -l "-d'>2003-03-06 16:34'"
3110       # logs between 4:46PM 6th March, 2003 (exclusive) and
3111       # 4:34PM 6th March, 2003 (inclusive)
3112       cvs2cl.pl -l "-d'2003-03-06 16:46>2003-03-06 16:34'"
3113
3114 Some examples (on non-UNIX shells):
3115
3116       # Reported to work on windows xp/2000
3117       cvs2cl.pl -l  "-d"">2003-10-18;today<"""
3118
3119 =head1 AUTHORS
3120
3121 =over 4
3122
3123 =item Karl Fogel
3124
3125 =item Melissa O'Neill
3126
3127 =item Martyn J. Pearce
3128
3129 =back
3130
3131 Contributions from
3132
3133 =over 4
3134
3135 =item Mike Ayers
3136
3137 =item Tim Bradshaw
3138
3139 =item Richard Broberg
3140
3141 =item Nathan Bryant
3142
3143 =item Oswald Buddenhagen
3144
3145 =item Neil Conway
3146
3147 =item Arthur de Jong
3148
3149 =item Mark W. Eichin
3150
3151 =item Dave Elcock
3152
3153 =item Reid Ellis
3154
3155 =item Simon Josefsson
3156
3157 =item Robin Hugh Johnson
3158
3159 =item Terry Kane
3160
3161 =item Pete Kempf
3162
3163 =item Akos Kiss
3164
3165 =item Claus Klein
3166
3167 =item Eddie Kohler
3168
3169 =item Richard Laager
3170
3171 =item Kevin Lilly
3172
3173 =item Karl-Heinz Marbaise
3174
3175 =item Mitsuaki Masuhara
3176
3177 =item Henrik Nordstrom
3178
3179 =item Joe Orton
3180
3181 =item Peter Palfrader
3182
3183 =item Thomas Parmelan
3184
3185 =item Jordan Russell
3186
3187 =item Jacek Sliwerski
3188
3189 =item Johannes Stezenbach
3190
3191 =item Joseph Walton
3192
3193 =item Ernie Zapata
3194
3195 =back
3196
3197 =head1 BUGS
3198
3199 Please report bugs to C<bug-cvs2cl@red-bean.com>.
3200
3201 =head1 PREREQUISITES
3202
3203 This script requires C<Text::Wrap>, C<Time::Local>, and C<File::Basename>.  It
3204 also seems to require C<Perl 5.004_04> or higher.
3205
3206 =head1 OPERATING SYSTEM COMPATIBILITY
3207
3208 Should work on any OS.
3209
3210 =head1 SCRIPT CATEGORIES
3211
3212 Version_Control/CVS
3213
3214 =head1 COPYRIGHT
3215
3216 (C) 2001,2002,2003,2004 Martyn J. Pearce E<lt>fluffy@cpan.orgE<gt>, under the GNU GPL.
3217
3218 (C) 1999 Karl Fogel E<lt>kfogel@red-bean.comE<gt>, under the GNU GPL.
3219
3220 cvs2cl.pl is free software; you can redistribute it and/or modify
3221 it under the terms of the GNU General Public License as published by
3222 the Free Software Foundation; either version 2, or (at your option)
3223 any later version.
3224
3225 cvs2cl.pl is distributed in the hope that it will be useful,
3226 but WITHOUT ANY WARRANTY; without even the implied warranty of
3227 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
3228 GNU General Public License for more details.
3229
3230 You may have received a copy of the GNU General Public License
3231 along with cvs2cl.pl; see the file COPYING.  If not, write to the
3232 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
3233 Boston, MA 02111-1307, USA.
3234
3235 =head1 SEE ALSO
3236
3237 cvs(1)
3238