maintenance/cvs2cl_by_file.pl

   1 #!/bin/sh
   2 exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
   3 #!perl -w
   4
   5 ###########################################################################
   6 # This is a quick hack version of cvs2cl.pl (see below)
   7 # that simply outputs in a file-centric mannor.
   8 # only CVS::Utils::ChangeLog::EntrySet::Output::output_changelog
   9 # was altered
  10 #
  11 # Usage:
  12 # cvs2cl_by_file.pl --delta bioperl-release-1-5-1:bioperl-release-1-5-2
  13 # Generates a file called ChangeLog showing, per file, all the commit
  14 # messages since tag bioperl-release-1-5-1 up to tag bioperl-release-1-5-2
  15 # It generates in wikipedia format suitable for immediate pasting into the
  16 # a wiki page for the delta
  17 #
  18 # Sendu Bala <bix@sendu.me.uk>
  19 ###########################################################################
  20
  21
  22 ##############################################################
  23 ###                                                        ###
  24 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
  25 ###                                                        ###
  26 ##############################################################
  27
  28 ##
  29
  30 use strict;
  31
  32 use File::Basename qw( fileparse );
  33 use Getopt::Long   qw( GetOptions );
  34 use Text::Wrap     qw( );
  35 use Time::Local    qw( timegm );
  36 use User::pwent    qw( getpwnam );
  37 use File::Spec;
  38
  39 # The Plan:
  40 #
  41 # Read in the logs for multiple files, spit out a nice ChangeLog that
  42 # mirrors the information entered during `cvs commit'.
  43 #
  44 # The problem presents some challenges. In an ideal world, we could
  45 # detect files with the same author, log message, and checkin time --
  46 # each <filelist, author, time, logmessage> would be a changelog entry.
  47 # We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*
  48 # so checkins can span a range of times.  Also, the directory structure
  49 # could be hierarchical.
  50 #
  51 # Another question is whether we really want to have the ChangeLog
  52 # exactly reflect commits. An author could issue two related commits,
  53 # with different log entries, reflecting a single logical change to the
  54 # source. GNU style ChangeLogs group these under a single author/date.
  55 # We try to do the same.
  56 #
  57 # So, we parse the output of `cvs log', storing log messages in a
  58 # multilevel hash that stores the mapping:
  59 #   directory => author => time => message => filelist
  60 # As we go, we notice "nearby" commit times and store them together
  61 # (i.e., under the same timestamp), so they appear in the same log
  62 # entry.
  63 #
  64 # When we've read all the logs, we twist this mapping into
  65 # a time => author => message => filelist mapping for each directory.
  66 #
  67 # If we're not using the `--distributed' flag, the directory is always
  68 # considered to be `./', even as descend into subdirectories.
  69
  70 # Call Tree
  71
  72 # name                         number of lines (10.xii.03)
  73 # parse_options                         192
  74 # derive_changelog                       13
  75 # +-maybe_grab_accumulation_date         38
  76 # +-read_changelog                      277
  77 #   +-maybe_read_user_map_file           94
  78 #     +-run_ext                           9
  79 #   +-read_file_path                     29
  80 #   +-read_symbolic_name                 43
  81 #   +-read_revision                      49
  82 #   +-read_date_author_and_state         25
  83 #     +-parse_date_author_and_state      20
  84 #   +-read_branches                      36
  85 # +-output_changelog                    424
  86 #   +-pretty_file_list                  290
  87 #     +-common_path_prefix               35
  88 #   +-preprocess_msg_text                30
  89 #     +-min                               1
  90 #   +-mywrap                             16
  91 #   +-last_line_len                       5
  92 #   +-wrap_log_entry                    177
  93 #
  94 # Utilities
  95 #
  96 # xml_escape                              6
  97 # slurp_file                             11
  98 # debug                                   5
  99 # version                                 2
 100 # usage                                 142
 101
 102 # -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
 103 #
 104 # Note about a bug-slash-opportunity:
 105 # -----------------------------------
 106 #
 107 # There's a bug in Text::Wrap, which affects cvs2cl.  This script
 108 # reveals it:
 109 #
 110 #   #!/usr/bin/perl -w
 111 #
 112 #   use Text::Wrap;
 113 #
 114 #   my $test_text =
 115 #   "This script demonstrates a bug in Text::Wrap.  The very long line
 116 #   following this paragraph will be relocated relative to the surrounding
 117 #   text:
 118 #
 119 #   ====================================================================
 120 #
 121 #   See?  When the bug happens, we'll get the line of equal signs below
 122 #   this paragraph, even though it should be above.";
 123 #
 124 #
 125 #   # Print out the test text with no wrapping:
 126 #   print "$test_text";
 127 #   print "\n";
 128 #   print "\n";
 129 #
 130 #   # Now print it out wrapped, and see the bug:
 131 #   print wrap ("\t", "        ", "$test_text");
 132 #   print "\n";
 133 #   print "\n";
 134 #
 135 # If the line of equal signs were one shorter, then the bug doesn't
 136 # happen.  Interesting.
 137 #
 138 # Anyway, rather than fix this in Text::Wrap, we might as well write a
 139 # new wrap() which has the following much-needed features:
 140 #
 141 # * initial indentation, like current Text::Wrap()
 142 # * subsequent line indentation, like current Text::Wrap()
 143 # * user chooses among: force-break long words, leave them alone, or die()?
 144 # * preserve existing indentation: chopped chunks from an indented line
 145 #   are indented by same (like this line, not counting the asterisk!)
 146 # * optional list of things to preserve on line starts, default ">"
 147 #
 148 # Note that the last two are essentially the same concept, so unify in
 149 # implementation and give a good interface to controlling them.
 150 #
 151 # And how about:
 152 #
 153 # Optionally, when encounter a line pre-indented by same as previous
 154 # line, then strip the newline and refill, but indent by the same.
 155 # Yeah...
 156
 157 # Globals --------------------------------------------------------------------
 158
 159 # In case we have to print it out:
 160 my $VERSION = '$Revision$';
 161 $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
 162
 163 ## Vars set by options:
 164
 165 # Print debugging messages?
 166 my $Debug = 0;
 167
 168 # Just show version and exit?
 169 my $Print_Version = 0;
 170
 171 # Just print usage message and exit?
 172 my $Print_Usage = 0;
 173
 174 # What file should we generate (defaults to "ChangeLog")?
 175 my $Log_File_Name = "ChangeLog";
 176
 177 # Grab most recent entry date from existing ChangeLog file, just add
 178 # to that ChangeLog.
 179 my $Cumulative = 0;
 180
 181 # `cvs log -d`, this will repeat the last entry in the old log.  This is OK,
 182 # as it guarantees at least one entry in the update changelog, which means
 183 # that there will always be a date to extract for the next update.  The repeat
 184 # entry can be removed in postprocessing, if necessary.
 185
 186 # MJP 2003-08-02
 187 # I don't think this actually does anything useful
 188 my $Update = 0;
 189
 190 # Expand usernames to email addresses based on a map file?
 191 my $User_Map_File = '';
 192 my $User_Passwd_File;
 193 my $Mail_Domain;
 194
 195 # Output log in chronological order? [default is reverse chronological order]
 196 my $Chronological_Order = 0;
 197
 198 # Grab user details via gecos
 199 my $Gecos = 0;
 200
 201 # User domain for gecos email addresses
 202 my $Domain;
 203
 204 # Output to a file or to stdout?
 205 my $Output_To_Stdout = 0;
 206
 207 # Eliminate empty log messages?
 208 my $Prune_Empty_Msgs = 0;
 209
 210 # Tags of which not to output
 211 my %ignore_tags;
 212
 213 # Show only revisions with Tags
 214 my %show_tags;
 215
 216 # Don't call Text::Wrap on the body of the message
 217 my $No_Wrap = 0;
 218
 219 # Indentation of log messages
 220 my $Indent = "\t";
 221
 222 # Don't do any pretty print processing
 223 my $Summary = 0;
 224
 225 # Separates header from log message.  Code assumes it is either " " or
 226 # "\n\n", so if there's ever an option to set it to something else,
 227 # make sure to go through all conditionals that use this var.
 228 my $After_Header = " ";
 229
 230 # XML Encoding
 231 my $XML_Encoding = '';
 232
 233 # Format more for programs than for humans.
 234 my $XML_Output = 0;
 235 my $No_XML_Namespace = 0;
 236 my $No_XML_ISO_Date = 0;
 237
 238 # Do some special tweaks for log data that was written in FSF
 239 # ChangeLog style.
 240 my $FSF_Style = 0;
 241
 242 # Show times in UTC instead of local time
 243 my $UTC_Times = 0;
 244
 245 # Show times in output?
 246 my $Show_Times = 1;
 247
 248 # Show day of week in output?
 249 my $Show_Day_Of_Week = 0;
 250
 251 # Show revision numbers in output?
 252 my $Show_Revisions = 0;
 253
 254 # Show dead files in output?
 255 my $Show_Dead = 0;
 256
 257 # Hide dead trunk files which were created as a result of additions on a
 258 # branch?
 259 my $Hide_Branch_Additions = 1;
 260
 261 # Show tags (symbolic names) in output?
 262 my $Show_Tags = 0;
 263
 264 # Show tags separately in output?
 265 my $Show_Tag_Dates = 0;
 266
 267 # Show branches by symbolic name in output?
 268 my $Show_Branches = 0;
 269
 270 # Show only revisions on these branches or their ancestors.
 271 my @Follow_Branches;
 272 # Show only revisions on these branches or their ancestors; ignore descendent
 273 # branches.
 274 my @Follow_Only;
 275
 276 # Don't bother with files matching this regexp.
 277 my @Ignore_Files;
 278
 279 # How exactly we match entries.  We definitely want "o",
 280 # and user might add "i" by using --case-insensitive option.
 281 my $Case_Insensitive = 0;
 282
 283 # Maybe only show log messages matching a certain regular expression.
 284 my $Regexp_Gate = '';
 285
 286 # Pass this global option string along to cvs, to the left of `log':
 287 my $Global_Opts = '';
 288
 289 # Pass this option string along to the cvs log subcommand:
 290 my $Command_Opts = '';
 291
 292 # Read log output from stdin instead of invoking cvs log?
 293 my $Input_From_Stdin = 0;
 294
 295 # Don't show filenames in output.
 296 my $Hide_Filenames = 0;
 297
 298 # Don't shorten directory names from filenames.
 299 my $Common_Dir = 1;
 300
 301 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
 302 # times that span a range of time. We assume that checkins will last no
 303 # longer than $Max_Checkin_Duration seconds, and that similarly, no
 304 # checkins will happen from the same users with the same message less
 305 # than $Max_Checkin_Duration seconds apart.
 306 my $Max_Checkin_Duration = 180;
 307
 308 # What to put at the front of [each] ChangeLog.
 309 my $ChangeLog_Header = '';
 310
 311 # Whether to enable 'delta' mode, and for what start/end tags.
 312 my $Delta_Mode = 0;
 313 my $Delta_From = '';
 314 my $Delta_To = '';
 315
 316 my $TestCode;
 317
 318 # Whether to parse filenames from the RCS filename, and if so what
 319 # prefix to strip.
 320 my $RCS_Root;
 321
 322 # Whether to output information on the # of lines added and removed
 323 # by each file modification.
 324 my $Show_Lines_Modified = 0;
 325
 326 ## end vars set by options.
 327
 328 # latest observed times for the start/end tags in delta mode
 329 my $Delta_StartTime = 0;
 330 my $Delta_EndTime = 0;
 331
 332 my $No_Ancestors = 0;
 333
 334 my $No_Extra_Indent = 0;
 335
 336 my $GroupWithinDate = 0;
 337
 338 # ----------------------------------------------------------------------------
 339
 340 package CVS::Utils::ChangeLog::EntrySet;
 341
 342 sub new {
 343   my $class = shift;
 344   my %self;
 345   bless \%self, $class;
 346 }
 347
 348 # -------------------------------------
 349
 350 sub output_changelog {
 351   my $output_type = $XML_Output ? 'XML' : 'Text';
 352   my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
 353   my $output = $output_class->new(follow_branches => \@Follow_Branches,
 354                                   follow_only     => \@Follow_Only,
 355                                   ignore_tags     => \%ignore_tags,
 356                                   show_tags       => \%show_tags,
 357                                  );
 358   $output->output_changelog(@_);
 359 }
 360
 361 # -------------------------------------
 362
 363 sub add_fileentry {
 364   my ($self, $file_full_path, $time, $revision, $state, $lines,
 365       $branch_names, $branch_roots, $branch_numbers,
 366       $symbolic_names, $author, $msg_txt) = @_;
 367
 368       my $qunk =
 369         CVS::Utils::ChangeLog::FileEntry->new($file_full_path, $time, $revision,
 370                                               $state, $lines,
 371                                               $branch_names, $branch_roots,
 372                                               $branch_numbers,
 373                                               $symbolic_names);
 374
 375       # We might be including revision numbers and/or tags and/or
 376       # branch names in the output.  Most of the code from here to
 377       # loop-end deals with organizing these in qunk.
 378
 379       unless ( $Hide_Branch_Additions
 380                and
 381                $msg_txt =~ /file .+ was initially added on branch \S+./ ) {
 382         # Add this file to the list
 383         # (We use many spoonfuls of autovivication magic. Hashes and arrays
 384         # will spring into existence if they aren't there already.)
 385
 386         &main::debug ("(pushing log msg for ". $qunk->dir_key . $qunk->filename . ")\n");
 387
 388         # Store with the files in this commit.  Later we'll loop through
 389         # again, making sure that revisions with the same log message
 390         # and nearby commit times are grouped together as one commit.
 391         $self->{$qunk->dir_key}{$author}{$time}{$msg_txt} =
 392           CVS::Utils::ChangeLog::Message->new($msg_txt)
 393               unless exists $self->{$qunk->dir_key}{$author}{$time}{$msg_txt};
 394         $self->{$qunk->dir_key}{$author}{$time}{$msg_txt}->add_fileentry($qunk);
 395       }
 396
 397 }
 398
 399 # ----------------------------------------------------------------------------
 400
 401 package CVS::Utils::ChangeLog::EntrySet::Output::Text;
 402
 403 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
 404
 405 use File::Basename qw( fileparse );
 406
 407 sub new {
 408   my $class = shift;
 409   my $self = $class->SUPER::new(@_);
 410 }
 411
 412 # -------------------------------------
 413
 414 sub wday {
 415   my $self = shift; my $class = ref $self;
 416   my ($wday) = @_;
 417
 418   return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : '';
 419 }
 420
 421 # -------------------------------------
 422
 423 sub header_line {
 424   my $self = shift;
 425   my ($time, $author, $lastdate) = @_;
 426
 427   my $header_line = '';
 428
 429   my (undef,$min,$hour,$mday,$mon,$year,$wday)
 430     = $UTC_Times ? gmtime($time) : localtime($time);
 431
 432   my $date = $self->fdatetime($time);
 433
 434   if ($Show_Times) {
 435     $header_line =
 436       sprintf "%s  %s\n\n", $date, $author;
 437   } else {
 438     if ( ! defined $lastdate or $date ne $lastdate or ! $GroupWithinDate ) {
 439       if ( $GroupWithinDate ) {
 440         $header_line = "$date\n\n";
 441       } else {
 442         $header_line = "$date  $author\n\n";
 443       }
 444     } else {
 445       $header_line = '';
 446     }
 447   }
 448 }
 449
 450 # -------------------------------------
 451
 452 sub preprocess_msg_text {
 453   my $self = shift;
 454   my ($text) = @_;
 455
 456   $text = $self->SUPER::preprocess_msg_text($text);
 457
 458   unless ( $No_Wrap ) {
 459     # Strip off lone newlines, but only for lines that don't begin with
 460     # whitespace or a mail-quoting character, since we want to preserve
 461     # that kind of formatting.  Also don't strip newlines that follow a
 462     # period; we handle those specially next.  And don't strip
 463     # newlines that precede an open paren.
 464     1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g;
 465
 466     # If a newline follows a period, make sure that when we bring up the
 467     # bottom sentence, it begins with two spaces.
 468     1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g;
 469   }
 470
 471   return $text;
 472 }
 473
 474 # -------------------------------------
 475
 476 # Here we take a bunch of qunks and convert them into printed
 477 # summary that will include all the information the user asked for.
 478 sub pretty_file_list {
 479   my $self = shift;
 480
 481   return ''
 482     if $Hide_Filenames;
 483
 484   my $qunksref = shift;
 485
 486   my @filenames;
 487   my $beauty = '';          # The accumulating header string for this entry.
 488   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
 489   my %unanimous_tags;       # Tags found in all qunks
 490   my %all_branches;         # Branches found in any qunk
 491   my $fbegun = 0;           # Did we begin printing filenames yet?
 492
 493   my ($common_dir, $qunkrefs) =
 494     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref);
 495
 496   my @qunkrefs = @$qunkrefs;
 497
 498   # Not XML output, so complexly compactify for chordate consumption.  At this
 499   # point we have enough global information about all the qunks to organize
 500   # them non-redundantly for output.
 501
 502   if ($common_dir) {
 503     # Note that $common_dir still has its trailing slash
 504     $beauty .= "$common_dir: ";
 505   }
 506
 507   if ($Show_Branches)
 508   {
 509     # For trailing revision numbers.
 510     my @brevisions;
 511
 512     foreach my $branch (keys (%all_branches))
 513     {
 514       foreach my $qunkref (@qunkrefs)
 515       {
 516         if ((defined ($qunkref->branch))
 517             and ($qunkref->branch eq $branch))
 518         {
 519           if ($fbegun) {
 520             # kff todo: comma-delimited in XML too?  Sure.
 521             $beauty .= ", ";
 522           }
 523           else {
 524             $fbegun = 1;
 525           }
 526           my $fname = substr ($qunkref->filename, length ($common_dir));
 527           $beauty .= $fname;
 528           $qunkref->{'printed'} = 1;  # Just setting a mark bit, basically
 529
 530           if ( $Show_Tags and defined $qunkref->tags ) {
 531             my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
 532
 533             if (@tags) {
 534               $beauty .= " (tags: ";
 535               $beauty .= join (', ', @tags);
 536               $beauty .= ")";
 537             }
 538           }
 539
 540           if ($Show_Revisions) {
 541             # Collect the revision numbers' last components, but don't
 542             # print them -- they'll get printed with the branch name
 543             # later.
 544             $qunkref->revision =~ /.+\.([\d]+)$/;
 545             push (@brevisions, $1);
 546
 547             # todo: we're still collecting branch roots, but we're not
 548             # showing them anywhere.  If we do show them, it would be
 549             # nifty to just call them revision "0" on a the branch.
 550             # Yeah, that's the ticket.
 551           }
 552         }
 553       }
 554       $beauty .= " ($branch";
 555       if (@brevisions) {
 556         if ((scalar (@brevisions)) > 1) {
 557           $beauty .= ".[";
 558           $beauty .= (join (',', @brevisions));
 559           $beauty .= "]";
 560         }
 561         else {
 562           # Square brackets are spurious here, since there's no range to
 563           # encapsulate
 564           $beauty .= ".$brevisions[0]";
 565         }
 566       }
 567       $beauty .= ")";
 568     }
 569   }
 570
 571   # Okay; any qunks that were done according to branch are taken care
 572   # of, and marked as printed.  Now print everyone else.
 573
 574   my %fileinfo_printed;
 575   foreach my $qunkref (@qunkrefs)
 576   {
 577     next if (defined ($qunkref->{'printed'}));   # skip if already printed
 578
 579     my $b = substr ($qunkref->filename, length ($common_dir));
 580     # todo: Shlomo's change was this:
 581     # $beauty .= substr ($qunkref->filename,
 582     #              (($common_dir eq "./") ? '' : length ($common_dir)));
 583     $qunkref->{'printed'} = 1;  # Set a mark bit.
 584
 585     if ($Show_Revisions || $Show_Tags || $Show_Dead)
 586     {
 587       my $started_addendum = 0;
 588
 589       if ($Show_Revisions) {
 590         $started_addendum = 1;
 591         $b .= " (";
 592         $b .= $qunkref->revision;
 593       }
 594       if ($Show_Dead && $qunkref->state =~ /dead/)
 595       {
 596         # Deliberately not using $started_addendum. Keeping it simple.
 597         $b .= "[DEAD]";
 598       }
 599       if ($Show_Tags && (defined $qunkref->tags)) {
 600         my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
 601         if ((scalar (@tags)) > 0) {
 602           if ($started_addendum) {
 603             $b .= ", ";
 604           }
 605           else {
 606             $b .= " (tags: ";
 607           }
 608           $b .= join (', ', @tags);
 609           $started_addendum = 1;
 610         }
 611       }
 612       if ($started_addendum) {
 613         $b .= ")";
 614       }
 615     }
 616
 617     unless ( exists $fileinfo_printed{$b} ) {
 618       if ($fbegun) {
 619         $beauty .= ", ";
 620       } else {
 621         $fbegun = 1;
 622       }
 623       $beauty .= $b, $fileinfo_printed{$b} = 1;
 624     }
 625   }
 626
 627   # Unanimous tags always come last.
 628   if ($Show_Tags && %unanimous_tags)
 629   {
 630     $beauty .= " (utags: ";
 631     $beauty .= join (', ', sort keys (%unanimous_tags));
 632     $beauty .= ")";
 633   }
 634
 635   # todo: still have to take care of branch_roots?
 636
 637   $beauty = "$beauty:";
 638
 639   return $beauty;
 640 }
 641
 642 # -------------------------------------
 643
 644 sub output_tagdate {
 645   my $self = shift;
 646   my ($fh, $time, $tag) = @_;
 647
 648   my $fdatetime = $self->fdatetime($time);
 649   print $fh "$fdatetime  tag $tag\n\n";
 650   return;
 651 }
 652
 653 # -------------------------------------
 654
 655 sub format_body {
 656   my $self = shift;
 657   my ($msg, $files, $qunklist) = @_;
 658
 659   my $body;
 660
 661   if ( $No_Wrap and ! $Summary ) {
 662     $msg = $self->preprocess_msg_text($msg);
 663     $files = $self->mywrap("\t", "\t  ", "* $files");
 664     $msg =~ s/\n(.+)/\n$Indent$1/g;
 665     unless ($After_Header eq " ") {
 666       $msg =~ s/^(.+)/$Indent$1/g;
 667     }
 668     if ( $Hide_Filenames ) {
 669       $body = $After_Header . $msg;
 670     } else {
 671       $body = $files . $After_Header . $msg;
 672     }
 673   } elsif ( $Summary ) {
 674     my ($filelist, $qunk);
 675     my (@DeletedQunks, @AddedQunks, @ChangedQunks);
 676
 677     $msg = $self->preprocess_msg_text($msg);
 678     #
 679     #     Sort the files (qunks) according to the operation that was
 680     # performed.  Files which were added have no line change
 681     # indicator, whereas deleted files have state dead.
 682     #
 683     foreach $qunk ( @$qunklist ) {
 684       if ( "dead" eq $qunk->state) {
 685         push @DeletedQunks, $qunk;
 686       } elsif ( ! defined $qunk->lines ) {
 687         push @AddedQunks, $qunk;
 688       } else {
 689         push @ChangedQunks, $qunk;
 690       }
 691     }
 692     #
 693     #     The qunks list was  originally in tree search order.  Let's
 694     # get that back.  The lists, if they exist, will be reversed upon
 695     # processing.
 696     #
 697
 698     #
 699     #     Now write the three sections onto $filelist
 700     #
 701     if ( @DeletedQunks ) {
 702       $filelist .= "\tDeleted:\n";
 703       foreach $qunk ( @DeletedQunks ) {
 704         $filelist .= "\t\t" . $qunk->filename;
 705         $filelist .= " (" . $qunk->revision . ")";
 706         $filelist .= "\n";
 707       }
 708       undef @DeletedQunks;
 709     }
 710
 711     if ( @AddedQunks ) {
 712       $filelist .= "\tAdded:\n";
 713       foreach $qunk (@AddedQunks) {
 714         $filelist .= "\t\t" . $qunk->filename;
 715         $filelist .= " (" . $qunk->revision . ")";
 716         $filelist .= "\n";
 717       }
 718       undef @AddedQunks ;
 719     }
 720
 721     if ( @ChangedQunks ) {
 722       $filelist .= "\tChanged:\n";
 723       foreach $qunk (@ChangedQunks) {
 724         $filelist .= "\t\t" . $qunk->filename;
 725         $filelist .= " (" . $qunk->revision . ")";
 726         $filelist .= ", \"" . $qunk->state . "\"";
 727         $filelist .= ", lines: " . $qunk->lines;
 728         $filelist .= "\n";
 729       }
 730       undef @ChangedQunks;
 731     }
 732
 733     chomp $filelist;
 734
 735     if ( $Hide_Filenames ) {
 736       $filelist = '';
 737     }
 738
 739     $msg =~ s/\n(.*)/\n$Indent$1/g;
 740     unless ( $After_Header eq " " or $FSF_Style ) {
 741       $msg =~ s/^(.*)/$Indent$1/g;
 742     }
 743
 744     unless ( $No_Wrap ) {
 745       if ( $FSF_Style ) {
 746         $msg = $self->wrap_log_entry($msg, '', 69, 69);
 747         chomp($msg);
 748         chomp($msg);
 749       } else {
 750         $msg = $self->mywrap('', $Indent, "$msg");
 751         $msg =~ s/[ \t]+\n/\n/g;
 752       }
 753     }
 754
 755     $body = $filelist . $After_Header . $msg;
 756   } else {  # do wrapping, either FSF-style or regular
 757     my $latter_wrap = $No_Extra_Indent ? $Indent : "$Indent  ";
 758
 759     if ( $FSF_Style ) {
 760       $files = $self->mywrap($Indent, $latter_wrap, "* $files");
 761
 762       my $files_last_line_len = 0;
 763       if ( $After_Header eq " " ) {
 764         $files_last_line_len = $self->last_line_len($files);
 765         $files_last_line_len += 1;  # for $After_Header
 766       }
 767
 768       $msg = $self->wrap_log_entry($msg, $latter_wrap, 69-$files_last_line_len, 69);
 769       $body = $files . $After_Header . $msg;
 770     } else {  # not FSF-style
 771       $msg = $self->preprocess_msg_text($msg);
 772       $body = $files . $After_Header . $msg;
 773       $body = $self->mywrap($Indent, $latter_wrap, "* $body");
 774       $body =~ s/[ \t]+\n/\n/g;
 775     }
 776   }
 777
 778   return $body;
 779 }
 780
 781 # ----------------------------------------------------------------------------
 782
 783 package CVS::Utils::ChangeLog::EntrySet::Output::XML;
 784
 785 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
 786
 787 use File::Basename qw( fileparse );
 788
 789 sub new {
 790   my $class = shift;
 791   my $self = $class->SUPER::new(@_);
 792 }
 793
 794 # -------------------------------------
 795
 796 sub header_line {
 797   my $self = shift;
 798   my ($time, $author, $lastdate) = @_;
 799
 800   my $header_line = '';
 801
 802   my $isoDate;
 803
 804   my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
 805
 806   # Ideally, this would honor $UTC_Times and use +HH:MM syntax
 807   $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
 808                      $y + 1900, $m + 1, $d, $H, $M, $S);
 809
 810   my (undef,$min,$hour,$mday,$mon,$year,$wday)
 811     = $UTC_Times ? gmtime($time) : localtime($time);
 812
 813   my $date = $self->fdatetime($time);
 814   $wday = $self->wday($wday);
 815
 816   $header_line =
 817     sprintf ("<date>%4u-%02u-%02u</date>\n${wday}<time>%02u:%02u</time>\n",
 818              $year+1900, $mon+1, $mday, $hour, $min);
 819   $header_line .= "<isoDate>$isoDate</isoDate>\n"
 820     unless $No_XML_ISO_Date;
 821   $header_line .= sprintf("<author>%s</author>\n" , $author);
 822 }
 823
 824 # -------------------------------------
 825
 826 sub wday {
 827   my $self = shift; my $class = ref $self;
 828   my ($wday) = @_;
 829
 830   return '<weekday>' . $class->weekday_en($wday) . "</weekday>\n";
 831 }
 832
 833 # -------------------------------------
 834
 835 sub escape {
 836   my $self = shift;
 837
 838   my $txt = shift;
 839   $txt =~ s/&/&amp;/g;
 840   $txt =~ s/</&lt;/g;
 841   $txt =~ s/>/&gt;/g;
 842   return $txt;
 843 }
 844
 845 # -------------------------------------
 846
 847 sub output_header {
 848   my $self = shift;
 849   my ($fh) = @_;
 850
 851   my $encoding    =
 852     length $XML_Encoding ? qq'encoding="$XML_Encoding"' : '';
 853   my $version     = 'version="1.0"';
 854   my $declaration =
 855     sprintf '<?xml %s?>', join ' ', grep length, $version, $encoding;
 856   my $root        =
 857     $No_XML_Namespace ?
 858       '<changelog>'     :
 859         '<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
 860   print $fh "$declaration\n\n$root\n\n";
 861 }
 862
 863 # -------------------------------------
 864
 865 sub output_footer {
 866   my $self = shift;
 867   my ($fh) = @_;
 868
 869   print $fh "</changelog>\n";
 870 }
 871
 872 # -------------------------------------
 873
 874 sub preprocess_msg_text {
 875   my $self = shift;
 876   my ($text) = @_;
 877
 878   $text = $self->SUPER::preprocess_msg_text($text);
 879
 880   $text = $self->escape($text);
 881   chomp $text;
 882   $text = "<msg>${text}</msg>\n";
 883
 884   return $text;
 885 }
 886
 887 # -------------------------------------
 888
 889 # Here we take a bunch of qunks and convert them into a printed
 890 # summary that will include all the information the user asked for.
 891 sub pretty_file_list {
 892   my $self = shift;
 893   my ($qunksref) = @_;
 894
 895   my $beauty = '';          # The accumulating header string for this entry.
 896   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
 897   my %unanimous_tags;       # Tags found in all qunks
 898   my %all_branches;         # Branches found in any qunk
 899   my $fbegun = 0;           # Did we begin printing filenames yet?
 900
 901   my ($common_dir, $qunkrefs) =
 902     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches),
 903       $qunksref);
 904
 905   my @qunkrefs = @$qunkrefs;
 906
 907   # If outputting XML, then our task is pretty simple, because we
 908   # don't have to detect common dir, common tags, branch prefixing,
 909   # etc.  We just output exactly what we have, and don't worry about
 910   # redundancy or readability.
 911
 912   foreach my $qunkref (@qunkrefs)
 913   {
 914     my $filename    = $qunkref->filename;
 915     my $state       = $qunkref->state;
 916     my $revision    = $qunkref->revision;
 917     my $tags        = $qunkref->tags;
 918     my $branch      = $qunkref->branch;
 919     my $branchroots = $qunkref->roots;
 920     my $lines       = $qunkref->lines;
 921
 922     $filename = $self->escape($filename);   # probably paranoia
 923     $revision = $self->escape($revision);   # definitely paranoia
 924
 925     $beauty .= "<file>\n";
 926     $beauty .= "<name>${filename}</name>\n";
 927     $beauty .= "<cvsstate>${state}</cvsstate>\n";
 928     $beauty .= "<revision>${revision}</revision>\n";
 929
 930     if ($Show_Lines_Modified
 931         && $lines && $lines =~ m/\+(\d+)\s+-(\d+)/) {
 932         $beauty .= "<linesadded>$1</linesadded>\n";
 933         $beauty .= "<linesremoved>$2</linesremoved>\n";
 934     }
 935
 936     if ($branch) {
 937       $branch   = $self->escape($branch);     # more paranoia
 938       $beauty .= "<branch>${branch}</branch>\n";
 939     }
 940     foreach my $tag (@$tags) {
 941       $tag = $self->escape($tag);  # by now you're used to the paranoia
 942       $beauty .= "<tag>${tag}</tag>\n";
 943     }
 944     foreach my $root (@$branchroots) {
 945       $root = $self->escape($root);  # which is good, because it will continue
 946       $beauty .= "<branchroot>${root}</branchroot>\n";
 947     }
 948     $beauty .= "</file>\n";
 949   }
 950
 951   # Theoretically, we could go home now.  But as long as we're here,
 952   # let's print out the common_dir and utags, as a convenience to
 953   # the receiver (after all, earlier code calculated that stuff
 954   # anyway, so we might as well take advantage of it).
 955
 956   if ((scalar (keys (%unanimous_tags))) > 1) {
 957     foreach my $utag ((keys (%unanimous_tags))) {
 958       $utag = $self->escape($utag);   # the usual paranoia
 959       $beauty .= "<utag>${utag}</utag>\n";
 960     }
 961   }
 962   if ($common_dir) {
 963     $common_dir = $self->escape($common_dir);
 964     $beauty .= "<commondir>${common_dir}</commondir>\n";
 965   }
 966
 967   # That's enough for XML, time to go home:
 968   return $beauty;
 969 }
 970
 971 # -------------------------------------
 972
 973 sub output_tagdate {
 974   my $self = shift;
 975   my ($fh, $time, $tag) = @_;
 976
 977   my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
 978
 979   # Ideally, this would honor $UTC_Times and use +HH:MM syntax
 980   my $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
 981                        $y + 1900, $m + 1, $d, $H, $M, $S);
 982
 983   print $fh "<tagdate>\n";
 984   print $fh "<tagisodate>$isoDate</tagisodate>\n";
 985   print $fh "<tagdatetag>$tag</tagdatetag>\n";
 986   print $fh "</tagdate>\n\n";
 987   return;
 988 }
 989
 990 # -------------------------------------
 991
 992 sub output_entry {
 993   my $self = shift;
 994   my ($fh, $entry) = @_;
 995   print $fh "<entry>\n$entry</entry>\n\n";
 996 }
 997
 998 # -------------------------------------
 999
1000 sub format_body {
1001   my $self = shift;
1002   my ($msg, $files, $qunklist) = @_;
1003
1004   $msg = $self->preprocess_msg_text($msg);
1005   return $files . $msg;
1006 }
1007
1008 # ----------------------------------------------------------------------------
1009
1010 package CVS::Utils::ChangeLog::EntrySet::Output;
1011
1012 use Carp           qw( croak );
1013 use File::Basename qw( fileparse );
1014
1015 # Class Utility Functions -------------
1016
1017 { # form closure
1018
1019 my @weekdays = (qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday));
1020 sub weekday_en {
1021   my $class = shift;
1022   return $weekdays[$_[0]];
1023 }
1024
1025 }
1026
1027 # -------------------------------------
1028
1029 sub new {
1030   my ($proto, %args) = @_;
1031   my $class = ref $proto || $proto;
1032
1033   my $follow_branches = delete $args{follow_branches};
1034   my $follow_only     = delete $args{follow_only};
1035   my $ignore_tags     = delete $args{ignore_tags};
1036   my $show_tags       = delete $args{show_tags};
1037   die "Unrecognized arg to EntrySet::Output::new: '$_'\n"
1038     for keys %args;
1039
1040   bless +{follow_branches => $follow_branches,
1041           follow_only     => $follow_only,
1042           show_tags       => $show_tags,
1043           ignore_tags     => $ignore_tags,
1044          }, $class;
1045 }
1046
1047 # Abstract Subrs ----------------------
1048
1049 sub wday               { croak "Whoops.  Abtract method call (wday).\n" }
1050 sub pretty_file_list   { croak "Whoops.  Abtract method call (pretty_file_list).\n" }
1051 sub output_tagdate     { croak "Whoops.  Abtract method call (output_tagdate).\n" }
1052 sub header_line        { croak "Whoops.  Abtract method call (header_line).\n" }
1053
1054 # Instance Subrs ----------------------
1055
1056 sub output_header { }
1057
1058 # -------------------------------------
1059
1060 sub output_entry {
1061   my $self = shift;
1062   my ($fh, $entry) = @_;
1063   print $fh "$entry\n";
1064 }
1065
1066 # -------------------------------------
1067
1068 sub output_footer { }
1069
1070 # -------------------------------------
1071
1072 sub escape { return $_[1] }
1073
1074 # -------------------------------------
1075
1076 sub _revision_is_wanted {
1077   my ($self, $qunk) = @_;
1078
1079   my ($revision, $branch_numbers) = @{$qunk}{qw( revision branch_numbers )};
1080   my $follow_branches = $self->{follow_branches};
1081   my $follow_only     = $self->{follow_only};
1082
1083   for my $ignore_tag (keys %{$self->{ignore_tags}}) {
1084     return
1085       if defined $qunk->{tags} and grep $_ eq $ignore_tag, @{$qunk->{tags}};
1086   }
1087
1088   if ( keys %{$self->{show_tags}} ) {
1089     for my $show_tag (keys %{$self->{show_tags}}) {
1090       return
1091         if ! defined $qunk->{tags} or ! grep $_ eq $show_tag, @{$qunk->{tags}};
1092     }
1093   }
1094
1095   return 1
1096     unless @$follow_branches + @$follow_only; # no follow is follow all
1097
1098   for my $x (map([$_, 1], @$follow_branches),
1099              map([$_, 0], @$follow_only    )) {
1100     my ($branch, $followsub) = @$x;
1101
1102     # Special case for following trunk revisions
1103     return 1
1104       if $branch =~ /^trunk$/i and $revision =~ /^[0-9]+\.[0-9]+$/;
1105
1106     if ( my $branch_number = $branch_numbers->{$branch} ) {
1107       # Are we on one of the follow branches or an ancestor of same?
1108
1109       # If this revision is a prefix of the branch number, or possibly is less
1110       # in the minormost number, OR if this branch number is a prefix of the
1111       # revision, then yes.  Otherwise, no.
1112
1113       # So below, we determine if any of those conditions are met.
1114
1115       # Trivial case: is this revision on the branch?  (Compare this way to
1116       # avoid regexps that screw up Emacs indentation, argh.)
1117       if ( substr($revision, 0, (length($branch_number) + 1))
1118            eq
1119            ($branch_number . ".") ) {
1120         if ( $followsub ) {
1121           return 1;
1122 #        } elsif ( length($revision) == length($branch_number)+2 ) {
1123         } elsif ( substr($revision, length($branch_number)+1) =~ /^\d+$/ ) {
1124           return 1;
1125         }
1126       } elsif ( length($branch_number) > length($revision)
1127                 and
1128                 ! $No_Ancestors ) {
1129         # Non-trivial case: check if rev is ancestral to branch
1130
1131         # r_left still has the trailing "."
1132         my ($r_left, $r_end) = ($revision =~ /^((?:\d+\.)+)(\d+)$/);
1133
1134         # b_left still has trailing "."
1135         # b_mid has no trailing "."
1136         my ($b_left, $b_mid) = ($branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/);
1137         return 1
1138           if $r_left eq $b_left and $r_end <= $b_mid;
1139       }
1140     }
1141   }
1142
1143   return;
1144 }
1145
1146 # -------------------------------------
1147
1148 sub output_changelog {
1149 my $self = shift; my $class = ref $self;
1150     my ($grand_poobah) = @_;
1151     ### Process each ChangeLog
1152
1153     while (my ($dir,$authorhash) = each %$grand_poobah) {
1154         &main::debug ("DOING DIR: $dir\n");
1155
1156         # Here we twist our hash around, from being
1157         #   author => time => message => filelist
1158         # in %$authorhash to
1159         #   time => author => message => filelist
1160         # in %changelog.
1161         #
1162         # This is also where we merge entries.  The algorithm proceeds
1163         # through the timeline of the changelog with a sliding window of
1164         # $Max_Checkin_Duration seconds; within that window, entries that
1165         # have the same log message are merged.
1166         #
1167         # (To save space, we zap %$authorhash after we've copied
1168         # everything out of it.)
1169
1170         # commit messages matching these will be ignored
1171         # should probably read these in from a file since they
1172         # will mostly change each release, but this is a quick hack!
1173         my @skip = ("Updating mailing lists URL",
1174                     "Updated bug reporting",
1175                     "use base, return true",
1176                     "Removed spurious ",
1177                     "cleaning unnecessary ",
1178                     "Updating emails",
1179                     "Improved POD markup",
1180                     "Fixed spaces",
1181                     "I'll be using bioperl.org mail address",
1182                     "Switched vanilla throw",
1183                     "regexp madness",
1184                     "minor edit",
1185                     "pod",
1186                     "pdoc",
1187                     "email address",
1188                     "typo",
1189                     "be explicit",
1190                     "using 'our'",
1191                     "silly email",
1192                     "regex clarity",
1193                     "polishing",
1194                     "Removed unused \"use vars",
1195                     "return, not return undef",
1196                     "lexically scoped file handles",
1197                     "No setting of own \$VERSION",
1198                     "do not return directly from sort",
1199                     "have NAME match module",
1200                     "Updating URLs",
1201                     "Changing emails",
1202                     "Updated doc",
1203                     "No setting own version",
1204                     "no log message");
1205
1206         # not interested in these files
1207         my %files_to_skip = ( AUTHORS => 1,
1208                               Changes => 1,
1209                               'INSTALL.PROGRAMS' => 1,
1210                               README => 1,
1211                               BUGS => 1,
1212                               INSTALL => 1,
1213                               LICENSE => 1,
1214                               DEPENDENCIES => 1,
1215                               DEPRECATED => 1,
1216                               'INSTALL.WIN' => 1,
1217                               MANIFEST => 1,
1218                               'MANIFEST.SKIP' => 1,
1219                               PLATFORMS => 1);
1220
1221         my %changelog;
1222         while (my ($author,$timehash) = each %$authorhash) {
1223             foreach my $time (sort {$a <=> $b} (keys %$timehash)) {
1224                 next if ($Delta_Mode && (($time <= $Delta_StartTime) || ($time > $Delta_EndTime && $Delta_EndTime)));
1225
1226                 my $msghash = $timehash->{$time};
1227                 MSG: while (my ($msg, $qunklist) = each %$msghash) {
1228                     foreach my $skip (@skip) {
1229                         if ($msg =~ /$skip/i) {
1230                             next MSG;
1231                         }
1232                     }
1233                     if ($msg =~ /merge/i && $msg =~ /head/i) {
1234                         next MSG;
1235                     }
1236
1237                     $msg =~ s/\n/ /g;
1238
1239                     foreach my $file (@{$qunklist->files}) {
1240                         $changelog{$file->filename}{$time} = $msg;
1241                     }
1242                 }
1243             }
1244         }
1245         undef (%$authorhash);
1246
1247         ### Now we can write out the ChangeLog!
1248
1249         my ($logfile_here, $logfile_bak, $tmpfile);
1250         my $lastdate;
1251
1252         if (! $Output_To_Stdout) {
1253             $logfile_here =  $dir . $Log_File_Name;
1254             $logfile_here =~ s/^\.\/\//\//;   # fix any leading ".//" problem
1255             $tmpfile      = "${logfile_here}.cvs2cl$$.tmp";
1256             $logfile_bak  = "${logfile_here}.bak";
1257
1258             open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
1259         }
1260         else {
1261             open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
1262         }
1263
1264         print LOG_OUT $ChangeLog_Header;
1265
1266         print LOG_OUT "These are detailed notes on changes made between $Delta_From and $Delta_To.\n\n";
1267
1268         my %tag_date_printed;
1269
1270         $self->output_header(\*LOG_OUT);
1271
1272         my @file_list = sort {$a cmp $b} (keys %changelog);
1273         foreach my $file (@file_list) {
1274             # skip files we don't need to see changes for
1275             next if exists $files_to_skip{$file};
1276             next if $file =~ /^t\//;
1277
1278             # convert module filenames to module name
1279             my $module = $file;
1280             if ($module =~ /^Bio/) {
1281                 $module = '<nowiki>'.join("::", File::Spec->splitdir($file)).'</nowiki>';
1282                 $module =~ s/\.pm//;
1283             }
1284             print LOG_OUT "; $module\n";
1285             foreach my $time (sort {$a <=> $b} keys %{$changelog{$file}}) {
1286                 my $msg = $changelog{$file}{$time};
1287
1288                 # uppercase first letter
1289                 $msg =~ s/^(\w)/\U$1/;
1290
1291                 # link bugs to bugzilla
1292                 $msg =~ s/bug.*(\d{4})/{{Bugzilla|$1}}/i;
1293
1294                 print LOG_OUT ": $msg\n";
1295             }
1296             print LOG_OUT "\n";
1297         }
1298
1299         $self->output_footer(\*LOG_OUT);
1300
1301         close (LOG_OUT);
1302
1303         if ( ! $Output_To_Stdout ) {
1304             # If accumulating, append old data to new before renaming.  But
1305             # don't append the most recent entry, since it's already in the
1306             # new log due to CVS's idiosyncratic interpretation of "log -d".
1307             if ($Cumulative && -f $logfile_here) {
1308                 open $NEW_LOG, '>>', $tmpfile
1309                   or die "Could not append to file '$tmpfile': $!\n";
1310
1311                 open $OLD_LOG, '<', $logfile_here
1312                   or die "Could not read file '$logfile_here': $!";
1313
1314                 my $started_first_entry = 0;
1315                 my $passed_first_entry = 0;
1316                 while (<$OLD_LOG>) {
1317                     if ( ! $passed_first_entry ) {
1318                         if (   ( ! $started_first_entry )
1319                             and /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/
1320                             ) {
1321                             $started_first_entry = 1;
1322                         }
1323                         elsif ( /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
1324                             $passed_first_entry = 1;
1325                             print $NEW_LOG $_;
1326                         }
1327                     }
1328                     else {
1329                         print $NEW_LOG $_;
1330                     }
1331                 }
1332                 close $NEW_LOG;
1333                 close $OLD_LOG;
1334             }
1335
1336             if ( -f $logfile_here ) {
1337                 rename $logfile_here, $logfile_bak;
1338             }
1339             rename $tmpfile, $logfile_here;
1340         }
1341     }
1342 }
1343
1344 # -------------------------------------
1345
1346 # Don't call this wrap, because with 5.5.3, that clashes with the
1347 # (unconditional :-( ) export of wrap() from Text::Wrap
1348 sub mywrap {
1349   my $self = shift;
1350   my ($indent1, $indent2, @text) = @_;
1351   # If incoming text looks preformatted, don't get clever
1352   my $text = Text::Wrap::wrap($indent1, $indent2, @text);
1353   if ( grep /^\s+/m, @text ) {
1354     return $text;
1355   }
1356   my @lines = split /\n/, $text;
1357   $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e;
1358   $lines[0] =~ s/^$indent1\s+/$indent1/;
1359   s/^$indent2\s+/$indent2/
1360     for @lines[1..$#lines];
1361   my $newtext = join "\n", @lines;
1362   $newtext .= "\n"
1363     if substr($text, -1) eq "\n";
1364   return $newtext;
1365 }
1366
1367 # -------------------------------------
1368
1369 sub preprocess_msg_text {
1370   my $self = shift;
1371   my ($text) = @_;
1372
1373   # Strip out carriage returns (as they probably result from DOSsy editors).
1374   $text =~ s/\r\n/\n/g;
1375   # If it *looks* like two newlines, make it *be* two newlines:
1376   $text =~ s/\n\s*\n/\n\n/g;
1377
1378   return $text;
1379 }
1380
1381 # -------------------------------------
1382
1383 sub last_line_len {
1384   my $self = shift;
1385
1386   my $files_list = shift;
1387   my @lines = split (/\n/, $files_list);
1388   my $last_line = pop (@lines);
1389   return length ($last_line);
1390 }
1391
1392 # -------------------------------------
1393
1394 # A custom wrap function, sensitive to some common constructs used in
1395 # log entries.
1396 sub wrap_log_entry {
1397   my $self = shift;
1398
1399   my $text = shift;                  # The text to wrap.
1400   my $left_pad_str = shift;          # String to pad with on the left.
1401
1402   # These do NOT take left_pad_str into account:
1403   my $length_remaining = shift;      # Amount left on current line.
1404   my $max_line_length  = shift;      # Amount left for a blank line.
1405
1406   my $wrapped_text = '';             # The accumulating wrapped entry.
1407   my $user_indent = '';              # Inherited user_indent from prev line.
1408
1409   my $first_time = 1;                # First iteration of the loop?
1410   my $suppress_line_start_match = 0; # Set to disable line start checks.
1411
1412   my @lines = split (/\n/, $text);
1413   while (@lines)   # Don't use `foreach' here, it won't work.
1414   {
1415     my $this_line = shift (@lines);
1416     chomp $this_line;
1417
1418     if ($this_line =~ /^(\s+)/) {
1419       $user_indent = $1;
1420     }
1421     else {
1422       $user_indent = '';
1423     }
1424
1425     # If it matches any of the line-start regexps, print a newline now...
1426     if ($suppress_line_start_match)
1427     {
1428       $suppress_line_start_match = 0;
1429     }
1430     elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
1431            || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
1432            || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
1433            || ($this_line =~ /^(\s+)(\S+)/)
1434            || ($this_line =~ /^(\s*)- +/)
1435            || ($this_line =~ /^()\s*$/)
1436            || ($this_line =~ /^(\s*)\*\) +/)
1437            || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
1438     {
1439       # Make a line break immediately, unless header separator is set
1440       # and this line is the first line in the entry, in which case
1441       # we're getting the blank line for free already and shouldn't
1442       # add an extra one.
1443       unless (($After_Header ne " ") and ($first_time))
1444       {
1445         if ($this_line =~ /^()\s*$/) {
1446           $suppress_line_start_match = 1;
1447           $wrapped_text .= "\n${left_pad_str}";
1448         }
1449
1450         $wrapped_text .= "\n${left_pad_str}";
1451       }
1452
1453       $length_remaining = $max_line_length - (length ($user_indent));
1454     }
1455
1456     # Now that any user_indent has been preserved, strip off leading
1457     # whitespace, so up-folding has no ugly side-effects.
1458     $this_line =~ s/^\s*//;
1459
1460     # Accumulate the line, and adjust parameters for next line.
1461     my $this_len = length ($this_line);
1462     if ($this_len == 0)
1463     {
1464       # Blank lines should cancel any user_indent level.
1465       $user_indent = '';
1466       $length_remaining = $max_line_length;
1467     }
1468     elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
1469     {
1470       # Walk backwards from the end.  At first acceptable spot, break
1471       # a new line.
1472       my $idx = $length_remaining - 1;
1473       if ($idx < 0) { $idx = 0 };
1474       while ($idx > 0)
1475       {
1476         if (substr ($this_line, $idx, 1) =~ /\s/)
1477         {
1478           my $line_now = substr ($this_line, 0, $idx);
1479           my $next_line = substr ($this_line, $idx);
1480           $this_line = $line_now;
1481
1482           # Clean whitespace off the end.
1483           chomp $this_line;
1484
1485           # The current line is ready to be printed.
1486           $this_line .= "\n${left_pad_str}";
1487
1488           # Make sure the next line is allowed full room.
1489           $length_remaining = $max_line_length - (length ($user_indent));
1490
1491           # Strip next_line, but then preserve any user_indent.
1492           $next_line =~ s/^\s*//;
1493
1494           # Sneak a peek at the user_indent of the upcoming line, so
1495           # $next_line (which will now precede it) can inherit that
1496           # indent level.  Otherwise, use whatever user_indent level
1497           # we currently have, which might be none.
1498           my $next_next_line = shift (@lines);
1499           if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
1500             $next_line = $1 . $next_line if (defined ($1));
1501             # $length_remaining = $max_line_length - (length ($1));
1502             $next_next_line =~ s/^\s*//;
1503           }
1504           else {
1505             $next_line = $user_indent . $next_line;
1506           }
1507           if (defined ($next_next_line)) {
1508             unshift (@lines, $next_next_line);
1509           }
1510           unshift (@lines, $next_line);
1511
1512           # Our new next line might, coincidentally, begin with one of
1513           # the line-start regexps, so we temporarily turn off
1514           # sensitivity to that until we're past the line.
1515           $suppress_line_start_match = 1;
1516
1517           last;
1518         }
1519         else
1520         {
1521           $idx--;
1522         }
1523       }
1524
1525       if ($idx == 0)
1526       {
1527         # We bottomed out because the line is longer than the
1528         # available space.  But that could be because the space is
1529         # small, or because the line is longer than even the maximum
1530         # possible space.  Handle both cases below.
1531
1532         if ($length_remaining == ($max_line_length - (length ($user_indent))))
1533         {
1534           # The line is simply too long -- there is no hope of ever
1535           # breaking it nicely, so just insert it verbatim, with
1536           # appropriate padding.
1537           $this_line = "\n${left_pad_str}${this_line}";
1538         }
1539         else
1540         {
1541           # Can't break it here, but may be able to on the next round...
1542           unshift (@lines, $this_line);
1543           $length_remaining = $max_line_length - (length ($user_indent));
1544           $this_line = "\n${left_pad_str}";
1545         }
1546       }
1547     }
1548     else  # $this_len < $length_remaining, so tack on what we can.
1549     {
1550       # Leave a note for the next iteration.
1551       $length_remaining = $length_remaining - $this_len;
1552
1553       if ($this_line =~ /\.$/)
1554       {
1555         $this_line .= "  ";
1556         $length_remaining -= 2;
1557       }
1558       else  # not a sentence end
1559       {
1560         $this_line .= " ";
1561         $length_remaining -= 1;
1562       }
1563     }
1564
1565     # Unconditionally indicate that loop has run at least once.
1566     $first_time = 0;
1567
1568     $wrapped_text .= "${user_indent}${this_line}";
1569   }
1570
1571   # One last bit of padding.
1572   $wrapped_text .= "\n";
1573
1574   return $wrapped_text;
1575 }
1576
1577 # -------------------------------------
1578
1579 sub _pretty_file_list {
1580   my $self = shift;
1581
1582   my ($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_;
1583
1584   my @qunkrefs =
1585     grep +( ( ! $_->tags_exists
1586               or
1587               ! grep exists $ignore_tags{$_}, @{$_->tags})
1588             and
1589             ( ! keys %show_tags
1590               or
1591               ( $_->tags_exists
1592                 and
1593                 grep exists $show_tags{$_}, @{$_->tags} )
1594             )
1595           ),
1596     @$qunksref;
1597
1598   my $common_dir;           # Dir prefix common to all files ('' if none)
1599
1600   # First, loop over the qunks gathering all the tag/branch names.
1601   # We'll put them all in non_unanimous_tags, and take out the
1602   # unanimous ones later.
1603  QUNKREF:
1604   foreach my $qunkref (@qunkrefs)
1605   {
1606     # Keep track of whether all the files in this commit were in the
1607     # same directory, and memorize it if so.  We can make the output a
1608     # little more compact by mentioning the directory only once.
1609     if ($Common_Dir && (scalar (@qunkrefs)) > 1)
1610     {
1611       if (! (defined ($common_dir)))
1612       {
1613         my ($base, $dir);
1614         ($base, $dir, undef) = fileparse ($qunkref->filename);
1615
1616         if ((! (defined ($dir)))  # this first case is sheer paranoia
1617             or ($dir eq '')
1618             or ($dir eq "./")
1619             or ($dir eq ".\\"))
1620         {
1621           $common_dir = '';
1622         }
1623         else
1624         {
1625           $common_dir = $dir;
1626         }
1627       }
1628       elsif ($common_dir ne '')
1629       {
1630         # Already have a common dir prefix, so how much of it can we preserve?
1631         $common_dir = &main::common_path_prefix ($qunkref->filename, $common_dir);
1632       }
1633     }
1634     else  # only one file in this entry anyway, so common dir not an issue
1635     {
1636       $common_dir = '';
1637     }
1638
1639     if (defined ($qunkref->branch)) {
1640       $all_branches->{$qunkref->branch} = 1;
1641     }
1642     if (defined ($qunkref->tags)) {
1643       foreach my $tag (@{$qunkref->tags}) {
1644         $non_unanimous_tags->{$tag} = 1;
1645       }
1646     }
1647   }
1648
1649   # Any tag held by all qunks will be printed specially... but only if
1650   # there are multiple qunks in the first place!
1651   if ((scalar (@qunkrefs)) > 1) {
1652     foreach my $tag (keys (%$non_unanimous_tags)) {
1653       my $everyone_has_this_tag = 1;
1654       foreach my $qunkref (@qunkrefs) {
1655         if ((! (defined ($qunkref->tags)))
1656             or (! (grep ($_ eq $tag, @{$qunkref->tags})))) {
1657           $everyone_has_this_tag = 0;
1658         }
1659       }
1660       if ($everyone_has_this_tag) {
1661         $unanimous_tags->{$tag} = 1;
1662         delete $non_unanimous_tags->{$tag};
1663       }
1664     }
1665   }
1666
1667   return $common_dir, \@qunkrefs;
1668 }
1669
1670 # -------------------------------------
1671
1672 sub fdatetime {
1673   my $self = shift;
1674
1675   my ($year, $mday, $mon, $wday, $hour, $min);
1676
1677   if ( @_ > 1 ) {
1678     ($year, $mday, $mon, $wday, $hour, $min) = @_;
1679   } else {
1680     my ($time) = @_;
1681     (undef, $min, $hour, $mday, $mon, $year, $wday) =
1682       $UTC_Times ? gmtime($time) : localtime($time);
1683
1684     $year += 1900;
1685     $mon  += 1;
1686     $wday  = $self->wday($wday);
1687   }
1688
1689   my $fdate = $self->fdate($year, $mon, $mday, $wday);
1690
1691   if ($Show_Times) {
1692     my $ftime = $self->ftime($hour, $min);
1693     return "$fdate $ftime";
1694   } else {
1695     return $fdate;
1696   }
1697 }
1698
1699 # -------------------------------------
1700
1701 sub fdate {
1702   my $self = shift;
1703
1704   my ($year, $mday, $mon, $wday);
1705
1706   if ( @_ > 1 ) {
1707     ($year, $mon, $mday, $wday) = @_;
1708   } else {
1709     my ($time) = @_;
1710     (undef, undef, undef, $mday, $mon, $year, $wday) =
1711       $UTC_Times ? gmtime($time) : localtime($time);
1712
1713     $year += 1900;
1714     $mon  += 1;
1715     $wday  = $self->wday($wday);
1716   }
1717
1718   return sprintf '%4u-%02u-%02u%s', $year, $mon, $mday, $wday;
1719 }
1720
1721 # -------------------------------------
1722
1723 sub ftime {
1724   my $self = shift;
1725
1726   my ($hour, $min);
1727
1728   if ( @_ > 1 ) {
1729     ($hour, $min) = @_;
1730   } else {
1731     my ($time) = @_;
1732     (undef, $min, $hour) = $UTC_Times ? gmtime($time) : localtime($time);
1733   }
1734
1735   return sprintf '%02u:%02u', $hour, $min;
1736 }
1737
1738 # ----------------------------------------------------------------------------
1739
1740 package CVS::Utils::ChangeLog::Message;
1741
1742 sub new {
1743   my $class = shift;
1744   my ($msg) = @_;
1745
1746   my %self = (msg => $msg, files => []);
1747
1748   bless \%self, $class;
1749 }
1750
1751 sub add_fileentry {
1752   my $self = shift;
1753   my ($fileentry) = @_;
1754
1755   die "Not a fileentry: $fileentry"
1756     unless $fileentry->isa('CVS::Utils::ChangeLog::FileEntry');
1757
1758   push @{$self->{files}}, $fileentry;
1759 }
1760
1761 sub files { wantarray ? @{$_[0]->{files}} : $_[0]->{files} }
1762
1763 # ----------------------------------------------------------------------------
1764
1765 package CVS::Utils::ChangeLog::FileEntry;
1766
1767 use File::Basename qw( fileparse );
1768
1769 # Each revision of a file has a little data structure (a `qunk')
1770 # associated with it.  That data structure holds not only the
1771 # file's name, but any additional information about the file
1772 # that might be needed in the output, such as the revision
1773 # number, tags, branches, etc.  The reason to have these things
1774 # arranged in a data structure, instead of just appending them
1775 # textually to the file's name, is that we may want to do a
1776 # little rearranging later as we write the output.  For example,
1777 # all the files on a given tag/branch will go together, followed
1778 # by the tag in parentheses (so trunk or otherwise non-tagged
1779 # files would go at the end of the file list for a given log
1780 # message).  This rearrangement is a lot easier to do if we
1781 # don't have to reparse the text.
1782 #
1783 # A qunk looks like this:
1784 #
1785 #   {
1786 #     filename    =>    "hello.c",
1787 #     revision    =>    "1.4.3.2",
1788 #     time        =>    a timegm() return value (moment of commit)
1789 #     tags        =>    [ "tag1", "tag2", ... ],
1790 #     branch      =>    "branchname" # There should be only one, right?
1791 #     roots       =>    [ "branchtag1", "branchtag2", ... ]
1792 #     lines       =>    "+x -y" # or undefined; x and y are integers
1793 #   }
1794
1795 # Single top-level ChangeLog, or one per subdirectory?
1796 my $distributed;
1797 sub distributed { $#_ ? ($distributed = $_[1]) : $distributed; }
1798
1799 sub new {
1800   my $class = shift;
1801   my ($path, $time, $revision, $state, $lines,
1802       $branch_names, $branch_roots, $branch_numbers, $symbolic_names) = @_;
1803
1804   my %self = (time     => $time,
1805               revision => $revision,
1806               state    => $state,
1807               lines    => $lines,
1808               branch_numbers => $branch_numbers,
1809              );
1810
1811   if ( $distributed ) {
1812     @self{qw(filename dir_key)} = fileparse($path);
1813   } else {
1814     @self{qw(filename dir_key)} = ($path, './');
1815   }
1816
1817   { # Scope for $branch_prefix
1818     (my ($branch_prefix) = ($revision =~ /((?:\d+\.)+)\d+/));
1819     $branch_prefix =~ s/\.$//;
1820     if ( $branch_names->{$branch_prefix} ) {
1821       my $branch_name = $branch_names->{$branch_prefix};
1822       $self{branch}   = $branch_name;
1823       $self{branches} = [$branch_name];
1824     }
1825     while ( $branch_prefix =~ s/^(\d+(?:\.\d+\.\d+)+)\.\d+\.\d+$/$1/ ) {
1826       push @{$self{branches}}, $branch_names->{$branch_prefix}
1827         if exists $branch_names->{$branch_prefix};
1828     }
1829   }
1830
1831   # If there's anything in the @branch_roots array, then this
1832   # revision is the root of at least one branch.  We'll display
1833   # them as branch names instead of revision numbers, the
1834   # substitution for which is done directly in the array:
1835   $self{'roots'} = [ map { $branch_names->{$_} } @$branch_roots ]
1836     if @$branch_roots;
1837
1838   if ( exists $symbolic_names->{$revision} ) {
1839     $self{tags} = delete $symbolic_names->{$revision};
1840     &main::delta_check($time, $self{tags});
1841   }
1842
1843   bless \%self, $class;
1844 }
1845
1846 sub filename       { $_[0]->{filename}       }
1847 sub dir_key        { $_[0]->{dir_key}        }
1848 sub revision       { $_[0]->{revision}       }
1849 sub branch         { $_[0]->{branch}         }
1850 sub state          { $_[0]->{state}          }
1851 sub lines          { $_[0]->{lines}          }
1852 sub roots          { $_[0]->{roots}          }
1853 sub branch_numbers { $_[0]->{branch_numbers} }
1854
1855 sub tags        { $_[0]->{tags}     }
1856 sub tags_exists {
1857   exists $_[0]->{tags};
1858 }
1859
1860 # This may someday be used in a more sophisticated calculation of what other
1861 # files are involved in this commit.  For now, we don't use it much except for
1862 # delta mode, because the common-commit-detection algorithm is hypothesized to
1863 # be "good enough" as it stands.
1864 sub time     { $_[0]->{time}     }
1865
1866 # ----------------------------------------------------------------------------
1867
1868 package CVS::Utils::ChangeLog::EntrySetBuilder;
1869
1870 use File::Basename qw( fileparse );
1871 use Time::Local    qw( timegm );
1872
1873 use constant MAILNAME => "/etc/mailname";
1874
1875 # In 'cvs log' output, one long unbroken line of equal signs separates files:
1876 use constant FILE_SEPARATOR => '=' x 77;# . "\n";
1877 # In 'cvs log' output, a shorter line of dashes separates log messages within
1878 # a file:
1879 use constant REV_SEPARATOR  => '-' x 28;# . "\n";
1880
1881 use constant EMPTY_LOG_MESSAGE => '*** empty log message ***';
1882
1883 # -------------------------------------
1884
1885 sub new {
1886   my ($proto) = @_;
1887   my $class = ref $proto || $proto;
1888
1889   my $poobah  = CVS::Utils::ChangeLog::EntrySet->new;
1890   my $self = bless +{ grand_poobah => $poobah }, $class;
1891
1892   $self->clear_file;
1893   $self->maybe_read_user_map_file;
1894   return $self;
1895 }
1896
1897 # -------------------------------------
1898
1899 sub clear_msg {
1900   my ($self) = @_;
1901
1902   # Make way for the next message
1903   undef $self->{rev_msg};
1904   undef $self->{rev_time};
1905   undef $self->{rev_revision};
1906   undef $self->{rev_author};
1907   undef $self->{rev_state};
1908   undef $self->{lines};
1909   $self->{rev_branch_roots} = [];       # For showing which files are branch
1910                                         # ancestors.
1911   $self->{collecting_symbolic_names} = 0;
1912 }
1913
1914 # -------------------------------------
1915
1916 sub clear_file {
1917   my ($self) = @_;
1918   $self->clear_msg;
1919
1920   undef $self->{filename};
1921   $self->{branch_names}   = +{};        # We'll grab branch names while we're
1922                                         # at it.
1923   $self->{branch_numbers} = +{};        # Save some revisions for
1924                                         # @Follow_Branches
1925   $self->{symbolic_names} = +{};        # Where tag names get stored.
1926 }
1927
1928 # -------------------------------------
1929
1930 sub grand_poobah { $_[0]->{grand_poobah} }
1931
1932 # -------------------------------------
1933
1934 sub read_changelog {
1935   my ($self, $command) = @_;
1936
1937   local (*READER, *WRITER);
1938   my $pid;
1939   if (! $Input_From_Stdin) {
1940     pipe(READER, WRITER)
1941       or die "Couldn't form pipe: $!\n";
1942     $pid = fork;
1943     die "Couldn't fork: $!\n"
1944       if ! defined $pid;
1945     if ( ! $pid ) { # child
1946       open STDOUT, '>&=' . fileno WRITER
1947         or die "Couldn't dup stderr to ", fileno WRITER, "\n";
1948       # strangely, some perls give spurious warnings about STDIN being opened
1949       # for output only these close calls precede the STDOUT reopen above.
1950       # I think they must be reusing fd 1.
1951       close READER;
1952       close STDIN;
1953
1954       exec @$command;
1955     }
1956
1957     close WRITER;
1958
1959     &main::debug ("(run \"@$command\")\n");
1960   }
1961   else {
1962     open READER, '-' or die "Unable to open stdin for reading";
1963   }
1964
1965   binmode READER;
1966
1967  XX_Log_Source:
1968   while (<READER>) {
1969     chomp;
1970     s!\r$!!;
1971
1972     # If on a new file and don't see filename, skip until we find it, and
1973     # when we find it, grab it.
1974     if ( ! defined $self->{filename} ) {
1975       $self->read_file_path($_);
1976     } elsif ( /^symbolic names:$/ ) {
1977       $self->{collecting_symbolic_names} = 1;
1978     } elsif ( $self->{collecting_symbolic_names} ) {
1979       $self->read_symbolic_name($_);
1980     } elsif ( $_ eq FILE_SEPARATOR and ! defined $self->{rev_revision} ) {
1981       $self->clear_file;
1982     } elsif ( ! defined $self->{rev_revision} ) {
1983         # If have file name, but not revision, and see revision, then grab
1984         # it.  (We collect unconditionally, even though we may or may not
1985         # ever use it.)
1986       $self->read_revision($_);
1987     } elsif ( ! defined $self->{rev_time} ) { # and /^date: /) {
1988       $self->read_date_author_and_state($_);
1989     } elsif ( /^branches:\s+(.*);$/ ) {
1990       $self->read_branches($1);
1991     } elsif ( ! ( $_ eq FILE_SEPARATOR or $_ eq REV_SEPARATOR ) ) {
1992       # If have file name, time, and author, then we're just grabbing
1993       # log message texts:
1994       $self->{rev_msg} .= $_ . "\n";   # Normally, just accumulate the message...
1995     } else {
1996       my $noadd = 0;
1997       if ( ! $self->{rev_msg}
1998            or $self->{rev_msg} =~ /^\s*(\.\s*)?$/
1999            or index($self->{rev_msg}, EMPTY_LOG_MESSAGE) > -1 ) {
2000         # ... until a msg separator is encountered:
2001         # Ensure the message contains something:
2002         $self->clear_msg, $noadd = 1
2003           if $Prune_Empty_Msgs;
2004         $self->{rev_msg} = "[no log message]\n";
2005       }
2006
2007       $self->add_file_entry
2008         unless $noadd;
2009
2010       if ( $_ eq FILE_SEPARATOR ) {
2011         $self->clear_file;
2012       } else {
2013         $self->clear_msg;
2014       }
2015     }
2016   }
2017
2018   close READER
2019     or die "Couldn't close pipe reader: $!\n";
2020   if ( defined $pid ) {
2021     my $rv;
2022     waitpid $pid, 0;
2023     0 == $?
2024       or $!=1, die sprintf("Problem reading log input (pid/exit/signal/core: %d/%d/%d/%d)\n",
2025                            $pid, $? >> 8, $? & 127, $? & 128);
2026   }
2027   return;
2028 }
2029
2030 # -------------------------------------
2031
2032 sub add_file_entry {
2033   $_[0]->grand_poobah->add_fileentry(@{$_[0]}{qw(filename rev_time rev_revision
2034                                                  rev_state lines branch_names
2035                                                  rev_branch_roots
2036                                                  branch_numbers
2037                                                  symbolic_names
2038                                                  rev_author rev_msg)});
2039 }
2040
2041 # -------------------------------------
2042
2043 sub maybe_read_user_map_file {
2044   my ($self) = @_;
2045
2046   my %expansions;
2047   my $User_Map_Input;
2048
2049   if ($User_Map_File)
2050   {
2051     if ( $User_Map_File =~ m{^([-\w\@+=.,\/]+):([-\w\@+=.,\/:]+)} and
2052          !-f $User_Map_File )
2053     {
2054       my $rsh = (exists $ENV{'CVS_RSH'} ? $ENV{'CVS_RSH'} : 'ssh');
2055       $User_Map_Input = "$rsh $1 'cat $2' |";
2056       &main::debug ("(run \"${User_Map_Input}\")\n");
2057     }
2058     else
2059     {
2060       $User_Map_Input = "<$User_Map_File";
2061     }
2062
2063     open (MAPFILE, $User_Map_Input)
2064         or die ("Unable to open $User_Map_File ($!)");
2065
2066     while (<MAPFILE>)
2067     {
2068       next if /^\s*#/;  # Skip comment lines.
2069       next if not /:/;  # Skip lines without colons.
2070
2071       # It is now safe to split on ':'.
2072       my ($username, $expansion) = split ':';
2073       chomp $expansion;
2074       $expansion =~ s/^'(.*)'$/$1/;
2075       $expansion =~ s/^"(.*)"$/$1/;
2076
2077       # If it looks like the expansion has a real name already, then
2078       # we toss the username we got from CVS log.  Otherwise, keep
2079       # it to use in combination with the email address.
2080
2081       if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
2082         # Also, add angle brackets if none present
2083         if (! ($expansion =~ /<\S+@\S+>/)) {
2084           $expansions{$username} = "$username <$expansion>";
2085         }
2086         else {
2087           $expansions{$username} = "$username $expansion";
2088         }
2089       }
2090       else {
2091         $expansions{$username} = $expansion;
2092       }
2093     } # fi ($User_Map_File)
2094
2095     close (MAPFILE);
2096   }
2097
2098   if (defined $User_Passwd_File)
2099   {
2100     if ( ! defined $Domain ) {
2101       if ( -e MAILNAME ) {
2102         chomp($Domain = slurp_file(MAILNAME));
2103       } else {
2104       MAILDOMAIN_CMD:
2105         for ([qw(hostname -d)], 'dnsdomainname', 'domainname') {
2106           my ($text, $exit, $sig, $core) = run_ext($_);
2107           if ( $exit == 0 && $sig == 0 && $core == 0 ) {
2108             chomp $text;
2109             if ( length $text ) {
2110               $Domain = $text;
2111               last MAILDOMAIN_CMD;
2112             }
2113           }
2114         }
2115       }
2116     }
2117
2118     die "No mail domain found\n"
2119       unless defined $Domain;
2120
2121     open (MAPFILE, "<$User_Passwd_File")
2122         or die ("Unable to open $User_Passwd_File ($!)");
2123     while (<MAPFILE>)
2124     {
2125       # all lines are valid
2126       my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
2127       my $expansion = '';
2128       ($expansion) = split (',', $gecos)
2129         if defined $gecos && length $gecos;
2130
2131       my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
2132       $expansions{$username} = "$expansion <$mailname>";
2133     }
2134     close (MAPFILE);
2135   }
2136
2137  $self->{usermap} = \%expansions;
2138 }
2139
2140 # -------------------------------------
2141
2142 sub read_file_path {
2143   my ($self, $line) = @_;
2144
2145   my $path;
2146
2147   if ( $line =~ /^Working file: (.*)/ ) {
2148     $path = $1;
2149   } elsif ( defined $RCS_Root
2150             and
2151             $line =~ m|^RCS file: $RCS_Root[/\\](.*),v$| ) {
2152     $path = $1;
2153     $path =~ s!Attic/!!;
2154   } else {
2155     return;
2156   }
2157
2158   if ( @Ignore_Files ) {
2159     my $base;
2160     ($base, undef, undef) = fileparse($path);
2161
2162     my $xpath = $Case_Insensitive ? lc($path) : $path;
2163     return
2164       if grep $path =~ /$_/, @Ignore_Files;
2165   }
2166
2167   $self->{filename} = $path;
2168   return;
2169 }
2170
2171 # -------------------------------------
2172
2173 sub read_symbolic_name {
2174   my ($self, $line) = @_;
2175
2176   # All tag names are listed with whitespace in front in cvs log
2177   # output; so if see non-whitespace, then we're done collecting.
2178   if ( /^\S/ ) {
2179     $self->{collecting_symbolic_names} = 0;
2180     return;
2181   } else {
2182     # we're looking at a tag name, so parse & store it
2183
2184     # According to the Cederqvist manual, in node "Tags", tag names must start
2185     # with an uppercase or lowercase letter and can contain uppercase and
2186     # lowercase letters, digits, `-', and `_'.  However, it's not our place to
2187     # enforce that, so we'll allow anything CVS hands us to be a tag:
2188     my ($tag_name, $tag_rev) = ($line =~ /^\s+([^:]+): ([\d.]+)$/);
2189
2190     # A branch number either has an odd number of digit sections
2191     # (and hence an even number of dots), or has ".0." as the
2192     # second-to-last digit section.  Test for these conditions.
2193     my $real_branch_rev = '';
2194     if ( $tag_rev =~ /^(\d+\.\d+\.)+\d+$/             # Even number of dots...
2195          and
2196          $tag_rev !~ /^(1\.)+1$/ ) {                  # ...but not "1.[1.]1"
2197       $real_branch_rev = $tag_rev;
2198     } elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) {  # Has ".0."
2199       $real_branch_rev = $1 . $3;
2200     }
2201
2202     # If we got a branch, record its number.
2203     if ( $real_branch_rev ) {
2204       $self->{branch_names}->{$real_branch_rev} = $tag_name;
2205       $self->{branch_numbers}->{$tag_name} = $real_branch_rev;
2206       $tag_rev =~ s/^(\d+\.\d+).+/$1/;
2207     }
2208
2209     # regardless if it is a name on a branch, store it as symbolic name so that tag deltas always work
2210     push @{$self->{symbolic_names}->{$tag_rev}}, $tag_name;
2211   }
2212
2213   $self->{collecting_symbolic_names} = 1;
2214   return;
2215 }
2216
2217 # -------------------------------------
2218
2219 sub read_revision {
2220   my ($self, $line) = @_;
2221
2222   my ($revision) = ( $line =~ /^revision (\d+\.[\d.]+)/ );
2223
2224   return
2225     unless $revision;
2226
2227   $self->{rev_revision} = $revision;
2228   return;
2229 }
2230
2231 # -------------------------------------
2232
2233 { # Closure over %gecos_warned
2234 my %gecos_warned;
2235 sub read_date_author_and_state {
2236   my ($self, $line) = @_;
2237
2238   my ($time, $author, $state) = $self->parse_date_author_and_state($line);
2239
2240   if ( defined($self->{usermap}->{$author}) and $self->{usermap}->{$author} ) {
2241     $author = $self->{usermap}->{$author};
2242   } elsif ( defined $Domain or $Gecos == 1 ) {
2243     my $email = $author;
2244     $email = $author."@".$Domain
2245       if defined $Domain && $Domain ne '';
2246
2247     my $pw = getpwnam($author);
2248     my ($fullname, $office, $workphone, $homephone, $gcos);
2249     if ( defined $pw ) {
2250       $gcos = (getpwnam($author))[6];
2251       ($fullname, $office, $workphone, $homephone) =
2252         split /\s*,\s*/, $gcos;
2253     } else {
2254       warn "Couldn't find gecos info for author '$author'\n"
2255         unless $gecos_warned{$author}++;
2256       $fullname = '';
2257     }
2258     for (grep defined, $fullname, $office, $workphone, $homephone) {
2259       s/&/ucfirst(lc($pw->name))/ge;
2260     }
2261     $author = $fullname . "  <" . $email . ">"
2262       if $fullname ne '';
2263   }
2264
2265   $self->{rev_state}  = $state;
2266   $self->{rev_time}   = $time;
2267   $self->{rev_author} = $author;
2268   return;
2269 }
2270 }
2271
2272 # -------------------------------------
2273
2274 sub read_branches {
2275   # A "branches: ..." line here indicates that one or more branches
2276   # are rooted at this revision.  If we're showing branches, then we
2277   # want to show that fact as well, so we collect all the branches
2278   # that this is the latest ancestor of and store them in
2279   # $self->[rev_branch_roots}.  Just for reference, the format of the
2280   # line we're seeing at this point is:
2281   #
2282   #    branches:  1.5.2;  1.5.4;  ...;
2283   #
2284   # Okay, here goes:
2285   my ($self, $line) = @_;
2286
2287   # Ugh.  This really bothers me.  Suppose we see a log entry
2288   # like this:
2289   #
2290   #    ----------------------------
2291   #    revision 1.1
2292   #    date: 1999/10/17 03:07:38;  author: jrandom;  state: Exp;
2293   #    branches:  1.1.2;
2294   #    Intended first line of log message begins here.
2295   #    ----------------------------
2296   #
2297   # The question is, how we can tell the difference between that
2298   # log message and a *two*-line log message whose first line is
2299   #
2300   #    "branches:  1.1.2;"
2301   #
2302   # See the problem?  The output of "cvs log" is inherently
2303   # ambiguous.
2304   #
2305   # For now, we punt: we liberally assume that people don't
2306   # write log messages like that, and just toss a "branches:"
2307   # line if we see it but are not showing branches.  I hope no
2308   # one ever loses real log data because of this.
2309   if ( $Show_Branches ) {
2310     $line =~ s/(1\.)+1;|(1\.)+1$//;  # ignore the trivial branch 1.1.1
2311     $self->{rev_branch_roots} = [split /;\s+/, $line]
2312       if length $line;
2313   }
2314 }
2315
2316 # -------------------------------------
2317
2318 sub parse_date_author_and_state {
2319   my ($self, $line) = @_;
2320   # Parses the date/time and author out of a line like:
2321   #
2322   # date: 1999/02/19 23:29:05;  author: apharris;  state: Exp;
2323   #
2324   # or, in CVS 1.12.9:
2325   #
2326   # date: 2004-06-05 16:10:32 +0000; author: somebody; state: Exp;
2327
2328   my ($year, $mon, $mday, $hours, $min, $secs, $utcOffset, $author, $state, $rest) =
2329     $line =~
2330       m!(\d+)[-/](\d+)[-/](\d+)\s+(\d+):(\d+):(\d+)(\s+[+-]\d{4})?;\s+
2331         author:\s+([^;]+);\s+state:\s+([^;]+);(.*)!x
2332     or  die "Couldn't parse date ``$line''";
2333   die "Bad date or Y2K issues"
2334     unless $year > 1969 and $year < 2258;
2335   # Kinda arbitrary, but useful as a sanity check
2336   my $time = timegm($secs, $min, $hours, $mday, $mon-1, $year-1900);
2337   if ( defined $utcOffset ) {
2338     my ($plusminus, $hour, $minute) = ($utcOffset =~ m/([+-])(\d\d)(\d\d)/);
2339     my $offset = (($hour * 60) + $minute) * 60 * ($plusminus eq '+' ? -1 : 1);
2340     $time += $offset;
2341   }
2342   if ( $rest =~ m!\s+lines:\s+(.*)! ) {
2343     $self->{lines} = $1;
2344   }
2345
2346   return $time, $author, $state;
2347 }
2348
2349 # Subrs ----------------------------------------------------------------------
2350
2351 package main;
2352
2353 sub delta_check {
2354   my ($time, $tags) = @_;
2355
2356   # If we're in 'delta' mode, update the latest observed times for the
2357   # beginning and ending tags, and when we get around to printing output, we
2358   # will simply restrict ourselves to that timeframe...
2359   return
2360     unless $Delta_Mode;
2361
2362   $Delta_StartTime = $time
2363     if $time > $Delta_StartTime and grep { $_ eq $Delta_From } @$tags;
2364
2365   $Delta_EndTime = $time
2366     if $time > $Delta_EndTime and grep { $_ eq $Delta_To } @$tags;
2367 }
2368
2369 sub run_ext {
2370   my ($cmd) = @_;
2371   $cmd = [$cmd]
2372     unless ref $cmd;
2373   local $" = ' ';
2374   my $out = qx"@$cmd 2>&1";
2375   my $rv  = $?;
2376   my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);
2377   return $out, $exit, $sig, $core;
2378 }
2379
2380 # -------------------------------------
2381
2382 # If accumulating, grab the boundary date from pre-existing ChangeLog.
2383 sub maybe_grab_accumulation_date {
2384   if (! $Cumulative || $Update) {
2385     return '';
2386   }
2387
2388   # else
2389
2390   open (LOG, "$Log_File_Name")
2391       or die ("trouble opening $Log_File_Name for reading ($!)");
2392
2393   my $boundary_date;
2394   while (<LOG>)
2395   {
2396     if (/^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/)
2397     {
2398       $boundary_date = "$1";
2399       last;
2400     }
2401   }
2402
2403   close (LOG);
2404
2405   # convert time from utc to local timezone if the ChangeLog has
2406   # dates/times in utc
2407   if ($UTC_Times && $boundary_date)
2408   {
2409     # convert the utc time to a time value
2410     my ($year,$mon,$mday,$hour,$min) = $boundary_date =~
2411       m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#;
2412     my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900);
2413     # print the timevalue in the local timezone
2414     my ($ignore,$wday);
2415     ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time);
2416     $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u",
2417                             $year+1900,$mon+1,$mday,$hour,$min);
2418   }
2419
2420   return $boundary_date;
2421 }
2422
2423 # -------------------------------------
2424
2425 # Fills up a ChangeLog structure in the current directory.
2426 sub derive_changelog {
2427   my ($command) = @_;
2428
2429   # See "The Plan" above for a full explanation.
2430
2431   # Might be adding to an existing ChangeLog
2432   my $accumulation_date = maybe_grab_accumulation_date;
2433   if ($accumulation_date) {
2434     # Insert -d immediately after 'cvs log'
2435     my $Log_Date_Command = "-d>${accumulation_date}";
2436
2437     my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command;
2438     splice @$command, $log_index+1, 0, $Log_Date_Command;
2439     &debug ("(adding log msg starting from $accumulation_date)\n");
2440   }
2441
2442 #  output_changelog(read_changelog($command));
2443   my $builder = CVS::Utils::ChangeLog::EntrySetBuilder->new;
2444   $builder->read_changelog($command);
2445   $builder->grand_poobah->output_changelog;
2446 }
2447
2448 # -------------------------------------
2449
2450 sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
2451
2452 # -------------------------------------
2453
2454 sub common_path_prefix {
2455   my ($path1, $path2) = @_;
2456
2457   # For compatibility (with older versions of cvs2cl.pl), we think in UN*X
2458   # terms, and mould windoze filenames to match.  Is this really appropriate?
2459   # If a file is checked in under UN*X, and cvs log run on windoze, which way
2460   # do the path separators slope?  Can we use fileparse as per the local
2461   # conventions?  If so, we should probably have a user option to specify an
2462   # OS to emulate to handle stdin-fed logs.  If we did this, we could avoid
2463   # the nasty \-/ transmogrification below.
2464
2465   my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;
2466
2467   # Transmogrify Windows filenames to look like Unix.
2468   # (It is far more likely that someone is running cvs2cl.pl under
2469   # Windows than that they would genuinely have backslashes in their
2470   # filenames.)
2471   tr!\\!/!
2472     for $dir1, $dir2;
2473
2474   my ($accum1, $accum2, $last_common_prefix) = ('') x 3;
2475
2476   my @path1 = grep length($_), split qr!/!, $dir1;
2477   my @path2 = grep length($_), split qr!/!, $dir2;
2478
2479   my @common_path;
2480   for (0..min($#path1,$#path2)) {
2481     if ( $path1[$_] eq $path2[$_]) {
2482       push @common_path, $path1[$_];
2483     } else {
2484       last;
2485     }
2486   }
2487
2488   return join '', map "$_/", @common_path;
2489 }
2490
2491 # -------------------------------------
2492
2493 sub parse_options {
2494   # Check this internally before setting the global variable.
2495   my $output_file;
2496
2497   # If this gets set, we encountered unknown options and will exit at
2498   # the end of this subroutine.
2499   my $exit_with_admonishment = 0;
2500
2501   # command to generate the log
2502   my @log_source_command = qw( cvs log );
2503
2504   my (@Global_Opts, @Local_Opts);
2505
2506   Getopt::Long::Configure(qw( bundling permute no_getopt_compat
2507                               pass_through no_ignore_case ));
2508   GetOptions('help|usage|h'   => \$Print_Usage,
2509              'debug'          => \$Debug,        # unadvertised option, heh
2510              'version'        => \$Print_Version,
2511
2512              'file|f=s'       => \$output_file,
2513              'accum'          => \$Cumulative,
2514              'update'         => \$Update,
2515              'fsf'            => \$FSF_Style,
2516              'rcs=s'          => \$RCS_Root,
2517              'usermap|U=s'    => \$User_Map_File,
2518              'gecos'          => \$Gecos,
2519              'domain=s'       => \$Domain,
2520              'passwd=s'       => \$User_Passwd_File,
2521              'window|W=i'     => \$Max_Checkin_Duration,
2522              'chrono'         => \$Chronological_Order,
2523              'ignore|I=s'     => \@Ignore_Files,
2524              'case-insensitive|C' => \$Case_Insensitive,
2525              'regexp|R=s'     => \$Regexp_Gate,
2526              'stdin'          => \$Input_From_Stdin,
2527              'stdout'         => \$Output_To_Stdout,
2528              'distributed|d'  => sub { CVS::Utils::ChangeLog::FileEntry->distributed(1) },
2529              'prune|P'        => \$Prune_Empty_Msgs,
2530              'no-wrap'        => \$No_Wrap,
2531              'gmt|utc'        => \$UTC_Times,
2532              'day-of-week|w'  => \$Show_Day_Of_Week,
2533              'revisions|r'    => \$Show_Revisions,
2534              'show-dead'      => \$Show_Dead,
2535              'tags|t'         => \$Show_Tags,
2536              'tagdates|T'     => \$Show_Tag_Dates,
2537              'branches|b'     => \$Show_Branches,
2538              'follow|F=s'     => \@Follow_Branches,
2539              'follow-only=s'  => \@Follow_Only,
2540              'xml-encoding=s' => \$XML_Encoding,
2541              'xml'            => \$XML_Output,
2542              'noxmlns'        => \$No_XML_Namespace,
2543              'no-xml-iso-date' => \$No_XML_ISO_Date,
2544              'no-ancestors'   => \$No_Ancestors,
2545              'lines-modified' => \$Show_Lines_Modified,
2546
2547              'no-indent'    => sub {
2548                $Indent = '';
2549              },
2550
2551              'summary'      => sub {
2552                $Summary = 1;
2553                $After_Header = "\n\n"; # Summary implies --separate-header
2554              },
2555
2556              'no-times'     => sub {
2557                $Show_Times = 0;
2558              },
2559
2560              'no-hide-branch-additions' => sub {
2561                $Hide_Branch_Additions = 0;
2562              },
2563
2564              'no-common-dir'  => sub {
2565                $Common_Dir = 0;
2566              },
2567
2568              'ignore-tag=s'   => sub {
2569                $ignore_tags{$_[1]} = 1;
2570              },
2571
2572              'show-tag=s'     => sub {
2573                $show_tags{$_[1]} = 1;
2574              },
2575
2576              # Deliberately undocumented.  This is not a public interface, and
2577              # may change/disappear at any time.
2578              'test-code=s'    => \$TestCode,
2579
2580              'delta=s'        => sub {
2581                my $arg = $_[1];
2582                if ( $arg =~
2583                     /^([A-Za-z][A-Za-z0-9_\-\]\[]*):([A-Za-z][A-Za-z0-9_\-\]\[]*)$/ ) {
2584                  $Delta_From = $1;
2585                  $Delta_To = $2;
2586                  $Delta_Mode = 1;
2587                } else {
2588                  die "--delta FROM_TAG:TO_TAG is what you meant to say.\n";
2589                }
2590              },
2591
2592              'FSF'             => sub {
2593                $Show_Times = 0;
2594                $Common_Dir = 0;
2595                $No_Extra_Indent = 1;
2596                $Indent = "\t";
2597              },
2598
2599              'header=s'        => sub {
2600                my $narg = $_[1];
2601                $ChangeLog_Header = &slurp_file ($narg);
2602                if (! defined ($ChangeLog_Header)) {
2603                  $ChangeLog_Header = '';
2604                }
2605              },
2606
2607              'global-opts|g=s' => sub {
2608                my $narg = $_[1];
2609                push @Global_Opts, $narg;
2610                splice @log_source_command, 1, 0, $narg;
2611              },
2612
2613              'log-opts|l=s' => sub {
2614                my $narg = $_[1];
2615                push @Local_Opts, $narg;
2616                push @log_source_command, $narg;
2617              },
2618
2619              'mailname=s'   => sub {
2620                my $narg = $_[1];
2621                warn "--mailname is deprecated; please use --domain instead\n";
2622                $Domain = $narg;
2623              },
2624
2625              'separate-header|S' => sub {
2626                $After_Header = "\n\n";
2627                $No_Extra_Indent = 1;
2628              },
2629
2630              'group-within-date' => sub {
2631                $GroupWithinDate = 1;
2632                $Show_Times = 0;
2633              },
2634
2635              'hide-filenames' => sub {
2636                $Hide_Filenames = 1;
2637                $After_Header = '';
2638              },
2639             )
2640     or die "options parsing failed\n";
2641
2642   push @log_source_command, map "$_", @ARGV;
2643
2644   ## Check for contradictions...
2645
2646   if ($Output_To_Stdout && CVS::Utils::ChangeLog::FileEntry->distributed) {
2647     print STDERR "cannot pass both --stdout and --distributed\n";
2648     $exit_with_admonishment = 1;
2649   }
2650
2651   if ($Output_To_Stdout && $output_file) {
2652     print STDERR "cannot pass both --stdout and --file\n";
2653     $exit_with_admonishment = 1;
2654   }
2655
2656   if ($Input_From_Stdin && @Global_Opts) {
2657     print STDERR "cannot pass both --stdin and -g\n";
2658     $exit_with_admonishment = 1;
2659   }
2660
2661   if ($Input_From_Stdin && @Local_Opts) {
2662     print STDERR "cannot pass both --stdin and -l\n";
2663     $exit_with_admonishment = 1;
2664   }
2665
2666   if ($XML_Output && $Cumulative) {
2667     print STDERR "cannot pass both --xml and --accum\n";
2668     $exit_with_admonishment = 1;
2669   }
2670
2671   # Other consistency checks and option-driven logic
2672
2673   # Bleargh.  Compensate for a deficiency of custom wrapping.
2674   if ( ($After_Header ne " ") and $FSF_Style ) {
2675     $After_Header .= "\t";
2676   }
2677
2678   @Ignore_Files = map lc, @Ignore_Files
2679     if $Case_Insensitive;
2680
2681   # Or if any other error message has already been printed out, we
2682   # just leave now:
2683   if ($exit_with_admonishment) {
2684     &usage ();
2685     exit (1);
2686   }
2687   elsif ($Print_Usage) {
2688     &usage ();
2689     exit (0);
2690   }
2691   elsif ($Print_Version) {
2692     &version ();
2693     exit (0);
2694   }
2695
2696   ## Else no problems, so proceed.
2697
2698   if ($output_file) {
2699     $Log_File_Name = $output_file;
2700   }
2701
2702   return \@log_source_command;
2703 }
2704
2705 # -------------------------------------
2706
2707 sub slurp_file {
2708   my $filename = shift || die ("no filename passed to slurp_file()");
2709   my $retstr;
2710
2711   open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
2712   local $/ = undef;
2713   $retstr = <SLURPEE>;
2714   close (SLURPEE);
2715   return $retstr;
2716 }
2717
2718 # -------------------------------------
2719
2720 sub debug {
2721   if ($Debug) {
2722     my $msg = shift;
2723     print STDERR $msg;
2724   }
2725 }
2726
2727 # -------------------------------------
2728
2729 sub version {
2730   print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
2731 }
2732
2733 # -------------------------------------
2734
2735 sub usage {
2736   &version ();
2737
2738   eval "use Pod::Usage qw( pod2usage )";
2739
2740    if ( $@ ) {
2741     print <<'END';
2742
2743 * Pod::Usage was not found.  The formatting may be suboptimal.  Consider
2744   upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and
2745   versions of perl prior to 5.6 are getting rather rusty, now.  Alternatively,
2746   install Pod::Usage direct from CPAN.
2747 END
2748
2749     local $/ = undef;
2750     my $message = <DATA>;
2751     $message =~ s/^=(head1|item) //gm;
2752     $message =~ s/^=(over|back).*\n//gm;
2753     $message =~ s/\n{3,}/\n\n/g;
2754     print $message;
2755   } else {
2756     print "\n";
2757     pod2usage( -exitval => 'NOEXIT',
2758                -verbose => 1,
2759                -output  => \*STDOUT,
2760              );
2761   }
2762
2763   return;
2764 }
2765
2766 # Main -----------------------------------------------------------------------
2767
2768 my $log_source_command = parse_options;
2769 if ( defined $TestCode ) {
2770   eval $TestCode;
2771   die "Eval failed: '$@'\n"
2772     if $@;
2773 } else {
2774   derive_changelog($log_source_command);
2775 }
2776
2777 __DATA__
2778
2779 =head1 NAME
2780
2781 cvs2cl_by_file.pl - convert cvs log messages to changelogs
2782
2783 =head1 SYNOPSIS
2784
2785 B<cvs2cl> [I<options>] [I<FILE1> [I<FILE2> ...]]
2786 cvs2cl_by_file.pl --delta bioperl-release-1-5-1:bioperl-release-1-5-2
2787
2788 =head1 DESCRIPTION
2789
2790 This is a quick hack version of cvs2cl.pl that simply outputs in a
2791 file-centric way.
2792 Only CVS::Utils::ChangeLog::EntrySet::Output::output_changelog
2793 was altered
2794
2795 Usage:
2796 cvs2cl_by_file.pl --delta bioperl-release-1-5-1:bioperl-release-1-5-2
2797 Generates a file called ChangeLog showing, per file, all the commit
2798 messages since tag bioperl-release-1-5-1 up to tag bioperl-release-1-5-2
2799
2800 Original cvs2cl docs now follow, but some things may not work because of
2801 the hack!
2802
2803
2804 cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by
2805 running "cvs log" and parsing the output. Duplicate log messages get
2806 unified in the Right Way.
2807
2808 The default output of cvs2cl is designed to be compact, formally unambiguous,
2809 but still easy for humans to read.  It should be largely self-explanatory; the
2810 one abbreviation that might not be obvious is "utags".  That stands for
2811 "universal tags" -- a universal tag is one held by all the files in a given
2812 change entry.
2813
2814 If you need output that's easy for a program to parse, use the B<--xml> option.
2815 Note that with XML output, just about all available information is included
2816 with each change entry, whether you asked for it or not, on the theory that
2817 your parser can ignore anything it's not looking for.
2818
2819 If filenames are given as arguments cvs2cl only shows log information for the
2820 named files.
2821
2822 =head1 OPTIONS
2823
2824 =over 4
2825
2826 =item B<-h>, B<-help>, B<--help>, B<-?>
2827
2828 Show a short help and exit.
2829
2830 =item B<--version>
2831
2832 Show version and exit.
2833
2834 =item B<-r>, B<--revisions>
2835
2836 Show revision numbers in output.
2837
2838 =item B<-b>, B<--branches>
2839
2840 Show branch names in revisions when possible.
2841
2842 =item B<-t>, B<--tags>
2843
2844 Show tags (symbolic names) in output.
2845
2846 =item B<-T>, B<--tagdates>
2847
2848 Show tags in output on their first occurance.
2849
2850 =item B<--show-dead>
2851
2852 Show dead files.
2853
2854 =item B<--stdin>
2855
2856 Read from stdin, don't run cvs log.
2857
2858 =item B<--stdout>
2859
2860 Output to stdout not to ChangeLog.
2861
2862 =item B<-d>, B<--distributed>
2863
2864 Put ChangeLogs in subdirs.
2865
2866 =item B<-f> I<FILE>, B<--file> I<FILE>
2867
2868 Write to I<FILE> instead of ChangeLog.
2869
2870 =item B<--fsf>
2871
2872 Use this if log data is in FSF ChangeLog style.
2873
2874 =item B<--FSF>
2875
2876 Attempt strict FSF-standard compatible output.
2877
2878 =item B<-W> I<SECS>, B<--window> I<SECS>
2879
2880 Window of time within which log entries unify.
2881
2882 =item -B<U> I<UFILE>, B<--usermap> I<UFILE>
2883
2884 Expand usernames to email addresses from I<UFILE>.
2885
2886 =item B<--passwd> I<PASSWORDFILE>
2887
2888 Use system passwd file for user name expansion.  If no mail domain is provided
2889 (via B<--domain>), it tries to read one from B</etc/mailname>, output of B<hostname
2890 -d>, B<dnsdomainname>, or B<domain-name>.  cvs2cl exits with an error if none of
2891 those options is successful. Use a domain of '' to prevent the addition of a
2892 mail domain.
2893
2894 =item B<--domain> I<DOMAIN>
2895
2896 Domain to build email addresses from.
2897
2898 =item B<--gecos>
2899
2900 Get user information from GECOS data.
2901
2902 =item B<-R> I<REGEXP>, B<--regexp> I<REGEXP>
2903
2904 Include only entries that match I<REGEXP>.  This option may be used multiple
2905 times.
2906
2907 =item B<-I> I<REGEXP>, B<--ignore> I<REGEXP>
2908
2909 Ignore files whose names match I<REGEXP>.  This option may be used multiple
2910 times.  The regexp is a perl regular expression.  It is matched as is; you may
2911 want to prefix with a ^ or suffix with a $ to anchor the match.
2912
2913 =item B<-C>, B<--case-insensitive>
2914
2915 Any regexp matching is done case-insensitively.
2916
2917 =item B<-F> I<BRANCH>, B<--follow> I<BRANCH>
2918
2919 Show only revisions on or ancestral to I<BRANCH>.
2920
2921 =item B<--follow-only> I<BRANCH>
2922
2923 Like --follow, but sub-branches are not followed.
2924
2925 =item B<--no-ancestors>
2926
2927 When using B<-F>, only track changes since the I<BRANCH> started.
2928
2929 =item B<--no-hide-branch-additions>
2930
2931 By default, entries generated by cvs for a file added on a branch (a dead 1.1
2932 entry) are not shown.  This flag reverses that action.
2933
2934 =item B<-S>, B<--separate-header>
2935
2936 Blank line between each header and log message.
2937
2938 =item B<--summary>
2939
2940 Add CVS change summary information.
2941
2942 =item B<--no-wrap>
2943
2944 Don't auto-wrap log message (recommend B<-S> also).
2945
2946 =item B<--no-indent>
2947
2948 Don't indent log message
2949
2950 =item B<--gmt>, B<--utc>
2951
2952 Show times in GMT/UTC instead of local time.
2953
2954 =item B<--accum>
2955
2956 Add to an existing ChangeLog (incompatible with B<--xml>).
2957
2958 =item B<-w>, B<--day-of-week>
2959
2960 Show day of week.
2961
2962 =item B<--no-times>
2963
2964 Don't show times in output.
2965
2966 =item B<--chrono>
2967
2968 Output log in chronological order (default is reverse chronological order).
2969
2970 =item B<--header> I<FILE>
2971
2972 Get ChangeLog header from I<FILE> ("B<->" means stdin).
2973
2974 =item B<--xml>
2975
2976 Output XML instead of ChangeLog format.
2977
2978 =item B<--xml-encoding> I<ENCODING.>
2979
2980 Insert encoding clause in XML header.
2981
2982 =item B<--noxmlns>
2983
2984 Don't include xmlns= attribute in root element.
2985
2986 =item B<--hide-filenames>
2987
2988 Don't show filenames (ignored for XML output).
2989
2990 =item B<--no-common-dir>
2991
2992 Don't shorten directory names from filenames.
2993
2994 =item B<--rcs> I<CVSROOT>
2995
2996 Handle filenames from raw RCS, for instance those produced by "cvs rlog"
2997 output, stripping the prefix I<CVSROOT>.
2998
2999 =item B<-P>, B<--prune>
3000
3001 Don't show empty log messages.
3002
3003 =item B<--lines-modified>
3004
3005 Output the number of lines added and the number of lines removed for
3006 each checkin (if applicable). At the moment, this only affects the
3007 XML output mode.
3008
3009 =item B<--ignore-tag> I<TAG>
3010
3011 Ignore individual changes that are associated with a given tag.
3012 May be repeated, if so, changes that are associated with any of
3013 the given tags are ignored.
3014
3015 =item B<--show-tag> I<TAG>
3016
3017 Log only individual changes that are associated with a given
3018 tag.  May be repeated, if so, changes that are associated with
3019 any of the given tags are logged.
3020
3021 =item B<--delta> I<FROM_TAG>B<:>I<TO_TAG>
3022
3023 Attempt a delta between two tags (since I<FROM_TAG> up to and
3024 including I<TO_TAG>).  The algorithm is a simple date-based one
3025 (this is a hard problem) so results are imperfect.
3026
3027 =item B<-g> I<OPTS>, B<--global-opts> I<OPTS>
3028
3029 Pass I<OPTS> to cvs like in "cvs I<OPTS> log ...".
3030
3031 =item B<-l> I<OPTS>, B<--log-opts> I<OPTS>
3032
3033 Pass I<OPTS> to cvs log like in "cvs ... log I<OPTS>".
3034
3035 =back
3036
3037 Notes about the options and arguments:
3038
3039 =over 4
3040
3041 =item *
3042
3043 The B<-I> and B<-F> options may appear multiple times.
3044
3045 =item *
3046
3047 To follow trunk revisions, use "B<-F trunk>" ("B<-F TRUNK>" also works).  This is
3048 okay because no would ever, ever be crazy enough to name a branch "trunk",
3049 right?  Right.
3050
3051 =item *
3052
3053 For the B<-U> option, the I<UFILE> should be formatted like CVSROOT/users. That is,
3054 each line of I<UFILE> looks like this:
3055
3056        jrandom:jrandom@red-bean.com
3057
3058 or maybe even like this
3059
3060        jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
3061
3062 Don't forget to quote the portion after the colon if necessary.
3063
3064 =item *
3065
3066 Many people want to filter by date.  To do so, invoke cvs2cl.pl like this:
3067
3068        cvs2cl.pl -l "-d'DATESPEC'"
3069
3070 where DATESPEC is any date specification valid for "cvs log -d".  (Note that
3071 CVS 1.10.7 and below requires there be no space between -d and its argument).
3072
3073 =item *
3074
3075 Dates/times are interpreted in the local time zone.
3076
3077 =item *
3078
3079 Remember to quote the argument to `B<-l>' so that your shell doesn't interpret
3080 spaces as argument separators.
3081
3082 =item *
3083
3084 See the 'Common Options' section of the cvs manual ('info cvs' on UNIX-like
3085 systems) for more information.
3086
3087 =item *
3088
3089 Note that the rules for quoting under windows shells are different.
3090
3091 =item *
3092
3093 To run in an automated environment such as CGI or PHP, suidperl may be needed
3094 in order to execute as the correct user to enable /cvsroot read lock files to
3095 be written for the 'cvs log' command.  This is likely just a case of changing
3096 the /usr/bin/perl command to /usr/bin/suidperl, and explicitly declaring the
3097 PATH variable.
3098
3099 =back
3100
3101 =head1 EXAMPLES
3102
3103 Some examples (working on UNIX shells):
3104
3105       # logs after 6th March, 2003 (inclusive)
3106       cvs2cl.pl -l "-d'>2003-03-06'"
3107       # logs after 4:34PM 6th March, 2003 (inclusive)
3108       cvs2cl.pl -l "-d'>2003-03-06 16:34'"
3109       # logs between 4:46PM 6th March, 2003 (exclusive) and
3110       # 4:34PM 6th March, 2003 (inclusive)
3111       cvs2cl.pl -l "-d'2003-03-06 16:46>2003-03-06 16:34'"
3112
3113 Some examples (on non-UNIX shells):
3114
3115       # Reported to work on windows xp/2000
3116       cvs2cl.pl -l  "-d"">2003-10-18;today<"""
3117
3118 =head1 AUTHORS
3119
3120 =over 4
3121
3122 =item Karl Fogel
3123
3124 =item Melissa O'Neill
3125
3126 =item Martyn J. Pearce
3127
3128 =back
3129
3130 Contributions from
3131
3132 =over 4
3133
3134 =item Mike Ayers
3135
3136 =item Tim Bradshaw
3137
3138 =item Richard Broberg
3139
3140 =item Nathan Bryant
3141
3142 =item Oswald Buddenhagen
3143
3144 =item Neil Conway
3145
3146 =item Arthur de Jong
3147
3148 =item Mark W. Eichin
3149
3150 =item Dave Elcock
3151
3152 =item Reid Ellis
3153
3154 =item Simon Josefsson
3155
3156 =item Robin Hugh Johnson
3157
3158 =item Terry Kane
3159
3160 =item Pete Kempf
3161
3162 =item Akos Kiss
3163
3164 =item Claus Klein
3165
3166 =item Eddie Kohler
3167
3168 =item Richard Laager
3169
3170 =item Kevin Lilly
3171
3172 =item Karl-Heinz Marbaise
3173
3174 =item Mitsuaki Masuhara
3175
3176 =item Henrik Nordstrom
3177
3178 =item Joe Orton
3179
3180 =item Peter Palfrader
3181
3182 =item Thomas Parmelan
3183
3184 =item Jordan Russell
3185
3186 =item Jacek Sliwerski
3187
3188 =item Johannes Stezenbach
3189
3190 =item Joseph Walton
3191
3192 =item Ernie Zapata
3193
3194 =back
3195
3196 =head1 BUGS
3197
3198 Please report bugs to C<bug-cvs2cl@red-bean.com>.
3199
3200 =head1 PREREQUISITES
3201
3202 This script requires C<Text::Wrap>, C<Time::Local>, and C<File::Basename>.  It
3203 also seems to require C<Perl 5.004_04> or higher.
3204
3205 =head1 OPERATING SYSTEM COMPATIBILITY
3206
3207 Should work on any OS.
3208
3209 =head1 SCRIPT CATEGORIES
3210
3211 Version_Control/CVS
3212
3213 =head1 COPYRIGHT
3214
3215 (C) 2001,2002,2003,2004 Martyn J. Pearce E<lt>fluffy@cpan.orgE<gt>, under the GNU GPL.
3216
3217 (C) 1999 Karl Fogel E<lt>kfogel@red-bean.comE<gt>, under the GNU GPL.
3218
3219 cvs2cl.pl is free software; you can redistribute it and/or modify
3220 it under the terms of the GNU General Public License as published by
3221 the Free Software Foundation; either version 2, or (at your option)
3222 any later version.
3223
3224 cvs2cl.pl is distributed in the hope that it will be useful,
3225 but WITHOUT ANY WARRANTY; without even the implied warranty of
3226 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
3227 GNU General Public License for more details.
3228
3229 You may have received a copy of the GNU General Public License
3230 along with cvs2cl.pl; see the file COPYING.  If not, write to the
3231 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
3232 Boston, MA 02111-1307, USA.
3233
3234 =head1 SEE ALSO
3235
3236 cvs(1)