utils/cvs2cl.pl

   1 #!/bin/sh
   2 exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
   3 #!perl -w
   4
   5
   6 ##############################################################
   7 ###                                                        ###
   8 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
   9 ###                                                        ###
  10 ##############################################################
  11
  12 ## $Revision$
  13 ## $Date$
  14 ## $Author$
  15 ##
  16
  17 use strict;
  18
  19 use File::Basename qw( fileparse );
  20 use Getopt::Long   qw( GetOptions );
  21 use Text::Wrap     qw( );
  22 use User::pwent    qw( getpwnam );
  23
  24 # The Plan:
  25 #
  26 # Read in the logs for multiple files, spit out a nice ChangeLog that
  27 # mirrors the information entered during `cvs commit'.
  28 #
  29 # The problem presents some challenges. In an ideal world, we could
  30 # detect files with the same author, log message, and checkin time --
  31 # each <filelist, author, time, logmessage> would be a changelog entry.
  32 # We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*
  33 # so checkins can span a range of times.  Also, the directory structure
  34 # could be hierarchical.
  35 #
  36 # Another question is whether we really want to have the ChangeLog
  37 # exactly reflect commits. An author could issue two related commits,
  38 # with different log entries, reflecting a single logical change to the
  39 # source. GNU style ChangeLogs group these under a single author/date.
  40 # We try to do the same.
  41 #
  42 # So, we parse the output of `cvs log', storing log messages in a
  43 # multilevel hash that stores the mapping:
  44 #   directory => author => time => message => filelist
  45 # As we go, we notice "nearby" commit times and store them together
  46 # (i.e., under the same timestamp), so they appear in the same log
  47 # entry.
  48 #
  49 # When we've read all the logs, we twist this mapping into
  50 # a time => author => message => filelist mapping for each directory.
  51 #
  52 # If we're not using the `--distributed' flag, the directory is always
  53 # considered to be `./', even as descend into subdirectories.
  54
  55 # Call Tree
  56
  57 # name                         number of lines (10.xii.03)
  58 # parse_options                         192
  59 # derive_changelog                       13
  60 # +-maybe_grab_accumulation_date         38
  61 # +-read_changelog                      277
  62 #   +-maybe_read_user_map_file           94
  63 #     +-run_ext                           9
  64 #   +-read_file_path                     29
  65 #   +-read_symbolic_name                 43
  66 #   +-read_revision                      49
  67 #   +-read_date_author_and_state         25
  68 #     +-parse_date_author_and_state      20
  69 #   +-read_branches                      36
  70 # +-output_changelog                    424
  71 #   +-pretty_file_list                  290
  72 #     +-common_path_prefix               35
  73 #   +-preprocess_msg_text                30
  74 #     +-min                               1
  75 #   +-mywrap                             16
  76 #   +-last_line_len                       5
  77 #   +-wrap_log_entry                    177
  78 #
  79 # Utilities
  80 #
  81 # xml_escape                              6
  82 # slurp_file                             11
  83 # debug                                   5
  84 # version                                 2
  85 # usage                                 142
  86
  87 # -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
  88 #
  89 # Note about a bug-slash-opportunity:
  90 # -----------------------------------
  91 #
  92 # There's a bug in Text::Wrap, which affects cvs2cl.  This script
  93 # reveals it:
  94 #
  95 #   #!/usr/bin/perl -w
  96 #
  97 #   use Text::Wrap;
  98 #
  99 #   my $test_text =
 100 #   "This script demonstrates a bug in Text::Wrap.  The very long line
 101 #   following this paragraph will be relocated relative to the surrounding
 102 #   text:
 103 #
 104 #   ====================================================================
 105 #
 106 #   See?  When the bug happens, we'll get the line of equal signs below
 107 #   this paragraph, even though it should be above.";
 108 #
 109 #
 110 #   # Print out the test text with no wrapping:
 111 #   print "$test_text";
 112 #   print "\n";
 113 #   print "\n";
 114 #
 115 #   # Now print it out wrapped, and see the bug:
 116 #   print wrap ("\t", "        ", "$test_text");
 117 #   print "\n";
 118 #   print "\n";
 119 #
 120 # If the line of equal signs were one shorter, then the bug doesn't
 121 # happen.  Interesting.
 122 #
 123 # Anyway, rather than fix this in Text::Wrap, we might as well write a
 124 # new wrap() which has the following much-needed features:
 125 #
 126 # * initial indentation, like current Text::Wrap()
 127 # * subsequent line indentation, like current Text::Wrap()
 128 # * user chooses among: force-break long words, leave them alone, or die()?
 129 # * preserve existing indentation: chopped chunks from an indented line
 130 #   are indented by same (like this line, not counting the asterisk!)
 131 # * optional list of things to preserve on line starts, default ">"
 132 #
 133 # Note that the last two are essentially the same concept, so unify in
 134 # implementation and give a good interface to controlling them.
 135 #
 136 # And how about:
 137 #
 138 # Optionally, when encounter a line pre-indented by same as previous
 139 # line, then strip the newline and refill, but indent by the same.
 140 # Yeah...
 141
 142 # Globals --------------------------------------------------------------------
 143
 144 # In case we have to print it out:
 145 my $VERSION = '$Revision$';
 146 $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
 147
 148 ## Vars set by options:
 149
 150 # Print debugging messages?
 151 my $Debug = 0;
 152
 153 # Just show version and exit?
 154 my $Print_Version = 0;
 155
 156 # Just print usage message and exit?
 157 my $Print_Usage = 0;
 158
 159 # What file should we generate (defaults to "ChangeLog")?
 160 my $Log_File_Name = "ChangeLog";
 161
 162 # Grab most recent entry date from existing ChangeLog file, just add
 163 # to that ChangeLog.
 164 my $Cumulative = 0;
 165
 166 # `cvs log -d`, this will repeat the last entry in the old log.  This is OK,
 167 # as it guarantees at least one entry in the update changelog, which means
 168 # that there will always be a date to extract for the next update.  The repeat
 169 # entry can be removed in postprocessing, if necessary.
 170
 171 # MJP 2003-08-02
 172 # I don't think this actually does anything useful
 173 my $Update = 0;
 174
 175 # Expand usernames to email addresses based on a map file?
 176 my $User_Map_File = '';
 177 my $User_Passwd_File;
 178 my $Mail_Domain;
 179
 180 # Output log in chronological order? [default is reverse chronological order]
 181 my $Chronological_Order = 0;
 182
 183 # Grab user details via gecos
 184 my $Gecos = 0;
 185
 186 # User domain for gecos email addresses
 187 my $Domain;
 188
 189 # Output to a file or to stdout?
 190 my $Output_To_Stdout = 0;
 191
 192 # Eliminate empty log messages?
 193 my $Prune_Empty_Msgs = 0;
 194
 195 # Tags of which not to output
 196 my %ignore_tags;
 197
 198 # Show only revisions with Tags
 199 my %show_tags;
 200
 201 # Don't call Text::Wrap on the body of the message
 202 my $No_Wrap = 0;
 203
 204 # Indentation of log messages
 205 my $Indent = "\t";
 206
 207 # Don't do any pretty print processing
 208 my $Summary = 0;
 209
 210 # Separates header from log message.  Code assumes it is either " " or
 211 # "\n\n", so if there's ever an option to set it to something else,
 212 # make sure to go through all conditionals that use this var.
 213 my $After_Header = " ";
 214
 215 # XML Encoding
 216 my $XML_Encoding = '';
 217
 218 # Format more for programs than for humans.
 219 my $XML_Output = 0;
 220 my $No_XML_Namespace = 0;
 221 my $No_XML_ISO_Date = 0;
 222
 223 # Do some special tweaks for log data that was written in FSF
 224 # ChangeLog style.
 225 my $FSF_Style = 0;
 226
 227 # Show times in UTC instead of local time
 228 my $UTC_Times = 0;
 229
 230 # Show times in output?
 231 my $Show_Times = 1;
 232
 233 # Show day of week in output?
 234 my $Show_Day_Of_Week = 0;
 235
 236 # Show revision numbers in output?
 237 my $Show_Revisions = 0;
 238
 239 # Show dead files in output?
 240 my $Show_Dead = 0;
 241
 242 # Hide dead trunk files which were created as a result of additions on a
 243 # branch?
 244 my $Hide_Branch_Additions = 1;
 245
 246 # Show tags (symbolic names) in output?
 247 my $Show_Tags = 0;
 248
 249 # Show tags separately in output?
 250 my $Show_Tag_Dates = 0;
 251
 252 # Show branches by symbolic name in output?
 253 my $Show_Branches = 0;
 254
 255 # Show only revisions on these branches or their ancestors.
 256 my @Follow_Branches;
 257 # Show only revisions on these branches or their ancestors; ignore descendent
 258 # branches.
 259 my @Follow_Only;
 260
 261 # Don't bother with files matching this regexp.
 262 my @Ignore_Files;
 263
 264 # How exactly we match entries.  We definitely want "o",
 265 # and user might add "i" by using --case-insensitive option.
 266 my $Case_Insensitive = 0;
 267
 268 # Maybe only show log messages matching a certain regular expression.
 269 my $Regexp_Gate = '';
 270
 271 # Pass this global option string along to cvs, to the left of `log':
 272 my $Global_Opts = '';
 273
 274 # Pass this option string along to the cvs log subcommand:
 275 my $Command_Opts = '';
 276
 277 # Read log output from stdin instead of invoking cvs log?
 278 my $Input_From_Stdin = 0;
 279
 280 # Don't show filenames in output.
 281 my $Hide_Filenames = 0;
 282
 283 # Don't shorten directory names from filenames.
 284 my $Common_Dir = 1;
 285
 286 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
 287 # times that span a range of time. We assume that checkins will last no
 288 # longer than $Max_Checkin_Duration seconds, and that similarly, no
 289 # checkins will happen from the same users with the same message less
 290 # than $Max_Checkin_Duration seconds apart.
 291 my $Max_Checkin_Duration = 180;
 292
 293 # What to put at the front of [each] ChangeLog.
 294 my $ChangeLog_Header = '';
 295
 296 # Whether to enable 'delta' mode, and for what start/end tags.
 297 my $Delta_Mode = 0;
 298 my $Delta_From = '';
 299 my $Delta_To = '';
 300
 301 my $TestCode;
 302
 303 # Whether to parse filenames from the RCS filename, and if so what
 304 # prefix to strip.
 305 my $RCS_Root;
 306
 307 # Whether to output information on the # of lines added and removed
 308 # by each file modification.
 309 my $Show_Lines_Modified = 0;
 310
 311 ## end vars set by options.
 312
 313 # latest observed times for the start/end tags in delta mode
 314 my $Delta_StartTime = 0;
 315 my $Delta_EndTime = 0;
 316
 317 my $No_Ancestors = 0;
 318
 319 my $No_Extra_Indent = 0;
 320
 321 my $GroupWithinDate = 0;
 322
 323 # ----------------------------------------------------------------------------
 324
 325 package CVS::Utils::ChangeLog::EntrySet;
 326
 327 sub new {
 328   my $class = shift;
 329   my %self;
 330   bless \%self, $class;
 331 }
 332
 333 # -------------------------------------
 334
 335 sub output_changelog {
 336   my $output_type = $XML_Output ? 'XML' : 'Text';
 337   my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
 338   my $output = $output_class->new(follow_branches => \@Follow_Branches,
 339                                   follow_only     => \@Follow_Only,
 340                                   ignore_tags     => \%ignore_tags,
 341                                   show_tags       => \%show_tags,
 342                                  );
 343   $output->output_changelog(@_);
 344 }
 345
 346 # -------------------------------------
 347
 348 sub add_fileentry {
 349   my ($self, $file_full_path, $time, $revision, $state, $lines,
 350       $branch_names, $branch_roots, $branch_numbers,
 351       $symbolic_names, $author, $msg_txt) = @_;
 352
 353       my $qunk =
 354         CVS::Utils::ChangeLog::FileEntry->new($file_full_path, $time, $revision,
 355                                               $state, $lines,
 356                                               $branch_names, $branch_roots,
 357                                               $branch_numbers,
 358                                               $symbolic_names);
 359
 360       # We might be including revision numbers and/or tags and/or
 361       # branch names in the output.  Most of the code from here to
 362       # loop-end deals with organizing these in qunk.
 363
 364       unless ( $Hide_Branch_Additions
 365                and
 366                $msg_txt =~ /file .+ was initially added on branch \S+./ ) {
 367         # Add this file to the list
 368         # (We use many spoonfuls of autovivication magic. Hashes and arrays
 369         # will spring into existence if they aren't there already.)
 370
 371         &main::debug ("(pushing log msg for ". $qunk->dir_key . $qunk->filename . ")\n");
 372
 373         # Store with the files in this commit.  Later we'll loop through
 374         # again, making sure that revisions with the same log message
 375         # and nearby commit times are grouped together as one commit.
 376         $self->{$qunk->dir_key}{$author}{$time}{$msg_txt} =
 377           CVS::Utils::ChangeLog::Message->new($msg_txt)
 378               unless exists $self->{$qunk->dir_key}{$author}{$time}{$msg_txt};
 379         $self->{$qunk->dir_key}{$author}{$time}{$msg_txt}->add_fileentry($qunk);
 380       }
 381
 382 }
 383
 384 # ----------------------------------------------------------------------------
 385
 386 package CVS::Utils::ChangeLog::EntrySet::Output::Text;
 387
 388 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
 389
 390 use File::Basename qw( fileparse );
 391
 392 sub new {
 393   my $class = shift;
 394   my $self = $class->SUPER::new(@_);
 395 }
 396
 397 # -------------------------------------
 398
 399 sub wday {
 400   my $self = shift; my $class = ref $self;
 401   my ($wday) = @_;
 402
 403   return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : '';
 404 }
 405
 406 # -------------------------------------
 407
 408 sub header_line {
 409   my $self = shift;
 410   my ($time, $author, $lastdate) = @_;
 411
 412   my $header_line = '';
 413
 414   my (undef,$min,$hour,$mday,$mon,$year,$wday)
 415     = $UTC_Times ? gmtime($time) : localtime($time);
 416
 417   my $date = $self->fdatetime($time);
 418
 419   if ($Show_Times) {
 420     $header_line =
 421       sprintf "%s  %s\n\n", $date, $author;
 422   } else {
 423     if ( ! defined $lastdate or $date ne $lastdate or ! $GroupWithinDate ) {
 424       if ( $GroupWithinDate ) {
 425         $header_line = "$date\n\n";
 426       } else {
 427         $header_line = "$date  $author\n\n";
 428       }
 429     } else {
 430       $header_line = '';
 431     }
 432   }
 433 }
 434
 435 # -------------------------------------
 436
 437 sub preprocess_msg_text {
 438   my $self = shift;
 439   my ($text) = @_;
 440
 441   $text = $self->SUPER::preprocess_msg_text($text);
 442
 443   unless ( $No_Wrap ) {
 444     # Strip off lone newlines, but only for lines that don't begin with
 445     # whitespace or a mail-quoting character, since we want to preserve
 446     # that kind of formatting.  Also don't strip newlines that follow a
 447     # period; we handle those specially next.  And don't strip
 448     # newlines that precede an open paren.
 449     1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g;
 450
 451     # If a newline follows a period, make sure that when we bring up the
 452     # bottom sentence, it begins with two spaces.
 453     1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g;
 454   }
 455
 456   return $text;
 457 }
 458
 459 # -------------------------------------
 460
 461 # Here we take a bunch of qunks and convert them into printed
 462 # summary that will include all the information the user asked for.
 463 sub pretty_file_list {
 464   my $self = shift;
 465
 466   return ''
 467     if $Hide_Filenames;
 468
 469   my $qunksref = shift;
 470
 471   my @filenames;
 472   my $beauty = '';          # The accumulating header string for this entry.
 473   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
 474   my %unanimous_tags;       # Tags found in all qunks
 475   my %all_branches;         # Branches found in any qunk
 476   my $fbegun = 0;           # Did we begin printing filenames yet?
 477
 478   my ($common_dir, $qunkrefs) =
 479     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref);
 480
 481   my @qunkrefs = @$qunkrefs;
 482
 483   # Not XML output, so complexly compactify for chordate consumption.  At this
 484   # point we have enough global information about all the qunks to organize
 485   # them non-redundantly for output.
 486
 487   if ($common_dir) {
 488     # Note that $common_dir still has its trailing slash
 489     $beauty .= "$common_dir: ";
 490   }
 491
 492   if ($Show_Branches)
 493   {
 494     # For trailing revision numbers.
 495     my @brevisions;
 496
 497     foreach my $branch (keys (%all_branches))
 498     {
 499       foreach my $qunkref (@qunkrefs)
 500       {
 501         if ((defined ($qunkref->branch))
 502             and ($qunkref->branch eq $branch))
 503         {
 504           if ($fbegun) {
 505             # kff todo: comma-delimited in XML too?  Sure.
 506             $beauty .= ", ";
 507           }
 508           else {
 509             $fbegun = 1;
 510           }
 511           my $fname = substr ($qunkref->filename, length ($common_dir));
 512           $beauty .= $fname;
 513           $qunkref->{'printed'} = 1;  # Just setting a mark bit, basically
 514
 515           if ( $Show_Tags and defined $qunkref->tags ) {
 516             my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
 517
 518             if (@tags) {
 519               $beauty .= " (tags: ";
 520               $beauty .= join (', ', @tags);
 521               $beauty .= ")";
 522             }
 523           }
 524
 525           if ($Show_Revisions) {
 526             # Collect the revision numbers' last components, but don't
 527             # print them -- they'll get printed with the branch name
 528             # later.
 529             $qunkref->revision =~ /.+\.([\d]+)$/;
 530             push (@brevisions, $1);
 531
 532             # todo: we're still collecting branch roots, but we're not
 533             # showing them anywhere.  If we do show them, it would be
 534             # nifty to just call them revision "0" on a the branch.
 535             # Yeah, that's the ticket.
 536           }
 537         }
 538       }
 539       $beauty .= " ($branch";
 540       if (@brevisions) {
 541         if ((scalar (@brevisions)) > 1) {
 542           $beauty .= ".[";
 543           $beauty .= (join (',', @brevisions));
 544           $beauty .= "]";
 545         }
 546         else {
 547           # Square brackets are spurious here, since there's no range to
 548           # encapsulate
 549           $beauty .= ".$brevisions[0]";
 550         }
 551       }
 552       $beauty .= ")";
 553     }
 554   }
 555
 556   # Okay; any qunks that were done according to branch are taken care
 557   # of, and marked as printed.  Now print everyone else.
 558
 559   my %fileinfo_printed;
 560   foreach my $qunkref (@qunkrefs)
 561   {
 562     next if (defined ($qunkref->{'printed'}));   # skip if already printed
 563
 564     my $b = substr ($qunkref->filename, length ($common_dir));
 565     # todo: Shlomo's change was this:
 566     # $beauty .= substr ($qunkref->filename,
 567     #              (($common_dir eq "./") ? '' : length ($common_dir)));
 568     $qunkref->{'printed'} = 1;  # Set a mark bit.
 569
 570     if ($Show_Revisions || $Show_Tags || $Show_Dead)
 571     {
 572       my $started_addendum = 0;
 573
 574       if ($Show_Revisions) {
 575         $started_addendum = 1;
 576         $b .= " (";
 577         $b .= $qunkref->revision;
 578       }
 579       if ($Show_Dead && $qunkref->state =~ /dead/)
 580       {
 581         # Deliberately not using $started_addendum. Keeping it simple.
 582         $b .= "[DEAD]";
 583       }
 584       if ($Show_Tags && (defined $qunkref->tags)) {
 585         my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
 586         if ((scalar (@tags)) > 0) {
 587           if ($started_addendum) {
 588             $b .= ", ";
 589           }
 590           else {
 591             $b .= " (tags: ";
 592           }
 593           $b .= join (', ', @tags);
 594           $started_addendum = 1;
 595         }
 596       }
 597       if ($started_addendum) {
 598         $b .= ")";
 599       }
 600     }
 601
 602     unless ( exists $fileinfo_printed{$b} ) {
 603       if ($fbegun) {
 604         $beauty .= ", ";
 605       } else {
 606         $fbegun = 1;
 607       }
 608       $beauty .= $b, $fileinfo_printed{$b} = 1;
 609     }
 610   }
 611
 612   # Unanimous tags always come last.
 613   if ($Show_Tags && %unanimous_tags)
 614   {
 615     $beauty .= " (utags: ";
 616     $beauty .= join (', ', sort keys (%unanimous_tags));
 617     $beauty .= ")";
 618   }
 619
 620   # todo: still have to take care of branch_roots?
 621
 622   $beauty = "$beauty:";
 623
 624   return $beauty;
 625 }
 626
 627 # -------------------------------------
 628
 629 sub output_tagdate {
 630   my $self = shift;
 631   my ($fh, $time, $tag) = @_;
 632
 633   my $fdatetime = $self->fdatetime($time);
 634   print $fh "$fdatetime  tag $tag\n\n";
 635   return;
 636 }
 637
 638 # -------------------------------------
 639
 640 sub format_body {
 641   my $self = shift;
 642   my ($msg, $files, $qunklist) = @_;
 643
 644   my $body;
 645
 646   if ( $No_Wrap and ! $Summary ) {
 647     $msg = $self->preprocess_msg_text($msg);
 648     $files = $self->mywrap("\t", "\t  ", "* $files");
 649     $msg =~ s/\n(.+)/\n$Indent$1/g;
 650     unless ($After_Header eq " ") {
 651       $msg =~ s/^(.+)/$Indent$1/g;
 652     }
 653     if ( $Hide_Filenames ) {
 654       $body = $After_Header . $msg;
 655     } else {
 656       $body = $files . $After_Header . $msg;
 657     }
 658   } elsif ( $Summary ) {
 659     my ($filelist, $qunk);
 660     my (@DeletedQunks, @AddedQunks, @ChangedQunks);
 661
 662     $msg = $self->preprocess_msg_text($msg);
 663     #
 664     #     Sort the files (qunks) according to the operation that was
 665     # performed.  Files which were added have no line change
 666     # indicator, whereas deleted files have state dead.
 667     #
 668     foreach $qunk ( @$qunklist ) {
 669       if ( "dead" eq $qunk->state) {
 670         push @DeletedQunks, $qunk;
 671       } elsif ( ! defined $qunk->lines ) {
 672         push @AddedQunks, $qunk;
 673       } else {
 674         push @ChangedQunks, $qunk;
 675       }
 676     }
 677     #
 678     #     The qunks list was  originally in tree search order.  Let's
 679     # get that back.  The lists, if they exist, will be reversed upon
 680     # processing.
 681     #
 682
 683     #
 684     #     Now write the three sections onto $filelist
 685     #
 686     if ( @DeletedQunks ) {
 687       $filelist .= "\tDeleted:\n";
 688       foreach $qunk ( @DeletedQunks ) {
 689         $filelist .= "\t\t" . $qunk->filename;
 690         $filelist .= " (" . $qunk->revision . ")";
 691         $filelist .= "\n";
 692       }
 693       undef @DeletedQunks;
 694     }
 695
 696     if ( @AddedQunks ) {
 697       $filelist .= "\tAdded:\n";
 698       foreach $qunk (@AddedQunks) {
 699         $filelist .= "\t\t" . $qunk->filename;
 700         $filelist .= " (" . $qunk->revision . ")";
 701         $filelist .= "\n";
 702       }
 703       undef @AddedQunks ;
 704     }
 705
 706     if ( @ChangedQunks ) {
 707       $filelist .= "\tChanged:\n";
 708       foreach $qunk (@ChangedQunks) {
 709         $filelist .= "\t\t" . $qunk->filename;
 710         $filelist .= " (" . $qunk->revision . ")";
 711         $filelist .= ", \"" . $qunk->state . "\"";
 712         $filelist .= ", lines: " . $qunk->lines;
 713         $filelist .= "\n";
 714       }
 715       undef @ChangedQunks;
 716     }
 717
 718     chomp $filelist;
 719
 720     if ( $Hide_Filenames ) {
 721       $filelist = '';
 722     }
 723
 724     $msg =~ s/\n(.*)/\n$Indent$1/g;
 725     unless ( $After_Header eq " " or $FSF_Style ) {
 726       $msg =~ s/^(.*)/$Indent$1/g;
 727     }
 728
 729     unless ( $No_Wrap ) {
 730       if ( $FSF_Style ) {
 731         $msg = $self->wrap_log_entry($msg, '', 69, 69);
 732         chomp($msg);
 733         chomp($msg);
 734       } else {
 735         $msg = $self->mywrap('', $Indent, "$msg");
 736         $msg =~ s/[ \t]+\n/\n/g;
 737       }
 738     }
 739
 740     $body = $filelist . $After_Header . $msg;
 741   } else {  # do wrapping, either FSF-style or regular
 742     my $latter_wrap = $No_Extra_Indent ? $Indent : "$Indent  ";
 743
 744     if ( $FSF_Style ) {
 745       $files = $self->mywrap($Indent, $latter_wrap, "* $files");
 746
 747       my $files_last_line_len = 0;
 748       if ( $After_Header eq " " ) {
 749         $files_last_line_len = $self->last_line_len($files);
 750         $files_last_line_len += 1;  # for $After_Header
 751       }
 752
 753       $msg = $self->wrap_log_entry($msg, $latter_wrap, 69-$files_last_line_len, 69);
 754       $body = $files . $After_Header . $msg;
 755     } else {  # not FSF-style
 756       $msg = $self->preprocess_msg_text($msg);
 757       $body = $files . $After_Header . $msg;
 758       $body = $self->mywrap($Indent, $latter_wrap, "* $body");
 759       $body =~ s/[ \t]+\n/\n/g;
 760     }
 761   }
 762
 763   return $body;
 764 }
 765
 766 # ----------------------------------------------------------------------------
 767
 768 package CVS::Utils::ChangeLog::EntrySet::Output::XML;
 769
 770 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
 771
 772 use File::Basename qw( fileparse );
 773
 774 sub new {
 775   my $class = shift;
 776   my $self = $class->SUPER::new(@_);
 777 }
 778
 779 # -------------------------------------
 780
 781 sub header_line {
 782   my $self = shift;
 783   my ($time, $author, $lastdate) = @_;
 784
 785   my $header_line = '';
 786
 787   my $isoDate;
 788
 789   my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
 790
 791   # Ideally, this would honor $UTC_Times and use +HH:MM syntax
 792   $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
 793                      $y + 1900, $m + 1, $d, $H, $M, $S);
 794
 795   my (undef,$min,$hour,$mday,$mon,$year,$wday)
 796     = $UTC_Times ? gmtime($time) : localtime($time);
 797
 798   my $date = $self->fdatetime($time);
 799   $wday = $self->wday($wday);
 800
 801   $header_line =
 802     sprintf ("<date>%4u-%02u-%02u</date>\n${wday}<time>%02u:%02u</time>\n",
 803              $year+1900, $mon+1, $mday, $hour, $min);
 804   $header_line .= "<isoDate>$isoDate</isoDate>\n"
 805     unless $No_XML_ISO_Date;
 806   $header_line .= sprintf("<author>%s</author>\n" , $author);
 807 }
 808
 809 # -------------------------------------
 810
 811 sub wday {
 812   my $self = shift; my $class = ref $self;
 813   my ($wday) = @_;
 814
 815   return '<weekday>' . $class->weekday_en($wday) . "</weekday>\n";
 816 }
 817
 818 # -------------------------------------
 819
 820 sub escape {
 821   my $self = shift;
 822
 823   my $txt = shift;
 824   $txt =~ s/&/&amp;/g;
 825   $txt =~ s/</&lt;/g;
 826   $txt =~ s/>/&gt;/g;
 827   return $txt;
 828 }
 829
 830 # -------------------------------------
 831
 832 sub output_header {
 833   my $self = shift;
 834   my ($fh) = @_;
 835
 836   my $encoding    =
 837     length $XML_Encoding ? qq'encoding="$XML_Encoding"' : '';
 838   my $version     = 'version="1.0"';
 839   my $declaration =
 840     sprintf '<?xml %s?>', join ' ', grep length, $version, $encoding;
 841   my $root        =
 842     $No_XML_Namespace ?
 843       '<changelog>'     :
 844         '<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
 845   print $fh "$declaration\n\n$root\n\n";
 846 }
 847
 848 # -------------------------------------
 849
 850 sub output_footer {
 851   my $self = shift;
 852   my ($fh) = @_;
 853
 854   print $fh "</changelog>\n";
 855 }
 856
 857 # -------------------------------------
 858
 859 sub preprocess_msg_text {
 860   my $self = shift;
 861   my ($text) = @_;
 862
 863   $text = $self->SUPER::preprocess_msg_text($text);
 864
 865   $text = $self->escape($text);
 866   chomp $text;
 867   $text = "<msg>${text}</msg>\n";
 868
 869   return $text;
 870 }
 871
 872 # -------------------------------------
 873
 874 # Here we take a bunch of qunks and convert them into a printed
 875 # summary that will include all the information the user asked for.
 876 sub pretty_file_list {
 877   my $self = shift;
 878   my ($qunksref) = @_;
 879
 880   my $beauty = '';          # The accumulating header string for this entry.
 881   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
 882   my %unanimous_tags;       # Tags found in all qunks
 883   my %all_branches;         # Branches found in any qunk
 884   my $fbegun = 0;           # Did we begin printing filenames yet?
 885
 886   my ($common_dir, $qunkrefs) =
 887     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches),
 888       $qunksref);
 889
 890   my @qunkrefs = @$qunkrefs;
 891
 892   # If outputting XML, then our task is pretty simple, because we
 893   # don't have to detect common dir, common tags, branch prefixing,
 894   # etc.  We just output exactly what we have, and don't worry about
 895   # redundancy or readability.
 896
 897   foreach my $qunkref (@qunkrefs)
 898   {
 899     my $filename    = $qunkref->filename;
 900     my $state       = $qunkref->state;
 901     my $revision    = $qunkref->revision;
 902     my $tags        = $qunkref->tags;
 903     my $branch      = $qunkref->branch;
 904     my $branchroots = $qunkref->roots;
 905     my $lines       = $qunkref->lines;
 906
 907     $filename = $self->escape($filename);   # probably paranoia
 908     $revision = $self->escape($revision);   # definitely paranoia
 909
 910     $beauty .= "<file>\n";
 911     $beauty .= "<name>${filename}</name>\n";
 912     $beauty .= "<cvsstate>${state}</cvsstate>\n";
 913     $beauty .= "<revision>${revision}</revision>\n";
 914
 915     if ($Show_Lines_Modified
 916         && $lines && $lines =~ m/\+(\d+)\s+-(\d+)/) {
 917         $beauty .= "<linesadded>$1</linesadded>\n";
 918         $beauty .= "<linesremoved>$2</linesremoved>\n";
 919     }
 920
 921     if ($branch) {
 922       $branch   = $self->escape($branch);     # more paranoia
 923       $beauty .= "<branch>${branch}</branch>\n";
 924     }
 925     foreach my $tag (@$tags) {
 926       $tag = $self->escape($tag);  # by now you're used to the paranoia
 927       $beauty .= "<tag>${tag}</tag>\n";
 928     }
 929     foreach my $root (@$branchroots) {
 930       $root = $self->escape($root);  # which is good, because it will continue
 931       $beauty .= "<branchroot>${root}</branchroot>\n";
 932     }
 933     $beauty .= "</file>\n";
 934   }
 935
 936   # Theoretically, we could go home now.  But as long as we're here,
 937   # let's print out the common_dir and utags, as a convenience to
 938   # the receiver (after all, earlier code calculated that stuff
 939   # anyway, so we might as well take advantage of it).
 940
 941   if ((scalar (keys (%unanimous_tags))) > 1) {
 942     foreach my $utag ((keys (%unanimous_tags))) {
 943       $utag = $self->escape($utag);   # the usual paranoia
 944       $beauty .= "<utag>${utag}</utag>\n";
 945     }
 946   }
 947   if ($common_dir) {
 948     $common_dir = $self->escape($common_dir);
 949     $beauty .= "<commondir>${common_dir}</commondir>\n";
 950   }
 951
 952   # That's enough for XML, time to go home:
 953   return $beauty;
 954 }
 955
 956 # -------------------------------------
 957
 958 sub output_tagdate {
 959   # NOT YET DONE
 960 }
 961
 962 # -------------------------------------
 963
 964 sub output_entry {
 965   my $self = shift;
 966   my ($fh, $entry) = @_;
 967   print $fh "<entry>\n$entry</entry>\n\n";
 968 }
 969
 970 # -------------------------------------
 971
 972 sub format_body {
 973   my $self = shift;
 974   my ($msg, $files, $qunklist) = @_;
 975
 976   $msg = $self->preprocess_msg_text($msg);
 977   return $files . $msg;
 978 }
 979
 980 # ----------------------------------------------------------------------------
 981
 982 package CVS::Utils::ChangeLog::EntrySet::Output;
 983
 984 use Carp           qw( croak );
 985 use File::Basename qw( fileparse );
 986
 987 # Class Utility Functions -------------
 988
 989 { # form closure
 990
 991 my @weekdays = (qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday));
 992 sub weekday_en {
 993   my $class = shift;
 994   return $weekdays[$_[0]];
 995 }
 996
 997 }
 998
 999 # -------------------------------------
1000
1001 sub new {
1002   my ($proto, %args) = @_;
1003   my $class = ref $proto || $proto;
1004
1005   my $follow_branches = delete $args{follow_branches};
1006   my $follow_only     = delete $args{follow_only};
1007   my $ignore_tags     = delete $args{ignore_tags};
1008   my $show_tags       = delete $args{show_tags};
1009   die "Unrecognized arg to EntrySet::Output::new: '$_'\n"
1010     for keys %args;
1011
1012   bless +{follow_branches => $follow_branches,
1013           follow_only     => $follow_only,
1014           show_tags       => $show_tags,
1015           ignore_tags     => $ignore_tags,
1016          }, $class;
1017 }
1018
1019 # Abstract Subrs ----------------------
1020
1021 sub wday               { croak "Whoops.  Abtract method call (wday).\n" }
1022 sub pretty_file_list   { croak "Whoops.  Abtract method call (pretty_file_list).\n" }
1023 sub output_tagdate     { croak "Whoops.  Abtract method call (output_tagdate).\n" }
1024 sub header_line        { croak "Whoops.  Abtract method call (header_line).\n" }
1025
1026 # Instance Subrs ----------------------
1027
1028 sub output_header { }
1029
1030 # -------------------------------------
1031
1032 sub output_entry {
1033   my $self = shift;
1034   my ($fh, $entry) = @_;
1035   print $fh "$entry\n";
1036 }
1037
1038 # -------------------------------------
1039
1040 sub output_footer { }
1041
1042 # -------------------------------------
1043
1044 sub escape { return $_[1] }
1045
1046 # -------------------------------------
1047
1048 sub _revision_is_wanted {
1049   my ($self, $qunk) = @_;
1050
1051   my ($revision, $branch_numbers) = @{$qunk}{qw( revision branch_numbers )};
1052   my $follow_branches = $self->{follow_branches};
1053   my $follow_only     = $self->{follow_only};
1054
1055 #print STDERR "IG: ", join(',', keys %{$self->{ignore_tags}}), "\n";
1056 #print STDERR "IX: ", join(',', @{$qunk->{tags}}), "\n" if defined $qunk->{tags};
1057 #print STDERR "IQ: ", join(',', keys %{$qunk->{branch_numbers}}), "\n" if defined $qunk->{branch_numbers};
1058 #use Data::Dumper; print STDERR Dumper $qunk;
1059
1060   for my $ignore_tag (keys %{$self->{ignore_tags}}) {
1061     return
1062       if defined $qunk->{tags} and grep $_ eq $ignore_tag, @{$qunk->{tags}};
1063   }
1064
1065   if ( keys %{$self->{show_tags}} ) {
1066     for my $show_tag (keys %{$self->{show_tags}}) {
1067       return
1068         if ! defined $qunk->{tags} or ! grep $_ eq $show_tag, @{$qunk->{tags}};
1069     }
1070   }
1071
1072   return 1
1073     unless @$follow_branches + @$follow_only; # no follow is follow all
1074
1075   for my $x (map([$_, 1], @$follow_branches),
1076              map([$_, 0], @$follow_only    )) {
1077     my ($branch, $followsub) = @$x;
1078
1079     # Special case for following trunk revisions
1080     return 1
1081       if $branch =~ /^trunk$/i and $revision =~ /^[0-9]+\.[0-9]+$/;
1082
1083     if ( my $branch_number = $branch_numbers->{$branch} ) {
1084       # Are we on one of the follow branches or an ancestor of same?
1085
1086       # If this revision is a prefix of the branch number, or possibly is less
1087       # in the minormost number, OR if this branch number is a prefix of the
1088       # revision, then yes.  Otherwise, no.
1089
1090       # So below, we determine if any of those conditions are met.
1091
1092       # Trivial case: is this revision on the branch?  (Compare this way to
1093       # avoid regexps that screw up Emacs indentation, argh.)
1094       if ( substr($revision, 0, (length($branch_number) + 1))
1095            eq
1096            ($branch_number . ".") ) {
1097         if ( $followsub ) {
1098           return 1;
1099         } elsif (length($revision) == length($branch_number)+2 ) {
1100           return 1;
1101         }
1102       } elsif ( length($branch_number) > length($revision)
1103                 and
1104                 $No_Ancestors ) {
1105         # Non-trivial case: check if rev is ancestral to branch
1106
1107         # r_left still has the trailing "."
1108         my ($r_left, $r_end) = ($revision =~ /^((?:\d+\.)+)(\d+)$/);
1109
1110         # b_left still has trailing "."
1111         # b_mid has no trailing "."
1112         my ($b_left, $b_mid) = ($branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/);
1113         return 1
1114           if $r_left eq $b_left and $r_end <= $b_mid;
1115       }
1116     }
1117   }
1118
1119   return;
1120 }
1121
1122 # -------------------------------------
1123
1124 sub output_changelog {
1125 my $self = shift; my $class = ref $self;
1126   my ($grand_poobah) = @_;
1127   ### Process each ChangeLog
1128
1129   while (my ($dir,$authorhash) = each %$grand_poobah)
1130   {
1131     &main::debug ("DOING DIR: $dir\n");
1132
1133     # Here we twist our hash around, from being
1134     #   author => time => message => filelist
1135     # in %$authorhash to
1136     #   time => author => message => filelist
1137     # in %changelog.
1138     #
1139     # This is also where we merge entries.  The algorithm proceeds
1140     # through the timeline of the changelog with a sliding window of
1141     # $Max_Checkin_Duration seconds; within that window, entries that
1142     # have the same log message are merged.
1143     #
1144     # (To save space, we zap %$authorhash after we've copied
1145     # everything out of it.)
1146
1147     my %changelog;
1148     while (my ($author,$timehash) = each %$authorhash)
1149     {
1150       my %stamptime;
1151       foreach my $time (sort {$a <=> $b} (keys %$timehash))
1152       {
1153         my $msghash = $timehash->{$time};
1154         while (my ($msg,$qunklist) = each %$msghash)
1155         {
1156           my $stamptime = $stamptime{$msg};
1157           if ((defined $stamptime)
1158               and (($time - $stamptime) < $Max_Checkin_Duration)
1159               and (defined $changelog{$stamptime}{$author}{$msg}))
1160           {
1161             push(@{$changelog{$stamptime}{$author}{$msg}}, $qunklist->files);
1162           }
1163           else {
1164             $changelog{$time}{$author}{$msg} = $qunklist->files;
1165             $stamptime{$msg} = $time;
1166           }
1167         }
1168       }
1169     }
1170     undef (%$authorhash);
1171
1172     ### Now we can write out the ChangeLog!
1173
1174     my ($logfile_here, $logfile_bak, $tmpfile);
1175     my $lastdate;
1176
1177     if (! $Output_To_Stdout) {
1178       $logfile_here =  $dir . $Log_File_Name;
1179       $logfile_here =~ s/^\.\/\//\//;   # fix any leading ".//" problem
1180       $tmpfile      = "${logfile_here}.cvs2cl$$.tmp";
1181       $logfile_bak  = "${logfile_here}.bak";
1182
1183       open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
1184     }
1185     else {
1186       open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
1187     }
1188
1189     print LOG_OUT $ChangeLog_Header;
1190
1191     my %tag_date_printed;
1192
1193     $self->output_header(\*LOG_OUT);
1194
1195     my @key_list = ();
1196     if($Chronological_Order) {
1197         @key_list = sort {$a <=> $b} (keys %changelog);
1198     } else {
1199         @key_list = sort {$b <=> $a} (keys %changelog);
1200     }
1201     foreach my $time (@key_list)
1202     {
1203       next if ($Delta_Mode &&
1204                (($time <= $Delta_StartTime) ||
1205                 ($time > $Delta_EndTime && $Delta_EndTime)));
1206
1207       # Set up the date/author line.
1208       # kff todo: do some more XML munging here, on the header
1209       # part of the entry:
1210       my (undef,$min,$hour,$mday,$mon,$year,$wday)
1211           = $UTC_Times ? gmtime($time) : localtime($time);
1212
1213       $wday = $self->wday($wday);
1214       # XML output includes everything else, we might as well make
1215       # it always include Day Of Week too, for consistency.
1216       my $authorhash = $changelog{$time};
1217       if ($Show_Tag_Dates) {
1218         my %tags;
1219         while (my ($author,$mesghash) = each %$authorhash) {
1220           while (my ($msg,$qunk) = each %$mesghash) {
1221             foreach my $qunkref2 (@$qunk) {
1222               if (defined ($qunkref2->tags)) {
1223                 foreach my $tag (@{$qunkref2->tags}) {
1224                   $tags{$tag} = 1;
1225                 }
1226               }
1227             }
1228           }
1229         }
1230         # Sort here for determinism to ease testing
1231         foreach my $tag (sort keys %tags) {
1232           if ( ! defined $tag_date_printed{$tag} ) {
1233             $tag_date_printed{$tag} = $time;
1234             $self->output_tagdate(\*LOG_OUT, $time, $tag);
1235           }
1236         }
1237       }
1238       while (my ($author,$mesghash) = each %$authorhash)
1239       {
1240         # If XML, escape in outer loop to avoid compound quoting:
1241         $author = $self->escape($author);
1242
1243       FOOBIE:
1244         # We sort here to enable predictable ordering for the testing porpoises
1245         for my $msg (sort keys %$mesghash)
1246         {
1247           my $qunklist = $mesghash->{$msg};
1248
1249           my @qunklist =
1250             grep $self->_revision_is_wanted($_), @$qunklist;
1251
1252           next FOOBIE unless @qunklist;
1253
1254           my $files               = $self->pretty_file_list(\@qunklist);
1255           my $header_line;          # date and author
1256           my $wholething;           # $header_line + $body
1257
1258           my $date = $self->fdatetime($time);
1259           $header_line = $self->header_line($time, $author, $lastdate);
1260           $lastdate = $date;
1261
1262           $Text::Wrap::huge = 'overflow'
1263             if $Text::Wrap::VERSION >= 2001.0130;
1264           # Reshape the body according to user preferences.
1265           my $body = $self->format_body($msg, $files, \@qunklist);
1266
1267           $body =~ s/[ \t]+\n/\n/g;
1268           $wholething = $header_line . $body;
1269
1270           # One last check: make sure it passes the regexp test, if the
1271           # user asked for that.  We have to do it here, so that the
1272           # test can match against information in the header as well
1273           # as in the text of the log message.
1274
1275           # How annoying to duplicate so much code just because I
1276           # can't figure out a way to evaluate scalars on the trailing
1277           # operator portion of a regular expression.  Grrr.
1278           if ($Case_Insensitive) {
1279             unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/oi ) ) {
1280               $self->output_entry(\*LOG_OUT, $wholething);
1281             }
1282           }
1283           else {
1284             unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/o ) ) {
1285               $self->output_entry(\*LOG_OUT, $wholething);
1286             }
1287           }
1288         }
1289       }
1290     }
1291
1292     $self->output_footer(\*LOG_OUT);
1293
1294     close (LOG_OUT);
1295
1296     if ( ! $Output_To_Stdout ) {
1297       # If accumulating, append old data to new before renaming.  But
1298       # don't append the most recent entry, since it's already in the
1299       # new log due to CVS's idiosyncratic interpretation of "log -d".
1300       if ($Cumulative && -f $logfile_here) {
1301         open NEW_LOG, ">>$tmpfile"
1302           or die "trouble appending to $tmpfile ($!)";
1303
1304         open OLD_LOG, "<$logfile_here"
1305           or die "trouble reading from $logfile_here ($!)";
1306
1307         my $started_first_entry = 0;
1308         my $passed_first_entry = 0;
1309         while (<OLD_LOG>) {
1310           if ( ! $passed_first_entry ) {
1311             if ( ( ! $started_first_entry )
1312                 and /^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/ ) {
1313               $started_first_entry = 1;
1314             } elsif ( /^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/ ) {
1315               $passed_first_entry = 1;
1316               print NEW_LOG $_;
1317             }
1318           } else {
1319             print NEW_LOG $_;
1320           }
1321         }
1322
1323         close NEW_LOG;
1324         close OLD_LOG;
1325       }
1326
1327       if ( -f $logfile_here ) {
1328         rename $logfile_here, $logfile_bak;
1329       }
1330       rename $tmpfile, $logfile_here;
1331     }
1332   }
1333 }
1334
1335 # -------------------------------------
1336
1337 # Don't call this wrap, because with 5.5.3, that clashes with the
1338 # (unconditional :-( ) export of wrap() from Text::Wrap
1339 sub mywrap {
1340   my $self = shift;
1341   my ($indent1, $indent2, @text) = @_;
1342   # If incoming text looks preformatted, don't get clever
1343   my $text = Text::Wrap::wrap($indent1, $indent2, @text);
1344   if ( grep /^\s+/m, @text ) {
1345     return $text;
1346   }
1347   my @lines = split /\n/, $text;
1348   $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e;
1349   $lines[0] =~ s/^$indent1\s+/$indent1/;
1350   s/^$indent2\s+/$indent2/
1351     for @lines[1..$#lines];
1352   my $newtext = join "\n", @lines;
1353   $newtext .= "\n"
1354     if substr($text, -1) eq "\n";
1355   return $newtext;
1356 }
1357
1358 # -------------------------------------
1359
1360 sub preprocess_msg_text {
1361   my $self = shift;
1362   my ($text) = @_;
1363
1364   # Strip out carriage returns (as they probably result from DOSsy editors).
1365   $text =~ s/\r\n/\n/g;
1366   # If it *looks* like two newlines, make it *be* two newlines:
1367   $text =~ s/\n\s*\n/\n\n/g;
1368
1369   return $text;
1370 }
1371
1372 # -------------------------------------
1373
1374 sub last_line_len {
1375   my $self = shift;
1376
1377   my $files_list = shift;
1378   my @lines = split (/\n/, $files_list);
1379   my $last_line = pop (@lines);
1380   return length ($last_line);
1381 }
1382
1383 # -------------------------------------
1384
1385 # A custom wrap function, sensitive to some common constructs used in
1386 # log entries.
1387 sub wrap_log_entry {
1388   my $self = shift;
1389
1390   my $text = shift;                  # The text to wrap.
1391   my $left_pad_str = shift;          # String to pad with on the left.
1392
1393   # These do NOT take left_pad_str into account:
1394   my $length_remaining = shift;      # Amount left on current line.
1395   my $max_line_length  = shift;      # Amount left for a blank line.
1396
1397   my $wrapped_text = '';             # The accumulating wrapped entry.
1398   my $user_indent = '';              # Inherited user_indent from prev line.
1399
1400   my $first_time = 1;                # First iteration of the loop?
1401   my $suppress_line_start_match = 0; # Set to disable line start checks.
1402
1403   my @lines = split (/\n/, $text);
1404   while (@lines)   # Don't use `foreach' here, it won't work.
1405   {
1406     my $this_line = shift (@lines);
1407     chomp $this_line;
1408
1409     if ($this_line =~ /^(\s+)/) {
1410       $user_indent = $1;
1411     }
1412     else {
1413       $user_indent = '';
1414     }
1415
1416     # If it matches any of the line-start regexps, print a newline now...
1417     if ($suppress_line_start_match)
1418     {
1419       $suppress_line_start_match = 0;
1420     }
1421     elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
1422            || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
1423            || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
1424            || ($this_line =~ /^(\s+)(\S+)/)
1425            || ($this_line =~ /^(\s*)- +/)
1426            || ($this_line =~ /^()\s*$/)
1427            || ($this_line =~ /^(\s*)\*\) +/)
1428            || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
1429     {
1430       # Make a line break immediately, unless header separator is set
1431       # and this line is the first line in the entry, in which case
1432       # we're getting the blank line for free already and shouldn't
1433       # add an extra one.
1434       unless (($After_Header ne " ") and ($first_time))
1435       {
1436         if ($this_line =~ /^()\s*$/) {
1437           $suppress_line_start_match = 1;
1438           $wrapped_text .= "\n${left_pad_str}";
1439         }
1440
1441         $wrapped_text .= "\n${left_pad_str}";
1442       }
1443
1444       $length_remaining = $max_line_length - (length ($user_indent));
1445     }
1446
1447     # Now that any user_indent has been preserved, strip off leading
1448     # whitespace, so up-folding has no ugly side-effects.
1449     $this_line =~ s/^\s*//;
1450
1451     # Accumulate the line, and adjust parameters for next line.
1452     my $this_len = length ($this_line);
1453     if ($this_len == 0)
1454     {
1455       # Blank lines should cancel any user_indent level.
1456       $user_indent = '';
1457       $length_remaining = $max_line_length;
1458     }
1459     elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
1460     {
1461       # Walk backwards from the end.  At first acceptable spot, break
1462       # a new line.
1463       my $idx = $length_remaining - 1;
1464       if ($idx < 0) { $idx = 0 };
1465       while ($idx > 0)
1466       {
1467         if (substr ($this_line, $idx, 1) =~ /\s/)
1468         {
1469           my $line_now = substr ($this_line, 0, $idx);
1470           my $next_line = substr ($this_line, $idx);
1471           $this_line = $line_now;
1472
1473           # Clean whitespace off the end.
1474           chomp $this_line;
1475
1476           # The current line is ready to be printed.
1477           $this_line .= "\n${left_pad_str}";
1478
1479           # Make sure the next line is allowed full room.
1480           $length_remaining = $max_line_length - (length ($user_indent));
1481
1482           # Strip next_line, but then preserve any user_indent.
1483           $next_line =~ s/^\s*//;
1484
1485           # Sneak a peek at the user_indent of the upcoming line, so
1486           # $next_line (which will now precede it) can inherit that
1487           # indent level.  Otherwise, use whatever user_indent level
1488           # we currently have, which might be none.
1489           my $next_next_line = shift (@lines);
1490           if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
1491             $next_line = $1 . $next_line if (defined ($1));
1492             # $length_remaining = $max_line_length - (length ($1));
1493             $next_next_line =~ s/^\s*//;
1494           }
1495           else {
1496             $next_line = $user_indent . $next_line;
1497           }
1498           if (defined ($next_next_line)) {
1499             unshift (@lines, $next_next_line);
1500           }
1501           unshift (@lines, $next_line);
1502
1503           # Our new next line might, coincidentally, begin with one of
1504           # the line-start regexps, so we temporarily turn off
1505           # sensitivity to that until we're past the line.
1506           $suppress_line_start_match = 1;
1507
1508           last;
1509         }
1510         else
1511         {
1512           $idx--;
1513         }
1514       }
1515
1516       if ($idx == 0)
1517       {
1518         # We bottomed out because the line is longer than the
1519         # available space.  But that could be because the space is
1520         # small, or because the line is longer than even the maximum
1521         # possible space.  Handle both cases below.
1522
1523         if ($length_remaining == ($max_line_length - (length ($user_indent))))
1524         {
1525           # The line is simply too long -- there is no hope of ever
1526           # breaking it nicely, so just insert it verbatim, with
1527           # appropriate padding.
1528           $this_line = "\n${left_pad_str}${this_line}";
1529         }
1530         else
1531         {
1532           # Can't break it here, but may be able to on the next round...
1533           unshift (@lines, $this_line);
1534           $length_remaining = $max_line_length - (length ($user_indent));
1535           $this_line = "\n${left_pad_str}";
1536         }
1537       }
1538     }
1539     else  # $this_len < $length_remaining, so tack on what we can.
1540     {
1541       # Leave a note for the next iteration.
1542       $length_remaining = $length_remaining - $this_len;
1543
1544       if ($this_line =~ /\.$/)
1545       {
1546         $this_line .= "  ";
1547         $length_remaining -= 2;
1548       }
1549       else  # not a sentence end
1550       {
1551         $this_line .= " ";
1552         $length_remaining -= 1;
1553       }
1554     }
1555
1556     # Unconditionally indicate that loop has run at least once.
1557     $first_time = 0;
1558
1559     $wrapped_text .= "${user_indent}${this_line}";
1560   }
1561
1562   # One last bit of padding.
1563   $wrapped_text .= "\n";
1564
1565   return $wrapped_text;
1566 }
1567
1568 # -------------------------------------
1569
1570 sub _pretty_file_list {
1571   my $self = shift;
1572
1573   my ($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_;
1574
1575   my @qunkrefs =
1576     grep +( ( ! $_->tags_exists
1577               or
1578               ! grep exists $ignore_tags{$_}, @{$_->tags})
1579             and
1580             ( ! keys %show_tags
1581               or
1582               ( $_->tags_exists
1583                 and
1584                 grep exists $show_tags{$_}, @{$_->tags} )
1585             )
1586           ),
1587     @$qunksref;
1588
1589   my $common_dir;           # Dir prefix common to all files ('' if none)
1590
1591   # First, loop over the qunks gathering all the tag/branch names.
1592   # We'll put them all in non_unanimous_tags, and take out the
1593   # unanimous ones later.
1594  QUNKREF:
1595   foreach my $qunkref (@qunkrefs)
1596   {
1597     # Keep track of whether all the files in this commit were in the
1598     # same directory, and memorize it if so.  We can make the output a
1599     # little more compact by mentioning the directory only once.
1600     if ($Common_Dir && (scalar (@qunkrefs)) > 1)
1601     {
1602       if (! (defined ($common_dir)))
1603       {
1604         my ($base, $dir);
1605         ($base, $dir, undef) = fileparse ($qunkref->filename);
1606
1607         if ((! (defined ($dir)))  # this first case is sheer paranoia
1608             or ($dir eq '')
1609             or ($dir eq "./")
1610             or ($dir eq ".\\"))
1611         {
1612           $common_dir = '';
1613         }
1614         else
1615         {
1616           $common_dir = $dir;
1617         }
1618       }
1619       elsif ($common_dir ne '')
1620       {
1621         # Already have a common dir prefix, so how much of it can we preserve?
1622         $common_dir = &main::common_path_prefix ($qunkref->filename, $common_dir);
1623       }
1624     }
1625     else  # only one file in this entry anyway, so common dir not an issue
1626     {
1627       $common_dir = '';
1628     }
1629
1630     if (defined ($qunkref->branch)) {
1631       $all_branches->{$qunkref->branch} = 1;
1632     }
1633     if (defined ($qunkref->tags)) {
1634       foreach my $tag (@{$qunkref->tags}) {
1635         $non_unanimous_tags->{$tag} = 1;
1636       }
1637     }
1638   }
1639
1640   # Any tag held by all qunks will be printed specially... but only if
1641   # there are multiple qunks in the first place!
1642   if ((scalar (@qunkrefs)) > 1) {
1643     foreach my $tag (keys (%$non_unanimous_tags)) {
1644       my $everyone_has_this_tag = 1;
1645       foreach my $qunkref (@qunkrefs) {
1646         if ((! (defined ($qunkref->tags)))
1647             or (! (grep ($_ eq $tag, @{$qunkref->tags})))) {
1648           $everyone_has_this_tag = 0;
1649         }
1650       }
1651       if ($everyone_has_this_tag) {
1652         $unanimous_tags->{$tag} = 1;
1653         delete $non_unanimous_tags->{$tag};
1654       }
1655     }
1656   }
1657
1658   return $common_dir, \@qunkrefs;
1659 }
1660
1661 # -------------------------------------
1662
1663 sub fdatetime {
1664   my $self = shift;
1665
1666   my ($year, $mday, $mon, $wday, $hour, $min);
1667
1668   if ( @_ > 1 ) {
1669     ($year, $mday, $mon, $wday, $hour, $min) = @_;
1670   } else {
1671     my ($time) = @_;
1672     (undef, $min, $hour, $mday, $mon, $year, $wday) =
1673       $UTC_Times ? gmtime($time) : localtime($time);
1674
1675     $year += 1900;
1676     $mon  += 1;
1677     $wday  = $self->wday($wday);
1678   }
1679
1680   my $fdate = $self->fdate($year, $mon, $mday, $wday);
1681
1682   if ($Show_Times) {
1683     my $ftime = $self->ftime($hour, $min);
1684     return "$fdate $ftime";
1685   } else {
1686     return $fdate;
1687   }
1688 }
1689
1690 # -------------------------------------
1691
1692 sub fdate {
1693   my $self = shift;
1694
1695   my ($year, $mday, $mon, $wday);
1696
1697   if ( @_ > 1 ) {
1698     ($year, $mon, $mday, $wday) = @_;
1699   } else {
1700     my ($time) = @_;
1701     (undef, undef, undef, $mday, $mon, $year, $wday) =
1702       $UTC_Times ? gmtime($time) : localtime($time);
1703
1704     $year += 1900;
1705     $mon  += 1;
1706     $wday  = $self->wday($wday);
1707   }
1708
1709   return sprintf '%4u-%02u-%02u%s', $year, $mon, $mday, $wday;
1710 }
1711
1712 # -------------------------------------
1713
1714 sub ftime {
1715   my $self = shift;
1716
1717   my ($hour, $min);
1718
1719   if ( @_ > 1 ) {
1720     ($hour, $min) = @_;
1721   } else {
1722     my ($time) = @_;
1723     (undef, $min, $hour) = $UTC_Times ? gmtime($time) : localtime($time);
1724   }
1725
1726   return sprintf '%02u:%02u', $hour, $min;
1727 }
1728
1729 # ----------------------------------------------------------------------------
1730
1731 package CVS::Utils::ChangeLog::Message;
1732
1733 sub new {
1734   my $class = shift;
1735   my ($msg) = @_;
1736
1737   my %self = (msg => $msg, files => []);
1738
1739   bless \%self, $class;
1740 }
1741
1742 sub add_fileentry {
1743   my $self = shift;
1744   my ($fileentry) = @_;
1745
1746   die "Not a fileentry: $fileentry"
1747     unless $fileentry->isa('CVS::Utils::ChangeLog::FileEntry');
1748
1749   push @{$self->{files}}, $fileentry;
1750 }
1751
1752 sub files { wantarray ? @{$_[0]->{files}} : $_[0]->{files} }
1753
1754 # ----------------------------------------------------------------------------
1755
1756 package CVS::Utils::ChangeLog::FileEntry;
1757
1758 # Each revision of a file has a little data structure (a `qunk')
1759 # associated with it.  That data structure holds not only the
1760 # file's name, but any additional information about the file
1761 # that might be needed in the output, such as the revision
1762 # number, tags, branches, etc.  The reason to have these things
1763 # arranged in a data structure, instead of just appending them
1764 # textually to the file's name, is that we may want to do a
1765 # little rearranging later as we write the output.  For example,
1766 # all the files on a given tag/branch will go together, followed
1767 # by the tag in parentheses (so trunk or otherwise non-tagged
1768 # files would go at the end of the file list for a given log
1769 # message).  This rearrangement is a lot easier to do if we
1770 # don't have to reparse the text.
1771 #
1772 # A qunk looks like this:
1773 #
1774 #   {
1775 #     filename    =>    "hello.c",
1776 #     revision    =>    "1.4.3.2",
1777 #     time        =>    a timegm() return value (moment of commit)
1778 #     tags        =>    [ "tag1", "tag2", ... ],
1779 #     branch      =>    "branchname" # There should be only one, right?
1780 #     roots       =>    [ "branchtag1", "branchtag2", ... ]
1781 #     lines       =>    "+x -y" # or undefined; x and y are integers
1782 #   }
1783
1784 # Single top-level ChangeLog, or one per subdirectory?
1785 my $distributed;
1786 sub distributed { $#_ ? ($distributed = $_[1]) : $distributed; }
1787
1788 sub new {
1789   my $class = shift;
1790   my ($path, $time, $revision, $state, $lines,
1791       $branch_names, $branch_roots, $branch_numbers, $symbolic_names) = @_;
1792
1793   my %self = (time     => $time,
1794               revision => $revision,
1795               state    => $state,
1796               lines    => $lines,
1797               branch_numbers => $branch_numbers,
1798              );
1799
1800   if ( $distributed ) {
1801     @self{qw(filename dir_key)} = fileparse($path);
1802   } else {
1803     @self{qw(filename dir_key)} = ($path, './');
1804   }
1805
1806   { # Scope for $branch_prefix
1807     (my ($branch_prefix) = ($revision =~ /((?:\d+\.)+)\d+/));
1808     $branch_prefix =~ s/\.$//;
1809     if ( $branch_names->{$branch_prefix} ) {
1810       my $branch_name = $branch_names->{$branch_prefix};
1811       $self{branch}   = $branch_name;
1812       $self{branches} = [$branch_name];
1813     }
1814     while ( $branch_prefix =~ s/^(\d+(?:\.\d+\.\d+)+)\.\d+\.\d+$/$1/ ) {
1815       push @{$self{branches}}, $branch_names->{$branch_prefix}
1816         if exists $branch_names->{$branch_prefix};
1817     }
1818   }
1819
1820   # If there's anything in the @branch_roots array, then this
1821   # revision is the root of at least one branch.  We'll display
1822   # them as branch names instead of revision numbers, the
1823   # substitution for which is done directly in the array:
1824   $self{'roots'} = [ map { $branch_names->{$_} } @$branch_roots ]
1825     if @$branch_roots;
1826
1827   if ( exists $symbolic_names->{$revision} ) {
1828     $self{tags} = delete $symbolic_names->{$revision};
1829     &main::delta_check($time, $self{tags});
1830   }
1831
1832   bless \%self, $class;
1833 }
1834
1835 sub filename       { $_[0]->{filename}       }
1836 sub dir_key        { $_[0]->{dir_key}        }
1837 sub revision       { $_[0]->{revision}       }
1838 sub branch         { $_[0]->{branch}         }
1839 sub state          { $_[0]->{state}          }
1840 sub lines          { $_[0]->{lines}          }
1841 sub roots          { $_[0]->{roots}          }
1842 sub branch_numbers { $_[0]->{branch_numbers} }
1843
1844 sub tags        { $_[0]->{tags}     }
1845 sub tags_exists {
1846   exists $_[0]->{tags};
1847 }
1848
1849 # This may someday be used in a more sophisticated calculation of what other
1850 # files are involved in this commit.  For now, we don't use it much except for
1851 # delta mode, because the common-commit-detection algorithm is hypothesized to
1852 # be "good enough" as it stands.
1853 sub time     { $_[0]->{time}     }
1854
1855 # ----------------------------------------------------------------------------
1856
1857 package CVS::Utils::ChangeLog::EntrySetBuilder;
1858
1859 use File::Basename qw( fileparse );
1860 use Time::Local    qw( timegm );
1861
1862 use constant MAILNAME => "/etc/mailname";
1863
1864 # In 'cvs log' output, one long unbroken line of equal signs separates files:
1865 use constant FILE_SEPARATOR => '=' x 77;# . "\n";
1866 # In 'cvs log' output, a shorter line of dashes separates log messages within
1867 # a file:
1868 use constant REV_SEPARATOR  => '-' x 28;# . "\n";
1869
1870 use constant EMPTY_LOG_MESSAGE => '*** empty log message ***';
1871
1872 # -------------------------------------
1873
1874 sub new {
1875   my ($proto) = @_;
1876   my $class = ref $proto || $proto;
1877
1878   my $poobah  = CVS::Utils::ChangeLog::EntrySet->new;
1879   my $self = bless +{ grand_poobah => $poobah }, $class;
1880
1881   $self->clear_file;
1882   $self->maybe_read_user_map_file;
1883   return $self;
1884 }
1885
1886 # -------------------------------------
1887
1888 sub clear_msg {
1889   my ($self) = @_;
1890
1891   # Make way for the next message
1892   undef $self->{rev_msg};
1893   undef $self->{rev_time};
1894   undef $self->{rev_revision};
1895   undef $self->{rev_author};
1896   undef $self->{rev_state};
1897   undef $self->{lines};
1898   $self->{rev_branch_roots} = [];       # For showing which files are branch
1899                                         # ancestors.
1900   $self->{collecting_symbolic_names} = 0;
1901 }
1902
1903 # -------------------------------------
1904
1905 sub clear_file {
1906   my ($self) = @_;
1907   $self->clear_msg;
1908
1909   undef $self->{filename};
1910   $self->{branch_names}   = +{};        # We'll grab branch names while we're
1911                                         # at it.
1912   $self->{branch_numbers} = +{};        # Save some revisions for
1913                                         # @Follow_Branches
1914   $self->{symbolic_names} = +{};        # Where tag names get stored.
1915 }
1916
1917 # -------------------------------------
1918
1919 sub grand_poobah { $_[0]->{grand_poobah} }
1920
1921 # -------------------------------------
1922
1923 sub read_changelog {
1924   my ($self, $command) = @_;
1925
1926 #  my $grand_poobah = CVS::Utils::ChangeLog::EntrySet->new;
1927
1928   if (! $Input_From_Stdin) {
1929     my $Log_Source_Command = join(' ', @$command);
1930     &main::debug ("(run \"${Log_Source_Command}\")\n");
1931     open (LOG_SOURCE, "$Log_Source_Command |")
1932         or die "unable to run \"${Log_Source_Command}\"";
1933   }
1934   else {
1935     open (LOG_SOURCE, "-") or die "unable to open stdin for reading";
1936   }
1937
1938   binmode LOG_SOURCE;
1939
1940  XX_Log_Source:
1941   while (<LOG_SOURCE>) {
1942     chomp;
1943
1944     # If on a new file and don't see filename, skip until we find it, and
1945     # when we find it, grab it.
1946     if ( ! defined $self->{filename} ) {
1947       $self->read_file_path($_);
1948     } elsif ( /^symbolic names:$/ ) {
1949       $self->{collecting_symbolic_names} = 1;
1950     } elsif ( $self->{collecting_symbolic_names} ) {
1951       $self->read_symbolic_name($_);
1952     } elsif ( $_ eq FILE_SEPARATOR and ! defined $self->{rev_revision} ) {
1953       $self->clear_file;
1954     } elsif ( ! defined $self->{rev_revision} ) {
1955         # If have file name, but not revision, and see revision, then grab
1956         # it.  (We collect unconditionally, even though we may or may not
1957         # ever use it.)
1958       $self->read_revision($_);
1959     } elsif ( ! defined $self->{rev_time} ) { # and /^date: /) {
1960       $self->read_date_author_and_state($_);
1961     } elsif ( /^branches:\s+(.*);$/ ) {
1962       $self->read_branches($1);
1963     } elsif ( ! ( $_ eq FILE_SEPARATOR or $_ eq REV_SEPARATOR ) ) {
1964       # If have file name, time, and author, then we're just grabbing
1965       # log message texts:
1966       $self->{rev_msg} .= $_ . "\n";   # Normally, just accumulate the message...
1967     } else {
1968       if ( ! $self->{rev_msg}
1969            or $self->{rev_msg} =~ /^\s*(\.\s*)?$/
1970            or index($self->{rev_msg}, EMPTY_LOG_MESSAGE) > -1 ) {
1971         # ... until a msg separator is encountered:
1972         # Ensure the message contains something:
1973         $self->clear_msg
1974           if $Prune_Empty_Msgs;
1975         $self->{rev_msg} = "[no log message]\n";
1976       }
1977
1978       $self->add_file_entry;
1979
1980       if ( $_ eq FILE_SEPARATOR ) {
1981         $self->clear_file;
1982       } else {
1983         $self->clear_msg;
1984       }
1985     }
1986   }
1987
1988   close LOG_SOURCE
1989     or die sprintf("Problem reading log input (exit/signal/core: %d/%d/%d)\n",
1990                    $? >> 8, $? & 127, $? & 128);
1991   return;
1992 }
1993
1994 # -------------------------------------
1995
1996 sub add_file_entry {
1997   $_[0]->grand_poobah->add_fileentry(@{$_[0]}{qw(filename rev_time rev_revision
1998                                                  rev_state lines branch_names
1999                                                  rev_branch_roots
2000                                                  branch_numbers
2001                                                  symbolic_names
2002                                                  rev_author rev_msg)});
2003 }
2004
2005 # -------------------------------------
2006
2007 sub maybe_read_user_map_file {
2008   my ($self) = @_;
2009
2010   my %expansions;
2011   my $User_Map_Input;
2012
2013   if ($User_Map_File)
2014   {
2015     if ( $User_Map_File =~ m{^([-\w\@+=.,\/]+):([-\w\@+=.,\/:]+)} and
2016          !-f $User_Map_File )
2017     {
2018       my $rsh = (exists $ENV{'CVS_RSH'} ? $ENV{'CVS_RSH'} : 'ssh');
2019       $User_Map_Input = "$rsh $1 'cat $2' |";
2020       &main::debug ("(run \"${User_Map_Input}\")\n");
2021     }
2022     else
2023     {
2024       $User_Map_Input = "<$User_Map_File";
2025     }
2026
2027     open (MAPFILE, $User_Map_Input)
2028         or die ("Unable to open $User_Map_File ($!)");
2029
2030     while (<MAPFILE>)
2031     {
2032       next if /^\s*#/;  # Skip comment lines.
2033       next if not /:/;  # Skip lines without colons.
2034
2035       # It is now safe to split on ':'.
2036       my ($username, $expansion) = split ':';
2037       chomp $expansion;
2038       $expansion =~ s/^'(.*)'$/$1/;
2039       $expansion =~ s/^"(.*)"$/$1/;
2040
2041       # If it looks like the expansion has a real name already, then
2042       # we toss the username we got from CVS log.  Otherwise, keep
2043       # it to use in combination with the email address.
2044
2045       if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
2046         # Also, add angle brackets if none present
2047         if (! ($expansion =~ /<\S+@\S+>/)) {
2048           $expansions{$username} = "$username <$expansion>";
2049         }
2050         else {
2051           $expansions{$username} = "$username $expansion";
2052         }
2053       }
2054       else {
2055         $expansions{$username} = $expansion;
2056       }
2057     } # fi ($User_Map_File)
2058
2059     close (MAPFILE);
2060   }
2061
2062   if (defined $User_Passwd_File)
2063   {
2064     if ( ! defined $Domain ) {
2065       if ( -e MAILNAME ) {
2066         chomp($Domain = slurp_file(MAILNAME));
2067       } else {
2068       MAILDOMAIN_CMD:
2069         for ([qw(hostname -d)], 'dnsdomainname', 'domainname') {
2070           my ($text, $exit, $sig, $core) = run_ext($_);
2071           if ( $exit == 0 && $sig == 0 && $core == 0 ) {
2072             chomp $text;
2073             if ( length $text ) {
2074               $Domain = $text;
2075               last MAILDOMAIN_CMD;
2076             }
2077           }
2078         }
2079       }
2080     }
2081
2082     die "No mail domain found\n"
2083       unless defined $Domain;
2084
2085     open (MAPFILE, "<$User_Passwd_File")
2086         or die ("Unable to open $User_Passwd_File ($!)");
2087     while (<MAPFILE>)
2088     {
2089       # all lines are valid
2090       my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
2091       my $expansion = '';
2092       ($expansion) = split (',', $gecos)
2093         if defined $gecos && length $gecos;
2094
2095       my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
2096       $expansions{$username} = "$expansion <$mailname>";
2097     }
2098     close (MAPFILE);
2099   }
2100
2101  $self->{usermap} = \%expansions;
2102 }
2103
2104 # -------------------------------------
2105
2106 sub read_file_path {
2107   my ($self, $line) = @_;
2108
2109   my $path;
2110
2111   if ( $line =~ /^Working file: (.*)/ ) {
2112     $path = $1;
2113   } elsif ( defined $RCS_Root
2114             and
2115             $line =~ m|^RCS file: $RCS_Root[/\\](.*),v$| ) {
2116     $path = $1;
2117     $path =~ s!Attic/!!;
2118   } else {
2119     return;
2120   }
2121
2122   if ( @Ignore_Files ) {
2123     my $base;
2124     ($base, undef, undef) = fileparse($path);
2125
2126     my $xpath = $Case_Insensitive ? lc($path) : $path;
2127     if ( grep index($path, $_) > -1, @Ignore_Files ) {
2128       return;
2129     }
2130   }
2131
2132   $self->{filename} = $path;
2133   return;
2134 }
2135
2136 # -------------------------------------
2137
2138 sub read_symbolic_name {
2139   my ($self, $line) = @_;
2140
2141   # All tag names are listed with whitespace in front in cvs log
2142   # output; so if see non-whitespace, then we're done collecting.
2143   if ( /^\S/ ) {
2144     $self->{collecting_symbolic_names} = 0;
2145     return;
2146   } else {
2147     # we're looking at a tag name, so parse & store it
2148
2149     # According to the Cederqvist manual, in node "Tags", tag names must start
2150     # with an uppercase or lowercase letter and can contain uppercase and
2151     # lowercase letters, digits, `-', and `_'.  However, it's not our place to
2152     # enforce that, so we'll allow anything CVS hands us to be a tag:
2153     my ($tag_name, $tag_rev) = ($line =~ /^\s+([^:]+): ([\d.]+)$/);
2154
2155     # A branch number either has an odd number of digit sections
2156     # (and hence an even number of dots), or has ".0." as the
2157     # second-to-last digit section.  Test for these conditions.
2158     my $real_branch_rev = '';
2159     if ( $tag_rev =~ /^(\d+\.\d+\.)+\d+$/             # Even number of dots...
2160          and
2161          $tag_rev !~ /^(1\.)+1$/ ) {                  # ...but not "1.[1.]1"
2162       $real_branch_rev = $tag_rev;
2163     } elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) {  # Has ".0."
2164       $real_branch_rev = $1 . $3;
2165     }
2166
2167     # If we got a branch, record its number.
2168     if ( $real_branch_rev ) {
2169       $self->{branch_names}->{$real_branch_rev} = $tag_name;
2170       $self->{branch_numbers}->{$tag_name} = $real_branch_rev;
2171     } else {
2172       # Else it's just a regular (non-branch) tag.
2173       push @{$self->{symbolic_names}->{$tag_rev}}, $tag_name;
2174     }
2175   }
2176
2177   $self->{collecting_symbolic_names} = 1;
2178   return;
2179 }
2180
2181 # -------------------------------------
2182
2183 sub read_revision {
2184   my ($self, $line) = @_;
2185
2186   my ($revision) = ( $line =~ /^revision (\d+\.[\d.]+)/ );
2187
2188   return
2189     unless $revision;
2190
2191   $self->{rev_revision} = $revision;
2192   return;
2193 }
2194
2195 # -------------------------------------
2196
2197 { # Closure over %gecos_warned
2198 my %gecos_warned;
2199 sub read_date_author_and_state {
2200   my ($self, $line) = @_;
2201
2202   my ($time, $author, $state) = $self->parse_date_author_and_state($line);
2203
2204   if ( defined($self->{usermap}->{$author}) and $self->{usermap}->{$author} ) {
2205     $author = $self->{usermap}->{$author};
2206   } elsif ( defined $Domain or $Gecos == 1 ) {
2207     my $email = $author;
2208     $email = $author."@".$Domain
2209       if defined $Domain && $Domain ne '';
2210
2211     my $pw = getpwnam($author);
2212     my ($fullname, $office, $workphone, $homephone);
2213     if ( defined $pw ) {
2214       ($fullname, $office, $workphone, $homephone) =
2215         split /\s*,\s*/, $pw->gecos;
2216     } else {
2217       warn "Couldn't find gecos info for author '$author'\n"
2218         unless $gecos_warned{$author}++;
2219       $fullname = '';
2220     }
2221     for (grep defined, $fullname, $office, $workphone, $homephone) {
2222       s/&/ucfirst(lc($pw->name))/ge;
2223     }
2224     $author = $fullname . "  <" . $email . ">"
2225       if $fullname ne '';
2226   }
2227
2228   $self->{rev_state}  = $state;
2229   $self->{rev_time}   = $time;
2230   $self->{rev_author} = $author;
2231   return;
2232 }
2233 }
2234
2235 # -------------------------------------
2236
2237 sub read_branches {
2238   # A "branches: ..." line here indicates that one or more branches
2239   # are rooted at this revision.  If we're showing branches, then we
2240   # want to show that fact as well, so we collect all the branches
2241   # that this is the latest ancestor of and store them in
2242   # $self->[rev_branch_roots}.  Just for reference, the format of the
2243   # line we're seeing at this point is:
2244   #
2245   #    branches:  1.5.2;  1.5.4;  ...;
2246   #
2247   # Okay, here goes:
2248   my ($self, $line) = @_;
2249
2250   # Ugh.  This really bothers me.  Suppose we see a log entry
2251   # like this:
2252   #
2253   #    ----------------------------
2254   #    revision 1.1
2255   #    date: 1999/10/17 03:07:38;  author: jrandom;  state: Exp;
2256   #    branches:  1.1.2;
2257   #    Intended first line of log message begins here.
2258   #    ----------------------------
2259   #
2260   # The question is, how we can tell the difference between that
2261   # log message and a *two*-line log message whose first line is
2262   #
2263   #    "branches:  1.1.2;"
2264   #
2265   # See the problem?  The output of "cvs log" is inherently
2266   # ambiguous.
2267   #
2268   # For now, we punt: we liberally assume that people don't
2269   # write log messages like that, and just toss a "branches:"
2270   # line if we see it but are not showing branches.  I hope no
2271   # one ever loses real log data because of this.
2272   if ( $Show_Branches ) {
2273     $line =~ s/(1\.)+1;|(1\.)+1$//;  # ignore the trivial branch 1.1.1
2274     $self->{rev_branch_roots} = [split /;\s+/, $line]
2275       if length $line;
2276   }
2277 }
2278
2279 # -------------------------------------
2280
2281 sub parse_date_author_and_state {
2282   my ($self, $line) = @_;
2283   # Parses the date/time and author out of a line like:
2284   #
2285   # date: 1999/02/19 23:29:05;  author: apharris;  state: Exp;
2286
2287   my ($year, $mon, $mday, $hours, $min, $secs, $author, $state, $rest) =
2288     $line =~
2289       m!(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+
2290         author:\s+([^;]+);\s+state:\s+([^;]+);(.*)!x
2291     or  die "Couldn't parse date ``$line''";
2292   die "Bad date or Y2K issues"
2293     unless $year > 1969 and $year < 2258;
2294   # Kinda arbitrary, but useful as a sanity check
2295   my $time = timegm($secs, $min, $hours, $mday, $mon-1, $year-1900);
2296   if ( $rest =~ m!\s+lines:\s+(.*)! ) {
2297     $self->{lines} = $1;
2298   }
2299
2300   return $time, $author, $state;
2301 }
2302
2303 # Subrs ----------------------------------------------------------------------
2304
2305 package main;
2306
2307 sub delta_check {
2308   my ($time, $tags) = @_;
2309
2310   # If we're in 'delta' mode, update the latest observed times for the
2311   # beginning and ending tags, and when we get around to printing output, we
2312   # will simply restrict ourselves to that timeframe...
2313   return
2314     unless $Delta_Mode;
2315
2316   $Delta_StartTime = $time
2317     if $time > $Delta_StartTime and grep { $_ eq $Delta_From } @$tags;
2318
2319   $Delta_EndTime = $time
2320     if $time > $Delta_EndTime and grep { $_ eq $Delta_To } @$tags;
2321 }
2322
2323 sub run_ext {
2324   my ($cmd) = @_;
2325   $cmd = [$cmd]
2326     unless ref $cmd;
2327   local $" = ' ';
2328   my $out = qx"@$cmd 2>&1";
2329   my $rv  = $?;
2330   my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);
2331   return $out, $exit, $sig, $core;
2332 }
2333
2334 # -------------------------------------
2335
2336 # If accumulating, grab the boundary date from pre-existing ChangeLog.
2337 sub maybe_grab_accumulation_date {
2338   if (! $Cumulative || $Update) {
2339     return '';
2340   }
2341
2342   # else
2343
2344   open (LOG, "$Log_File_Name")
2345       or die ("trouble opening $Log_File_Name for reading ($!)");
2346
2347   my $boundary_date;
2348   while (<LOG>)
2349   {
2350     if (/^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/)
2351     {
2352       $boundary_date = "$1";
2353       last;
2354     }
2355   }
2356
2357   close (LOG);
2358
2359   # convert time from utc to local timezone if the ChangeLog has
2360   # dates/times in utc
2361   if ($UTC_Times && $boundary_date)
2362   {
2363     # convert the utc time to a time value
2364     my ($year,$mon,$mday,$hour,$min) = $boundary_date =~
2365       m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#;
2366     my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900);
2367     # print the timevalue in the local timezone
2368     my ($ignore,$wday);
2369     ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time);
2370     $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u",
2371                             $year+1900,$mon+1,$mday,$hour,$min);
2372   }
2373
2374   return $boundary_date;
2375 }
2376
2377 # -------------------------------------
2378
2379 # Fills up a ChangeLog structure in the current directory.
2380 sub derive_changelog {
2381   my ($command) = @_;
2382
2383   # See "The Plan" above for a full explanation.
2384
2385   # Might be adding to an existing ChangeLog
2386   my $accumulation_date = maybe_grab_accumulation_date;
2387   if ($accumulation_date) {
2388     # Insert -d immediately after 'cvs log'
2389     my $Log_Date_Command = "-d\'>${accumulation_date}\'";
2390
2391     my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command;
2392     splice @$command, $log_index+1, 0, $Log_Date_Command;
2393     &debug ("(adding log msg starting from $accumulation_date)\n");
2394   }
2395
2396 #  output_changelog(read_changelog($command));
2397   my $builder = CVS::Utils::ChangeLog::EntrySetBuilder->new;
2398   $builder->read_changelog($command);
2399   $builder->grand_poobah->output_changelog;
2400 }
2401
2402 # -------------------------------------
2403
2404 sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
2405
2406 # -------------------------------------
2407
2408 sub common_path_prefix {
2409   my ($path1, $path2) = @_;
2410
2411   # For compatibility (with older versions of cvs2cl.pl), we think in UN*X
2412   # terms, and mould windoze filenames to match.  Is this really appropriate?
2413   # If a file is checked in under UN*X, and cvs log run on windoze, which way
2414   # do the path separators slope?  Can we use fileparse as per the local
2415   # conventions?  If so, we should probably have a user option to specify an
2416   # OS to emulate to handle stdin-fed logs.  If we did this, we could avoid
2417   # the nasty \-/ transmogrification below.
2418
2419   my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;
2420
2421   # Transmogrify Windows filenames to look like Unix.
2422   # (It is far more likely that someone is running cvs2cl.pl under
2423   # Windows than that they would genuinely have backslashes in their
2424   # filenames.)
2425   tr!\\!/!
2426     for $dir1, $dir2;
2427
2428   my ($accum1, $accum2, $last_common_prefix) = ('') x 3;
2429
2430   my @path1 = grep length($_), split qr!/!, $dir1;
2431   my @path2 = grep length($_), split qr!/!, $dir2;
2432
2433   my @common_path;
2434   for (0..min($#path1,$#path2)) {
2435     if ( $path1[$_] eq $path2[$_]) {
2436       push @common_path, $path1[$_];
2437     } else {
2438       last;
2439     }
2440   }
2441
2442   return join '', map "$_/", @common_path;
2443 }
2444
2445 # -------------------------------------
2446 sub parse_options {
2447   # Check this internally before setting the global variable.
2448   my $output_file;
2449
2450   # If this gets set, we encountered unknown options and will exit at
2451   # the end of this subroutine.
2452   my $exit_with_admonishment = 0;
2453
2454   # command to generate the log
2455   my @log_source_command = qw( cvs log );
2456
2457   my (@Global_Opts, @Local_Opts);
2458
2459   Getopt::Long::Configure(qw( bundling permute no_getopt_compat
2460                               pass_through no_ignore_case ));
2461   GetOptions('help|usage|h'   => \$Print_Usage,
2462              'debug'          => \$Debug,        # unadvertised option, heh
2463              'version'        => \$Print_Version,
2464
2465              'file|f=s'       => \$output_file,
2466              'accum'          => \$Cumulative,
2467              'update'         => \$Update,
2468              'fsf'            => \$FSF_Style,
2469              'rcs=s'          => \$RCS_Root,
2470              'usermap|U=s'    => \$User_Map_File,
2471              'gecos'          => \$Gecos,
2472              'domain=s'       => \$Domain,
2473              'passwd=s'       => \$User_Passwd_File,
2474              'window|W=i'     => \$Max_Checkin_Duration,
2475              'chrono'         => \$Chronological_Order,
2476              'ignore|I=s'     => \@Ignore_Files,
2477              'case-insensitive|C' => \$Case_Insensitive,
2478              'regexp|R=s'     => \$Regexp_Gate,
2479              'stdin'          => \$Input_From_Stdin,
2480              'stdout'         => \$Output_To_Stdout,
2481              'distributed|d'  => sub { CVS::Utils::ChangeLog::FileEntry->distributed(1) },
2482              'prune|P'        => \$Prune_Empty_Msgs,
2483              'no-wrap'        => \$No_Wrap,
2484              'gmt|utc'        => \$UTC_Times,
2485              'day-of-week|w'  => \$Show_Day_Of_Week,
2486              'revisions|r'    => \$Show_Revisions,
2487              'show-dead'      => \$Show_Dead,
2488              'tags|t'         => \$Show_Tags,
2489              'tagdates|T'     => \$Show_Tag_Dates,
2490              'branches|b'     => \$Show_Branches,
2491              'follow|F=s'     => \@Follow_Branches,
2492              'follow-only=s'  => \@Follow_Only,
2493              'xml-encoding=s' => \$XML_Encoding,
2494              'xml'            => \$XML_Output,
2495              'noxmlns'        => \$No_XML_Namespace,
2496              'no-xml-iso-date' => \$No_XML_ISO_Date,
2497              'no-ancestors'   => \$No_Ancestors,
2498              'lines-modified' => \$Show_Lines_Modified,
2499
2500              'no-indent'    => sub {
2501                $Indent = '';
2502              },
2503
2504              'summary'      => sub {
2505                $Summary = 1;
2506                $After_Header = "\n\n"; # Summary implies --separate-header
2507              },
2508
2509              'no-times'     => sub {
2510                $Show_Times = 0;
2511              },
2512
2513              'no-hide-branch-additions' => sub {
2514                $Hide_Branch_Additions = 0;
2515              },
2516
2517              'no-common-dir'  => sub {
2518                $Common_Dir = 0;
2519              },
2520
2521              'ignore-tag=s'   => sub {
2522                $ignore_tags{$_[1]} = 1;
2523              },
2524
2525              'show-tag=s'     => sub {
2526                $show_tags{$_[1]} = 1;
2527              },
2528
2529              # Deliberately undocumented.  This is not a public interface, and
2530              # may change/disappear at any time.
2531              'test-code=s'    => \$TestCode,
2532
2533              'delta=s'        => sub {
2534                my $arg = $_[1];
2535                if ( $arg =~
2536                     /^([A-Za-z][A-Za-z0-9_\-\]\[]*):([A-Za-z][A-Za-z0-9_\-\]\[]*)$/ ) {
2537                  $Delta_From = $1;
2538                  $Delta_To = $2;
2539                  $Delta_Mode = 1;
2540                } else {
2541                  die "--delta FROM_TAG:TO_TAG is what you meant to say.\n";
2542                }
2543              },
2544
2545              'FSF'             => sub {
2546                $Show_Times = 0;
2547                $Common_Dir = 0;
2548                $No_Extra_Indent = 1;
2549                $Indent = "\t";
2550              },
2551
2552              'header=s'        => sub {
2553                my $narg = $_[1];
2554                $ChangeLog_Header = &slurp_file ($narg);
2555                if (! defined ($ChangeLog_Header)) {
2556                  $ChangeLog_Header = '';
2557                }
2558              },
2559
2560              'global-opts|g=s' => sub {
2561                my $narg = $_[1];
2562                push @Global_Opts, $narg;
2563                splice @log_source_command, 1, 0, $narg;
2564              },
2565
2566              'log-opts|l=s' => sub {
2567                my $narg = $_[1];
2568                push @Local_Opts, $narg;
2569                push @log_source_command, $narg;
2570              },
2571
2572              'mailname=s'   => sub {
2573                my $narg = $_[1];
2574                warn "--mailname is deprecated; please use --domain instead\n";
2575                $Domain = $narg;
2576              },
2577
2578              'separate-header|S' => sub {
2579                $After_Header = "\n\n";
2580                $No_Extra_Indent = 1;
2581              },
2582
2583              'group-within-date' => sub {
2584                $GroupWithinDate = 1;
2585                $Show_Times = 0;
2586              },
2587
2588              'hide-filenames' => sub {
2589                $Hide_Filenames = 1;
2590                $After_Header = '';
2591              },
2592             )
2593     or die "options parsing failed\n";
2594
2595   push @log_source_command, map "'$_'", @ARGV;
2596
2597   ## Check for contradictions...
2598
2599   if ($Output_To_Stdout && CVS::Utils::ChangeLog::FileEntry->distributed) {
2600     print STDERR "cannot pass both --stdout and --distributed\n";
2601     $exit_with_admonishment = 1;
2602   }
2603
2604   if ($Output_To_Stdout && $output_file) {
2605     print STDERR "cannot pass both --stdout and --file\n";
2606     $exit_with_admonishment = 1;
2607   }
2608
2609   if ($Input_From_Stdin && @Global_Opts) {
2610     print STDERR "cannot pass both --stdin and -g\n";
2611     $exit_with_admonishment = 1;
2612   }
2613
2614   if ($Input_From_Stdin && @Local_Opts) {
2615     print STDERR "cannot pass both --stdin and -l\n";
2616     $exit_with_admonishment = 1;
2617   }
2618
2619   if ($XML_Output && $Cumulative) {
2620     print STDERR "cannot pass both --xml and --accum\n";
2621     $exit_with_admonishment = 1;
2622   }
2623
2624   # Other consistency checks and option-driven logic
2625
2626   # Bleargh.  Compensate for a deficiency of custom wrapping.
2627   if ( ($After_Header ne " ") and $FSF_Style ) {
2628     $After_Header .= "\t";
2629   }
2630
2631   @Ignore_Files = map lc, @Ignore_Files
2632     if $Case_Insensitive;
2633
2634   # Or if any other error message has already been printed out, we
2635   # just leave now:
2636   if ($exit_with_admonishment) {
2637     &usage ();
2638     exit (1);
2639   }
2640   elsif ($Print_Usage) {
2641     &usage ();
2642     exit (0);
2643   }
2644   elsif ($Print_Version) {
2645     &version ();
2646     exit (0);
2647   }
2648
2649   ## Else no problems, so proceed.
2650
2651   if ($output_file) {
2652     $Log_File_Name = $output_file;
2653   }
2654
2655   return \@log_source_command;
2656 }
2657
2658 # -------------------------------------
2659
2660 sub slurp_file {
2661   my $filename = shift || die ("no filename passed to slurp_file()");
2662   my $retstr;
2663
2664   open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
2665   local $/ = undef;
2666   $retstr = <SLURPEE>;
2667   close (SLURPEE);
2668   return $retstr;
2669 }
2670
2671 # -------------------------------------
2672
2673 sub debug {
2674   if ($Debug) {
2675     my $msg = shift;
2676     print STDERR $msg;
2677   }
2678 }
2679
2680 # -------------------------------------
2681
2682 sub version {
2683   print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
2684 }
2685
2686 # -------------------------------------
2687
2688 sub usage {
2689   &version ();
2690
2691   eval "use Pod::Usage qw( pod2usage )";
2692
2693    if ( $@ ) {
2694     print <<'END';
2695
2696 * Pod::Usage was not found.  The formatting may be suboptimal.  Consider
2697   upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and
2698   versions of perl prior to 5.6 are getting rather rusty, now.  Alternatively,
2699   install Pod::Usage direct from CPAN.
2700 END
2701
2702     local $/ = undef;
2703     my $message = <DATA>;
2704     $message =~ s/^=(head1|item) //gm;
2705     $message =~ s/^=(over|back).*\n//gm;
2706     $message =~ s/\n{3,}/\n\n/g;
2707     print $message;
2708   } else {
2709     print "\n";
2710     pod2usage( -exitval => 'NOEXIT',
2711                -verbose => 1,
2712                -output  => \*STDOUT,
2713              );
2714   }
2715
2716   return;
2717 }
2718
2719 # Main -----------------------------------------------------------------------
2720
2721 my $log_source_command = parse_options;
2722 if ( defined $TestCode ) {
2723   eval $TestCode;
2724   die "Eval failed: '$@'\n"
2725     if $@;
2726 } else {
2727   derive_changelog($log_source_command);
2728 }
2729
2730 __DATA__
2731
2732 =head1 NAME
2733
2734 cvs2cl.pl - convert cvs log messages to changelogs
2735
2736 =head1 SYNOPSIS
2737
2738 B<cvs2cl> [I<options>] [I<FILE1> [I<FILE2> ...]]
2739
2740 =head1 DESCRIPTION
2741
2742 cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by
2743 running "cvs log" and parsing the output. Duplicate log messages get
2744 unified in the Right Way.
2745
2746 The default output of cvs2cl is designed to be compact, formally unambiguous,
2747 but still easy for humans to read.  It should be largely self-explanatory; the
2748 one abbreviation that might not be obvious is "utags".  That stands for
2749 "universal tags" -- a universal tag is one held by all the files in a given
2750 change entry.
2751
2752 If you need output that's easy for a program to parse, use the B<--xml> option.
2753 Note that with XML output, just about all available information is included
2754 with each change entry, whether you asked for it or not, on the theory that
2755 your parser can ignore anything it's not looking for.
2756
2757 If filenames are given as arguments cvs2cl only shows log information for the
2758 named files.
2759
2760 =head1 OPTIONS
2761
2762 =over 4
2763
2764 =item B<-h>, B<-help>, B<--help>, B<-?>
2765
2766 Show a short help and exit.
2767
2768 =item B<--version>
2769
2770 Show version and exit.
2771
2772 =item B<-r>, B<--revisions>
2773
2774 Show revision numbers in output.
2775
2776 =item B<-b>, B<--branches>
2777
2778 Show branch names in revisions when possible.
2779
2780 =item B<-t>, B<--tags>
2781
2782 Show tags (symbolic names) in output.
2783
2784 =item B<-T>, B<--tagdates>
2785
2786 Show tags in output on their first occurance.
2787
2788 =item B<--show-dead>
2789
2790 Show dead files.
2791
2792 =item B<--stdin>
2793
2794 Read from stdin, don't run cvs log.
2795
2796 =item B<--stdout>
2797
2798 Output to stdout not to ChangeLog.
2799
2800 =item B<-d>, B<--distributed>
2801
2802 Put ChangeLogs in subdirs.
2803
2804 =item B<-f> I<FILE>, B<--file> I<FILE>
2805
2806 Write to I<FILE> instead of ChangeLog.
2807
2808 =item B<--fsf>
2809
2810 Use this if log data is in FSF ChangeLog style.
2811
2812 =item B<--FSF>
2813
2814 Attempt strict FSF-standard compatible output.
2815
2816 =item B<-W> I<SECS>, B<--window> I<SECS>
2817
2818 Window of time within which log entries unify.
2819
2820 =item -B<U> I<UFILE>, B<--usermap> I<UFILE>
2821
2822 Expand usernames to email addresses from I<UFILE>.
2823
2824 =item B<--passwd> I<PASSWORDFILE>
2825
2826 Use system passwd file for user name expansion.  If no mail domain is provided
2827 (via B<--domain>), it tries to read one from B</etc/mailname>, output of B<hostname
2828 -d>, B<dnsdomainname>, or B<domain-name>.  cvs2cl exits with an error if none of
2829 those options is successful. Use a domain of '' to prevent the addition of a
2830 mail domain.
2831
2832 =item B<--domain> I<DOMAIN>
2833
2834 Domain to build email addresses from.
2835
2836 =item B<--gecos>
2837
2838 Get user information from GECOS data.
2839
2840 =item B<-R> I<REGEXP>, B<--regexp> I<REGEXP>
2841
2842 Include only entries that match I<REGEXP>.  This option may be used multiple
2843 times.
2844
2845 =item B<-I> I<REGEXP>, B<--ignore> I<REGEXP>
2846
2847 Ignore files whose names match I<REGEXP>.  This option may be used multiple
2848 times.
2849
2850 =item B<-C>, B<--case-insensitive>
2851
2852 Any regexp matching is done case-insensitively.
2853
2854 =item B<-F> I<BRANCH>, B<--follow> I<BRANCH>
2855
2856 Show only revisions on or ancestral to I<BRANCH>.
2857
2858 =item B<--follow-only> I<BRANCH>
2859
2860 Like --follow, but sub-branches are not followed.
2861
2862 =item B<--no-ancestors>
2863
2864 When using B<-F>, only track changes since the I<BRANCH> started.
2865
2866 =item B<--no-hide-branch-additions>
2867
2868 By default, entries generated by cvs for a file added on a branch (a dead 1.1
2869 entry) are not shown.  This flag reverses that action.
2870
2871 =item B<-S>, B<--separate-header>
2872
2873 Blank line between each header and log message.
2874
2875 =item B<--summary>
2876
2877 Add CVS change summary information.
2878
2879 =item B<--no-wrap>
2880
2881 Don't auto-wrap log message (recommend B<-S> also).
2882
2883 =item B<--no-indent>
2884
2885 Don't indent log message
2886
2887 =item B<--gmt>, B<--utc>
2888
2889 Show times in GMT/UTC instead of local time.
2890
2891 =item B<--accum>
2892
2893 Add to an existing ChangeLog (incompatible with B<--xml>).
2894
2895 =item B<-w>, B<--day-of-week>
2896
2897 Show day of week.
2898
2899 =item B<--no-times>
2900
2901 Don't show times in output.
2902
2903 =item B<--chrono>
2904
2905 Output log in chronological order (default is reverse chronological order).
2906
2907 =item B<--header> I<FILE>
2908
2909 Get ChangeLog header from I<FILE> ("B<->" means stdin).
2910
2911 =item B<--xml>
2912
2913 Output XML instead of ChangeLog format.
2914
2915 =item B<--xml-encoding> I<ENCODING.>
2916
2917 Insert encoding clause in XML header.
2918
2919 =item B<--noxmlns>
2920
2921 Don't include xmlns= attribute in root element.
2922
2923 =item B<--hide-filenames>
2924
2925 Don't show filenames (ignored for XML output).
2926
2927 =item B<--no-common-dir>
2928
2929 Don't shorten directory names from filenames.
2930
2931 =item B<--rcs> I<CVSROOT>
2932
2933 Handle filenames from raw RCS, for instance those produced by "cvs rlog"
2934 output, stripping the prefix I<CVSROOT>.
2935
2936 =item B<-P>, B<--prune>
2937
2938 Don't show empty log messages.
2939
2940 =item B<--lines-modified>
2941
2942 Output the number of lines added and the number of lines removed for
2943 each checkin (if applicable). At the moment, this only affects the
2944 XML output mode.
2945
2946 =item B<--ignore-tag> I<TAG>
2947
2948 Ignore individual changes that are associated with a given tag.
2949 May be repeated, if so, changes that are associated with any of
2950 the given tags are ignored.
2951
2952 =item B<--show-tag> I<TAG>
2953
2954 Log only individual changes that are associated with a given
2955 tag.  May be repeated, if so, changes that are associated with
2956 any of the given tags are logged.
2957
2958 =item B<--delta> I<FROM_TAG>B<:>I<TO_TAG>
2959
2960 Attempt a delta between two tags (since I<FROM_TAG> up to and
2961 including I<TO_TAG>).  The algorithm is a simple date-based one
2962 (this is a hard problem) so results are imperfect.
2963
2964 =item B<-g> I<OPTS>, B<--global-opts> I<OPTS>
2965
2966 Pass I<OPTS> to cvs like in "cvs I<OPTS> log ...".
2967
2968 =item B<-l> I<OPTS>, B<--log-opts> I<OPTS>
2969
2970 Pass I<OPTS> to cvs log like in "cvs ... log I<OPTS>".
2971
2972 =back
2973
2974 Notes about the options and arguments:
2975
2976 =over 4
2977
2978 =item *
2979
2980 The B<-I> and B<-F> options may appear multiple times.
2981
2982 =item *
2983
2984 To follow trunk revisions, use "B<-F trunk>" ("B<-F TRUNK>" also works).  This is
2985 okay because no would ever, ever be crazy enough to name a branch "trunk",
2986 right?  Right.
2987
2988 =item *
2989
2990 For the B<-U> option, the I<UFILE> should be formatted like CVSROOT/users. That is,
2991 each line of I<UFILE> looks like this:
2992
2993        jrandom:jrandom@red-bean.com
2994
2995 or maybe even like this
2996
2997        jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
2998
2999 Don't forget to quote the portion after the colon if necessary.
3000
3001 =item *
3002
3003 Many people want to filter by date.  To do so, invoke cvs2cl.pl like this:
3004
3005        cvs2cl.pl -l "-d'DATESPEC'"
3006
3007 where DATESPEC is any date specification valid for "cvs log -d".  (Note that
3008 CVS 1.10.7 and below requires there be no space between -d and its argument).
3009
3010 =item *
3011
3012 Dates/times are interpreted in the local time zone.
3013
3014 =item *
3015
3016 Remember to quote the argument to `B<-l>' so that your shell doesn't interpret
3017 spaces as argument separators.
3018
3019 =item *
3020
3021 See the 'Common Options' section of the cvs manual ('info cvs' on UNIX-like
3022 systems) for more information.
3023
3024 =item *
3025
3026 Note that the rules for quoting under windows shells are different.
3027
3028 =back
3029
3030 =head1 EXAMPLES
3031
3032 Some examples (working on UNIX shells):
3033
3034       # logs after 6th March, 2003 (inclusive)
3035       cvs2cl.pl -l "-d'>2003-03-06'"
3036       # logs after 4:34PM 6th March, 2003 (inclusive)
3037       cvs2cl.pl -l "-d'>2003-03-06 16:34'"
3038       # logs between 4:46PM 6th March, 2003 (exclusive) and
3039       # 4:34PM 6th March, 2003 (inclusive)
3040       cvs2cl.pl -l "-d'2003-03-06 16:46>2003-03-06 16:34'"
3041
3042 Some examples (on non-UNIX shells):
3043
3044       # Reported to work on windows xp/2000
3045       cvs2cl.pl -l  "-d"">2003-10-18;today<"""
3046
3047 =head1 AUTHORS
3048
3049 =over 4
3050
3051 =item Karl Fogel
3052
3053 =item Melissa O'Neill
3054
3055 =item Martyn J. Pearce
3056
3057 =back
3058
3059 Contributions from
3060
3061 =over 4
3062
3063 =item Mike Ayers
3064
3065 =item Tim Bradshaw
3066
3067 =item Richard Broberg
3068
3069 =item Nathan Bryant
3070
3071 =item Oswald Buddenhagen
3072
3073 =item Neil Conway
3074
3075 =item Arthur de Jong
3076
3077 =item Mark W. Eichin
3078
3079 =item Dave Elcock
3080
3081 =item Reid Ellis
3082
3083 =item Simon Josefsson
3084
3085 =item Robin Hugh Johnson
3086
3087 =item Terry Kane
3088
3089 =item Akos Kiss
3090
3091 =item Claus Klein
3092
3093 =item Eddie Kohler
3094
3095 =item Richard Laager
3096
3097 =item Kevin Lilly
3098
3099 =item Karl-Heinz Marbaise
3100
3101 =item Mitsuaki Masuhara
3102
3103 =item Henrik Nordstrom
3104
3105 =item Joe Orton
3106
3107 =item Peter Palfrader
3108
3109 =item Thomas Parmelan
3110
3111 =item Johanne Stezenbach
3112
3113 =item Joseph Walton
3114
3115 =item Ernie Zapata
3116
3117 =back
3118
3119 =head1 BUGS
3120
3121 Please report bugs to C<bug-cvs2cl@red-bean.com>.
3122
3123 =head1 PREREQUISITES
3124
3125 This script requires C<Text::Wrap>, C<Time::Local>, and C<File::Basename>.  It
3126 also seems to require C<Perl 5.004_04> or higher.
3127
3128 =head1 OPERATING SYSTEM COMPATIBILITY
3129
3130 Should work on any OS.
3131
3132 =head1 SCRIPT CATEGORIES
3133
3134 Version_Control/CVS
3135
3136 =head1 COPYRIGHT
3137
3138 (C) 2001,2002,2003,2004 Martyn J. Pearce <fluffy@cpan.org>, under the GNU GPL.
3139
3140 (C) 1999 Karl Fogel <kfogel@red-bean.com>, under the GNU GPL.
3141
3142 cvs2cl.pl is free software; you can redistribute it and/or modify
3143 it under the terms of the GNU General Public License as published by
3144 the Free Software Foundation; either version 2, or (at your option)
3145 any later version.
3146
3147 cvs2cl.pl is distributed in the hope that it will be useful,
3148 but WITHOUT ANY WARRANTY; without even the implied warranty of
3149 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
3150 GNU General Public License for more details.
3151
3152 You may have received a copy of the GNU General Public License
3153 along with cvs2cl.pl; see the file COPYING.  If not, write to the
3154 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
3155 Boston, MA 02111-1307, USA.
3156
3157 =head1 SEE ALSO
3158
3159 cvs(1)
3160