scripts/nightly/cvs2cl.pl

   1 #!/bin/sh
   2 exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
   3 #!perl -w
   4
   5
   6 ##############################################################
   7 ###                                                        ###
   8 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
   9 ###                                                        ###
  10 ##############################################################
  11
  12 ## $Revision$
  13 ## $Date$
  14 ## $Author$
  15 ##
  16
  17 use strict;
  18
  19 use File::Basename qw( fileparse );
  20 use Getopt::Long   qw( GetOptions );
  21 use Text::Wrap     qw( );
  22 use Time::Local    qw( timegm );
  23 use User::pwent    qw( getpwnam );
  24
  25 # The Plan:
  26 #
  27 # Read in the logs for multiple files, spit out a nice ChangeLog that
  28 # mirrors the information entered during `cvs commit'.
  29 #
  30 # The problem presents some challenges. In an ideal world, we could
  31 # detect files with the same author, log message, and checkin time --
  32 # each <filelist, author, time, logmessage> would be a changelog entry.
  33 # We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*
  34 # so checkins can span a range of times.  Also, the directory structure
  35 # could be hierarchical.
  36 #
  37 # Another question is whether we really want to have the ChangeLog
  38 # exactly reflect commits. An author could issue two related commits,
  39 # with different log entries, reflecting a single logical change to the
  40 # source. GNU style ChangeLogs group these under a single author/date.
  41 # We try to do the same.
  42 #
  43 # So, we parse the output of `cvs log', storing log messages in a
  44 # multilevel hash that stores the mapping:
  45 #   directory => author => time => message => filelist
  46 # As we go, we notice "nearby" commit times and store them together
  47 # (i.e., under the same timestamp), so they appear in the same log
  48 # entry.
  49 #
  50 # When we've read all the logs, we twist this mapping into
  51 # a time => author => message => filelist mapping for each directory.
  52 #
  53 # If we're not using the `--distributed' flag, the directory is always
  54 # considered to be `./', even as descend into subdirectories.
  55
  56 # Call Tree
  57
  58 # name                         number of lines (10.xii.03)
  59 # parse_options                         192
  60 # derive_changelog                       13
  61 # +-maybe_grab_accumulation_date         38
  62 # +-read_changelog                      277
  63 #   +-maybe_read_user_map_file           94
  64 #     +-run_ext                           9
  65 #   +-read_file_path                     29
  66 #   +-read_symbolic_name                 43
  67 #   +-read_revision                      49
  68 #   +-read_date_author_and_state         25
  69 #     +-parse_date_author_and_state      20
  70 #   +-read_branches                      36
  71 # +-output_changelog                    424
  72 #   +-pretty_file_list                  290
  73 #     +-common_path_prefix               35
  74 #   +-preprocess_msg_text                30
  75 #     +-min                               1
  76 #   +-mywrap                             16
  77 #   +-last_line_len                       5
  78 #   +-wrap_log_entry                    177
  79 #
  80 # Utilities
  81 #
  82 # xml_escape                              6
  83 # slurp_file                             11
  84 # debug                                   5
  85 # version                                 2
  86 # usage                                 142
  87
  88 # -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
  89 #
  90 # Note about a bug-slash-opportunity:
  91 # -----------------------------------
  92 #
  93 # There's a bug in Text::Wrap, which affects cvs2cl.  This script
  94 # reveals it:
  95 #
  96 #   #!/usr/bin/perl -w
  97 #
  98 #   use Text::Wrap;
  99 #
 100 #   my $test_text =
 101 #   "This script demonstrates a bug in Text::Wrap.  The very long line
 102 #   following this paragraph will be relocated relative to the surrounding
 103 #   text:
 104 #
 105 #   ====================================================================
 106 #
 107 #   See?  When the bug happens, we'll get the line of equal signs below
 108 #   this paragraph, even though it should be above.";
 109 #
 110 #
 111 #   # Print out the test text with no wrapping:
 112 #   print "$test_text";
 113 #   print "\n";
 114 #   print "\n";
 115 #
 116 #   # Now print it out wrapped, and see the bug:
 117 #   print wrap ("\t", "        ", "$test_text");
 118 #   print "\n";
 119 #   print "\n";
 120 #
 121 # If the line of equal signs were one shorter, then the bug doesn't
 122 # happen.  Interesting.
 123 #
 124 # Anyway, rather than fix this in Text::Wrap, we might as well write a
 125 # new wrap() which has the following much-needed features:
 126 #
 127 # * initial indentation, like current Text::Wrap()
 128 # * subsequent line indentation, like current Text::Wrap()
 129 # * user chooses among: force-break long words, leave them alone, or die()?
 130 # * preserve existing indentation: chopped chunks from an indented line
 131 #   are indented by same (like this line, not counting the asterisk!)
 132 # * optional list of things to preserve on line starts, default ">"
 133 #
 134 # Note that the last two are essentially the same concept, so unify in
 135 # implementation and give a good interface to controlling them.
 136 #
 137 # And how about:
 138 #
 139 # Optionally, when encounter a line pre-indented by same as previous
 140 # line, then strip the newline and refill, but indent by the same.
 141 # Yeah...
 142
 143 # Globals --------------------------------------------------------------------
 144
 145 use constant MAILNAME => "/etc/mailname";
 146
 147 # In case we have to print it out:
 148 my $VERSION = '$Revision$';
 149 $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
 150
 151 ## Vars set by options:
 152
 153 # Print debugging messages?
 154 my $Debug = 0;
 155
 156 # Just show version and exit?
 157 my $Print_Version = 0;
 158
 159 # Just print usage message and exit?
 160 my $Print_Usage = 0;
 161
 162 # What file should we generate (defaults to "ChangeLog")?
 163 my $Log_File_Name = "ChangeLog";
 164
 165 # Grab most recent entry date from existing ChangeLog file, just add
 166 # to that ChangeLog.
 167 my $Cumulative = 0;
 168
 169 # `cvs log -d`, this will repeat the last entry in the old log.  This is OK,
 170 # as it guarantees at least one entry in the update changelog, which means
 171 # that there will always be a date to extract for the next update.  The repeat
 172 # entry can be removed in postprocessing, if necessary.
 173
 174 # MJP 2003-08-02
 175 # I don't think this actually does anything useful
 176 my $Update = 0;
 177
 178 # Expand usernames to email addresses based on a map file?
 179 my $User_Map_File = '';
 180 my $User_Passwd_File;
 181 my $Mail_Domain;
 182
 183 # Output log in chronological order? [default is reverse chronological order]
 184 my $Chronological_Order = 0;
 185
 186 # Grab user details via gecos
 187 my $Gecos = 0;
 188
 189 # User domain for gecos email addresses
 190 my $Domain;
 191
 192 # Output to a file or to stdout?
 193 my $Output_To_Stdout = 0;
 194
 195 # Eliminate empty log messages?
 196 my $Prune_Empty_Msgs = 0;
 197
 198 # Tags of which not to output
 199 my %ignore_tags;
 200
 201 # Show only revisions with Tags
 202 my %show_tags;
 203
 204 # Don't call Text::Wrap on the body of the message
 205 my $No_Wrap = 0;
 206
 207 # Indentation of log messages
 208 my $Indent = "\t";
 209
 210 # Don't do any pretty print processing
 211 my $Summary = 0;
 212
 213 # Separates header from log message.  Code assumes it is either " " or
 214 # "\n\n", so if there's ever an option to set it to something else,
 215 # make sure to go through all conditionals that use this var.
 216 my $After_Header = " ";
 217
 218 # XML Encoding
 219 my $XML_Encoding = '';
 220
 221 # Format more for programs than for humans.
 222 my $XML_Output = 0;
 223 my $No_XML_Namespace = 0;
 224 my $No_XML_ISO_Date = 0;
 225
 226 # Do some special tweaks for log data that was written in FSF
 227 # ChangeLog style.
 228 my $FSF_Style = 0;
 229
 230 # Show times in UTC instead of local time
 231 my $UTC_Times = 0;
 232
 233 # Show times in output?
 234 my $Show_Times = 1;
 235
 236 # Show day of week in output?
 237 my $Show_Day_Of_Week = 0;
 238
 239 # Show revision numbers in output?
 240 my $Show_Revisions = 0;
 241
 242 # Show dead files in output?
 243 my $Show_Dead = 0;
 244
 245 # Hide dead trunk files which were created as a result of additions on a
 246 # branch?
 247 my $Hide_Branch_Additions = 1;
 248
 249 # Show tags (symbolic names) in output?
 250 my $Show_Tags = 0;
 251
 252 # Show tags separately in output?
 253 my $Show_Tag_Dates = 0;
 254
 255 # Show branches by symbolic name in output?
 256 my $Show_Branches = 0;
 257
 258 # Show only revisions on these branches or their ancestors.
 259 my @Follow_Branches;
 260
 261 # Don't bother with files matching this regexp.
 262 my @Ignore_Files;
 263
 264 # How exactly we match entries.  We definitely want "o",
 265 # and user might add "i" by using --case-insensitive option.
 266 my $Case_Insensitive = 0;
 267
 268 # Maybe only show log messages matching a certain regular expression.
 269 my $Regexp_Gate = '';
 270
 271 # Pass this global option string along to cvs, to the left of `log':
 272 my $Global_Opts = '';
 273
 274 # Pass this option string along to the cvs log subcommand:
 275 my $Command_Opts = '';
 276
 277 # Read log output from stdin instead of invoking cvs log?
 278 my $Input_From_Stdin = 0;
 279
 280 # Don't show filenames in output.
 281 my $Hide_Filenames = 0;
 282
 283 # Don't shorten directory names from filenames.
 284 my $Common_Dir = 1;
 285
 286 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
 287 # times that span a range of time. We assume that checkins will last no
 288 # longer than $Max_Checkin_Duration seconds, and that similarly, no
 289 # checkins will happen from the same users with the same message less
 290 # than $Max_Checkin_Duration seconds apart.
 291 my $Max_Checkin_Duration = 180;
 292
 293 # What to put at the front of [each] ChangeLog.
 294 my $ChangeLog_Header = '';
 295
 296 # Whether to enable 'delta' mode, and for what start/end tags.
 297 my $Delta_Mode = 0;
 298 my $Delta_From = '';
 299 my $Delta_To = '';
 300
 301 my $TestCode;
 302
 303 # Whether to parse filenames from the RCS filename, and if so what
 304 # prefix to strip.
 305 my $RCS_Root;
 306
 307 ## end vars set by options.
 308
 309 # latest observed times for the start/end tags in delta mode
 310 my $Delta_StartTime = 0;
 311 my $Delta_EndTime = 0;
 312
 313 # In 'cvs log' output, one long unbroken line of equal signs separates
 314 # files:
 315 my $file_separator = "======================================="
 316                    . "======================================";
 317
 318 # In 'cvs log' output, a shorter line of dashes separates log messages
 319 # within a file:
 320 my $logmsg_separator = "----------------------------";
 321
 322 my $No_Ancestors = 0;
 323
 324 my $No_Extra_Indent = 0;
 325
 326 my $GroupWithinDate = 0;
 327
 328 # ----------------------------------------------------------------------------
 329
 330 package CVS::Utils::ChangeLog::EntrySet;
 331
 332 sub new {
 333   my $class = shift;
 334   my %self;
 335   bless \%self, $class;
 336 }
 337
 338 # -------------------------------------
 339
 340 sub output_changelog {
 341   my $output_type = $XML_Output ? 'XML' : 'Text';
 342   my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
 343   $output_class->new->output_changelog(@_);
 344 }
 345
 346 # ----------------------------------------------------------------------------
 347
 348 package CVS::Utils::ChangeLog::EntrySet::Output::Text;
 349
 350 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
 351
 352 use File::Basename qw( fileparse );
 353
 354 sub new {
 355   my $class = shift;
 356   bless \(my($ self)), $class;
 357 }
 358
 359 # -------------------------------------
 360
 361 sub wday {
 362   my $self = shift; my $class = ref $self;
 363   my ($wday) = @_;
 364
 365   return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : '';
 366 }
 367
 368 # -------------------------------------
 369
 370 sub header_line {
 371   my $self = shift;
 372   my ($time, $author, $lastdate) = @_;
 373
 374   my $header_line = '';
 375
 376   my (undef,$min,$hour,$mday,$mon,$year,$wday)
 377     = $UTC_Times ? gmtime($time) : localtime($time);
 378
 379   my $date = $self->fdatetime($time);
 380
 381   if ($Show_Times) {
 382     $header_line =
 383       sprintf "%s  %s\n\n", $date, $author;
 384   } else {
 385     if ( ! defined $lastdate or $date ne $lastdate or ! $GroupWithinDate ) {
 386       if ( $GroupWithinDate ) {
 387         $header_line = "$date\n\n";
 388       } else {
 389         $header_line = "$date  $author\n\n";
 390       }
 391     } else {
 392       $header_line = '';
 393     }
 394   }
 395 }
 396
 397 # -------------------------------------
 398
 399 sub preprocess_msg_text {
 400   my $self = shift;
 401   my ($text) = @_;
 402
 403   $text = $self->SUPER::preprocess_msg_text($text);
 404
 405   unless ( $No_Wrap ) {
 406     # Strip off lone newlines, but only for lines that don't begin with
 407     # whitespace or a mail-quoting character, since we want to preserve
 408     # that kind of formatting.  Also don't strip newlines that follow a
 409     # period; we handle those specially next.  And don't strip
 410     # newlines that precede an open paren.
 411     1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g;
 412
 413     # If a newline follows a period, make sure that when we bring up the
 414     # bottom sentence, it begins with two spaces.
 415     1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g;
 416   }
 417
 418   return $text;
 419 }
 420
 421 # -------------------------------------
 422
 423 # Here we take a bunch of qunks and convert them into printed
 424 # summary that will include all the information the user asked for.
 425 sub pretty_file_list {
 426   my $self = shift;
 427
 428   return ''
 429     if $Hide_Filenames;
 430
 431   my $qunksref = shift;
 432
 433   my @filenames;
 434   my $beauty = '';          # The accumulating header string for this entry.
 435   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
 436   my %unanimous_tags;       # Tags found in all qunks
 437   my %all_branches;         # Branches found in any qunk
 438   my $fbegun = 0;           # Did we begin printing filenames yet?
 439
 440   my ($common_dir, $qunkrefs) =
 441     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref);
 442
 443   my @qunkrefs = @$qunkrefs;
 444
 445   # Not XML output, so complexly compactify for chordate consumption.  At this
 446   # point we have enough global information about all the qunks to organize
 447   # them non-redundantly for output.
 448
 449   if ($common_dir) {
 450     # Note that $common_dir still has its trailing slash
 451     $beauty .= "$common_dir: ";
 452   }
 453
 454   if ($Show_Branches)
 455   {
 456     # For trailing revision numbers.
 457     my @brevisions;
 458
 459     foreach my $branch (keys (%all_branches))
 460     {
 461       foreach my $qunkref (@qunkrefs)
 462       {
 463         if ((defined ($qunkref->branch))
 464             and ($qunkref->branch eq $branch))
 465         {
 466           if ($fbegun) {
 467             # kff todo: comma-delimited in XML too?  Sure.
 468             $beauty .= ", ";
 469           }
 470           else {
 471             $fbegun = 1;
 472           }
 473           my $fname = substr ($qunkref->filename, length ($common_dir));
 474           $beauty .= $fname;
 475           $qunkref->{'printed'} = 1;  # Just setting a mark bit, basically
 476
 477           if ( $Show_Tags and defined $qunkref->tags ) {
 478             my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
 479
 480             if (@tags) {
 481               $beauty .= " (tags: ";
 482               $beauty .= join (', ', @tags);
 483               $beauty .= ")";
 484             }
 485           }
 486
 487           if ($Show_Revisions) {
 488             # Collect the revision numbers' last components, but don't
 489             # print them -- they'll get printed with the branch name
 490             # later.
 491             $qunkref->revision =~ /.+\.([\d]+)$/;
 492             push (@brevisions, $1);
 493
 494             # todo: we're still collecting branch roots, but we're not
 495             # showing them anywhere.  If we do show them, it would be
 496             # nifty to just call them revision "0" on a the branch.
 497             # Yeah, that's the ticket.
 498           }
 499         }
 500       }
 501       $beauty .= " ($branch";
 502       if (@brevisions) {
 503         if ((scalar (@brevisions)) > 1) {
 504           $beauty .= ".[";
 505           $beauty .= (join (',', @brevisions));
 506           $beauty .= "]";
 507         }
 508         else {
 509           # Square brackets are spurious here, since there's no range to
 510           # encapsulate
 511           $beauty .= ".$brevisions[0]";
 512         }
 513       }
 514       $beauty .= ")";
 515     }
 516   }
 517
 518   # Okay; any qunks that were done according to branch are taken care
 519   # of, and marked as printed.  Now print everyone else.
 520
 521   my %fileinfo_printed;
 522   foreach my $qunkref (@qunkrefs)
 523   {
 524     next if (defined ($qunkref->{'printed'}));   # skip if already printed
 525
 526     my $b = substr ($qunkref->filename, length ($common_dir));
 527     # todo: Shlomo's change was this:
 528     # $beauty .= substr ($qunkref->filename,
 529     #              (($common_dir eq "./") ? '' : length ($common_dir)));
 530     $qunkref->{'printed'} = 1;  # Set a mark bit.
 531
 532     if ($Show_Revisions || $Show_Tags || $Show_Dead)
 533     {
 534       my $started_addendum = 0;
 535
 536       if ($Show_Revisions) {
 537         $started_addendum = 1;
 538         $b .= " (";
 539         $b .= $qunkref->revision;
 540       }
 541       if ($Show_Dead && $qunkref->state =~ /dead/)
 542       {
 543         # Deliberately not using $started_addendum. Keeping it simple.
 544         $b .= "[DEAD]";
 545       }
 546       if ($Show_Tags && (defined $qunkref->tags)) {
 547         my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
 548         if ((scalar (@tags)) > 0) {
 549           if ($started_addendum) {
 550             $b .= ", ";
 551           }
 552           else {
 553             $b .= " (tags: ";
 554           }
 555           $b .= join (', ', @tags);
 556           $started_addendum = 1;
 557         }
 558       }
 559       if ($started_addendum) {
 560         $b .= ")";
 561       }
 562     }
 563
 564     unless ( exists $fileinfo_printed{$b} ) {
 565       if ($fbegun) {
 566         $beauty .= ", ";
 567       } else {
 568         $fbegun = 1;
 569       }
 570       $beauty .= $b, $fileinfo_printed{$b} = 1;
 571     }
 572   }
 573
 574   # Unanimous tags always come last.
 575   if ($Show_Tags && %unanimous_tags)
 576   {
 577     $beauty .= " (utags: ";
 578     $beauty .= join (', ', sort keys (%unanimous_tags));
 579     $beauty .= ")";
 580   }
 581
 582   # todo: still have to take care of branch_roots?
 583
 584   $beauty = "$beauty:";
 585
 586   return $beauty;
 587 }
 588
 589 # -------------------------------------
 590
 591 sub output_tagdate {
 592   my $self = shift;
 593   my ($fh, $time, $tag) = @_;
 594
 595   my $fdatetime = $self->fdatetime($time);
 596   print $fh "$fdatetime  tag $tag\n\n";
 597   return;
 598 }
 599
 600 # -------------------------------------
 601
 602 sub format_body {
 603   my $self = shift;
 604   my ($msg, $files, $qunklist) = @_;
 605
 606   my $body;
 607
 608   if ( $No_Wrap and ! $Summary ) {
 609     $msg = $self->preprocess_msg_text($msg);
 610     $files = $self->mywrap("\t", "\t  ", "* $files");
 611     $msg =~ s/\n(.+)/\n$Indent$1/g;
 612     unless ($After_Header eq " ") {
 613       $msg =~ s/^(.+)/$Indent$1/g;
 614     }
 615     if ( $Hide_Filenames ) {
 616       $body = $After_Header . $msg;
 617     } else {
 618       $body = $files . $After_Header . $msg;
 619     }
 620   } elsif ( $Summary ) {
 621     my ($filelist, $qunk);
 622     my (@DeletedQunks, @AddedQunks, @ChangedQunks);
 623
 624     $msg = $self->preprocess_msg_text($msg);
 625     #
 626     #     Sort the files (qunks) according to the operation that was
 627     # performed.  Files which were added have no line change
 628     # indicator, whereas deleted files have state dead.
 629     #
 630     foreach $qunk ( @$qunklist ) {
 631       if ( "dead" eq $qunk->state) {
 632         push @DeletedQunks, $qunk;
 633       } elsif ( ! defined $qunk->lines ) {
 634         push @AddedQunks, $qunk;
 635       } else {
 636         push @ChangedQunks, $qunk;
 637       }
 638     }
 639     #
 640     #     The qunks list was  originally in tree search order.  Let's
 641     # get that back.  The lists, if they exist, will be reversed upon
 642     # processing.
 643     #
 644
 645     #
 646     #     Now write the three sections onto $filelist
 647     #
 648     if ( @DeletedQunks ) {
 649       $filelist .= "\tDeleted:\n";
 650       foreach $qunk ( @DeletedQunks ) {
 651         $filelist .= "\t\t" . $qunk->filename;
 652         $filelist .= " (" . $qunk->revision . ")";
 653         $filelist .= "\n";
 654       }
 655       undef @DeletedQunks;
 656     }
 657
 658     if ( @AddedQunks ) {
 659       $filelist .= "\tAdded:\n";
 660       foreach $qunk (@AddedQunks) {
 661         $filelist .= "\t\t" . $qunk->filename;
 662         $filelist .= " (" . $qunk->revision . ")";
 663         $filelist .= "\n";
 664       }
 665       undef @AddedQunks ;
 666     }
 667
 668     if ( @ChangedQunks ) {
 669       $filelist .= "\tChanged:\n";
 670       foreach $qunk (@ChangedQunks) {
 671         $filelist .= "\t\t" . $qunk->filename;
 672         $filelist .= " (" . $qunk->revision . ")";
 673         $filelist .= ", \"" . $qunk->state . "\"";
 674         $filelist .= ", lines: " . $qunk->lines;
 675         $filelist .= "\n";
 676       }
 677       undef @ChangedQunks;
 678     }
 679
 680     chomp $filelist;
 681
 682     if ( $Hide_Filenames ) {
 683       $filelist = '';
 684     }
 685
 686     $msg =~ s/\n(.*)/\n$Indent$1/g;
 687     unless ( $After_Header eq " " or $FSF_Style ) {
 688       $msg =~ s/^(.*)/$Indent$1/g;
 689     }
 690
 691     unless ( $No_Wrap ) {
 692       if ( $FSF_Style ) {
 693         $msg = $self->wrap_log_entry($msg, '', 69, 69);
 694         chomp($msg);
 695         chomp($msg);
 696       } else {
 697         $msg = $self->mywrap('', $Indent, "$msg");
 698         $msg =~ s/[ \t]+\n/\n/g;
 699       }
 700     }
 701
 702     $body = $filelist . $After_Header . $msg;
 703   } else {  # do wrapping, either FSF-style or regular
 704     my $latter_wrap = $No_Extra_Indent ? $Indent : "$Indent  ";
 705
 706     if ( $FSF_Style ) {
 707       $files = $self->mywrap($Indent, $latter_wrap, "* $files");
 708
 709       my $files_last_line_len = 0;
 710       if ( $After_Header eq " " ) {
 711         $files_last_line_len = $self->last_line_len($files);
 712         $files_last_line_len += 1;  # for $After_Header
 713       }
 714
 715       $msg = $self->wrap_log_entry($msg, $latter_wrap, 69-$files_last_line_len, 69);
 716       $body = $files . $After_Header . $msg;
 717     } else {  # not FSF-style
 718       $msg = $self->preprocess_msg_text($msg);
 719       $body = $files . $After_Header . $msg;
 720       $body = $self->mywrap($Indent, $latter_wrap, "* $body");
 721       $body =~ s/[ \t]+\n/\n/g;
 722     }
 723   }
 724
 725   return $body;
 726 }
 727
 728 # ----------------------------------------------------------------------------
 729
 730 package CVS::Utils::ChangeLog::EntrySet::Output::XML;
 731
 732 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
 733
 734 use File::Basename qw( fileparse );
 735
 736 sub new {
 737   my $class = shift;
 738   bless \(my($ self)), $class;
 739 }
 740
 741 # -------------------------------------
 742
 743 sub header_line {
 744   my $self = shift;
 745   my ($time, $author, $lastdate) = @_;
 746
 747   my $header_line = '';
 748
 749   my $isoDate;
 750
 751   my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
 752
 753   # Ideally, this would honor $UTC_Times and use +HH:MM syntax
 754   $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
 755                      $y + 1900, $m + 1, $d, $H, $M, $S);
 756
 757   my (undef,$min,$hour,$mday,$mon,$year,$wday)
 758     = $UTC_Times ? gmtime($time) : localtime($time);
 759
 760   my $date = $self->fdatetime($time);
 761   $wday = $self->wday($wday);
 762
 763   $header_line =
 764     sprintf ("<date>%4u-%02u-%02u</date>\n${wday}<time>%02u:%02u</time>\n",
 765              $year+1900, $mon+1, $mday, $hour, $min);
 766   $header_line .= "<isoDate>$isoDate</isoDate>\n"
 767     unless $No_XML_ISO_Date;
 768   $header_line .= sprintf("<author>%s</author>\n" , $author);
 769 }
 770
 771 # -------------------------------------
 772
 773 sub wday {
 774   my $self = shift; my $class = ref $self;
 775   my ($wday) = @_;
 776
 777   return '<weekday>' . $class->weekday_en($wday) . "</weekday>\n";
 778 }
 779
 780 # -------------------------------------
 781
 782 sub escape {
 783   my $self = shift;
 784
 785   my $txt = shift;
 786   $txt =~ s/&/&amp;/g;
 787   $txt =~ s/</&lt;/g;
 788   $txt =~ s/>/&gt;/g;
 789   return $txt;
 790 }
 791
 792 # -------------------------------------
 793
 794 sub output_header {
 795   my $self = shift;
 796   my ($fh) = @_;
 797
 798   my $encoding    =
 799     length $XML_Encoding ? qq'encoding="$XML_Encoding"' : '';
 800   my $version     = 'version="1.0"';
 801   my $declaration =
 802     sprintf '<?xml %s?>', join ' ', grep length, $version, $encoding;
 803   my $root        =
 804     $No_XML_Namespace ?
 805       '<changelog>'     :
 806         '<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
 807   print $fh "$declaration\n\n$root\n\n";
 808 }
 809
 810 # -------------------------------------
 811
 812 sub output_footer {
 813   my $self = shift;
 814   my ($fh) = @_;
 815
 816   print $fh "</changelog>\n";
 817 }
 818
 819 # -------------------------------------
 820
 821 sub preprocess_msg_text {
 822   my $self = shift;
 823   my ($text) = @_;
 824
 825   $text = $self->SUPER::preprocess_msg_text($text);
 826
 827   $text = $self->escape($text);
 828   chomp $text;
 829   $text = "<msg>${text}</msg>\n";
 830
 831   return $text;
 832 }
 833
 834 # -------------------------------------
 835
 836 # Here we take a bunch of qunks and convert them into printed
 837 # summary that will include all the information the user asked for.
 838 sub pretty_file_list {
 839   my $self = shift;
 840   my ($qunksref) = @_;
 841
 842   my $beauty = '';          # The accumulating header string for this entry.
 843   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
 844   my %unanimous_tags;       # Tags found in all qunks
 845   my %all_branches;         # Branches found in any qunk
 846   my $fbegun = 0;           # Did we begin printing filenames yet?
 847
 848   my ($common_dir, $qunkrefs) =
 849     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches),
 850       $qunksref);
 851
 852   my @qunkrefs = @$qunkrefs;
 853
 854   # If outputting XML, then our task is pretty simple, because we
 855   # don't have to detect common dir, common tags, branch prefixing,
 856   # etc.  We just output exactly what we have, and don't worry about
 857   # redundancy or readability.
 858
 859   foreach my $qunkref (@qunkrefs)
 860   {
 861     my $filename    = $qunkref->filename;
 862     my $state       = $qunkref->state;
 863     my $revision    = $qunkref->revision;
 864     my $tags        = $qunkref->tags;
 865     my $branch      = $qunkref->branch;
 866     my $branchroots = $qunkref->roots;
 867
 868     $filename = $self->escape($filename);   # probably paranoia
 869     $revision = $self->escape($revision);   # definitely paranoia
 870
 871     $beauty .= "<file>\n";
 872     $beauty .= "<name>${filename}</name>\n";
 873     $beauty .= "<cvsstate>${state}</cvsstate>\n";
 874     $beauty .= "<revision>${revision}</revision>\n";
 875     if ($branch) {
 876       $branch   = $self->escape($branch);     # more paranoia
 877       $beauty .= "<branch>${branch}</branch>\n";
 878     }
 879     foreach my $tag (@$tags) {
 880       $tag = $self->escape($tag);  # by now you're used to the paranoia
 881       $beauty .= "<tag>${tag}</tag>\n";
 882     }
 883     foreach my $root (@$branchroots) {
 884       $root = $self->escape($root);  # which is good, because it will continue
 885       $beauty .= "<branchroot>${root}</branchroot>\n";
 886     }
 887     $beauty .= "</file>\n";
 888   }
 889
 890   # Theoretically, we could go home now.  But as long as we're here,
 891   # let's print out the common_dir and utags, as a convenience to
 892   # the receiver (after all, earlier code calculated that stuff
 893   # anyway, so we might as well take advantage of it).
 894
 895   if ((scalar (keys (%unanimous_tags))) > 1) {
 896     foreach my $utag ((keys (%unanimous_tags))) {
 897       $utag = $self->escape($utag);   # the usual paranoia
 898       $beauty .= "<utag>${utag}</utag>\n";
 899     }
 900   }
 901   if ($common_dir) {
 902     $common_dir = $self->escape($common_dir);
 903     $beauty .= "<commondir>${common_dir}</commondir>\n";
 904   }
 905
 906   # That's enough for XML, time to go home:
 907   return $beauty;
 908 }
 909
 910 # -------------------------------------
 911
 912 sub output_tagdate {
 913   # NOT YET DONE
 914 }
 915
 916 # -------------------------------------
 917
 918 sub output_entry {
 919   my $self = shift;
 920   my ($fh, $entry) = @_;
 921   print $fh "<entry>\n$entry</entry>\n\n";
 922 }
 923
 924 # -------------------------------------
 925
 926 sub format_body {
 927   my $self = shift;
 928   my ($msg, $files, $qunklist) = @_;
 929
 930   $msg = $self->preprocess_msg_text($msg);
 931   return $files . $msg;
 932 }
 933
 934 # ----------------------------------------------------------------------------
 935
 936 package CVS::Utils::ChangeLog::EntrySet::Output;
 937
 938 use Carp           qw( croak );
 939 use File::Basename qw( fileparse );
 940
 941 # Class Utility Functions -------------
 942
 943 { # form closure
 944
 945 my @weekdays = (qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday));
 946 sub weekday_en {
 947   my $class = shift;
 948   return $weekdays[$_[0]];
 949 }
 950
 951 }
 952
 953 # Abstract Subrs ----------------------
 954
 955 sub wday               { croak "Whoops.  Abtract method call (wday).\n" }
 956 sub pretty_file_list   { croak "Whoops.  Abtract method call (pretty_file_list).\n" }
 957 sub output_tagdate     { croak "Whoops.  Abtract method call (output_tagdate).\n" }
 958 sub header_line        { croak "Whoops.  Abtract method call (header_line).\n" }
 959
 960 # Instance Subrs ----------------------
 961
 962 sub output_header { }
 963
 964 # -------------------------------------
 965
 966 sub output_entry {
 967   my $self = shift;
 968   my ($fh, $entry) = @_;
 969   print $fh "$entry\n";
 970 }
 971
 972 # -------------------------------------
 973
 974 sub output_footer { }
 975
 976 # -------------------------------------
 977
 978 sub escape { return $_[1] }
 979
 980 # -------------------------------------
 981
 982 sub output_changelog {
 983 my $self = shift; my $class = ref $self;
 984   my ($grand_poobah) = @_;
 985   ### Process each ChangeLog
 986
 987   while (my ($dir,$authorhash) = each %$grand_poobah)
 988   {
 989     &main::debug ("DOING DIR: $dir\n");
 990
 991     # Here we twist our hash around, from being
 992     #   author => time => message => filelist
 993     # in %$authorhash to
 994     #   time => author => message => filelist
 995     # in %changelog.
 996     #
 997     # This is also where we merge entries.  The algorithm proceeds
 998     # through the timeline of the changelog with a sliding window of
 999     # $Max_Checkin_Duration seconds; within that window, entries that
1000     # have the same log message are merged.
1001     #
1002     # (To save space, we zap %$authorhash after we've copied
1003     # everything out of it.)
1004
1005     my %changelog;
1006     while (my ($author,$timehash) = each %$authorhash)
1007     {
1008       my %stamptime;
1009       foreach my $time (sort {$a <=> $b} (keys %$timehash))
1010       {
1011         my $msghash = $timehash->{$time};
1012         while (my ($msg,$qunklist) = each %$msghash)
1013         {
1014           my $stamptime = $stamptime{$msg};
1015           if ((defined $stamptime)
1016               and (($time - $stamptime) < $Max_Checkin_Duration)
1017               and (defined $changelog{$stamptime}{$author}{$msg}))
1018           {
1019             push(@{$changelog{$stamptime}{$author}{$msg}}, $qunklist->files);
1020           }
1021           else {
1022             $changelog{$time}{$author}{$msg} = $qunklist->files;
1023             $stamptime{$msg} = $time;
1024           }
1025         }
1026       }
1027     }
1028     undef (%$authorhash);
1029
1030     ### Now we can write out the ChangeLog!
1031
1032     my ($logfile_here, $logfile_bak, $tmpfile);
1033     my $lastdate;
1034
1035     if (! $Output_To_Stdout) {
1036       $logfile_here =  $dir . $Log_File_Name;
1037       $logfile_here =~ s/^\.\/\//\//;   # fix any leading ".//" problem
1038       $tmpfile      = "${logfile_here}.cvs2cl$$.tmp";
1039       $logfile_bak  = "${logfile_here}.bak";
1040
1041       open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
1042     }
1043     else {
1044       open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
1045     }
1046
1047     print LOG_OUT $ChangeLog_Header;
1048
1049     my %tag_date_printed;
1050
1051     $self->output_header(\*LOG_OUT);
1052
1053     my @key_list = ();
1054     if($Chronological_Order) {
1055         @key_list = sort {$a <=> $b} (keys %changelog);
1056     } else {
1057         @key_list = sort {$b <=> $a} (keys %changelog);
1058     }
1059     foreach my $time (@key_list)
1060     {
1061       next if ($Delta_Mode &&
1062                (($time <= $Delta_StartTime) ||
1063                 ($time > $Delta_EndTime && $Delta_EndTime)));
1064
1065       # Set up the date/author line.
1066       # kff todo: do some more XML munging here, on the header
1067       # part of the entry:
1068       my (undef,$min,$hour,$mday,$mon,$year,$wday)
1069           = $UTC_Times ? gmtime($time) : localtime($time);
1070
1071       $wday = $self->wday($wday);
1072       # XML output includes everything else, we might as well make
1073       # it always include Day Of Week too, for consistency.
1074       my $authorhash = $changelog{$time};
1075       if ($Show_Tag_Dates) {
1076         my %tags;
1077         while (my ($author,$mesghash) = each %$authorhash) {
1078           while (my ($msg,$qunk) = each %$mesghash) {
1079             foreach my $qunkref2 (@$qunk) {
1080               if (defined ($qunkref2->tags)) {
1081                 foreach my $tag (@{$qunkref2->tags}) {
1082                   $tags{$tag} = 1;
1083                 }
1084               }
1085             }
1086           }
1087         }
1088         # Sort here for determinism to ease testing
1089         foreach my $tag (sort keys %tags) {
1090           if ( ! defined $tag_date_printed{$tag} ) {
1091             $tag_date_printed{$tag} = $time;
1092             $self->output_tagdate(\*LOG_OUT, $time, $tag);
1093           }
1094         }
1095       }
1096       while (my ($author,$mesghash) = each %$authorhash)
1097       {
1098         # If XML, escape in outer loop to avoid compound quoting:
1099         $author = $self->escape($author);
1100
1101       FOOBIE:
1102         # We sort here to enable predictable ordering for the testing porpoises
1103         for my $msg (sort keys %$mesghash)
1104         {
1105           my $qunklist = $mesghash->{$msg};
1106
1107           ## MJP: 19.xii.01 : Exclude @ignore_tags
1108           for my $ignore_tag (keys %ignore_tags) {
1109             next FOOBIE
1110               if grep($_ eq $ignore_tag, map(@{$_->{tags}},
1111                                              grep(defined $_->{tags},
1112                                                   @$qunklist)));
1113           }
1114           ## MJP: 19.xii.01 : End exclude @ignore_tags
1115
1116           # show only files with tag --show-tag $show_tag
1117           if ( keys %show_tags ) {
1118             next FOOBIE
1119               if !grep(exists $show_tags{$_}, map(@{$_->{tags}},
1120                                                   grep(defined $_->{tags},
1121                                                        @$qunklist)));
1122           }
1123
1124           my $files               = $self->pretty_file_list($qunklist);
1125           my $header_line;          # date and author
1126           my $wholething;           # $header_line + $body
1127
1128           my $date = $self->fdatetime($time);
1129           $header_line = $self->header_line($time, $author, $lastdate);
1130           $lastdate = $date;
1131
1132           $Text::Wrap::huge = 'overflow'
1133             if $Text::Wrap::VERSION >= 2001.0130;
1134           # Reshape the body according to user preferences.
1135           my $body = $self->format_body($msg, $files, $qunklist);
1136
1137           $body =~ s/[ \t]+\n/\n/g;
1138           $wholething = $header_line . $body;
1139
1140           # One last check: make sure it passes the regexp test, if the
1141           # user asked for that.  We have to do it here, so that the
1142           # test can match against information in the header as well
1143           # as in the text of the log message.
1144
1145           # How annoying to duplicate so much code just because I
1146           # can't figure out a way to evaluate scalars on the trailing
1147           # operator portion of a regular expression.  Grrr.
1148           if ($Case_Insensitive) {
1149             unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/oi ) ) {
1150               $self->output_entry(\*LOG_OUT, $wholething);
1151             }
1152           }
1153           else {
1154             unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/o ) ) {
1155               $self->output_entry(\*LOG_OUT, $wholething);
1156             }
1157           }
1158         }
1159       }
1160     }
1161
1162     $self->output_footer(\*LOG_OUT);
1163
1164     close (LOG_OUT);
1165
1166     if ( ! $Output_To_Stdout ) {
1167       # If accumulating, append old data to new before renaming.  But
1168       # don't append the most recent entry, since it's already in the
1169       # new log due to CVS's idiosyncratic interpretation of "log -d".
1170       if ($Cumulative && -f $logfile_here) {
1171         open NEW_LOG, ">>$tmpfile"
1172           or die "trouble appending to $tmpfile ($!)";
1173
1174         open OLD_LOG, "<$logfile_here"
1175           or die "trouble reading from $logfile_here ($!)";
1176
1177         my $started_first_entry = 0;
1178         my $passed_first_entry = 0;
1179         while (<OLD_LOG>) {
1180           if ( ! $passed_first_entry ) {
1181             if ( ( ! $started_first_entry )
1182                 and /^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/ ) {
1183               $started_first_entry = 1;
1184             } elsif ( /^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/ ) {
1185               $passed_first_entry = 1;
1186               print NEW_LOG $_;
1187             }
1188           } else {
1189             print NEW_LOG $_;
1190           }
1191         }
1192
1193         close NEW_LOG;
1194         close OLD_LOG;
1195       }
1196
1197       if ( -f $logfile_here ) {
1198         rename $logfile_here, $logfile_bak;
1199       }
1200       rename $tmpfile, $logfile_here;
1201     }
1202   }
1203 }
1204
1205 # -------------------------------------
1206
1207 # Don't call this wrap, because with 5.5.3, that clashes with the
1208 # (unconditional :-( ) export of wrap() from Text::Wrap
1209 sub mywrap {
1210   my $self = shift;
1211   my ($indent1, $indent2, @text) = @_;
1212   # If incoming text looks preformatted, don't get clever
1213   my $text = Text::Wrap::wrap($indent1, $indent2, @text);
1214   if ( grep /^\s+/m, @text ) {
1215     return $text;
1216   }
1217   my @lines = split /\n/, $text;
1218   $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e;
1219   $lines[0] =~ s/^$indent1\s+/$indent1/;
1220   s/^$indent2\s+/$indent2/
1221     for @lines[1..$#lines];
1222   my $newtext = join "\n", @lines;
1223   $newtext .= "\n"
1224     if substr($text, -1) eq "\n";
1225   return $newtext;
1226 }
1227
1228 # -------------------------------------
1229
1230 sub preprocess_msg_text {
1231   my $self = shift;
1232   my ($text) = @_;
1233
1234   # Strip out carriage returns (as they probably result from DOSsy editors).
1235   $text =~ s/\r\n/\n/g;
1236   # If it *looks* like two newlines, make it *be* two newlines:
1237   $text =~ s/\n\s*\n/\n\n/g;
1238
1239   return $text;
1240 }
1241
1242 # -------------------------------------
1243
1244 sub last_line_len {
1245   my $self = shift;
1246
1247   my $files_list = shift;
1248   my @lines = split (/\n/, $files_list);
1249   my $last_line = pop (@lines);
1250   return length ($last_line);
1251 }
1252
1253 # -------------------------------------
1254
1255 # A custom wrap function, sensitive to some common constructs used in
1256 # log entries.
1257 sub wrap_log_entry {
1258   my $self = shift;
1259
1260   my $text = shift;                  # The text to wrap.
1261   my $left_pad_str = shift;          # String to pad with on the left.
1262
1263   # These do NOT take left_pad_str into account:
1264   my $length_remaining = shift;      # Amount left on current line.
1265   my $max_line_length  = shift;      # Amount left for a blank line.
1266
1267   my $wrapped_text = '';             # The accumulating wrapped entry.
1268   my $user_indent = '';              # Inherited user_indent from prev line.
1269
1270   my $first_time = 1;                # First iteration of the loop?
1271   my $suppress_line_start_match = 0; # Set to disable line start checks.
1272
1273   my @lines = split (/\n/, $text);
1274   while (@lines)   # Don't use `foreach' here, it won't work.
1275   {
1276     my $this_line = shift (@lines);
1277     chomp $this_line;
1278
1279     if ($this_line =~ /^(\s+)/) {
1280       $user_indent = $1;
1281     }
1282     else {
1283       $user_indent = '';
1284     }
1285
1286     # If it matches any of the line-start regexps, print a newline now...
1287     if ($suppress_line_start_match)
1288     {
1289       $suppress_line_start_match = 0;
1290     }
1291     elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
1292            || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
1293            || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
1294            || ($this_line =~ /^(\s+)(\S+)/)
1295            || ($this_line =~ /^(\s*)- +/)
1296            || ($this_line =~ /^()\s*$/)
1297            || ($this_line =~ /^(\s*)\*\) +/)
1298            || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
1299     {
1300       # Make a line break immediately, unless header separator is set
1301       # and this line is the first line in the entry, in which case
1302       # we're getting the blank line for free already and shouldn't
1303       # add an extra one.
1304       unless (($After_Header ne " ") and ($first_time))
1305       {
1306         if ($this_line =~ /^()\s*$/) {
1307           $suppress_line_start_match = 1;
1308           $wrapped_text .= "\n${left_pad_str}";
1309         }
1310
1311         $wrapped_text .= "\n${left_pad_str}";
1312       }
1313
1314       $length_remaining = $max_line_length - (length ($user_indent));
1315     }
1316
1317     # Now that any user_indent has been preserved, strip off leading
1318     # whitespace, so up-folding has no ugly side-effects.
1319     $this_line =~ s/^\s*//;
1320
1321     # Accumulate the line, and adjust parameters for next line.
1322     my $this_len = length ($this_line);
1323     if ($this_len == 0)
1324     {
1325       # Blank lines should cancel any user_indent level.
1326       $user_indent = '';
1327       $length_remaining = $max_line_length;
1328     }
1329     elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
1330     {
1331       # Walk backwards from the end.  At first acceptable spot, break
1332       # a new line.
1333       my $idx = $length_remaining - 1;
1334       if ($idx < 0) { $idx = 0 };
1335       while ($idx > 0)
1336       {
1337         if (substr ($this_line, $idx, 1) =~ /\s/)
1338         {
1339           my $line_now = substr ($this_line, 0, $idx);
1340           my $next_line = substr ($this_line, $idx);
1341           $this_line = $line_now;
1342
1343           # Clean whitespace off the end.
1344           chomp $this_line;
1345
1346           # The current line is ready to be printed.
1347           $this_line .= "\n${left_pad_str}";
1348
1349           # Make sure the next line is allowed full room.
1350           $length_remaining = $max_line_length - (length ($user_indent));
1351
1352           # Strip next_line, but then preserve any user_indent.
1353           $next_line =~ s/^\s*//;
1354
1355           # Sneak a peek at the user_indent of the upcoming line, so
1356           # $next_line (which will now precede it) can inherit that
1357           # indent level.  Otherwise, use whatever user_indent level
1358           # we currently have, which might be none.
1359           my $next_next_line = shift (@lines);
1360           if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
1361             $next_line = $1 . $next_line if (defined ($1));
1362             # $length_remaining = $max_line_length - (length ($1));
1363             $next_next_line =~ s/^\s*//;
1364           }
1365           else {
1366             $next_line = $user_indent . $next_line;
1367           }
1368           if (defined ($next_next_line)) {
1369             unshift (@lines, $next_next_line);
1370           }
1371           unshift (@lines, $next_line);
1372
1373           # Our new next line might, coincidentally, begin with one of
1374           # the line-start regexps, so we temporarily turn off
1375           # sensitivity to that until we're past the line.
1376           $suppress_line_start_match = 1;
1377
1378           last;
1379         }
1380         else
1381         {
1382           $idx--;
1383         }
1384       }
1385
1386       if ($idx == 0)
1387       {
1388         # We bottomed out because the line is longer than the
1389         # available space.  But that could be because the space is
1390         # small, or because the line is longer than even the maximum
1391         # possible space.  Handle both cases below.
1392
1393         if ($length_remaining == ($max_line_length - (length ($user_indent))))
1394         {
1395           # The line is simply too long -- there is no hope of ever
1396           # breaking it nicely, so just insert it verbatim, with
1397           # appropriate padding.
1398           $this_line = "\n${left_pad_str}${this_line}";
1399         }
1400         else
1401         {
1402           # Can't break it here, but may be able to on the next round...
1403           unshift (@lines, $this_line);
1404           $length_remaining = $max_line_length - (length ($user_indent));
1405           $this_line = "\n${left_pad_str}";
1406         }
1407       }
1408     }
1409     else  # $this_len < $length_remaining, so tack on what we can.
1410     {
1411       # Leave a note for the next iteration.
1412       $length_remaining = $length_remaining - $this_len;
1413
1414       if ($this_line =~ /\.$/)
1415       {
1416         $this_line .= "  ";
1417         $length_remaining -= 2;
1418       }
1419       else  # not a sentence end
1420       {
1421         $this_line .= " ";
1422         $length_remaining -= 1;
1423       }
1424     }
1425
1426     # Unconditionally indicate that loop has run at least once.
1427     $first_time = 0;
1428
1429     $wrapped_text .= "${user_indent}${this_line}";
1430   }
1431
1432   # One last bit of padding.
1433   $wrapped_text .= "\n";
1434
1435   return $wrapped_text;
1436 }
1437
1438 # -------------------------------------
1439
1440 sub _pretty_file_list {
1441   my $self = shift;
1442
1443   my ($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_;
1444
1445   my @qunkrefs =
1446     grep +( ( ! $_->tags_exists
1447               or
1448               ! grep exists $ignore_tags{$_}, @{$_->tags})
1449             and
1450             ( ! keys %show_tags
1451               or
1452               ( $_->tags_exists
1453                 and
1454                 grep exists $show_tags{$_}, @{$_->tags} )
1455             )
1456           ),
1457     @$qunksref;
1458
1459   my $common_dir;           # Dir prefix common to all files ('' if none)
1460
1461   # First, loop over the qunks gathering all the tag/branch names.
1462   # We'll put them all in non_unanimous_tags, and take out the
1463   # unanimous ones later.
1464  QUNKREF:
1465   foreach my $qunkref (@qunkrefs)
1466   {
1467     # Keep track of whether all the files in this commit were in the
1468     # same directory, and memorize it if so.  We can make the output a
1469     # little more compact by mentioning the directory only once.
1470     if ($Common_Dir && (scalar (@qunkrefs)) > 1)
1471     {
1472       if (! (defined ($common_dir)))
1473       {
1474         my ($base, $dir);
1475         ($base, $dir, undef) = fileparse ($qunkref->filename);
1476
1477         if ((! (defined ($dir)))  # this first case is sheer paranoia
1478             or ($dir eq '')
1479             or ($dir eq "./")
1480             or ($dir eq ".\\"))
1481         {
1482           $common_dir = '';
1483         }
1484         else
1485         {
1486           $common_dir = $dir;
1487         }
1488       }
1489       elsif ($common_dir ne '')
1490       {
1491         # Already have a common dir prefix, so how much of it can we preserve?
1492         $common_dir = &main::common_path_prefix ($qunkref->filename, $common_dir);
1493       }
1494     }
1495     else  # only one file in this entry anyway, so common dir not an issue
1496     {
1497       $common_dir = '';
1498     }
1499
1500     if (defined ($qunkref->branch)) {
1501       $all_branches->{$qunkref->branch} = 1;
1502     }
1503     if (defined ($qunkref->tags)) {
1504       foreach my $tag (@{$qunkref->tags}) {
1505         $non_unanimous_tags->{$tag} = 1;
1506       }
1507     }
1508   }
1509
1510   # Any tag held by all qunks will be printed specially... but only if
1511   # there are multiple qunks in the first place!
1512   if ((scalar (@qunkrefs)) > 1) {
1513     foreach my $tag (keys (%$non_unanimous_tags)) {
1514       my $everyone_has_this_tag = 1;
1515       foreach my $qunkref (@qunkrefs) {
1516         if ((! (defined ($qunkref->tags)))
1517             or (! (grep ($_ eq $tag, @{$qunkref->tags})))) {
1518           $everyone_has_this_tag = 0;
1519         }
1520       }
1521       if ($everyone_has_this_tag) {
1522         $unanimous_tags->{$tag} = 1;
1523         delete $non_unanimous_tags->{$tag};
1524       }
1525     }
1526   }
1527
1528   return $common_dir, \@qunkrefs;
1529 }
1530
1531 # -------------------------------------
1532
1533 sub fdatetime {
1534   my $self = shift;
1535
1536   my ($year, $mday, $mon, $wday, $hour, $min);
1537
1538   if ( @_ > 1 ) {
1539     ($year, $mday, $mon, $wday, $hour, $min) = @_;
1540   } else {
1541     my ($time) = @_;
1542     (undef, $min, $hour, $mday, $mon, $year, $wday) =
1543       $UTC_Times ? gmtime($time) : localtime($time);
1544
1545     $year += 1900;
1546     $mon  += 1;
1547     $wday  = $self->wday($wday);
1548   }
1549
1550   my $fdate = $self->fdate($year, $mon, $mday, $wday);
1551
1552   if ($Show_Times) {
1553     my $ftime = $self->ftime($hour, $min);
1554     return "$fdate $ftime";
1555   } else {
1556     return $fdate;
1557   }
1558 }
1559
1560 # -------------------------------------
1561
1562 sub fdate {
1563   my $self = shift;
1564
1565   my ($year, $mday, $mon, $wday);
1566
1567   if ( @_ > 1 ) {
1568     ($year, $mon, $mday, $wday) = @_;
1569   } else {
1570     my ($time) = @_;
1571     (undef, undef, undef, $mday, $mon, $year, $wday) =
1572       $UTC_Times ? gmtime($time) : localtime($time);
1573
1574     $year += 1900;
1575     $mon  += 1;
1576     $wday  = $self->wday($wday);
1577   }
1578
1579   return sprintf '%4u-%02u-%02u%s', $year, $mon, $mday, $wday;
1580 }
1581
1582 # -------------------------------------
1583
1584 sub ftime {
1585   my $self = shift;
1586
1587   my ($hour, $min);
1588
1589   if ( @_ > 1 ) {
1590     ($hour, $min) = @_;
1591   } else {
1592     my ($time) = @_;
1593     (undef, $min, $hour) = $UTC_Times ? gmtime($time) : localtime($time);
1594   }
1595
1596   return sprintf '%02u:%02u', $hour, $min;
1597 }
1598
1599 # ----------------------------------------------------------------------------
1600
1601 package CVS::Utils::ChangeLog::Message;
1602
1603 sub new {
1604   my $class = shift;
1605   my ($msg) = @_;
1606
1607   my %self = (msg => $msg, files => []);
1608
1609   bless \%self, $class;
1610 }
1611
1612 sub add_fileentry {
1613   my $self = shift;
1614   my ($fileentry) = @_;
1615
1616   die "Not a fileentry: $fileentry"
1617     unless $fileentry->isa('CVS::Utils::ChangeLog::FileEntry');
1618
1619   push @{$self->{files}}, $fileentry;
1620 }
1621
1622 sub files { wantarray ? @{$_[0]->{files}} : $_[0]->{files} }
1623
1624 # ----------------------------------------------------------------------------
1625
1626 package CVS::Utils::ChangeLog::FileEntry;
1627
1628 # Each revision of a file has a little data structure (a `qunk')
1629 # associated with it.  That data structure holds not only the
1630 # file's name, but any additional information about the file
1631 # that might be needed in the output, such as the revision
1632 # number, tags, branches, etc.  The reason to have these things
1633 # arranged in a data structure, instead of just appending them
1634 # textually to the file's name, is that we may want to do a
1635 # little rearranging later as we write the output.  For example,
1636 # all the files on a given tag/branch will go together, followed
1637 # by the tag in parentheses (so trunk or otherwise non-tagged
1638 # files would go at the end of the file list for a given log
1639 # message).  This rearrangement is a lot easier to do if we
1640 # don't have to reparse the text.
1641 #
1642 # A qunk looks like this:
1643 #
1644 #   {
1645 #     filename    =>    "hello.c",
1646 #     revision    =>    "1.4.3.2",
1647 #     time        =>    a timegm() return value (moment of commit)
1648 #     tags        =>    [ "tag1", "tag2", ... ],
1649 #     branch      =>    "branchname" # There should be only one, right?
1650 #     roots       =>    [ "branchtag1", "branchtag2", ... ]
1651 #   }
1652
1653 # Single top-level ChangeLog, or one per subdirectory?
1654 my $distributed;
1655 sub distributed { $#_ ? ($distributed = $_[1]) : $distributed; }
1656
1657 sub new {
1658   my $class = shift;
1659   my ($path, $time, $revision, $state, $lines,
1660       $branch_names, $branch_roots, $symbolic_names) = @_;
1661
1662   my %self = (time     => $time,
1663               revision => $revision,
1664               state    => $state,
1665               lines    => $lines,
1666              );
1667
1668   if ( $distributed ) {
1669     @self{qw(filename dir_key)} = fileparse($path);
1670   } else {
1671     @self{qw(filename dir_key)} = ($path, './');
1672   }
1673
1674   # Grab the branch, even though we may or may not need it:
1675   (my ($branch_prefix) = ($revision =~ /((?:\d+\.)+)\d+/));
1676   $branch_prefix =~ s/\.$//;
1677   $self{branch} = $branch_names->{$branch_prefix}
1678     if $branch_names->{$branch_prefix};
1679
1680   # If there's anything in the @branch_roots array, then this
1681   # revision is the root of at least one branch.  We'll display
1682   # them as branch names instead of revision numbers, the
1683   # substitution for which is done directly in the array:
1684   $self{'roots'} = [ map { $branch_names->{$_} } @$branch_roots ]
1685     if @$branch_roots;
1686
1687   if ( exists $symbolic_names->{$revision} ) {
1688     $self{tags} = delete $symbolic_names->{$revision};
1689     &main::delta_check($time, $self{tags});
1690   }
1691
1692   bless \%self, $class;
1693 }
1694
1695 sub filename    { $_[0]->{filename} }
1696 sub dir_key     { $_[0]->{dir_key}  }
1697 sub revision    { $_[0]->{revision} }
1698 sub branch      { $_[0]->{branch}   }
1699 sub state       { $_[0]->{state}    }
1700 sub lines       { $_[0]->{lines}    }
1701 sub roots       { $_[0]->{roots}    }
1702
1703 sub tags        { $_[0]->{tags}     }
1704 sub tags_exists {
1705   exists $_[0]->{tags};
1706 }
1707
1708 # This may someday be used in a more sophisticated calculation of what other
1709 # files are involved in this commit.  For now, we don't use it much except for
1710 # delta mode, because the common-commit-detection algorithm is hypothesized to
1711 # be "good enough" as it stands.
1712 sub time     { $_[0]->{time}     }
1713
1714 package main;
1715
1716 # Subrs ----------------------------------------------------------------------
1717
1718 sub delta_check {
1719   my ($time, $tags) = @_;
1720
1721   # If we're in 'delta' mode, update the latest observed times for the
1722   # beginning and ending tags, and when we get around to printing output, we
1723   # will simply restrict ourselves to that timeframe...
1724   return
1725     unless $Delta_Mode;
1726
1727   $Delta_StartTime = $time
1728     if $time > $Delta_StartTime and grep { $_ eq $Delta_From } @$tags;
1729
1730   $Delta_EndTime = $time
1731     if $time > $Delta_EndTime and grep { $_ eq $Delta_To } @$tags;
1732 }
1733
1734 sub run_ext {
1735   my ($cmd) = @_;
1736   $cmd = [$cmd]
1737     unless ref $cmd;
1738   local $" = ' ';
1739   my $out = qx"@$cmd 2>&1";
1740   my $rv  = $?;
1741   my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);
1742   return $out, $exit, $sig, $core;
1743 }
1744
1745 # -------------------------------------
1746
1747 # If accumulating, grab the boundary date from pre-existing ChangeLog.
1748 sub maybe_grab_accumulation_date {
1749   if (! $Cumulative || $Update) {
1750     return '';
1751   }
1752
1753   # else
1754
1755   open (LOG, "$Log_File_Name")
1756       or die ("trouble opening $Log_File_Name for reading ($!)");
1757
1758   my $boundary_date;
1759   while (<LOG>)
1760   {
1761     if (/^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/)
1762     {
1763       $boundary_date = "$1";
1764       last;
1765     }
1766   }
1767
1768   close (LOG);
1769
1770   # convert time from utc to local timezone if the ChangeLog has
1771   # dates/times in utc
1772   if ($UTC_Times && $boundary_date)
1773   {
1774     # convert the utc time to a time value
1775     my ($year,$mon,$mday,$hour,$min) = $boundary_date =~
1776       m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#;
1777     my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900);
1778     # print the timevalue in the local timezone
1779     my ($ignore,$wday);
1780     ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time);
1781     $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u",
1782                             $year+1900,$mon+1,$mday,$hour,$min);
1783   }
1784
1785   return $boundary_date;
1786 }
1787
1788 # -------------------------------------
1789
1790 sub maybe_read_user_map_file {
1791   my %expansions;
1792   my $User_Map_Input;
1793
1794   if ($User_Map_File)
1795   {
1796     if ( $User_Map_File =~ m{^([-\w\@+=.,\/]+):([-\w\@+=.,\/:]+)} and
1797          !-f $User_Map_File )
1798     {
1799       my $rsh = (exists $ENV{'CVS_RSH'} ? $ENV{'CVS_RSH'} : 'ssh');
1800       $User_Map_Input = "$rsh $1 'cat $2' |";
1801       &debug ("(run \"${User_Map_Input}\")\n");
1802     }
1803     else
1804     {
1805       $User_Map_Input = "<$User_Map_File";
1806     }
1807
1808     open (MAPFILE, $User_Map_Input)
1809         or die ("Unable to open $User_Map_File ($!)");
1810
1811     while (<MAPFILE>)
1812     {
1813       next if /^\s*#/;  # Skip comment lines.
1814       next if not /:/;  # Skip lines without colons.
1815
1816       # It is now safe to split on ':'.
1817       my ($username, $expansion) = split ':';
1818       chomp $expansion;
1819       $expansion =~ s/^'(.*)'$/$1/;
1820       $expansion =~ s/^"(.*)"$/$1/;
1821
1822       # If it looks like the expansion has a real name already, then
1823       # we toss the username we got from CVS log.  Otherwise, keep
1824       # it to use in combination with the email address.
1825
1826       if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
1827         # Also, add angle brackets if none present
1828         if (! ($expansion =~ /<\S+@\S+>/)) {
1829           $expansions{$username} = "$username <$expansion>";
1830         }
1831         else {
1832           $expansions{$username} = "$username $expansion";
1833         }
1834       }
1835       else {
1836         $expansions{$username} = $expansion;
1837       }
1838     } # fi ($User_Map_File)
1839
1840     close (MAPFILE);
1841   }
1842
1843   if (defined $User_Passwd_File)
1844   {
1845     if ( ! defined $Domain ) {
1846       if ( -e MAILNAME ) {
1847         chomp($Domain = slurp_file(MAILNAME));
1848       } else {
1849       MAILDOMAIN_CMD:
1850         for ([qw(hostname -d)], 'dnsdomainname', 'domainname') {
1851           my ($text, $exit, $sig, $core) = run_ext($_);
1852           if ( $exit == 0 && $sig == 0 && $core == 0 ) {
1853             chomp $text;
1854             if ( length $text ) {
1855               $Domain = $text;
1856               last MAILDOMAIN_CMD;
1857             }
1858           }
1859         }
1860       }
1861     }
1862
1863     die "No mail domain found\n"
1864       unless defined $Domain;
1865
1866     open (MAPFILE, "<$User_Passwd_File")
1867         or die ("Unable to open $User_Passwd_File ($!)");
1868     while (<MAPFILE>)
1869     {
1870       # all lines are valid
1871       my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
1872       my $expansion = '';
1873       ($expansion) = split (',', $gecos)
1874         if defined $gecos && length $gecos;
1875
1876       my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
1877       $expansions{$username} = "$expansion <$mailname>";
1878     }
1879     close (MAPFILE);
1880   }
1881
1882   return %expansions;
1883 }
1884
1885 # -------------------------------------
1886
1887 sub read_file_path {
1888   my ($line) = @_;
1889
1890   my $path;
1891
1892   if ( $line =~ /^Working file: (.*)/ ) {
1893     $path = $1;
1894   } elsif ( defined $RCS_Root
1895             and
1896             $line =~ m|^RCS file: $RCS_Root[/\\](.*),v$| ) {
1897     $path = $1;
1898     $path =~ s!Attic/!!;
1899   } else {
1900     return;
1901   }
1902
1903   if ( @Ignore_Files ) {
1904     my $base;
1905     ($base, undef, undef) = fileparse($path);
1906
1907     my $xpath = $Case_Insensitive ? lc($path) : $path;
1908     if ( grep index($path, $_) > -1, @Ignore_Files ) {
1909       return;
1910     }
1911   }
1912
1913   return $path;
1914 }
1915
1916 # -------------------------------------
1917
1918 sub read_symbolic_name {
1919   my ($line, $branch_names, $branch_numbers, $symbolic_names) = @_;
1920
1921   # All tag names are listed with whitespace in front in cvs log
1922   # output; so if see non-whitespace, then we're done collecting.
1923   if ( /^\S/ ) {
1924     return 0;
1925   } else {
1926     # we're looking at a tag name, so parse & store it
1927
1928     # According to the Cederqvist manual, in node "Tags", tag names must start
1929     # with an uppercase or lowercase letter and can contain uppercase and
1930     # lowercase letters, digits, `-', and `_'.  However, it's not our place to
1931     # enforce that, so we'll allow anything CVS hands us to be a tag:
1932     my ($tag_name, $tag_rev) = ($line =~ /^\s+([^:]+): ([\d.]+)$/);
1933
1934     # A branch number either has an odd number of digit sections
1935     # (and hence an even number of dots), or has ".0." as the
1936     # second-to-last digit section.  Test for these conditions.
1937     my $real_branch_rev = '';
1938     if ( $tag_rev =~ /^(\d+\.\d+\.)+\d+$/             # Even number of dots...
1939          and
1940          $tag_rev !~ /^(1\.)+1$/ ) {                  # ...but not "1.[1.]1"
1941       $real_branch_rev = $tag_rev;
1942     } elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) {  # Has ".0."
1943       $real_branch_rev = $1 . $3;
1944     }
1945
1946     # If we got a branch, record its number.
1947     if ( $real_branch_rev ) {
1948       $branch_names->{$real_branch_rev} = $tag_name;
1949       if ( @Follow_Branches ) {
1950         if ( grep $_ eq $tag_name, @Follow_Branches ) {
1951           $branch_numbers->{$tag_name} = $real_branch_rev;
1952         }
1953       }
1954     } else {
1955       # Else it's just a regular (non-branch) tag.
1956       push @{$symbolic_names->{$tag_rev}}, $tag_name;
1957     }
1958   }
1959
1960   return 1;
1961 }
1962
1963 # -------------------------------------
1964
1965 sub read_revision {
1966   my ($line, $branch_numbers) = @_;
1967
1968   my ($revision) = ( $line =~ /^revision (\d+\.[\d.]+)/ );
1969
1970   return
1971     unless $revision;
1972
1973   return $revision
1974     unless @Follow_Branches;
1975
1976   foreach my $branch (@Follow_Branches) {
1977     # Special case for following trunk revisions
1978     return $revision
1979       if $branch =~ /^trunk$/i and $revision =~ /^[0-9]+\.[0-9]+$/;
1980
1981     if ( my $branch_number = $branch_numbers->{$branch} ) {
1982       # Are we on one of the follow branches or an ancestor of same?
1983
1984       # If this revision is a prefix of the branch number, or possibly is less
1985       # in the minormost number, OR if this branch number is a prefix of the
1986       # revision, then yes.  Otherwise, no.
1987
1988       # So below, we determine if any of those conditions are met.
1989
1990       # Trivial case: is this revision on the branch?  (Compare this way to
1991       # avoid regexps that screw up Emacs indentation, argh.)
1992       if ( substr($revision, 0, (length($branch_number) + 1))
1993            eq
1994            ($branch_number . ".") ) {
1995         return $revision;
1996       } elsif ( length($branch_number) > length($revision)
1997                 and
1998                 $No_Ancestors ) {
1999         # Non-trivial case: check if rev is ancestral to branch
2000
2001         # r_left still has the trailing "."
2002         my ($r_left, $r_end) = ($revision =~ /^((?:\d+\.)+)(\d+)$/);
2003
2004         # b_left still has trailing "."
2005         # b_mid has no trailing "."
2006         my ($b_left, $b_mid) = ($branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/);
2007
2008         return $revision
2009           if $r_left eq $b_left and $r_end <= $b_mid;
2010       }
2011     }
2012   }
2013
2014   # Else we are following branches, but this revision isn't on the
2015   # path.  So skip it.
2016   return;
2017 }
2018
2019 # -------------------------------------
2020
2021 { # Closure over %gecos_warned
2022 my %gecos_warned;
2023 sub read_date_author_and_state {
2024   my ($line, $usermap) = @_;
2025
2026   my ($time, $author, $state, $lines) = parse_date_author_and_state($line);
2027
2028   if ( defined($usermap->{$author}) and $usermap->{$author} ) {
2029     $author = $usermap->{$author};
2030   } elsif ( defined $Domain or $Gecos == 1 ) {
2031     my $email = $author;
2032     $email = $author."@".$Domain
2033       if defined $Domain && $Domain ne '';
2034
2035     my $pw = getpwnam($author);
2036     my ($fullname, $office, $workphone, $homephone);
2037     if ( defined $pw ) {
2038       ($fullname, $office, $workphone, $homephone) =
2039         split /\s*,\s*/, $pw->gecos;
2040     } else {
2041       warn "Couldn't find gecos info for author '$author'\n"
2042         unless $gecos_warned{$author}++;
2043       $fullname = '';
2044     }
2045     for (grep defined, $fullname, $office, $workphone, $homephone) {
2046       s/&/ucfirst(lc($pw->name))/ge;
2047     }
2048     $author = $fullname . "  <" . $email . ">"
2049       if $fullname ne '';
2050   }
2051
2052   return $time, $author, $state, $lines;
2053 }
2054 }
2055
2056 # -------------------------------------
2057
2058 sub read_branches {
2059   my ($line) = @_;
2060
2061   if ( $Show_Branches ) {
2062     my $lst = $1;
2063     $lst =~ s/(1\.)+1;|(1\.)+1$//;  # ignore the trivial branch 1.1.1
2064     if ( $lst ) {
2065       return split (/;\s+/, $lst);
2066     } else {
2067       return;
2068     }
2069   } else {
2070     # Ugh.  This really bothers me.  Suppose we see a log entry
2071     # like this:
2072     #
2073     #    ----------------------------
2074     #    revision 1.1
2075     #    date: 1999/10/17 03:07:38;  author: jrandom;  state: Exp;
2076     #    branches:  1.1.2;
2077     #    Intended first line of log message begins here.
2078     #    ----------------------------
2079     #
2080     # The question is, how we can tell the difference between that
2081     # log message and a *two*-line log message whose first line is
2082     #
2083     #    "branches:  1.1.2;"
2084     #
2085     # See the problem?  The output of "cvs log" is inherently
2086     # ambiguous.
2087     #
2088     # For now, we punt: we liberally assume that people don't
2089     # write log messages like that, and just toss a "branches:"
2090     # line if we see it but are not showing branches.  I hope no
2091     # one ever loses real log data because of this.
2092     return;
2093   }
2094 }
2095
2096 # -------------------------------------
2097
2098 sub read_changelog {
2099   my ($command) = @_;
2100
2101   my $grand_poobah = CVS::Utils::ChangeLog::EntrySet->new;
2102
2103   my $file_full_path;
2104   my $detected_file_separator;
2105   my $author;
2106   my $revision;
2107   my $time;
2108   my $state;
2109   my $lines;
2110   my $msg_txt;
2111
2112   # We might be expanding usernames
2113   my %usermap = maybe_read_user_map_file;
2114
2115   # In general, it's probably not very maintainable to use state
2116   # variables like this to tell the loop what it's doing at any given
2117   # moment, but this is only the first one, and if we never have more
2118   # than a few of these, it's okay.
2119   my $collecting_symbolic_names = 0;
2120   my %symbolic_names;    # Where tag names get stored.
2121   my %branch_names;      # We'll grab branch names while we're at it.
2122   my %branch_numbers;    # Save some revisions for @Follow_Branches
2123   my @branch_roots;      # For showing which files are branch ancestors.
2124
2125   if (! $Input_From_Stdin) {
2126     my $Log_Source_Command = join(' ', @$command);
2127     &debug ("(run \"${Log_Source_Command}\")\n");
2128     open (LOG_SOURCE, "$Log_Source_Command |")
2129         or die "unable to run \"${Log_Source_Command}\"";
2130   }
2131   else {
2132     open (LOG_SOURCE, "-") or die "unable to open stdin for reading";
2133   }
2134
2135   binmode LOG_SOURCE;
2136
2137  XX_Log_Source:
2138   while (<LOG_SOURCE>) {
2139     # Canonicalize line endings
2140     s/\r$//;
2141
2142     # If on a new file and don't see filename, skip until we find it, and
2143     # when we find it, grab it.
2144     if ( ! defined $file_full_path ) {
2145       $file_full_path = read_file_path($_);
2146       next XX_Log_Source;
2147     } elsif ( /^symbolic names:$/ ) {
2148       # Collect tag names in case we're asked to print them in the output.
2149       $collecting_symbolic_names = 1;
2150       next XX_Log_Source;  # There's no more info on this line, so skip to next
2151     } elsif ($collecting_symbolic_names) {
2152       $collecting_symbolic_names =
2153         read_symbolic_name($_,
2154                            \(%branch_names, %branch_numbers, %symbolic_names));
2155       next XX_Log_Source;
2156     }
2157
2158     # If have file name, but not revision, and see revision, then grab
2159     # it.  (We collect unconditionally, even though we may or may not
2160     # ever use it.)
2161     if ( ( ! defined $revision) ) {
2162       $revision = read_revision($_, \%branch_numbers);
2163       # This breaks, because files with no messages don't get to call clear
2164       # and so the file picks up messages from the next file in sequence
2165       #      next XX_Log_Source;
2166     }
2167
2168     # If we don't have a revision right now, we couldn't possibly
2169     # be looking at anything useful.
2170     if (! (defined ($revision))) {
2171       $detected_file_separator = /^$file_separator$/o;
2172       if ($detected_file_separator) {
2173         # No revisions for this file; can happen, e.g. "cvs log -d DATE"
2174         goto XX_Clear;
2175       }
2176       else {
2177         next XX_Log_Source;
2178       }
2179     }
2180
2181     # If have file name but not date and author, and see date or
2182     # author, then grab them:
2183     unless (defined $time) {
2184       if (/^date: .*/) {
2185         ($time, $author, $state, $lines) =
2186           read_date_author_and_state($_, \%usermap);
2187       } else {
2188         $detected_file_separator = /^$file_separator$/o;
2189         goto XX_Clear
2190           # No revisions for this file; can happen, e.g. "cvs log -d DATE"
2191           if $detected_file_separator;
2192       }
2193
2194       # If the date/time/author hasn't been found yet, we couldn't
2195       # possibly care about anything we see.  So skip:
2196       next XX_Log_Source;
2197     }
2198
2199     # A "branches: ..." line here indicates that one or more branches
2200     # are rooted at this revision.  If we're showing branches, then we
2201     # want to show that fact as well, so we collect all the branches
2202     # that this is the latest ancestor of and store them in
2203     # @branch_roots.  Just for reference, the format of the line we're
2204     # seeing at this point is:
2205     #
2206     #    branches:  1.5.2;  1.5.4;  ...;
2207     #
2208     # Okay, here goes:
2209     if ( /^branches:\s+(.*);$/ ) {
2210       @branch_roots = read_branches($_);
2211       next XX_Log_Source;
2212     }
2213
2214     # If have file name, time, and author, then we're just grabbing
2215     # log message texts:
2216     $detected_file_separator = /^$file_separator$/o;
2217     if ($detected_file_separator && ! (defined $revision)) {
2218       # No revisions for this file; can happen, e.g. "cvs log -d DATE"
2219       goto XX_Clear;
2220     }
2221     unless ($detected_file_separator || /^$logmsg_separator$/o)
2222     {
2223       $msg_txt .= $_;   # Normally, just accumulate the message...
2224       next XX_Log_Source;
2225     }
2226     # ... until a msg separator is encountered:
2227     # Ensure the message contains something:
2228     if ((! $msg_txt)
2229         || ($msg_txt =~ /^\s*\.\s*$|^\s*$/)
2230         || ($msg_txt =~ /\*\*\* empty log message \*\*\*/))
2231     {
2232       if ($Prune_Empty_Msgs) {
2233         goto XX_Clear;
2234       }
2235       # else
2236       $msg_txt = "[no log message]\n";
2237     }
2238
2239     ### Store it all in the Grand Poobah:
2240     {
2241       my $qunk = CVS::Utils::ChangeLog::FileEntry->new($file_full_path, $time, $revision,
2242                                                    $state, $lines,
2243                                                    \%branch_names, \@branch_roots,
2244                                                    \%symbolic_names);
2245
2246       # We might be including revision numbers and/or tags and/or
2247       # branch names in the output.  Most of the code from here to
2248       # loop-end deals with organizing these in qunk.
2249
2250       unless ( $Hide_Branch_Additions
2251                and
2252                $msg_txt =~ /file .+ was initially added on branch \S+./ ) {
2253         # Add this file to the list
2254         # (We use many spoonfuls of autovivication magic. Hashes and arrays
2255         # will spring into existence if they aren't there already.)
2256
2257         &debug ("(pushing log msg for ". $qunk->dir_key . $qunk->filename . ")\n");
2258
2259         # Store with the files in this commit.  Later we'll loop through
2260         # again, making sure that revisions with the same log message
2261         # and nearby commit times are grouped together as one commit.
2262         $grand_poobah->{$qunk->dir_key}{$author}{$time}{$msg_txt} =
2263           CVS::Utils::ChangeLog::Message->new($msg_txt)
2264               unless exists $grand_poobah->{$qunk->dir_key}{$author}{$time}{$msg_txt};
2265         $grand_poobah->{$qunk->dir_key}{$author}{$time}{$msg_txt}->add_fileentry($qunk);
2266       }
2267     }
2268
2269   XX_Clear:
2270     # Make way for the next message
2271     undef $msg_txt;
2272     undef $time;
2273     undef $revision;
2274     undef $author;
2275     undef @branch_roots;
2276
2277     # Maybe even make way for the next file:
2278     if ($detected_file_separator) {
2279       undef $file_full_path;
2280       undef %branch_names;
2281       undef %branch_numbers;
2282       undef %symbolic_names;
2283     }
2284   }
2285
2286   close LOG_SOURCE
2287     or die sprintf("Problem reading log input (exit/signal/core: %d/%d/%d)\n",
2288                    $? >> 8, $? & 127, $? & 128);
2289
2290   return $grand_poobah;
2291 }
2292
2293 # -------------------------------------
2294
2295 # Fills up a ChangeLog structure in the current directory.
2296 sub derive_changelog {
2297   my ($command) = @_;
2298
2299   # See "The Plan" above for a full explanation.
2300
2301   # Might be adding to an existing ChangeLog
2302   my $accumulation_date = maybe_grab_accumulation_date;
2303   if ($accumulation_date) {
2304     # Insert -d immediately after 'cvs log'
2305     my $Log_Date_Command = "-d\'>${accumulation_date}\'";
2306
2307     my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command;
2308     splice @$command, $log_index+1, 0, $Log_Date_Command;
2309     &debug ("(adding log msg starting from $accumulation_date)\n");
2310   }
2311
2312 #  output_changelog(read_changelog($command));
2313   read_changelog($command)->output_changelog;
2314 }
2315
2316 # -------------------------------------
2317
2318 sub parse_date_author_and_state {
2319   # Parses the date/time and author out of a line like:
2320   #
2321   # date: 1999/02/19 23:29:05;  author: apharris;  state: Exp;
2322
2323   my $line = shift;
2324
2325   my ($year, $mon, $mday, $hours, $min, $secs, $author, $state, $rest) =
2326     $line =~
2327       m#(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+author:\s+([^;]+);\s+state:\s+([^;]+);(.*)#
2328           or  die "Couldn't parse date ``$line''";
2329   die "Bad date or Y2K issues" unless ($year > 1969 and $year < 2258);
2330   # Kinda arbitrary, but useful as a sanity check
2331   my $time = timegm($secs,$min,$hours,$mday,$mon-1,$year-1900);
2332   my $lines;
2333   if ( $rest =~ m#\s+lines:\s+(.*)# )
2334     {
2335       $lines =$1;
2336     }
2337   return ($time, $author, $state, $lines);
2338 }
2339
2340 # -------------------------------------
2341
2342 sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
2343
2344 # -------------------------------------
2345
2346 sub common_path_prefix {
2347   my ($path1, $path2) = @_;
2348
2349   # For compatibility (with older versions of cvs2cl.pl), we think in UN*X
2350   # terms, and mould windoze filenames to match.  Is this really appropriate?
2351   # If a file is checked in under UN*X, and cvs log run on windoze, which way
2352   # do the path separators slope?  Can we use fileparse as per the local
2353   # conventions?  If so, we should probably have a user option to specify an
2354   # OS to emulate to handle stdin-fed logs.  If we did this, we could avoid
2355   # the nasty \-/ transmogrification below.
2356
2357   my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;
2358
2359   # Transmogrify Windows filenames to look like Unix.
2360   # (It is far more likely that someone is running cvs2cl.pl under
2361   # Windows than that they would genuinely have backslashes in their
2362   # filenames.)
2363   tr!\\!/!
2364     for $dir1, $dir2;
2365
2366   my ($accum1, $accum2, $last_common_prefix) = ('') x 3;
2367
2368   my @path1 = grep length($_), split qr!/!, $dir1;
2369   my @path2 = grep length($_), split qr!/!, $dir2;
2370
2371   my @common_path;
2372   for (0..min($#path1,$#path2)) {
2373     if ( $path1[$_] eq $path2[$_]) {
2374       push @common_path, $path1[$_];
2375     } else {
2376       last;
2377     }
2378   }
2379
2380   return join '', map "$_/", @common_path;
2381 }
2382
2383 # -------------------------------------
2384 sub parse_options {
2385   # Check this internally before setting the global variable.
2386   my $output_file;
2387
2388   # If this gets set, we encountered unknown options and will exit at
2389   # the end of this subroutine.
2390   my $exit_with_admonishment = 0;
2391
2392   # command to generate the log
2393   my @log_source_command = qw( cvs log );
2394
2395   my (@Global_Opts, @Local_Opts);
2396
2397   Getopt::Long::Configure(qw( bundling permute no_getopt_compat
2398                               pass_through no_ignore_case ));
2399   GetOptions('help|usage|h'   => \$Print_Usage,
2400              'debug'          => \$Debug,        # unadvertised option, heh
2401              'version'        => \$Print_Version,
2402
2403              'file|f=s'       => \$output_file,
2404              'accum'          => \$Cumulative,
2405              'update'         => \$Update,
2406              'fsf'            => \$FSF_Style,
2407              'rcs=s'          => \$RCS_Root,
2408              'usermap|U=s'    => \$User_Map_File,
2409              'gecos'          => \$Gecos,
2410              'domain=s'       => \$Domain,
2411              'passwd=s'       => \$User_Passwd_File,
2412              'window|W=i'     => \$Max_Checkin_Duration,
2413              'chrono'         => \$Chronological_Order,
2414              'ignore|I=s'     => \@Ignore_Files,
2415              'case-insensitive|C' => \$Case_Insensitive,
2416              'regexp|R=s'     => \$Regexp_Gate,
2417              'stdin'          => \$Input_From_Stdin,
2418              'stdout'         => \$Output_To_Stdout,
2419              'distributed|d'  => sub { CVS::Utils::ChangeLog::FileEntry->distributed(1) },
2420              'prune|P'        => \$Prune_Empty_Msgs,
2421              'no-wrap'        => \$No_Wrap,
2422              'gmt|utc'        => \$UTC_Times,
2423              'day-of-week|w'  => \$Show_Day_Of_Week,
2424              'revisions|r'    => \$Show_Revisions,
2425              'show-dead'      => \$Show_Dead,
2426              'tags|t'         => \$Show_Tags,
2427              'tagdates|T'     => \$Show_Tag_Dates,
2428              'branches|b'     => \$Show_Branches,
2429              'follow|F=s'     => \@Follow_Branches,
2430              'xml-encoding=s' => \$XML_Encoding,
2431              'xml'            => \$XML_Output,
2432              'noxmlns'        => \$No_XML_Namespace,
2433              'no-xml-iso-date' => \$No_XML_ISO_Date,
2434              'no-ancestors'   => \$No_Ancestors,
2435
2436              'no-indent'    => sub {
2437                $Indent = '';
2438              },
2439
2440              'summary'      => sub {
2441                $Summary = 1;
2442                $After_Header = "\n\n"; # Summary implies --separate-header
2443              },
2444
2445              'no-times'     => sub {
2446                $Show_Times = 0;
2447              },
2448
2449              'no-hide-branch-additions' => sub {
2450                $Hide_Branch_Additions = 0;
2451              },
2452
2453              'no-common-dir'  => sub {
2454                $Common_Dir = 0;
2455              },
2456
2457              'ignore-tag=s'   => sub {
2458                $ignore_tags{$_[1]} = 1;
2459              },
2460
2461              'show-tag=s'     => sub {
2462                $show_tags{$_[1]} = 1;
2463              },
2464
2465              # Deliberately undocumented.  This is not a public interface, and
2466              # may change/disappear at any time.
2467              'test-code=s'    => \$TestCode,
2468
2469              'delta=s'        => sub {
2470                my $arg = $_[1];
2471                if ( $arg =~
2472                     /^([A-Za-z][A-Za-z0-9_\-]*):([A-Za-z][A-Za-z0-9_\-]*)$/ ) {
2473                  $Delta_From = $1;
2474                  $Delta_To = $2;
2475                  $Delta_Mode = 1;
2476                } else {
2477                  die "--delta FROM_TAG:TO_TAG is what you meant to say.\n";
2478                }
2479              },
2480
2481              'FSF'             => sub {
2482                $Show_Times = 0;
2483                $Common_Dir = 0;
2484                $No_Extra_Indent = 1;
2485                $Indent = "\t";
2486              },
2487
2488              'header=s'        => sub {
2489                my $narg = $_[1];
2490                $ChangeLog_Header = &slurp_file ($narg);
2491                if (! defined ($ChangeLog_Header)) {
2492                  $ChangeLog_Header = '';
2493                }
2494              },
2495
2496              'global-opts|g=s' => sub {
2497                my $narg = $_[1];
2498                push @Global_Opts, $narg;
2499                splice @log_source_command, 1, 0, $narg;
2500              },
2501
2502              'log-opts|l=s' => sub {
2503                my $narg = $_[1];
2504                push @Local_Opts, $narg;
2505                push @log_source_command, $narg;
2506              },
2507
2508              'mailname=s'   => sub {
2509                my $narg = $_[1];
2510                warn "--mailname is deprecated; please use --domain instead\n";
2511                $Domain = $narg;
2512              },
2513
2514              'separate-header|S' => sub {
2515                $After_Header = "\n\n";
2516                $No_Extra_Indent = 1;
2517              },
2518
2519              'group-within-date' => sub {
2520                $GroupWithinDate = 1;
2521                $Show_Times = 0;
2522              },
2523
2524              'hide-filenames' => sub {
2525                $Hide_Filenames = 1;
2526                $After_Header = '';
2527              },
2528             )
2529     or die "options parsing failed\n";
2530
2531   push @log_source_command, map "'$_'", @ARGV;
2532
2533   ## Check for contradictions...
2534
2535   if ($Output_To_Stdout && CVS::Utils::ChangeLog::FileEntry->distributed) {
2536     print STDERR "cannot pass both --stdout and --distributed\n";
2537     $exit_with_admonishment = 1;
2538   }
2539
2540   if ($Output_To_Stdout && $output_file) {
2541     print STDERR "cannot pass both --stdout and --file\n";
2542     $exit_with_admonishment = 1;
2543   }
2544
2545   if ($Input_From_Stdin && @Global_Opts) {
2546     print STDERR "cannot pass both --stdin and -g\n";
2547     $exit_with_admonishment = 1;
2548   }
2549
2550   if ($Input_From_Stdin && @Local_Opts) {
2551     print STDERR "cannot pass both --stdin and -l\n";
2552     $exit_with_admonishment = 1;
2553   }
2554
2555   if ($XML_Output && $Cumulative) {
2556     print STDERR "cannot pass both --xml and --accum\n";
2557     $exit_with_admonishment = 1;
2558   }
2559
2560   # Other consistency checks and option-driven logic
2561
2562   # Bleargh.  Compensate for a deficiency of custom wrapping.
2563   if ( ($After_Header ne " ") and $FSF_Style ) {
2564     $After_Header .= "\t";
2565   }
2566
2567   @Ignore_Files = map lc, @Ignore_Files
2568     if $Case_Insensitive;
2569
2570   # Or if any other error message has already been printed out, we
2571   # just leave now:
2572   if ($exit_with_admonishment) {
2573     &usage ();
2574     exit (1);
2575   }
2576   elsif ($Print_Usage) {
2577     &usage ();
2578     exit (0);
2579   }
2580   elsif ($Print_Version) {
2581     &version ();
2582     exit (0);
2583   }
2584
2585   ## Else no problems, so proceed.
2586
2587   if ($output_file) {
2588     $Log_File_Name = $output_file;
2589   }
2590
2591   return \@log_source_command;
2592 }
2593
2594 # -------------------------------------
2595
2596 sub slurp_file {
2597   my $filename = shift || die ("no filename passed to slurp_file()");
2598   my $retstr;
2599
2600   open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
2601   my $saved_sep = $/;
2602   undef $/;
2603   $retstr = <SLURPEE>;
2604   $/ = $saved_sep;
2605   close (SLURPEE);
2606   return $retstr;
2607 }
2608
2609 # -------------------------------------
2610
2611 sub debug {
2612   if ($Debug) {
2613     my $msg = shift;
2614     print STDERR $msg;
2615   }
2616 }
2617
2618 # -------------------------------------
2619
2620 sub version {
2621   print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
2622 }
2623
2624 # -------------------------------------
2625
2626 sub usage {
2627   &version ();
2628
2629   eval "use Pod::Usage qw( pod2usage )";
2630
2631    if ( $@ ) {
2632     print <<'END';
2633
2634 * Pod::Usage was not found.  The formatting may be suboptimal.  Consider
2635   upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and
2636   versions of perl prior to 5.6 are getting rather rusty, now.  Alternatively,
2637   install Pod::Usage direct from CPAN.
2638 END
2639
2640     local $/ = undef;
2641     my $message = <DATA>;
2642     $message =~ s/^=(head1|item) //gm;
2643     $message =~ s/^=(over|back).*\n//gm;
2644     $message =~ s/\n{3,}/\n\n/g;
2645     print $message;
2646   } else {
2647     print "\n";
2648     pod2usage( -exitval => 'NOEXIT',
2649                -verbose => 1,
2650                -output  => \*STDOUT,
2651              );
2652   }
2653
2654   return;
2655 }
2656
2657 # Main -----------------------------------------------------------------------
2658
2659 my $log_source_command = parse_options;
2660 if ( defined $TestCode ) {
2661   eval $TestCode;
2662   die "Eval failed: '$@'\n"
2663     if $@;
2664 } else {
2665   derive_changelog($log_source_command);
2666 }
2667
2668 __DATA__
2669
2670 =head1 NAME
2671
2672 cvs2cl.pl - convert cvs log messages to changelogs
2673
2674 =head1 SYNOPSIS
2675
2676 B<cvs2cl> [I<options>] [I<FILE1> [I<FILE2> ...]]
2677
2678 =head1 DESCRIPTION
2679
2680 cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by
2681 running "cvs log" and parsing the output. Duplicate log messages get
2682 unified in the Right Way.
2683
2684 The default output of cvs2cl is designed to be compact, formally unambiguous,
2685 but still easy for humans to read.  It should be largely self-explanatory; the
2686 one abbreviation that might not be obvious is "utags".  That stands for
2687 "universal tags" -- a universal tag is one held by all the files in a given
2688 change entry.
2689
2690 If you need output that's easy for a program to parse, use the B<--xml> option.
2691 Note that with XML output, just about all available information is included
2692 with each change entry, whether you asked for it or not, on the theory that
2693 your parser can ignore anything it's not looking for.
2694
2695 If filenames are given as arguments cvs2cl only shows log information for the
2696 named files.
2697
2698 =head1 OPTIONS
2699
2700 =over 4
2701
2702 =item B<-h>, B<-help>, B<--help>, B<-?>
2703
2704 Show a short help and exit.
2705
2706 =item B<--version>
2707
2708 Show version and exit.
2709
2710 =item B<-r>, B<--revisions>
2711
2712 Show revision numbers in output.
2713
2714 =item B<-b>, B<--branches>
2715
2716 Show branch names in revisions when possible.
2717
2718 =item B<-t>, B<--tags>
2719
2720 Show tags (symbolic names) in output.
2721
2722 =item B<-T>, B<--tagdates>
2723
2724 Show tags in output on their first occurance.
2725
2726 =item B<--show-dead>
2727
2728 Show dead files.
2729
2730 =item B<--stdin>
2731
2732 Read from stdin, don't run cvs log.
2733
2734 =item B<--stdout>
2735
2736 Output to stdout not to ChangeLog.
2737
2738 =item B<-d>, B<--distributed>
2739
2740 Put ChangeLogs in subdirs.
2741
2742 =item B<-f> I<FILE>, B<--file> I<FILE>
2743
2744 Write to I<FILE> instead of ChangeLog.
2745
2746 =item B<--fsf>
2747
2748 Use this if log data is in FSF ChangeLog style.
2749
2750 =item B<--FSF>
2751
2752 Attempt strict FSF-standard compatible output.
2753
2754 =item B<-W> I<SECS>, B<--window> I<SECS>
2755
2756 Window of time within which log entries unify.
2757
2758 =item -B<U> I<UFILE>, B<--usermap> I<UFILE>
2759
2760 Expand usernames to email addresses from I<UFILE>.
2761
2762 =item B<--passwd> I<PASSWORDFILE>
2763
2764 Use system passwd file for user name expansion.  If no mail domain is provided
2765 (via B<--domain>), it tries to read one from B</etc/mailname>, output of B<hostname
2766 -d>, B<dnsdomainname>, or B<domain-name>.  cvs2cl exits with an error if none of
2767 those options is successful. Use a domain of '' to prevent the addition of a
2768 mail domain.
2769
2770 =item B<--domain> I<DOMAIN>
2771
2772 Domain to build email addresses from.
2773
2774 =item B<--gecos>
2775
2776 Get user information from GECOS data.
2777
2778 =item B<-R> I<REGEXP>, B<--regexp> I<REGEXP>
2779
2780 Include only entries that match I<REGEXP>.  This option may be used multiple
2781 times.
2782
2783 =item B<-I> I<REGEXP>, B<--ignore> I<REGEXP>
2784
2785 Ignore files whose names match I<REGEXP>.  This option may be used multiple
2786 times.
2787
2788 =item B<-C>, B<--case-insensitive>
2789
2790 Any regexp matching is done case-insensitively.
2791
2792 =item B<-F> I<BRANCH>, B<--follow> I<BRANCH>
2793
2794 Show only revisions on or ancestral to I<BRANCH>.
2795
2796 =item B<--no-ancestors>
2797
2798 When using B<-F>, only track changes since the I<BRANCH> started.
2799
2800 =item B<--no-hide-branch-additions>
2801
2802 By default, entries generated by cvs for a file added on a branch (a dead 1.1
2803 entry) are not shown.  This flag reverses that action.
2804
2805 =item B<-S>, B<--separate-header>
2806
2807 Blank line between each header and log message.
2808
2809 =item B<--summary>
2810
2811 Add CVS change summary information.
2812
2813 =item B<--no-wrap>
2814
2815 Don't auto-wrap log message (recommend B<-S> also).
2816
2817 =item B<--no-indent>
2818
2819 Don't indent log message
2820
2821 =item B<--gmt>, B<--utc>
2822
2823 Show times in GMT/UTC instead of local time.
2824
2825 =item B<--accum>
2826
2827 Add to an existing ChangeLog (incompatible with B<--xml>).
2828
2829 =item B<-w>, B<--day-of-week>
2830
2831 Show day of week.
2832
2833 =item B<--no-times>
2834
2835 Don't show times in output.
2836
2837 =item B<--chrono>
2838
2839 Output log in chronological order (default is reverse chronological order).
2840
2841 =item B<--header> I<FILE>
2842
2843 Get ChangeLog header from I<FILE> ("B<->" means stdin).
2844
2845 =item B<--xml>
2846
2847 Output XML instead of ChangeLog format.
2848
2849 =item B<--xml-encoding> I<ENCODING.>
2850
2851 Insert encoding clause in XML header.
2852
2853 =item B<--noxmlns>
2854
2855 Don't include xmlns= attribute in root element.
2856
2857 =item B<--hide-filenames>
2858
2859 Don't show filenames (ignored for XML output).
2860
2861 =item B<--no-common-dir>
2862
2863 Don't shorten directory names from filenames.
2864
2865 =item B<--rcs> I<CVSROOT>
2866
2867 Handle filenames from raw RCS, for instance those produced by "cvs rlog"
2868 output, stripping the prefix I<CVSROOT>.
2869
2870 =item B<-P>, B<--prune>
2871
2872 Don't show empty log messages.
2873
2874 =item B<--ignore-tag> I<TAG>
2875
2876 Ignore individual changes that are associated with a given tag.
2877 May be repeated, if so, changes that are associated with any of
2878 the given tags are ignored.
2879
2880 =item B<--show-tag> I<TAG>
2881
2882 Log only individual changes that are associated with a given
2883 tag.  May be repeated, if so, changes that are associated with
2884 any of the given tags are logged.
2885
2886 =item B<--delta> I<FROM_TAG>B<:>I<TO_TAG>
2887
2888 Attempt a delta between two tags (since I<FROM_TAG> up to and
2889 including I<TO_TAG>).  The algorithm is a simple date-based one
2890 (this is a hard problem) so results are imperfect.
2891
2892 =item B<-g> I<OPTS>, B<--global-opts> I<OPTS>
2893
2894 Pass I<OPTS> to cvs like in "cvs I<OPTS> log ...".
2895
2896 =item B<-l> I<OPTS>, B<--log-opts> I<OPTS>
2897
2898 Pass I<OPTS> to cvs log like in "cvs ... log I<OPTS>".
2899
2900 =back
2901
2902 Notes about the options and arguments:
2903
2904 =over 4
2905
2906 =item *
2907
2908 The B<-I> and B<-F> options may appear multiple times.
2909
2910 =item *
2911
2912 To follow trunk revisions, use "B<-F trunk>" ("B<-F TRUNK>" also works).  This is
2913 okay because no would ever, ever be crazy enough to name a branch "trunk",
2914 right?  Right.
2915
2916 =item *
2917
2918 For the B<-U> option, the I<UFILE> should be formatted like CVSROOT/users. That is,
2919 each line of I<UFILE> looks like this:
2920
2921        jrandom:jrandom@red-bean.com
2922
2923 or maybe even like this
2924
2925        jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
2926
2927 Don't forget to quote the portion after the colon if necessary.
2928
2929 =item *
2930
2931 Many people want to filter by date.  To do so, invoke cvs2cl.pl like this:
2932
2933        cvs2cl.pl -l "-d'DATESPEC'"
2934
2935 where DATESPEC is any date specification valid for "cvs log -d".  (Note that
2936 CVS 1.10.7 and below requires there be no space between -d and its argument).
2937
2938 =item *
2939
2940 Dates/times are interpreted in the local time zone.
2941
2942 =item *
2943
2944 Remember to quote the argument to `B<-l>' so that your shell doesn't interpret
2945 spaces as argument separators.
2946
2947 =item *
2948
2949 See the 'Common Options' section of the cvs manual ('info cvs' on UNIX-like
2950 systems) for more information.
2951
2952 =item *
2953
2954 Note that the rules for quoting under windows shells are different.
2955
2956 =back
2957
2958 =head1 EXAMPLES
2959
2960 Some examples (working on UNIX shells):
2961
2962       # logs after 6th March, 2003 (inclusive)
2963       cvs2cl.pl -l "-d'>2003-03-06'"
2964       # logs after 4:34PM 6th March, 2003 (inclusive)
2965       cvs2cl.pl -l "-d'>2003-03-06 16:34'"
2966       # logs between 4:46PM 6th March, 2003 (exclusive) and
2967       # 4:34PM 6th March, 2003 (inclusive)
2968       cvs2cl.pl -l "-d'2003-03-06 16:46>2003-03-06 16:34'"
2969
2970 Some examples (on non-UNIX shells):
2971
2972       # Reported to work on windows xp/2000
2973       cvs2cl.pl -l  "-d"">2003-10-18;today<"""
2974
2975 =head1 AUTHORS
2976
2977 =over 4
2978
2979 =item Karl Fogel
2980
2981 =item Melissa O'Neal
2982
2983 =item Martyn J. Pearce
2984
2985 =back
2986
2987 Contributions from
2988
2989 =over 4
2990
2991 =item Mike Ayers
2992
2993 =item Tim Bradshaw
2994
2995 =item Richard Broberg
2996
2997 =item Nathan Bryant
2998
2999 =item Oswald Buddenhagen
3000
3001 =item Arthur de Jong
3002
3003 =item Mark W. Eichin
3004
3005 =item Dave Elcock
3006
3007 =item Reid Ellis
3008
3009 =item Simon Josefsson
3010
3011 =item Robin Hugh Johnson
3012
3013 =item Terry Kane
3014
3015 =item Akos Kiss
3016
3017 =item Claus Klein
3018
3019 =item Eddie Kohler
3020
3021 =item Richard Laager
3022
3023 =item Kevin Lilly
3024
3025 =item Karl-Heinz Marbaise
3026
3027 =item Mitsuaki Masuhara
3028
3029 =item Henrik Nordstrom
3030
3031 =item Joe Orton
3032
3033 =item Peter Palfrader
3034
3035 =item Thomas Parmelan
3036
3037 =item Johanne Stezenbach
3038
3039 =item Joseph Walton
3040
3041 =item Ernie Zapata
3042
3043 =back
3044
3045 =head1 BUGS
3046
3047 Please report bugs to C<bug-cvs2cl@red-bean.com>.
3048
3049 =head1 PREREQUISITES
3050
3051 This script requires C<Text::Wrap>, C<Time::Local>, and C<File::Basename>.  It
3052 also seems to require C<Perl 5.004_04> or higher.
3053
3054 =head1 OPERATING SYSTEM COMPATIBILITY
3055
3056 Should work on any OS.
3057
3058 =head1 SCRIPT CATEGORIES
3059
3060 Version_Control/CVS
3061
3062 =head1 COPYRIGHT
3063
3064 (C) 2001,2002,2003,2004 Martyn J. Pearce <fluffy@cpan.org>, under the GNU GPL.
3065
3066 (C) 1999 Karl Fogel <kfogel@red-bean.com>, under the GNU GPL.
3067
3068 cvs2cl.pl is free software; you can redistribute it and/or modify
3069 it under the terms of the GNU General Public License as published by
3070 the Free Software Foundation; either version 2, or (at your option)
3071 any later version.
3072
3073 cvs2cl.pl is distributed in the hope that it will be useful,
3074 but WITHOUT ANY WARRANTY; without even the implied warranty of
3075 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
3076 GNU General Public License for more details.
3077
3078 You may have received a copy of the GNU General Public License
3079 along with cvs2cl.pl; see the file COPYING.  If not, write to the
3080 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
3081 Boston, MA 02111-1307, USA.
3082
3083 =head1 SEE ALSO
3084
3085 cvs(1)
3086