Remove unused functions {LO,PV}TouchesLine() and their callbacks
[geda-pcb/pcjc2.git] / utils / cvs2cl.pl
blob4af57891739cc514ba2cd15a54691d8dadc00cf1
1 #!/bin/sh
2 exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
3 #!perl -w
6 ##############################################################
7 ### ###
8 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
9 ### ###
10 ##############################################################
12 ## $Revision$
13 ## $Date$
14 ## $Author$
17 use strict;
19 use File::Basename qw( fileparse );
20 use Getopt::Long qw( GetOptions );
21 use Text::Wrap qw( );
22 use User::pwent qw( getpwnam );
24 # The Plan:
26 # Read in the logs for multiple files, spit out a nice ChangeLog that
27 # mirrors the information entered during `cvs commit'.
29 # The problem presents some challenges. In an ideal world, we could
30 # detect files with the same author, log message, and checkin time --
31 # each <filelist, author, time, logmessage> would be a changelog entry.
32 # We'd sort them; and spit them out. Unfortunately, CVS is *not atomic*
33 # so checkins can span a range of times. Also, the directory structure
34 # could be hierarchical.
36 # Another question is whether we really want to have the ChangeLog
37 # exactly reflect commits. An author could issue two related commits,
38 # with different log entries, reflecting a single logical change to the
39 # source. GNU style ChangeLogs group these under a single author/date.
40 # We try to do the same.
42 # So, we parse the output of `cvs log', storing log messages in a
43 # multilevel hash that stores the mapping:
44 # directory => author => time => message => filelist
45 # As we go, we notice "nearby" commit times and store them together
46 # (i.e., under the same timestamp), so they appear in the same log
47 # entry.
49 # When we've read all the logs, we twist this mapping into
50 # a time => author => message => filelist mapping for each directory.
52 # If we're not using the `--distributed' flag, the directory is always
53 # considered to be `./', even as descend into subdirectories.
55 # Call Tree
57 # name number of lines (10.xii.03)
58 # parse_options 192
59 # derive_changelog 13
60 # +-maybe_grab_accumulation_date 38
61 # +-read_changelog 277
62 # +-maybe_read_user_map_file 94
63 # +-run_ext 9
64 # +-read_file_path 29
65 # +-read_symbolic_name 43
66 # +-read_revision 49
67 # +-read_date_author_and_state 25
68 # +-parse_date_author_and_state 20
69 # +-read_branches 36
70 # +-output_changelog 424
71 # +-pretty_file_list 290
72 # +-common_path_prefix 35
73 # +-preprocess_msg_text 30
74 # +-min 1
75 # +-mywrap 16
76 # +-last_line_len 5
77 # +-wrap_log_entry 177
79 # Utilities
81 # xml_escape 6
82 # slurp_file 11
83 # debug 5
84 # version 2
85 # usage 142
87 # -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
89 # Note about a bug-slash-opportunity:
90 # -----------------------------------
92 # There's a bug in Text::Wrap, which affects cvs2cl. This script
93 # reveals it:
95 # #!/usr/bin/perl -w
97 # use Text::Wrap;
99 # my $test_text =
100 # "This script demonstrates a bug in Text::Wrap. The very long line
101 # following this paragraph will be relocated relative to the surrounding
102 # text:
104 # ====================================================================
106 # See? When the bug happens, we'll get the line of equal signs below
107 # this paragraph, even though it should be above.";
110 # # Print out the test text with no wrapping:
111 # print "$test_text";
112 # print "\n";
113 # print "\n";
115 # # Now print it out wrapped, and see the bug:
116 # print wrap ("\t", " ", "$test_text");
117 # print "\n";
118 # print "\n";
120 # If the line of equal signs were one shorter, then the bug doesn't
121 # happen. Interesting.
123 # Anyway, rather than fix this in Text::Wrap, we might as well write a
124 # new wrap() which has the following much-needed features:
126 # * initial indentation, like current Text::Wrap()
127 # * subsequent line indentation, like current Text::Wrap()
128 # * user chooses among: force-break long words, leave them alone, or die()?
129 # * preserve existing indentation: chopped chunks from an indented line
130 # are indented by same (like this line, not counting the asterisk!)
131 # * optional list of things to preserve on line starts, default ">"
133 # Note that the last two are essentially the same concept, so unify in
134 # implementation and give a good interface to controlling them.
136 # And how about:
138 # Optionally, when encounter a line pre-indented by same as previous
139 # line, then strip the newline and refill, but indent by the same.
140 # Yeah...
142 # Globals --------------------------------------------------------------------
144 # In case we have to print it out:
145 my $VERSION = '$Revision$';
146 $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
148 ## Vars set by options:
150 # Print debugging messages?
151 my $Debug = 0;
153 # Just show version and exit?
154 my $Print_Version = 0;
156 # Just print usage message and exit?
157 my $Print_Usage = 0;
159 # What file should we generate (defaults to "ChangeLog")?
160 my $Log_File_Name = "ChangeLog";
162 # Grab most recent entry date from existing ChangeLog file, just add
163 # to that ChangeLog.
164 my $Cumulative = 0;
166 # `cvs log -d`, this will repeat the last entry in the old log. This is OK,
167 # as it guarantees at least one entry in the update changelog, which means
168 # that there will always be a date to extract for the next update. The repeat
169 # entry can be removed in postprocessing, if necessary.
171 # MJP 2003-08-02
172 # I don't think this actually does anything useful
173 my $Update = 0;
175 # Expand usernames to email addresses based on a map file?
176 my $User_Map_File = '';
177 my $User_Passwd_File;
178 my $Mail_Domain;
180 # Output log in chronological order? [default is reverse chronological order]
181 my $Chronological_Order = 0;
183 # Grab user details via gecos
184 my $Gecos = 0;
186 # User domain for gecos email addresses
187 my $Domain;
189 # Output to a file or to stdout?
190 my $Output_To_Stdout = 0;
192 # Eliminate empty log messages?
193 my $Prune_Empty_Msgs = 0;
195 # Tags of which not to output
196 my %ignore_tags;
198 # Show only revisions with Tags
199 my %show_tags;
201 # Don't call Text::Wrap on the body of the message
202 my $No_Wrap = 0;
204 # Indentation of log messages
205 my $Indent = "\t";
207 # Don't do any pretty print processing
208 my $Summary = 0;
210 # Separates header from log message. Code assumes it is either " " or
211 # "\n\n", so if there's ever an option to set it to something else,
212 # make sure to go through all conditionals that use this var.
213 my $After_Header = " ";
215 # XML Encoding
216 my $XML_Encoding = '';
218 # Format more for programs than for humans.
219 my $XML_Output = 0;
220 my $No_XML_Namespace = 0;
221 my $No_XML_ISO_Date = 0;
223 # Do some special tweaks for log data that was written in FSF
224 # ChangeLog style.
225 my $FSF_Style = 0;
227 # Show times in UTC instead of local time
228 my $UTC_Times = 0;
230 # Show times in output?
231 my $Show_Times = 1;
233 # Show day of week in output?
234 my $Show_Day_Of_Week = 0;
236 # Show revision numbers in output?
237 my $Show_Revisions = 0;
239 # Show dead files in output?
240 my $Show_Dead = 0;
242 # Hide dead trunk files which were created as a result of additions on a
243 # branch?
244 my $Hide_Branch_Additions = 1;
246 # Show tags (symbolic names) in output?
247 my $Show_Tags = 0;
249 # Show tags separately in output?
250 my $Show_Tag_Dates = 0;
252 # Show branches by symbolic name in output?
253 my $Show_Branches = 0;
255 # Show only revisions on these branches or their ancestors.
256 my @Follow_Branches;
257 # Show only revisions on these branches or their ancestors; ignore descendent
258 # branches.
259 my @Follow_Only;
261 # Don't bother with files matching this regexp.
262 my @Ignore_Files;
264 # How exactly we match entries. We definitely want "o",
265 # and user might add "i" by using --case-insensitive option.
266 my $Case_Insensitive = 0;
268 # Maybe only show log messages matching a certain regular expression.
269 my $Regexp_Gate = '';
271 # Pass this global option string along to cvs, to the left of `log':
272 my $Global_Opts = '';
274 # Pass this option string along to the cvs log subcommand:
275 my $Command_Opts = '';
277 # Read log output from stdin instead of invoking cvs log?
278 my $Input_From_Stdin = 0;
280 # Don't show filenames in output.
281 my $Hide_Filenames = 0;
283 # Don't shorten directory names from filenames.
284 my $Common_Dir = 1;
286 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
287 # times that span a range of time. We assume that checkins will last no
288 # longer than $Max_Checkin_Duration seconds, and that similarly, no
289 # checkins will happen from the same users with the same message less
290 # than $Max_Checkin_Duration seconds apart.
291 my $Max_Checkin_Duration = 180;
293 # What to put at the front of [each] ChangeLog.
294 my $ChangeLog_Header = '';
296 # Whether to enable 'delta' mode, and for what start/end tags.
297 my $Delta_Mode = 0;
298 my $Delta_From = '';
299 my $Delta_To = '';
301 my $TestCode;
303 # Whether to parse filenames from the RCS filename, and if so what
304 # prefix to strip.
305 my $RCS_Root;
307 # Whether to output information on the # of lines added and removed
308 # by each file modification.
309 my $Show_Lines_Modified = 0;
311 ## end vars set by options.
313 # latest observed times for the start/end tags in delta mode
314 my $Delta_StartTime = 0;
315 my $Delta_EndTime = 0;
317 my $No_Ancestors = 0;
319 my $No_Extra_Indent = 0;
321 my $GroupWithinDate = 0;
323 # ----------------------------------------------------------------------------
325 package CVS::Utils::ChangeLog::EntrySet;
327 sub new {
328 my $class = shift;
329 my %self;
330 bless \%self, $class;
333 # -------------------------------------
335 sub output_changelog {
336 my $output_type = $XML_Output ? 'XML' : 'Text';
337 my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
338 my $output = $output_class->new(follow_branches => \@Follow_Branches,
339 follow_only => \@Follow_Only,
340 ignore_tags => \%ignore_tags,
341 show_tags => \%show_tags,
343 $output->output_changelog(@_);
346 # -------------------------------------
348 sub add_fileentry {
349 my ($self, $file_full_path, $time, $revision, $state, $lines,
350 $branch_names, $branch_roots, $branch_numbers,
351 $symbolic_names, $author, $msg_txt) = @_;
353 my $qunk =
354 CVS::Utils::ChangeLog::FileEntry->new($file_full_path, $time, $revision,
355 $state, $lines,
356 $branch_names, $branch_roots,
357 $branch_numbers,
358 $symbolic_names);
360 # We might be including revision numbers and/or tags and/or
361 # branch names in the output. Most of the code from here to
362 # loop-end deals with organizing these in qunk.
364 unless ( $Hide_Branch_Additions
366 $msg_txt =~ /file .+ was initially added on branch \S+./ ) {
367 # Add this file to the list
368 # (We use many spoonfuls of autovivication magic. Hashes and arrays
369 # will spring into existence if they aren't there already.)
371 &main::debug ("(pushing log msg for ". $qunk->dir_key . $qunk->filename . ")\n");
373 # Store with the files in this commit. Later we'll loop through
374 # again, making sure that revisions with the same log message
375 # and nearby commit times are grouped together as one commit.
376 $self->{$qunk->dir_key}{$author}{$time}{$msg_txt} =
377 CVS::Utils::ChangeLog::Message->new($msg_txt)
378 unless exists $self->{$qunk->dir_key}{$author}{$time}{$msg_txt};
379 $self->{$qunk->dir_key}{$author}{$time}{$msg_txt}->add_fileentry($qunk);
384 # ----------------------------------------------------------------------------
386 package CVS::Utils::ChangeLog::EntrySet::Output::Text;
388 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
390 use File::Basename qw( fileparse );
392 sub new {
393 my $class = shift;
394 my $self = $class->SUPER::new(@_);
397 # -------------------------------------
399 sub wday {
400 my $self = shift; my $class = ref $self;
401 my ($wday) = @_;
403 return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : '';
406 # -------------------------------------
408 sub header_line {
409 my $self = shift;
410 my ($time, $author, $lastdate) = @_;
412 my $header_line = '';
414 my (undef,$min,$hour,$mday,$mon,$year,$wday)
415 = $UTC_Times ? gmtime($time) : localtime($time);
417 my $date = $self->fdatetime($time);
419 if ($Show_Times) {
420 $header_line =
421 sprintf "%s %s\n\n", $date, $author;
422 } else {
423 if ( ! defined $lastdate or $date ne $lastdate or ! $GroupWithinDate ) {
424 if ( $GroupWithinDate ) {
425 $header_line = "$date\n\n";
426 } else {
427 $header_line = "$date $author\n\n";
429 } else {
430 $header_line = '';
435 # -------------------------------------
437 sub preprocess_msg_text {
438 my $self = shift;
439 my ($text) = @_;
441 $text = $self->SUPER::preprocess_msg_text($text);
443 unless ( $No_Wrap ) {
444 # Strip off lone newlines, but only for lines that don't begin with
445 # whitespace or a mail-quoting character, since we want to preserve
446 # that kind of formatting. Also don't strip newlines that follow a
447 # period; we handle those specially next. And don't strip
448 # newlines that precede an open paren.
449 1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g;
451 # If a newline follows a period, make sure that when we bring up the
452 # bottom sentence, it begins with two spaces.
453 1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2 $3/g;
456 return $text;
459 # -------------------------------------
461 # Here we take a bunch of qunks and convert them into printed
462 # summary that will include all the information the user asked for.
463 sub pretty_file_list {
464 my $self = shift;
466 return ''
467 if $Hide_Filenames;
469 my $qunksref = shift;
471 my @filenames;
472 my $beauty = ''; # The accumulating header string for this entry.
473 my %non_unanimous_tags; # Tags found in a proper subset of qunks
474 my %unanimous_tags; # Tags found in all qunks
475 my %all_branches; # Branches found in any qunk
476 my $fbegun = 0; # Did we begin printing filenames yet?
478 my ($common_dir, $qunkrefs) =
479 $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref);
481 my @qunkrefs = @$qunkrefs;
483 # Not XML output, so complexly compactify for chordate consumption. At this
484 # point we have enough global information about all the qunks to organize
485 # them non-redundantly for output.
487 if ($common_dir) {
488 # Note that $common_dir still has its trailing slash
489 $beauty .= "$common_dir: ";
492 if ($Show_Branches)
494 # For trailing revision numbers.
495 my @brevisions;
497 foreach my $branch (keys (%all_branches))
499 foreach my $qunkref (@qunkrefs)
501 if ((defined ($qunkref->branch))
502 and ($qunkref->branch eq $branch))
504 if ($fbegun) {
505 # kff todo: comma-delimited in XML too? Sure.
506 $beauty .= ", ";
508 else {
509 $fbegun = 1;
511 my $fname = substr ($qunkref->filename, length ($common_dir));
512 $beauty .= $fname;
513 $qunkref->{'printed'} = 1; # Just setting a mark bit, basically
515 if ( $Show_Tags and defined $qunkref->tags ) {
516 my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
518 if (@tags) {
519 $beauty .= " (tags: ";
520 $beauty .= join (', ', @tags);
521 $beauty .= ")";
525 if ($Show_Revisions) {
526 # Collect the revision numbers' last components, but don't
527 # print them -- they'll get printed with the branch name
528 # later.
529 $qunkref->revision =~ /.+\.([\d]+)$/;
530 push (@brevisions, $1);
532 # todo: we're still collecting branch roots, but we're not
533 # showing them anywhere. If we do show them, it would be
534 # nifty to just call them revision "0" on a the branch.
535 # Yeah, that's the ticket.
539 $beauty .= " ($branch";
540 if (@brevisions) {
541 if ((scalar (@brevisions)) > 1) {
542 $beauty .= ".[";
543 $beauty .= (join (',', @brevisions));
544 $beauty .= "]";
546 else {
547 # Square brackets are spurious here, since there's no range to
548 # encapsulate
549 $beauty .= ".$brevisions[0]";
552 $beauty .= ")";
556 # Okay; any qunks that were done according to branch are taken care
557 # of, and marked as printed. Now print everyone else.
559 my %fileinfo_printed;
560 foreach my $qunkref (@qunkrefs)
562 next if (defined ($qunkref->{'printed'})); # skip if already printed
564 my $b = substr ($qunkref->filename, length ($common_dir));
565 # todo: Shlomo's change was this:
566 # $beauty .= substr ($qunkref->filename,
567 # (($common_dir eq "./") ? '' : length ($common_dir)));
568 $qunkref->{'printed'} = 1; # Set a mark bit.
570 if ($Show_Revisions || $Show_Tags || $Show_Dead)
572 my $started_addendum = 0;
574 if ($Show_Revisions) {
575 $started_addendum = 1;
576 $b .= " (";
577 $b .= $qunkref->revision;
579 if ($Show_Dead && $qunkref->state =~ /dead/)
581 # Deliberately not using $started_addendum. Keeping it simple.
582 $b .= "[DEAD]";
584 if ($Show_Tags && (defined $qunkref->tags)) {
585 my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
586 if ((scalar (@tags)) > 0) {
587 if ($started_addendum) {
588 $b .= ", ";
590 else {
591 $b .= " (tags: ";
593 $b .= join (', ', @tags);
594 $started_addendum = 1;
597 if ($started_addendum) {
598 $b .= ")";
602 unless ( exists $fileinfo_printed{$b} ) {
603 if ($fbegun) {
604 $beauty .= ", ";
605 } else {
606 $fbegun = 1;
608 $beauty .= $b, $fileinfo_printed{$b} = 1;
612 # Unanimous tags always come last.
613 if ($Show_Tags && %unanimous_tags)
615 $beauty .= " (utags: ";
616 $beauty .= join (', ', sort keys (%unanimous_tags));
617 $beauty .= ")";
620 # todo: still have to take care of branch_roots?
622 $beauty = "$beauty:";
624 return $beauty;
627 # -------------------------------------
629 sub output_tagdate {
630 my $self = shift;
631 my ($fh, $time, $tag) = @_;
633 my $fdatetime = $self->fdatetime($time);
634 print $fh "$fdatetime tag $tag\n\n";
635 return;
638 # -------------------------------------
640 sub format_body {
641 my $self = shift;
642 my ($msg, $files, $qunklist) = @_;
644 my $body;
646 if ( $No_Wrap and ! $Summary ) {
647 $msg = $self->preprocess_msg_text($msg);
648 $files = $self->mywrap("\t", "\t ", "* $files");
649 $msg =~ s/\n(.+)/\n$Indent$1/g;
650 unless ($After_Header eq " ") {
651 $msg =~ s/^(.+)/$Indent$1/g;
653 if ( $Hide_Filenames ) {
654 $body = $After_Header . $msg;
655 } else {
656 $body = $files . $After_Header . $msg;
658 } elsif ( $Summary ) {
659 my ($filelist, $qunk);
660 my (@DeletedQunks, @AddedQunks, @ChangedQunks);
662 $msg = $self->preprocess_msg_text($msg);
664 # Sort the files (qunks) according to the operation that was
665 # performed. Files which were added have no line change
666 # indicator, whereas deleted files have state dead.
668 foreach $qunk ( @$qunklist ) {
669 if ( "dead" eq $qunk->state) {
670 push @DeletedQunks, $qunk;
671 } elsif ( ! defined $qunk->lines ) {
672 push @AddedQunks, $qunk;
673 } else {
674 push @ChangedQunks, $qunk;
678 # The qunks list was originally in tree search order. Let's
679 # get that back. The lists, if they exist, will be reversed upon
680 # processing.
684 # Now write the three sections onto $filelist
686 if ( @DeletedQunks ) {
687 $filelist .= "\tDeleted:\n";
688 foreach $qunk ( @DeletedQunks ) {
689 $filelist .= "\t\t" . $qunk->filename;
690 $filelist .= " (" . $qunk->revision . ")";
691 $filelist .= "\n";
693 undef @DeletedQunks;
696 if ( @AddedQunks ) {
697 $filelist .= "\tAdded:\n";
698 foreach $qunk (@AddedQunks) {
699 $filelist .= "\t\t" . $qunk->filename;
700 $filelist .= " (" . $qunk->revision . ")";
701 $filelist .= "\n";
703 undef @AddedQunks ;
706 if ( @ChangedQunks ) {
707 $filelist .= "\tChanged:\n";
708 foreach $qunk (@ChangedQunks) {
709 $filelist .= "\t\t" . $qunk->filename;
710 $filelist .= " (" . $qunk->revision . ")";
711 $filelist .= ", \"" . $qunk->state . "\"";
712 $filelist .= ", lines: " . $qunk->lines;
713 $filelist .= "\n";
715 undef @ChangedQunks;
718 chomp $filelist;
720 if ( $Hide_Filenames ) {
721 $filelist = '';
724 $msg =~ s/\n(.*)/\n$Indent$1/g;
725 unless ( $After_Header eq " " or $FSF_Style ) {
726 $msg =~ s/^(.*)/$Indent$1/g;
729 unless ( $No_Wrap ) {
730 if ( $FSF_Style ) {
731 $msg = $self->wrap_log_entry($msg, '', 69, 69);
732 chomp($msg);
733 chomp($msg);
734 } else {
735 $msg = $self->mywrap('', $Indent, "$msg");
736 $msg =~ s/[ \t]+\n/\n/g;
740 $body = $filelist . $After_Header . $msg;
741 } else { # do wrapping, either FSF-style or regular
742 my $latter_wrap = $No_Extra_Indent ? $Indent : "$Indent ";
744 if ( $FSF_Style ) {
745 $files = $self->mywrap($Indent, $latter_wrap, "* $files");
747 my $files_last_line_len = 0;
748 if ( $After_Header eq " " ) {
749 $files_last_line_len = $self->last_line_len($files);
750 $files_last_line_len += 1; # for $After_Header
753 $msg = $self->wrap_log_entry($msg, $latter_wrap, 69-$files_last_line_len, 69);
754 $body = $files . $After_Header . $msg;
755 } else { # not FSF-style
756 $msg = $self->preprocess_msg_text($msg);
757 $body = $files . $After_Header . $msg;
758 $body = $self->mywrap($Indent, $latter_wrap, "* $body");
759 $body =~ s/[ \t]+\n/\n/g;
763 return $body;
766 # ----------------------------------------------------------------------------
768 package CVS::Utils::ChangeLog::EntrySet::Output::XML;
770 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
772 use File::Basename qw( fileparse );
774 sub new {
775 my $class = shift;
776 my $self = $class->SUPER::new(@_);
779 # -------------------------------------
781 sub header_line {
782 my $self = shift;
783 my ($time, $author, $lastdate) = @_;
785 my $header_line = '';
787 my $isoDate;
789 my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
791 # Ideally, this would honor $UTC_Times and use +HH:MM syntax
792 $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
793 $y + 1900, $m + 1, $d, $H, $M, $S);
795 my (undef,$min,$hour,$mday,$mon,$year,$wday)
796 = $UTC_Times ? gmtime($time) : localtime($time);
798 my $date = $self->fdatetime($time);
799 $wday = $self->wday($wday);
801 $header_line =
802 sprintf ("<date>%4u-%02u-%02u</date>\n${wday}<time>%02u:%02u</time>\n",
803 $year+1900, $mon+1, $mday, $hour, $min);
804 $header_line .= "<isoDate>$isoDate</isoDate>\n"
805 unless $No_XML_ISO_Date;
806 $header_line .= sprintf("<author>%s</author>\n" , $author);
809 # -------------------------------------
811 sub wday {
812 my $self = shift; my $class = ref $self;
813 my ($wday) = @_;
815 return '<weekday>' . $class->weekday_en($wday) . "</weekday>\n";
818 # -------------------------------------
820 sub escape {
821 my $self = shift;
823 my $txt = shift;
824 $txt =~ s/&/&amp;/g;
825 $txt =~ s/</&lt;/g;
826 $txt =~ s/>/&gt;/g;
827 return $txt;
830 # -------------------------------------
832 sub output_header {
833 my $self = shift;
834 my ($fh) = @_;
836 my $encoding =
837 length $XML_Encoding ? qq'encoding="$XML_Encoding"' : '';
838 my $version = 'version="1.0"';
839 my $declaration =
840 sprintf '<?xml %s?>', join ' ', grep length, $version, $encoding;
841 my $root =
842 $No_XML_Namespace ?
843 '<changelog>' :
844 '<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
845 print $fh "$declaration\n\n$root\n\n";
848 # -------------------------------------
850 sub output_footer {
851 my $self = shift;
852 my ($fh) = @_;
854 print $fh "</changelog>\n";
857 # -------------------------------------
859 sub preprocess_msg_text {
860 my $self = shift;
861 my ($text) = @_;
863 $text = $self->SUPER::preprocess_msg_text($text);
865 $text = $self->escape($text);
866 chomp $text;
867 $text = "<msg>${text}</msg>\n";
869 return $text;
872 # -------------------------------------
874 # Here we take a bunch of qunks and convert them into a printed
875 # summary that will include all the information the user asked for.
876 sub pretty_file_list {
877 my $self = shift;
878 my ($qunksref) = @_;
880 my $beauty = ''; # The accumulating header string for this entry.
881 my %non_unanimous_tags; # Tags found in a proper subset of qunks
882 my %unanimous_tags; # Tags found in all qunks
883 my %all_branches; # Branches found in any qunk
884 my $fbegun = 0; # Did we begin printing filenames yet?
886 my ($common_dir, $qunkrefs) =
887 $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches),
888 $qunksref);
890 my @qunkrefs = @$qunkrefs;
892 # If outputting XML, then our task is pretty simple, because we
893 # don't have to detect common dir, common tags, branch prefixing,
894 # etc. We just output exactly what we have, and don't worry about
895 # redundancy or readability.
897 foreach my $qunkref (@qunkrefs)
899 my $filename = $qunkref->filename;
900 my $state = $qunkref->state;
901 my $revision = $qunkref->revision;
902 my $tags = $qunkref->tags;
903 my $branch = $qunkref->branch;
904 my $branchroots = $qunkref->roots;
905 my $lines = $qunkref->lines;
907 $filename = $self->escape($filename); # probably paranoia
908 $revision = $self->escape($revision); # definitely paranoia
910 $beauty .= "<file>\n";
911 $beauty .= "<name>${filename}</name>\n";
912 $beauty .= "<cvsstate>${state}</cvsstate>\n";
913 $beauty .= "<revision>${revision}</revision>\n";
915 if ($Show_Lines_Modified
916 && $lines && $lines =~ m/\+(\d+)\s+-(\d+)/) {
917 $beauty .= "<linesadded>$1</linesadded>\n";
918 $beauty .= "<linesremoved>$2</linesremoved>\n";
921 if ($branch) {
922 $branch = $self->escape($branch); # more paranoia
923 $beauty .= "<branch>${branch}</branch>\n";
925 foreach my $tag (@$tags) {
926 $tag = $self->escape($tag); # by now you're used to the paranoia
927 $beauty .= "<tag>${tag}</tag>\n";
929 foreach my $root (@$branchroots) {
930 $root = $self->escape($root); # which is good, because it will continue
931 $beauty .= "<branchroot>${root}</branchroot>\n";
933 $beauty .= "</file>\n";
936 # Theoretically, we could go home now. But as long as we're here,
937 # let's print out the common_dir and utags, as a convenience to
938 # the receiver (after all, earlier code calculated that stuff
939 # anyway, so we might as well take advantage of it).
941 if ((scalar (keys (%unanimous_tags))) > 1) {
942 foreach my $utag ((keys (%unanimous_tags))) {
943 $utag = $self->escape($utag); # the usual paranoia
944 $beauty .= "<utag>${utag}</utag>\n";
947 if ($common_dir) {
948 $common_dir = $self->escape($common_dir);
949 $beauty .= "<commondir>${common_dir}</commondir>\n";
952 # That's enough for XML, time to go home:
953 return $beauty;
956 # -------------------------------------
958 sub output_tagdate {
959 # NOT YET DONE
962 # -------------------------------------
964 sub output_entry {
965 my $self = shift;
966 my ($fh, $entry) = @_;
967 print $fh "<entry>\n$entry</entry>\n\n";
970 # -------------------------------------
972 sub format_body {
973 my $self = shift;
974 my ($msg, $files, $qunklist) = @_;
976 $msg = $self->preprocess_msg_text($msg);
977 return $files . $msg;
980 # ----------------------------------------------------------------------------
982 package CVS::Utils::ChangeLog::EntrySet::Output;
984 use Carp qw( croak );
985 use File::Basename qw( fileparse );
987 # Class Utility Functions -------------
989 { # form closure
991 my @weekdays = (qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday));
992 sub weekday_en {
993 my $class = shift;
994 return $weekdays[$_[0]];
999 # -------------------------------------
1001 sub new {
1002 my ($proto, %args) = @_;
1003 my $class = ref $proto || $proto;
1005 my $follow_branches = delete $args{follow_branches};
1006 my $follow_only = delete $args{follow_only};
1007 my $ignore_tags = delete $args{ignore_tags};
1008 my $show_tags = delete $args{show_tags};
1009 die "Unrecognized arg to EntrySet::Output::new: '$_'\n"
1010 for keys %args;
1012 bless +{follow_branches => $follow_branches,
1013 follow_only => $follow_only,
1014 show_tags => $show_tags,
1015 ignore_tags => $ignore_tags,
1016 }, $class;
1019 # Abstract Subrs ----------------------
1021 sub wday { croak "Whoops. Abtract method call (wday).\n" }
1022 sub pretty_file_list { croak "Whoops. Abtract method call (pretty_file_list).\n" }
1023 sub output_tagdate { croak "Whoops. Abtract method call (output_tagdate).\n" }
1024 sub header_line { croak "Whoops. Abtract method call (header_line).\n" }
1026 # Instance Subrs ----------------------
1028 sub output_header { }
1030 # -------------------------------------
1032 sub output_entry {
1033 my $self = shift;
1034 my ($fh, $entry) = @_;
1035 print $fh "$entry\n";
1038 # -------------------------------------
1040 sub output_footer { }
1042 # -------------------------------------
1044 sub escape { return $_[1] }
1046 # -------------------------------------
1048 sub _revision_is_wanted {
1049 my ($self, $qunk) = @_;
1051 my ($revision, $branch_numbers) = @{$qunk}{qw( revision branch_numbers )};
1052 my $follow_branches = $self->{follow_branches};
1053 my $follow_only = $self->{follow_only};
1055 #print STDERR "IG: ", join(',', keys %{$self->{ignore_tags}}), "\n";
1056 #print STDERR "IX: ", join(',', @{$qunk->{tags}}), "\n" if defined $qunk->{tags};
1057 #print STDERR "IQ: ", join(',', keys %{$qunk->{branch_numbers}}), "\n" if defined $qunk->{branch_numbers};
1058 #use Data::Dumper; print STDERR Dumper $qunk;
1060 for my $ignore_tag (keys %{$self->{ignore_tags}}) {
1061 return
1062 if defined $qunk->{tags} and grep $_ eq $ignore_tag, @{$qunk->{tags}};
1065 if ( keys %{$self->{show_tags}} ) {
1066 for my $show_tag (keys %{$self->{show_tags}}) {
1067 return
1068 if ! defined $qunk->{tags} or ! grep $_ eq $show_tag, @{$qunk->{tags}};
1072 return 1
1073 unless @$follow_branches + @$follow_only; # no follow is follow all
1075 for my $x (map([$_, 1], @$follow_branches),
1076 map([$_, 0], @$follow_only )) {
1077 my ($branch, $followsub) = @$x;
1079 # Special case for following trunk revisions
1080 return 1
1081 if $branch =~ /^trunk$/i and $revision =~ /^[0-9]+\.[0-9]+$/;
1083 if ( my $branch_number = $branch_numbers->{$branch} ) {
1084 # Are we on one of the follow branches or an ancestor of same?
1086 # If this revision is a prefix of the branch number, or possibly is less
1087 # in the minormost number, OR if this branch number is a prefix of the
1088 # revision, then yes. Otherwise, no.
1090 # So below, we determine if any of those conditions are met.
1092 # Trivial case: is this revision on the branch? (Compare this way to
1093 # avoid regexps that screw up Emacs indentation, argh.)
1094 if ( substr($revision, 0, (length($branch_number) + 1))
1096 ($branch_number . ".") ) {
1097 if ( $followsub ) {
1098 return 1;
1099 } elsif (length($revision) == length($branch_number)+2 ) {
1100 return 1;
1102 } elsif ( length($branch_number) > length($revision)
1104 $No_Ancestors ) {
1105 # Non-trivial case: check if rev is ancestral to branch
1107 # r_left still has the trailing "."
1108 my ($r_left, $r_end) = ($revision =~ /^((?:\d+\.)+)(\d+)$/);
1110 # b_left still has trailing "."
1111 # b_mid has no trailing "."
1112 my ($b_left, $b_mid) = ($branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/);
1113 return 1
1114 if $r_left eq $b_left and $r_end <= $b_mid;
1119 return;
1122 # -------------------------------------
1124 sub output_changelog {
1125 my $self = shift; my $class = ref $self;
1126 my ($grand_poobah) = @_;
1127 ### Process each ChangeLog
1129 while (my ($dir,$authorhash) = each %$grand_poobah)
1131 &main::debug ("DOING DIR: $dir\n");
1133 # Here we twist our hash around, from being
1134 # author => time => message => filelist
1135 # in %$authorhash to
1136 # time => author => message => filelist
1137 # in %changelog.
1139 # This is also where we merge entries. The algorithm proceeds
1140 # through the timeline of the changelog with a sliding window of
1141 # $Max_Checkin_Duration seconds; within that window, entries that
1142 # have the same log message are merged.
1144 # (To save space, we zap %$authorhash after we've copied
1145 # everything out of it.)
1147 my %changelog;
1148 while (my ($author,$timehash) = each %$authorhash)
1150 my %stamptime;
1151 foreach my $time (sort {$a <=> $b} (keys %$timehash))
1153 my $msghash = $timehash->{$time};
1154 while (my ($msg,$qunklist) = each %$msghash)
1156 my $stamptime = $stamptime{$msg};
1157 if ((defined $stamptime)
1158 and (($time - $stamptime) < $Max_Checkin_Duration)
1159 and (defined $changelog{$stamptime}{$author}{$msg}))
1161 push(@{$changelog{$stamptime}{$author}{$msg}}, $qunklist->files);
1163 else {
1164 $changelog{$time}{$author}{$msg} = $qunklist->files;
1165 $stamptime{$msg} = $time;
1170 undef (%$authorhash);
1172 ### Now we can write out the ChangeLog!
1174 my ($logfile_here, $logfile_bak, $tmpfile);
1175 my $lastdate;
1177 if (! $Output_To_Stdout) {
1178 $logfile_here = $dir . $Log_File_Name;
1179 $logfile_here =~ s/^\.\/\//\//; # fix any leading ".//" problem
1180 $tmpfile = "${logfile_here}.cvs2cl$$.tmp";
1181 $logfile_bak = "${logfile_here}.bak";
1183 open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
1185 else {
1186 open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
1189 print LOG_OUT $ChangeLog_Header;
1191 my %tag_date_printed;
1193 $self->output_header(\*LOG_OUT);
1195 my @key_list = ();
1196 if($Chronological_Order) {
1197 @key_list = sort {$a <=> $b} (keys %changelog);
1198 } else {
1199 @key_list = sort {$b <=> $a} (keys %changelog);
1201 foreach my $time (@key_list)
1203 next if ($Delta_Mode &&
1204 (($time <= $Delta_StartTime) ||
1205 ($time > $Delta_EndTime && $Delta_EndTime)));
1207 # Set up the date/author line.
1208 # kff todo: do some more XML munging here, on the header
1209 # part of the entry:
1210 my (undef,$min,$hour,$mday,$mon,$year,$wday)
1211 = $UTC_Times ? gmtime($time) : localtime($time);
1213 $wday = $self->wday($wday);
1214 # XML output includes everything else, we might as well make
1215 # it always include Day Of Week too, for consistency.
1216 my $authorhash = $changelog{$time};
1217 if ($Show_Tag_Dates) {
1218 my %tags;
1219 while (my ($author,$mesghash) = each %$authorhash) {
1220 while (my ($msg,$qunk) = each %$mesghash) {
1221 foreach my $qunkref2 (@$qunk) {
1222 if (defined ($qunkref2->tags)) {
1223 foreach my $tag (@{$qunkref2->tags}) {
1224 $tags{$tag} = 1;
1230 # Sort here for determinism to ease testing
1231 foreach my $tag (sort keys %tags) {
1232 if ( ! defined $tag_date_printed{$tag} ) {
1233 $tag_date_printed{$tag} = $time;
1234 $self->output_tagdate(\*LOG_OUT, $time, $tag);
1238 while (my ($author,$mesghash) = each %$authorhash)
1240 # If XML, escape in outer loop to avoid compound quoting:
1241 $author = $self->escape($author);
1243 FOOBIE:
1244 # We sort here to enable predictable ordering for the testing porpoises
1245 for my $msg (sort keys %$mesghash)
1247 my $qunklist = $mesghash->{$msg};
1249 my @qunklist =
1250 grep $self->_revision_is_wanted($_), @$qunklist;
1252 next FOOBIE unless @qunklist;
1254 my $files = $self->pretty_file_list(\@qunklist);
1255 my $header_line; # date and author
1256 my $wholething; # $header_line + $body
1258 my $date = $self->fdatetime($time);
1259 $header_line = $self->header_line($time, $author, $lastdate);
1260 $lastdate = $date;
1262 $Text::Wrap::huge = 'overflow'
1263 if $Text::Wrap::VERSION >= 2001.0130;
1264 # Reshape the body according to user preferences.
1265 my $body = $self->format_body($msg, $files, \@qunklist);
1267 $body =~ s/[ \t]+\n/\n/g;
1268 $wholething = $header_line . $body;
1270 # One last check: make sure it passes the regexp test, if the
1271 # user asked for that. We have to do it here, so that the
1272 # test can match against information in the header as well
1273 # as in the text of the log message.
1275 # How annoying to duplicate so much code just because I
1276 # can't figure out a way to evaluate scalars on the trailing
1277 # operator portion of a regular expression. Grrr.
1278 if ($Case_Insensitive) {
1279 unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/oi ) ) {
1280 $self->output_entry(\*LOG_OUT, $wholething);
1283 else {
1284 unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/o ) ) {
1285 $self->output_entry(\*LOG_OUT, $wholething);
1292 $self->output_footer(\*LOG_OUT);
1294 close (LOG_OUT);
1296 if ( ! $Output_To_Stdout ) {
1297 # If accumulating, append old data to new before renaming. But
1298 # don't append the most recent entry, since it's already in the
1299 # new log due to CVS's idiosyncratic interpretation of "log -d".
1300 if ($Cumulative && -f $logfile_here) {
1301 open NEW_LOG, ">>$tmpfile"
1302 or die "trouble appending to $tmpfile ($!)";
1304 open OLD_LOG, "<$logfile_here"
1305 or die "trouble reading from $logfile_here ($!)";
1307 my $started_first_entry = 0;
1308 my $passed_first_entry = 0;
1309 while (<OLD_LOG>) {
1310 if ( ! $passed_first_entry ) {
1311 if ( ( ! $started_first_entry )
1312 and /^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/ ) {
1313 $started_first_entry = 1;
1314 } elsif ( /^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/ ) {
1315 $passed_first_entry = 1;
1316 print NEW_LOG $_;
1318 } else {
1319 print NEW_LOG $_;
1323 close NEW_LOG;
1324 close OLD_LOG;
1327 if ( -f $logfile_here ) {
1328 rename $logfile_here, $logfile_bak;
1330 rename $tmpfile, $logfile_here;
1335 # -------------------------------------
1337 # Don't call this wrap, because with 5.5.3, that clashes with the
1338 # (unconditional :-( ) export of wrap() from Text::Wrap
1339 sub mywrap {
1340 my $self = shift;
1341 my ($indent1, $indent2, @text) = @_;
1342 # If incoming text looks preformatted, don't get clever
1343 my $text = Text::Wrap::wrap($indent1, $indent2, @text);
1344 if ( grep /^\s+/m, @text ) {
1345 return $text;
1347 my @lines = split /\n/, $text;
1348 $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e;
1349 $lines[0] =~ s/^$indent1\s+/$indent1/;
1350 s/^$indent2\s+/$indent2/
1351 for @lines[1..$#lines];
1352 my $newtext = join "\n", @lines;
1353 $newtext .= "\n"
1354 if substr($text, -1) eq "\n";
1355 return $newtext;
1358 # -------------------------------------
1360 sub preprocess_msg_text {
1361 my $self = shift;
1362 my ($text) = @_;
1364 # Strip out carriage returns (as they probably result from DOSsy editors).
1365 $text =~ s/\r\n/\n/g;
1366 # If it *looks* like two newlines, make it *be* two newlines:
1367 $text =~ s/\n\s*\n/\n\n/g;
1369 return $text;
1372 # -------------------------------------
1374 sub last_line_len {
1375 my $self = shift;
1377 my $files_list = shift;
1378 my @lines = split (/\n/, $files_list);
1379 my $last_line = pop (@lines);
1380 return length ($last_line);
1383 # -------------------------------------
1385 # A custom wrap function, sensitive to some common constructs used in
1386 # log entries.
1387 sub wrap_log_entry {
1388 my $self = shift;
1390 my $text = shift; # The text to wrap.
1391 my $left_pad_str = shift; # String to pad with on the left.
1393 # These do NOT take left_pad_str into account:
1394 my $length_remaining = shift; # Amount left on current line.
1395 my $max_line_length = shift; # Amount left for a blank line.
1397 my $wrapped_text = ''; # The accumulating wrapped entry.
1398 my $user_indent = ''; # Inherited user_indent from prev line.
1400 my $first_time = 1; # First iteration of the loop?
1401 my $suppress_line_start_match = 0; # Set to disable line start checks.
1403 my @lines = split (/\n/, $text);
1404 while (@lines) # Don't use `foreach' here, it won't work.
1406 my $this_line = shift (@lines);
1407 chomp $this_line;
1409 if ($this_line =~ /^(\s+)/) {
1410 $user_indent = $1;
1412 else {
1413 $user_indent = '';
1416 # If it matches any of the line-start regexps, print a newline now...
1417 if ($suppress_line_start_match)
1419 $suppress_line_start_match = 0;
1421 elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
1422 || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
1423 || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
1424 || ($this_line =~ /^(\s+)(\S+)/)
1425 || ($this_line =~ /^(\s*)- +/)
1426 || ($this_line =~ /^()\s*$/)
1427 || ($this_line =~ /^(\s*)\*\) +/)
1428 || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
1430 # Make a line break immediately, unless header separator is set
1431 # and this line is the first line in the entry, in which case
1432 # we're getting the blank line for free already and shouldn't
1433 # add an extra one.
1434 unless (($After_Header ne " ") and ($first_time))
1436 if ($this_line =~ /^()\s*$/) {
1437 $suppress_line_start_match = 1;
1438 $wrapped_text .= "\n${left_pad_str}";
1441 $wrapped_text .= "\n${left_pad_str}";
1444 $length_remaining = $max_line_length - (length ($user_indent));
1447 # Now that any user_indent has been preserved, strip off leading
1448 # whitespace, so up-folding has no ugly side-effects.
1449 $this_line =~ s/^\s*//;
1451 # Accumulate the line, and adjust parameters for next line.
1452 my $this_len = length ($this_line);
1453 if ($this_len == 0)
1455 # Blank lines should cancel any user_indent level.
1456 $user_indent = '';
1457 $length_remaining = $max_line_length;
1459 elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
1461 # Walk backwards from the end. At first acceptable spot, break
1462 # a new line.
1463 my $idx = $length_remaining - 1;
1464 if ($idx < 0) { $idx = 0 };
1465 while ($idx > 0)
1467 if (substr ($this_line, $idx, 1) =~ /\s/)
1469 my $line_now = substr ($this_line, 0, $idx);
1470 my $next_line = substr ($this_line, $idx);
1471 $this_line = $line_now;
1473 # Clean whitespace off the end.
1474 chomp $this_line;
1476 # The current line is ready to be printed.
1477 $this_line .= "\n${left_pad_str}";
1479 # Make sure the next line is allowed full room.
1480 $length_remaining = $max_line_length - (length ($user_indent));
1482 # Strip next_line, but then preserve any user_indent.
1483 $next_line =~ s/^\s*//;
1485 # Sneak a peek at the user_indent of the upcoming line, so
1486 # $next_line (which will now precede it) can inherit that
1487 # indent level. Otherwise, use whatever user_indent level
1488 # we currently have, which might be none.
1489 my $next_next_line = shift (@lines);
1490 if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
1491 $next_line = $1 . $next_line if (defined ($1));
1492 # $length_remaining = $max_line_length - (length ($1));
1493 $next_next_line =~ s/^\s*//;
1495 else {
1496 $next_line = $user_indent . $next_line;
1498 if (defined ($next_next_line)) {
1499 unshift (@lines, $next_next_line);
1501 unshift (@lines, $next_line);
1503 # Our new next line might, coincidentally, begin with one of
1504 # the line-start regexps, so we temporarily turn off
1505 # sensitivity to that until we're past the line.
1506 $suppress_line_start_match = 1;
1508 last;
1510 else
1512 $idx--;
1516 if ($idx == 0)
1518 # We bottomed out because the line is longer than the
1519 # available space. But that could be because the space is
1520 # small, or because the line is longer than even the maximum
1521 # possible space. Handle both cases below.
1523 if ($length_remaining == ($max_line_length - (length ($user_indent))))
1525 # The line is simply too long -- there is no hope of ever
1526 # breaking it nicely, so just insert it verbatim, with
1527 # appropriate padding.
1528 $this_line = "\n${left_pad_str}${this_line}";
1530 else
1532 # Can't break it here, but may be able to on the next round...
1533 unshift (@lines, $this_line);
1534 $length_remaining = $max_line_length - (length ($user_indent));
1535 $this_line = "\n${left_pad_str}";
1539 else # $this_len < $length_remaining, so tack on what we can.
1541 # Leave a note for the next iteration.
1542 $length_remaining = $length_remaining - $this_len;
1544 if ($this_line =~ /\.$/)
1546 $this_line .= " ";
1547 $length_remaining -= 2;
1549 else # not a sentence end
1551 $this_line .= " ";
1552 $length_remaining -= 1;
1556 # Unconditionally indicate that loop has run at least once.
1557 $first_time = 0;
1559 $wrapped_text .= "${user_indent}${this_line}";
1562 # One last bit of padding.
1563 $wrapped_text .= "\n";
1565 return $wrapped_text;
1568 # -------------------------------------
1570 sub _pretty_file_list {
1571 my $self = shift;
1573 my ($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_;
1575 my @qunkrefs =
1576 grep +( ( ! $_->tags_exists
1578 ! grep exists $ignore_tags{$_}, @{$_->tags})
1580 ( ! keys %show_tags
1582 ( $_->tags_exists
1584 grep exists $show_tags{$_}, @{$_->tags} )
1587 @$qunksref;
1589 my $common_dir; # Dir prefix common to all files ('' if none)
1591 # First, loop over the qunks gathering all the tag/branch names.
1592 # We'll put them all in non_unanimous_tags, and take out the
1593 # unanimous ones later.
1594 QUNKREF:
1595 foreach my $qunkref (@qunkrefs)
1597 # Keep track of whether all the files in this commit were in the
1598 # same directory, and memorize it if so. We can make the output a
1599 # little more compact by mentioning the directory only once.
1600 if ($Common_Dir && (scalar (@qunkrefs)) > 1)
1602 if (! (defined ($common_dir)))
1604 my ($base, $dir);
1605 ($base, $dir, undef) = fileparse ($qunkref->filename);
1607 if ((! (defined ($dir))) # this first case is sheer paranoia
1608 or ($dir eq '')
1609 or ($dir eq "./")
1610 or ($dir eq ".\\"))
1612 $common_dir = '';
1614 else
1616 $common_dir = $dir;
1619 elsif ($common_dir ne '')
1621 # Already have a common dir prefix, so how much of it can we preserve?
1622 $common_dir = &main::common_path_prefix ($qunkref->filename, $common_dir);
1625 else # only one file in this entry anyway, so common dir not an issue
1627 $common_dir = '';
1630 if (defined ($qunkref->branch)) {
1631 $all_branches->{$qunkref->branch} = 1;
1633 if (defined ($qunkref->tags)) {
1634 foreach my $tag (@{$qunkref->tags}) {
1635 $non_unanimous_tags->{$tag} = 1;
1640 # Any tag held by all qunks will be printed specially... but only if
1641 # there are multiple qunks in the first place!
1642 if ((scalar (@qunkrefs)) > 1) {
1643 foreach my $tag (keys (%$non_unanimous_tags)) {
1644 my $everyone_has_this_tag = 1;
1645 foreach my $qunkref (@qunkrefs) {
1646 if ((! (defined ($qunkref->tags)))
1647 or (! (grep ($_ eq $tag, @{$qunkref->tags})))) {
1648 $everyone_has_this_tag = 0;
1651 if ($everyone_has_this_tag) {
1652 $unanimous_tags->{$tag} = 1;
1653 delete $non_unanimous_tags->{$tag};
1658 return $common_dir, \@qunkrefs;
1661 # -------------------------------------
1663 sub fdatetime {
1664 my $self = shift;
1666 my ($year, $mday, $mon, $wday, $hour, $min);
1668 if ( @_ > 1 ) {
1669 ($year, $mday, $mon, $wday, $hour, $min) = @_;
1670 } else {
1671 my ($time) = @_;
1672 (undef, $min, $hour, $mday, $mon, $year, $wday) =
1673 $UTC_Times ? gmtime($time) : localtime($time);
1675 $year += 1900;
1676 $mon += 1;
1677 $wday = $self->wday($wday);
1680 my $fdate = $self->fdate($year, $mon, $mday, $wday);
1682 if ($Show_Times) {
1683 my $ftime = $self->ftime($hour, $min);
1684 return "$fdate $ftime";
1685 } else {
1686 return $fdate;
1690 # -------------------------------------
1692 sub fdate {
1693 my $self = shift;
1695 my ($year, $mday, $mon, $wday);
1697 if ( @_ > 1 ) {
1698 ($year, $mon, $mday, $wday) = @_;
1699 } else {
1700 my ($time) = @_;
1701 (undef, undef, undef, $mday, $mon, $year, $wday) =
1702 $UTC_Times ? gmtime($time) : localtime($time);
1704 $year += 1900;
1705 $mon += 1;
1706 $wday = $self->wday($wday);
1709 return sprintf '%4u-%02u-%02u%s', $year, $mon, $mday, $wday;
1712 # -------------------------------------
1714 sub ftime {
1715 my $self = shift;
1717 my ($hour, $min);
1719 if ( @_ > 1 ) {
1720 ($hour, $min) = @_;
1721 } else {
1722 my ($time) = @_;
1723 (undef, $min, $hour) = $UTC_Times ? gmtime($time) : localtime($time);
1726 return sprintf '%02u:%02u', $hour, $min;
1729 # ----------------------------------------------------------------------------
1731 package CVS::Utils::ChangeLog::Message;
1733 sub new {
1734 my $class = shift;
1735 my ($msg) = @_;
1737 my %self = (msg => $msg, files => []);
1739 bless \%self, $class;
1742 sub add_fileentry {
1743 my $self = shift;
1744 my ($fileentry) = @_;
1746 die "Not a fileentry: $fileentry"
1747 unless $fileentry->isa('CVS::Utils::ChangeLog::FileEntry');
1749 push @{$self->{files}}, $fileentry;
1752 sub files { wantarray ? @{$_[0]->{files}} : $_[0]->{files} }
1754 # ----------------------------------------------------------------------------
1756 package CVS::Utils::ChangeLog::FileEntry;
1758 # Each revision of a file has a little data structure (a `qunk')
1759 # associated with it. That data structure holds not only the
1760 # file's name, but any additional information about the file
1761 # that might be needed in the output, such as the revision
1762 # number, tags, branches, etc. The reason to have these things
1763 # arranged in a data structure, instead of just appending them
1764 # textually to the file's name, is that we may want to do a
1765 # little rearranging later as we write the output. For example,
1766 # all the files on a given tag/branch will go together, followed
1767 # by the tag in parentheses (so trunk or otherwise non-tagged
1768 # files would go at the end of the file list for a given log
1769 # message). This rearrangement is a lot easier to do if we
1770 # don't have to reparse the text.
1772 # A qunk looks like this:
1775 # filename => "hello.c",
1776 # revision => "1.4.3.2",
1777 # time => a timegm() return value (moment of commit)
1778 # tags => [ "tag1", "tag2", ... ],
1779 # branch => "branchname" # There should be only one, right?
1780 # roots => [ "branchtag1", "branchtag2", ... ]
1781 # lines => "+x -y" # or undefined; x and y are integers
1784 # Single top-level ChangeLog, or one per subdirectory?
1785 my $distributed;
1786 sub distributed { $#_ ? ($distributed = $_[1]) : $distributed; }
1788 sub new {
1789 my $class = shift;
1790 my ($path, $time, $revision, $state, $lines,
1791 $branch_names, $branch_roots, $branch_numbers, $symbolic_names) = @_;
1793 my %self = (time => $time,
1794 revision => $revision,
1795 state => $state,
1796 lines => $lines,
1797 branch_numbers => $branch_numbers,
1800 if ( $distributed ) {
1801 @self{qw(filename dir_key)} = fileparse($path);
1802 } else {
1803 @self{qw(filename dir_key)} = ($path, './');
1806 { # Scope for $branch_prefix
1807 (my ($branch_prefix) = ($revision =~ /((?:\d+\.)+)\d+/));
1808 $branch_prefix =~ s/\.$//;
1809 if ( $branch_names->{$branch_prefix} ) {
1810 my $branch_name = $branch_names->{$branch_prefix};
1811 $self{branch} = $branch_name;
1812 $self{branches} = [$branch_name];
1814 while ( $branch_prefix =~ s/^(\d+(?:\.\d+\.\d+)+)\.\d+\.\d+$/$1/ ) {
1815 push @{$self{branches}}, $branch_names->{$branch_prefix}
1816 if exists $branch_names->{$branch_prefix};
1820 # If there's anything in the @branch_roots array, then this
1821 # revision is the root of at least one branch. We'll display
1822 # them as branch names instead of revision numbers, the
1823 # substitution for which is done directly in the array:
1824 $self{'roots'} = [ map { $branch_names->{$_} } @$branch_roots ]
1825 if @$branch_roots;
1827 if ( exists $symbolic_names->{$revision} ) {
1828 $self{tags} = delete $symbolic_names->{$revision};
1829 &main::delta_check($time, $self{tags});
1832 bless \%self, $class;
1835 sub filename { $_[0]->{filename} }
1836 sub dir_key { $_[0]->{dir_key} }
1837 sub revision { $_[0]->{revision} }
1838 sub branch { $_[0]->{branch} }
1839 sub state { $_[0]->{state} }
1840 sub lines { $_[0]->{lines} }
1841 sub roots { $_[0]->{roots} }
1842 sub branch_numbers { $_[0]->{branch_numbers} }
1844 sub tags { $_[0]->{tags} }
1845 sub tags_exists {
1846 exists $_[0]->{tags};
1849 # This may someday be used in a more sophisticated calculation of what other
1850 # files are involved in this commit. For now, we don't use it much except for
1851 # delta mode, because the common-commit-detection algorithm is hypothesized to
1852 # be "good enough" as it stands.
1853 sub time { $_[0]->{time} }
1855 # ----------------------------------------------------------------------------
1857 package CVS::Utils::ChangeLog::EntrySetBuilder;
1859 use File::Basename qw( fileparse );
1860 use Time::Local qw( timegm );
1862 use constant MAILNAME => "/etc/mailname";
1864 # In 'cvs log' output, one long unbroken line of equal signs separates files:
1865 use constant FILE_SEPARATOR => '=' x 77;# . "\n";
1866 # In 'cvs log' output, a shorter line of dashes separates log messages within
1867 # a file:
1868 use constant REV_SEPARATOR => '-' x 28;# . "\n";
1870 use constant EMPTY_LOG_MESSAGE => '*** empty log message ***';
1872 # -------------------------------------
1874 sub new {
1875 my ($proto) = @_;
1876 my $class = ref $proto || $proto;
1878 my $poobah = CVS::Utils::ChangeLog::EntrySet->new;
1879 my $self = bless +{ grand_poobah => $poobah }, $class;
1881 $self->clear_file;
1882 $self->maybe_read_user_map_file;
1883 return $self;
1886 # -------------------------------------
1888 sub clear_msg {
1889 my ($self) = @_;
1891 # Make way for the next message
1892 undef $self->{rev_msg};
1893 undef $self->{rev_time};
1894 undef $self->{rev_revision};
1895 undef $self->{rev_author};
1896 undef $self->{rev_state};
1897 undef $self->{lines};
1898 $self->{rev_branch_roots} = []; # For showing which files are branch
1899 # ancestors.
1900 $self->{collecting_symbolic_names} = 0;
1903 # -------------------------------------
1905 sub clear_file {
1906 my ($self) = @_;
1907 $self->clear_msg;
1909 undef $self->{filename};
1910 $self->{branch_names} = +{}; # We'll grab branch names while we're
1911 # at it.
1912 $self->{branch_numbers} = +{}; # Save some revisions for
1913 # @Follow_Branches
1914 $self->{symbolic_names} = +{}; # Where tag names get stored.
1917 # -------------------------------------
1919 sub grand_poobah { $_[0]->{grand_poobah} }
1921 # -------------------------------------
1923 sub read_changelog {
1924 my ($self, $command) = @_;
1926 # my $grand_poobah = CVS::Utils::ChangeLog::EntrySet->new;
1928 if (! $Input_From_Stdin) {
1929 my $Log_Source_Command = join(' ', @$command);
1930 &main::debug ("(run \"${Log_Source_Command}\")\n");
1931 open (LOG_SOURCE, "$Log_Source_Command |")
1932 or die "unable to run \"${Log_Source_Command}\"";
1934 else {
1935 open (LOG_SOURCE, "-") or die "unable to open stdin for reading";
1938 binmode LOG_SOURCE;
1940 XX_Log_Source:
1941 while (<LOG_SOURCE>) {
1942 chomp;
1944 # If on a new file and don't see filename, skip until we find it, and
1945 # when we find it, grab it.
1946 if ( ! defined $self->{filename} ) {
1947 $self->read_file_path($_);
1948 } elsif ( /^symbolic names:$/ ) {
1949 $self->{collecting_symbolic_names} = 1;
1950 } elsif ( $self->{collecting_symbolic_names} ) {
1951 $self->read_symbolic_name($_);
1952 } elsif ( $_ eq FILE_SEPARATOR and ! defined $self->{rev_revision} ) {
1953 $self->clear_file;
1954 } elsif ( ! defined $self->{rev_revision} ) {
1955 # If have file name, but not revision, and see revision, then grab
1956 # it. (We collect unconditionally, even though we may or may not
1957 # ever use it.)
1958 $self->read_revision($_);
1959 } elsif ( ! defined $self->{rev_time} ) { # and /^date: /) {
1960 $self->read_date_author_and_state($_);
1961 } elsif ( /^branches:\s+(.*);$/ ) {
1962 $self->read_branches($1);
1963 } elsif ( ! ( $_ eq FILE_SEPARATOR or $_ eq REV_SEPARATOR ) ) {
1964 # If have file name, time, and author, then we're just grabbing
1965 # log message texts:
1966 $self->{rev_msg} .= $_ . "\n"; # Normally, just accumulate the message...
1967 } else {
1968 if ( ! $self->{rev_msg}
1969 or $self->{rev_msg} =~ /^\s*(\.\s*)?$/
1970 or index($self->{rev_msg}, EMPTY_LOG_MESSAGE) > -1 ) {
1971 # ... until a msg separator is encountered:
1972 # Ensure the message contains something:
1973 $self->clear_msg
1974 if $Prune_Empty_Msgs;
1975 $self->{rev_msg} = "[no log message]\n";
1978 $self->add_file_entry;
1980 if ( $_ eq FILE_SEPARATOR ) {
1981 $self->clear_file;
1982 } else {
1983 $self->clear_msg;
1988 close LOG_SOURCE
1989 or die sprintf("Problem reading log input (exit/signal/core: %d/%d/%d)\n",
1990 $? >> 8, $? & 127, $? & 128);
1991 return;
1994 # -------------------------------------
1996 sub add_file_entry {
1997 $_[0]->grand_poobah->add_fileentry(@{$_[0]}{qw(filename rev_time rev_revision
1998 rev_state lines branch_names
1999 rev_branch_roots
2000 branch_numbers
2001 symbolic_names
2002 rev_author rev_msg)});
2005 # -------------------------------------
2007 sub maybe_read_user_map_file {
2008 my ($self) = @_;
2010 my %expansions;
2011 my $User_Map_Input;
2013 if ($User_Map_File)
2015 if ( $User_Map_File =~ m{^([-\w\@+=.,\/]+):([-\w\@+=.,\/:]+)} and
2016 !-f $User_Map_File )
2018 my $rsh = (exists $ENV{'CVS_RSH'} ? $ENV{'CVS_RSH'} : 'ssh');
2019 $User_Map_Input = "$rsh $1 'cat $2' |";
2020 &main::debug ("(run \"${User_Map_Input}\")\n");
2022 else
2024 $User_Map_Input = "<$User_Map_File";
2027 open (MAPFILE, $User_Map_Input)
2028 or die ("Unable to open $User_Map_File ($!)");
2030 while (<MAPFILE>)
2032 next if /^\s*#/; # Skip comment lines.
2033 next if not /:/; # Skip lines without colons.
2035 # It is now safe to split on ':'.
2036 my ($username, $expansion) = split ':';
2037 chomp $expansion;
2038 $expansion =~ s/^'(.*)'$/$1/;
2039 $expansion =~ s/^"(.*)"$/$1/;
2041 # If it looks like the expansion has a real name already, then
2042 # we toss the username we got from CVS log. Otherwise, keep
2043 # it to use in combination with the email address.
2045 if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
2046 # Also, add angle brackets if none present
2047 if (! ($expansion =~ /<\S+@\S+>/)) {
2048 $expansions{$username} = "$username <$expansion>";
2050 else {
2051 $expansions{$username} = "$username $expansion";
2054 else {
2055 $expansions{$username} = $expansion;
2057 } # fi ($User_Map_File)
2059 close (MAPFILE);
2062 if (defined $User_Passwd_File)
2064 if ( ! defined $Domain ) {
2065 if ( -e MAILNAME ) {
2066 chomp($Domain = slurp_file(MAILNAME));
2067 } else {
2068 MAILDOMAIN_CMD:
2069 for ([qw(hostname -d)], 'dnsdomainname', 'domainname') {
2070 my ($text, $exit, $sig, $core) = run_ext($_);
2071 if ( $exit == 0 && $sig == 0 && $core == 0 ) {
2072 chomp $text;
2073 if ( length $text ) {
2074 $Domain = $text;
2075 last MAILDOMAIN_CMD;
2082 die "No mail domain found\n"
2083 unless defined $Domain;
2085 open (MAPFILE, "<$User_Passwd_File")
2086 or die ("Unable to open $User_Passwd_File ($!)");
2087 while (<MAPFILE>)
2089 # all lines are valid
2090 my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
2091 my $expansion = '';
2092 ($expansion) = split (',', $gecos)
2093 if defined $gecos && length $gecos;
2095 my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
2096 $expansions{$username} = "$expansion <$mailname>";
2098 close (MAPFILE);
2101 $self->{usermap} = \%expansions;
2104 # -------------------------------------
2106 sub read_file_path {
2107 my ($self, $line) = @_;
2109 my $path;
2111 if ( $line =~ /^Working file: (.*)/ ) {
2112 $path = $1;
2113 } elsif ( defined $RCS_Root
2115 $line =~ m|^RCS file: $RCS_Root[/\\](.*),v$| ) {
2116 $path = $1;
2117 $path =~ s!Attic/!!;
2118 } else {
2119 return;
2122 if ( @Ignore_Files ) {
2123 my $base;
2124 ($base, undef, undef) = fileparse($path);
2126 my $xpath = $Case_Insensitive ? lc($path) : $path;
2127 if ( grep index($path, $_) > -1, @Ignore_Files ) {
2128 return;
2132 $self->{filename} = $path;
2133 return;
2136 # -------------------------------------
2138 sub read_symbolic_name {
2139 my ($self, $line) = @_;
2141 # All tag names are listed with whitespace in front in cvs log
2142 # output; so if see non-whitespace, then we're done collecting.
2143 if ( /^\S/ ) {
2144 $self->{collecting_symbolic_names} = 0;
2145 return;
2146 } else {
2147 # we're looking at a tag name, so parse & store it
2149 # According to the Cederqvist manual, in node "Tags", tag names must start
2150 # with an uppercase or lowercase letter and can contain uppercase and
2151 # lowercase letters, digits, `-', and `_'. However, it's not our place to
2152 # enforce that, so we'll allow anything CVS hands us to be a tag:
2153 my ($tag_name, $tag_rev) = ($line =~ /^\s+([^:]+): ([\d.]+)$/);
2155 # A branch number either has an odd number of digit sections
2156 # (and hence an even number of dots), or has ".0." as the
2157 # second-to-last digit section. Test for these conditions.
2158 my $real_branch_rev = '';
2159 if ( $tag_rev =~ /^(\d+\.\d+\.)+\d+$/ # Even number of dots...
2161 $tag_rev !~ /^(1\.)+1$/ ) { # ...but not "1.[1.]1"
2162 $real_branch_rev = $tag_rev;
2163 } elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) { # Has ".0."
2164 $real_branch_rev = $1 . $3;
2167 # If we got a branch, record its number.
2168 if ( $real_branch_rev ) {
2169 $self->{branch_names}->{$real_branch_rev} = $tag_name;
2170 $self->{branch_numbers}->{$tag_name} = $real_branch_rev;
2171 } else {
2172 # Else it's just a regular (non-branch) tag.
2173 push @{$self->{symbolic_names}->{$tag_rev}}, $tag_name;
2177 $self->{collecting_symbolic_names} = 1;
2178 return;
2181 # -------------------------------------
2183 sub read_revision {
2184 my ($self, $line) = @_;
2186 my ($revision) = ( $line =~ /^revision (\d+\.[\d.]+)/ );
2188 return
2189 unless $revision;
2191 $self->{rev_revision} = $revision;
2192 return;
2195 # -------------------------------------
2197 { # Closure over %gecos_warned
2198 my %gecos_warned;
2199 sub read_date_author_and_state {
2200 my ($self, $line) = @_;
2202 my ($time, $author, $state) = $self->parse_date_author_and_state($line);
2204 if ( defined($self->{usermap}->{$author}) and $self->{usermap}->{$author} ) {
2205 $author = $self->{usermap}->{$author};
2206 } elsif ( defined $Domain or $Gecos == 1 ) {
2207 my $email = $author;
2208 $email = $author."@".$Domain
2209 if defined $Domain && $Domain ne '';
2211 my $pw = getpwnam($author);
2212 my ($fullname, $office, $workphone, $homephone);
2213 if ( defined $pw ) {
2214 ($fullname, $office, $workphone, $homephone) =
2215 split /\s*,\s*/, $pw->gecos;
2216 } else {
2217 warn "Couldn't find gecos info for author '$author'\n"
2218 unless $gecos_warned{$author}++;
2219 $fullname = '';
2221 for (grep defined, $fullname, $office, $workphone, $homephone) {
2222 s/&/ucfirst(lc($pw->name))/ge;
2224 $author = $fullname . " <" . $email . ">"
2225 if $fullname ne '';
2228 $self->{rev_state} = $state;
2229 $self->{rev_time} = $time;
2230 $self->{rev_author} = $author;
2231 return;
2235 # -------------------------------------
2237 sub read_branches {
2238 # A "branches: ..." line here indicates that one or more branches
2239 # are rooted at this revision. If we're showing branches, then we
2240 # want to show that fact as well, so we collect all the branches
2241 # that this is the latest ancestor of and store them in
2242 # $self->[rev_branch_roots}. Just for reference, the format of the
2243 # line we're seeing at this point is:
2245 # branches: 1.5.2; 1.5.4; ...;
2247 # Okay, here goes:
2248 my ($self, $line) = @_;
2250 # Ugh. This really bothers me. Suppose we see a log entry
2251 # like this:
2253 # ----------------------------
2254 # revision 1.1
2255 # date: 1999/10/17 03:07:38; author: jrandom; state: Exp;
2256 # branches: 1.1.2;
2257 # Intended first line of log message begins here.
2258 # ----------------------------
2260 # The question is, how we can tell the difference between that
2261 # log message and a *two*-line log message whose first line is
2263 # "branches: 1.1.2;"
2265 # See the problem? The output of "cvs log" is inherently
2266 # ambiguous.
2268 # For now, we punt: we liberally assume that people don't
2269 # write log messages like that, and just toss a "branches:"
2270 # line if we see it but are not showing branches. I hope no
2271 # one ever loses real log data because of this.
2272 if ( $Show_Branches ) {
2273 $line =~ s/(1\.)+1;|(1\.)+1$//; # ignore the trivial branch 1.1.1
2274 $self->{rev_branch_roots} = [split /;\s+/, $line]
2275 if length $line;
2279 # -------------------------------------
2281 sub parse_date_author_and_state {
2282 my ($self, $line) = @_;
2283 # Parses the date/time and author out of a line like:
2285 # date: 1999/02/19 23:29:05; author: apharris; state: Exp;
2287 my ($year, $mon, $mday, $hours, $min, $secs, $author, $state, $rest) =
2288 $line =~
2289 m!(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+
2290 author:\s+([^;]+);\s+state:\s+([^;]+);(.*)!x
2291 or die "Couldn't parse date ``$line''";
2292 die "Bad date or Y2K issues"
2293 unless $year > 1969 and $year < 2258;
2294 # Kinda arbitrary, but useful as a sanity check
2295 my $time = timegm($secs, $min, $hours, $mday, $mon-1, $year-1900);
2296 if ( $rest =~ m!\s+lines:\s+(.*)! ) {
2297 $self->{lines} = $1;
2300 return $time, $author, $state;
2303 # Subrs ----------------------------------------------------------------------
2305 package main;
2307 sub delta_check {
2308 my ($time, $tags) = @_;
2310 # If we're in 'delta' mode, update the latest observed times for the
2311 # beginning and ending tags, and when we get around to printing output, we
2312 # will simply restrict ourselves to that timeframe...
2313 return
2314 unless $Delta_Mode;
2316 $Delta_StartTime = $time
2317 if $time > $Delta_StartTime and grep { $_ eq $Delta_From } @$tags;
2319 $Delta_EndTime = $time
2320 if $time > $Delta_EndTime and grep { $_ eq $Delta_To } @$tags;
2323 sub run_ext {
2324 my ($cmd) = @_;
2325 $cmd = [$cmd]
2326 unless ref $cmd;
2327 local $" = ' ';
2328 my $out = qx"@$cmd 2>&1";
2329 my $rv = $?;
2330 my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);
2331 return $out, $exit, $sig, $core;
2334 # -------------------------------------
2336 # If accumulating, grab the boundary date from pre-existing ChangeLog.
2337 sub maybe_grab_accumulation_date {
2338 if (! $Cumulative || $Update) {
2339 return '';
2342 # else
2344 open (LOG, "$Log_File_Name")
2345 or die ("trouble opening $Log_File_Name for reading ($!)");
2347 my $boundary_date;
2348 while (<LOG>)
2350 if (/^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/)
2352 $boundary_date = "$1";
2353 last;
2357 close (LOG);
2359 # convert time from utc to local timezone if the ChangeLog has
2360 # dates/times in utc
2361 if ($UTC_Times && $boundary_date)
2363 # convert the utc time to a time value
2364 my ($year,$mon,$mday,$hour,$min) = $boundary_date =~
2365 m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#;
2366 my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900);
2367 # print the timevalue in the local timezone
2368 my ($ignore,$wday);
2369 ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time);
2370 $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u",
2371 $year+1900,$mon+1,$mday,$hour,$min);
2374 return $boundary_date;
2377 # -------------------------------------
2379 # Fills up a ChangeLog structure in the current directory.
2380 sub derive_changelog {
2381 my ($command) = @_;
2383 # See "The Plan" above for a full explanation.
2385 # Might be adding to an existing ChangeLog
2386 my $accumulation_date = maybe_grab_accumulation_date;
2387 if ($accumulation_date) {
2388 # Insert -d immediately after 'cvs log'
2389 my $Log_Date_Command = "-d\'>${accumulation_date}\'";
2391 my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command;
2392 splice @$command, $log_index+1, 0, $Log_Date_Command;
2393 &debug ("(adding log msg starting from $accumulation_date)\n");
2396 # output_changelog(read_changelog($command));
2397 my $builder = CVS::Utils::ChangeLog::EntrySetBuilder->new;
2398 $builder->read_changelog($command);
2399 $builder->grand_poobah->output_changelog;
2402 # -------------------------------------
2404 sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
2406 # -------------------------------------
2408 sub common_path_prefix {
2409 my ($path1, $path2) = @_;
2411 # For compatibility (with older versions of cvs2cl.pl), we think in UN*X
2412 # terms, and mould windoze filenames to match. Is this really appropriate?
2413 # If a file is checked in under UN*X, and cvs log run on windoze, which way
2414 # do the path separators slope? Can we use fileparse as per the local
2415 # conventions? If so, we should probably have a user option to specify an
2416 # OS to emulate to handle stdin-fed logs. If we did this, we could avoid
2417 # the nasty \-/ transmogrification below.
2419 my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;
2421 # Transmogrify Windows filenames to look like Unix.
2422 # (It is far more likely that someone is running cvs2cl.pl under
2423 # Windows than that they would genuinely have backslashes in their
2424 # filenames.)
2425 tr!\\!/!
2426 for $dir1, $dir2;
2428 my ($accum1, $accum2, $last_common_prefix) = ('') x 3;
2430 my @path1 = grep length($_), split qr!/!, $dir1;
2431 my @path2 = grep length($_), split qr!/!, $dir2;
2433 my @common_path;
2434 for (0..min($#path1,$#path2)) {
2435 if ( $path1[$_] eq $path2[$_]) {
2436 push @common_path, $path1[$_];
2437 } else {
2438 last;
2442 return join '', map "$_/", @common_path;
2445 # -------------------------------------
2446 sub parse_options {
2447 # Check this internally before setting the global variable.
2448 my $output_file;
2450 # If this gets set, we encountered unknown options and will exit at
2451 # the end of this subroutine.
2452 my $exit_with_admonishment = 0;
2454 # command to generate the log
2455 my @log_source_command = qw( cvs log );
2457 my (@Global_Opts, @Local_Opts);
2459 Getopt::Long::Configure(qw( bundling permute no_getopt_compat
2460 pass_through no_ignore_case ));
2461 GetOptions('help|usage|h' => \$Print_Usage,
2462 'debug' => \$Debug, # unadvertised option, heh
2463 'version' => \$Print_Version,
2465 'file|f=s' => \$output_file,
2466 'accum' => \$Cumulative,
2467 'update' => \$Update,
2468 'fsf' => \$FSF_Style,
2469 'rcs=s' => \$RCS_Root,
2470 'usermap|U=s' => \$User_Map_File,
2471 'gecos' => \$Gecos,
2472 'domain=s' => \$Domain,
2473 'passwd=s' => \$User_Passwd_File,
2474 'window|W=i' => \$Max_Checkin_Duration,
2475 'chrono' => \$Chronological_Order,
2476 'ignore|I=s' => \@Ignore_Files,
2477 'case-insensitive|C' => \$Case_Insensitive,
2478 'regexp|R=s' => \$Regexp_Gate,
2479 'stdin' => \$Input_From_Stdin,
2480 'stdout' => \$Output_To_Stdout,
2481 'distributed|d' => sub { CVS::Utils::ChangeLog::FileEntry->distributed(1) },
2482 'prune|P' => \$Prune_Empty_Msgs,
2483 'no-wrap' => \$No_Wrap,
2484 'gmt|utc' => \$UTC_Times,
2485 'day-of-week|w' => \$Show_Day_Of_Week,
2486 'revisions|r' => \$Show_Revisions,
2487 'show-dead' => \$Show_Dead,
2488 'tags|t' => \$Show_Tags,
2489 'tagdates|T' => \$Show_Tag_Dates,
2490 'branches|b' => \$Show_Branches,
2491 'follow|F=s' => \@Follow_Branches,
2492 'follow-only=s' => \@Follow_Only,
2493 'xml-encoding=s' => \$XML_Encoding,
2494 'xml' => \$XML_Output,
2495 'noxmlns' => \$No_XML_Namespace,
2496 'no-xml-iso-date' => \$No_XML_ISO_Date,
2497 'no-ancestors' => \$No_Ancestors,
2498 'lines-modified' => \$Show_Lines_Modified,
2500 'no-indent' => sub {
2501 $Indent = '';
2504 'summary' => sub {
2505 $Summary = 1;
2506 $After_Header = "\n\n"; # Summary implies --separate-header
2509 'no-times' => sub {
2510 $Show_Times = 0;
2513 'no-hide-branch-additions' => sub {
2514 $Hide_Branch_Additions = 0;
2517 'no-common-dir' => sub {
2518 $Common_Dir = 0;
2521 'ignore-tag=s' => sub {
2522 $ignore_tags{$_[1]} = 1;
2525 'show-tag=s' => sub {
2526 $show_tags{$_[1]} = 1;
2529 # Deliberately undocumented. This is not a public interface, and
2530 # may change/disappear at any time.
2531 'test-code=s' => \$TestCode,
2533 'delta=s' => sub {
2534 my $arg = $_[1];
2535 if ( $arg =~
2536 /^([A-Za-z][A-Za-z0-9_\-\]\[]*):([A-Za-z][A-Za-z0-9_\-\]\[]*)$/ ) {
2537 $Delta_From = $1;
2538 $Delta_To = $2;
2539 $Delta_Mode = 1;
2540 } else {
2541 die "--delta FROM_TAG:TO_TAG is what you meant to say.\n";
2545 'FSF' => sub {
2546 $Show_Times = 0;
2547 $Common_Dir = 0;
2548 $No_Extra_Indent = 1;
2549 $Indent = "\t";
2552 'header=s' => sub {
2553 my $narg = $_[1];
2554 $ChangeLog_Header = &slurp_file ($narg);
2555 if (! defined ($ChangeLog_Header)) {
2556 $ChangeLog_Header = '';
2560 'global-opts|g=s' => sub {
2561 my $narg = $_[1];
2562 push @Global_Opts, $narg;
2563 splice @log_source_command, 1, 0, $narg;
2566 'log-opts|l=s' => sub {
2567 my $narg = $_[1];
2568 push @Local_Opts, $narg;
2569 push @log_source_command, $narg;
2572 'mailname=s' => sub {
2573 my $narg = $_[1];
2574 warn "--mailname is deprecated; please use --domain instead\n";
2575 $Domain = $narg;
2578 'separate-header|S' => sub {
2579 $After_Header = "\n\n";
2580 $No_Extra_Indent = 1;
2583 'group-within-date' => sub {
2584 $GroupWithinDate = 1;
2585 $Show_Times = 0;
2588 'hide-filenames' => sub {
2589 $Hide_Filenames = 1;
2590 $After_Header = '';
2593 or die "options parsing failed\n";
2595 push @log_source_command, map "'$_'", @ARGV;
2597 ## Check for contradictions...
2599 if ($Output_To_Stdout && CVS::Utils::ChangeLog::FileEntry->distributed) {
2600 print STDERR "cannot pass both --stdout and --distributed\n";
2601 $exit_with_admonishment = 1;
2604 if ($Output_To_Stdout && $output_file) {
2605 print STDERR "cannot pass both --stdout and --file\n";
2606 $exit_with_admonishment = 1;
2609 if ($Input_From_Stdin && @Global_Opts) {
2610 print STDERR "cannot pass both --stdin and -g\n";
2611 $exit_with_admonishment = 1;
2614 if ($Input_From_Stdin && @Local_Opts) {
2615 print STDERR "cannot pass both --stdin and -l\n";
2616 $exit_with_admonishment = 1;
2619 if ($XML_Output && $Cumulative) {
2620 print STDERR "cannot pass both --xml and --accum\n";
2621 $exit_with_admonishment = 1;
2624 # Other consistency checks and option-driven logic
2626 # Bleargh. Compensate for a deficiency of custom wrapping.
2627 if ( ($After_Header ne " ") and $FSF_Style ) {
2628 $After_Header .= "\t";
2631 @Ignore_Files = map lc, @Ignore_Files
2632 if $Case_Insensitive;
2634 # Or if any other error message has already been printed out, we
2635 # just leave now:
2636 if ($exit_with_admonishment) {
2637 &usage ();
2638 exit (1);
2640 elsif ($Print_Usage) {
2641 &usage ();
2642 exit (0);
2644 elsif ($Print_Version) {
2645 &version ();
2646 exit (0);
2649 ## Else no problems, so proceed.
2651 if ($output_file) {
2652 $Log_File_Name = $output_file;
2655 return \@log_source_command;
2658 # -------------------------------------
2660 sub slurp_file {
2661 my $filename = shift || die ("no filename passed to slurp_file()");
2662 my $retstr;
2664 open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
2665 local $/ = undef;
2666 $retstr = <SLURPEE>;
2667 close (SLURPEE);
2668 return $retstr;
2671 # -------------------------------------
2673 sub debug {
2674 if ($Debug) {
2675 my $msg = shift;
2676 print STDERR $msg;
2680 # -------------------------------------
2682 sub version {
2683 print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
2686 # -------------------------------------
2688 sub usage {
2689 &version ();
2691 eval "use Pod::Usage qw( pod2usage )";
2693 if ( $@ ) {
2694 print <<'END';
2696 * Pod::Usage was not found. The formatting may be suboptimal. Consider
2697 upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and
2698 versions of perl prior to 5.6 are getting rather rusty, now. Alternatively,
2699 install Pod::Usage direct from CPAN.
2702 local $/ = undef;
2703 my $message = <DATA>;
2704 $message =~ s/^=(head1|item) //gm;
2705 $message =~ s/^=(over|back).*\n//gm;
2706 $message =~ s/\n{3,}/\n\n/g;
2707 print $message;
2708 } else {
2709 print "\n";
2710 pod2usage( -exitval => 'NOEXIT',
2711 -verbose => 1,
2712 -output => \*STDOUT,
2716 return;
2719 # Main -----------------------------------------------------------------------
2721 my $log_source_command = parse_options;
2722 if ( defined $TestCode ) {
2723 eval $TestCode;
2724 die "Eval failed: '$@'\n"
2725 if $@;
2726 } else {
2727 derive_changelog($log_source_command);
2730 __DATA__
2732 =head1 NAME
2734 cvs2cl.pl - convert cvs log messages to changelogs
2736 =head1 SYNOPSIS
2738 B<cvs2cl> [I<options>] [I<FILE1> [I<FILE2> ...]]
2740 =head1 DESCRIPTION
2742 cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by
2743 running "cvs log" and parsing the output. Duplicate log messages get
2744 unified in the Right Way.
2746 The default output of cvs2cl is designed to be compact, formally unambiguous,
2747 but still easy for humans to read. It should be largely self-explanatory; the
2748 one abbreviation that might not be obvious is "utags". That stands for
2749 "universal tags" -- a universal tag is one held by all the files in a given
2750 change entry.
2752 If you need output that's easy for a program to parse, use the B<--xml> option.
2753 Note that with XML output, just about all available information is included
2754 with each change entry, whether you asked for it or not, on the theory that
2755 your parser can ignore anything it's not looking for.
2757 If filenames are given as arguments cvs2cl only shows log information for the
2758 named files.
2760 =head1 OPTIONS
2762 =over 4
2764 =item B<-h>, B<-help>, B<--help>, B<-?>
2766 Show a short help and exit.
2768 =item B<--version>
2770 Show version and exit.
2772 =item B<-r>, B<--revisions>
2774 Show revision numbers in output.
2776 =item B<-b>, B<--branches>
2778 Show branch names in revisions when possible.
2780 =item B<-t>, B<--tags>
2782 Show tags (symbolic names) in output.
2784 =item B<-T>, B<--tagdates>
2786 Show tags in output on their first occurance.
2788 =item B<--show-dead>
2790 Show dead files.
2792 =item B<--stdin>
2794 Read from stdin, don't run cvs log.
2796 =item B<--stdout>
2798 Output to stdout not to ChangeLog.
2800 =item B<-d>, B<--distributed>
2802 Put ChangeLogs in subdirs.
2804 =item B<-f> I<FILE>, B<--file> I<FILE>
2806 Write to I<FILE> instead of ChangeLog.
2808 =item B<--fsf>
2810 Use this if log data is in FSF ChangeLog style.
2812 =item B<--FSF>
2814 Attempt strict FSF-standard compatible output.
2816 =item B<-W> I<SECS>, B<--window> I<SECS>
2818 Window of time within which log entries unify.
2820 =item -B<U> I<UFILE>, B<--usermap> I<UFILE>
2822 Expand usernames to email addresses from I<UFILE>.
2824 =item B<--passwd> I<PASSWORDFILE>
2826 Use system passwd file for user name expansion. If no mail domain is provided
2827 (via B<--domain>), it tries to read one from B</etc/mailname>, output of B<hostname
2828 -d>, B<dnsdomainname>, or B<domain-name>. cvs2cl exits with an error if none of
2829 those options is successful. Use a domain of '' to prevent the addition of a
2830 mail domain.
2832 =item B<--domain> I<DOMAIN>
2834 Domain to build email addresses from.
2836 =item B<--gecos>
2838 Get user information from GECOS data.
2840 =item B<-R> I<REGEXP>, B<--regexp> I<REGEXP>
2842 Include only entries that match I<REGEXP>. This option may be used multiple
2843 times.
2845 =item B<-I> I<REGEXP>, B<--ignore> I<REGEXP>
2847 Ignore files whose names match I<REGEXP>. This option may be used multiple
2848 times.
2850 =item B<-C>, B<--case-insensitive>
2852 Any regexp matching is done case-insensitively.
2854 =item B<-F> I<BRANCH>, B<--follow> I<BRANCH>
2856 Show only revisions on or ancestral to I<BRANCH>.
2858 =item B<--follow-only> I<BRANCH>
2860 Like --follow, but sub-branches are not followed.
2862 =item B<--no-ancestors>
2864 When using B<-F>, only track changes since the I<BRANCH> started.
2866 =item B<--no-hide-branch-additions>
2868 By default, entries generated by cvs for a file added on a branch (a dead 1.1
2869 entry) are not shown. This flag reverses that action.
2871 =item B<-S>, B<--separate-header>
2873 Blank line between each header and log message.
2875 =item B<--summary>
2877 Add CVS change summary information.
2879 =item B<--no-wrap>
2881 Don't auto-wrap log message (recommend B<-S> also).
2883 =item B<--no-indent>
2885 Don't indent log message
2887 =item B<--gmt>, B<--utc>
2889 Show times in GMT/UTC instead of local time.
2891 =item B<--accum>
2893 Add to an existing ChangeLog (incompatible with B<--xml>).
2895 =item B<-w>, B<--day-of-week>
2897 Show day of week.
2899 =item B<--no-times>
2901 Don't show times in output.
2903 =item B<--chrono>
2905 Output log in chronological order (default is reverse chronological order).
2907 =item B<--header> I<FILE>
2909 Get ChangeLog header from I<FILE> ("B<->" means stdin).
2911 =item B<--xml>
2913 Output XML instead of ChangeLog format.
2915 =item B<--xml-encoding> I<ENCODING.>
2917 Insert encoding clause in XML header.
2919 =item B<--noxmlns>
2921 Don't include xmlns= attribute in root element.
2923 =item B<--hide-filenames>
2925 Don't show filenames (ignored for XML output).
2927 =item B<--no-common-dir>
2929 Don't shorten directory names from filenames.
2931 =item B<--rcs> I<CVSROOT>
2933 Handle filenames from raw RCS, for instance those produced by "cvs rlog"
2934 output, stripping the prefix I<CVSROOT>.
2936 =item B<-P>, B<--prune>
2938 Don't show empty log messages.
2940 =item B<--lines-modified>
2942 Output the number of lines added and the number of lines removed for
2943 each checkin (if applicable). At the moment, this only affects the
2944 XML output mode.
2946 =item B<--ignore-tag> I<TAG>
2948 Ignore individual changes that are associated with a given tag.
2949 May be repeated, if so, changes that are associated with any of
2950 the given tags are ignored.
2952 =item B<--show-tag> I<TAG>
2954 Log only individual changes that are associated with a given
2955 tag. May be repeated, if so, changes that are associated with
2956 any of the given tags are logged.
2958 =item B<--delta> I<FROM_TAG>B<:>I<TO_TAG>
2960 Attempt a delta between two tags (since I<FROM_TAG> up to and
2961 including I<TO_TAG>). The algorithm is a simple date-based one
2962 (this is a hard problem) so results are imperfect.
2964 =item B<-g> I<OPTS>, B<--global-opts> I<OPTS>
2966 Pass I<OPTS> to cvs like in "cvs I<OPTS> log ...".
2968 =item B<-l> I<OPTS>, B<--log-opts> I<OPTS>
2970 Pass I<OPTS> to cvs log like in "cvs ... log I<OPTS>".
2972 =back
2974 Notes about the options and arguments:
2976 =over 4
2978 =item *
2980 The B<-I> and B<-F> options may appear multiple times.
2982 =item *
2984 To follow trunk revisions, use "B<-F trunk>" ("B<-F TRUNK>" also works). This is
2985 okay because no would ever, ever be crazy enough to name a branch "trunk",
2986 right? Right.
2988 =item *
2990 For the B<-U> option, the I<UFILE> should be formatted like CVSROOT/users. That is,
2991 each line of I<UFILE> looks like this:
2993 jrandom:jrandom@red-bean.com
2995 or maybe even like this
2997 jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
2999 Don't forget to quote the portion after the colon if necessary.
3001 =item *
3003 Many people want to filter by date. To do so, invoke cvs2cl.pl like this:
3005 cvs2cl.pl -l "-d'DATESPEC'"
3007 where DATESPEC is any date specification valid for "cvs log -d". (Note that
3008 CVS 1.10.7 and below requires there be no space between -d and its argument).
3010 =item *
3012 Dates/times are interpreted in the local time zone.
3014 =item *
3016 Remember to quote the argument to `B<-l>' so that your shell doesn't interpret
3017 spaces as argument separators.
3019 =item *
3021 See the 'Common Options' section of the cvs manual ('info cvs' on UNIX-like
3022 systems) for more information.
3024 =item *
3026 Note that the rules for quoting under windows shells are different.
3028 =back
3030 =head1 EXAMPLES
3032 Some examples (working on UNIX shells):
3034 # logs after 6th March, 2003 (inclusive)
3035 cvs2cl.pl -l "-d'>2003-03-06'"
3036 # logs after 4:34PM 6th March, 2003 (inclusive)
3037 cvs2cl.pl -l "-d'>2003-03-06 16:34'"
3038 # logs between 4:46PM 6th March, 2003 (exclusive) and
3039 # 4:34PM 6th March, 2003 (inclusive)
3040 cvs2cl.pl -l "-d'2003-03-06 16:46>2003-03-06 16:34'"
3042 Some examples (on non-UNIX shells):
3044 # Reported to work on windows xp/2000
3045 cvs2cl.pl -l "-d"">2003-10-18;today<"""
3047 =head1 AUTHORS
3049 =over 4
3051 =item Karl Fogel
3053 =item Melissa O'Neill
3055 =item Martyn J. Pearce
3057 =back
3059 Contributions from
3061 =over 4
3063 =item Mike Ayers
3065 =item Tim Bradshaw
3067 =item Richard Broberg
3069 =item Nathan Bryant
3071 =item Oswald Buddenhagen
3073 =item Neil Conway
3075 =item Arthur de Jong
3077 =item Mark W. Eichin
3079 =item Dave Elcock
3081 =item Reid Ellis
3083 =item Simon Josefsson
3085 =item Robin Hugh Johnson
3087 =item Terry Kane
3089 =item Akos Kiss
3091 =item Claus Klein
3093 =item Eddie Kohler
3095 =item Richard Laager
3097 =item Kevin Lilly
3099 =item Karl-Heinz Marbaise
3101 =item Mitsuaki Masuhara
3103 =item Henrik Nordstrom
3105 =item Joe Orton
3107 =item Peter Palfrader
3109 =item Thomas Parmelan
3111 =item Johanne Stezenbach
3113 =item Joseph Walton
3115 =item Ernie Zapata
3117 =back
3119 =head1 BUGS
3121 Please report bugs to C<bug-cvs2cl@red-bean.com>.
3123 =head1 PREREQUISITES
3125 This script requires C<Text::Wrap>, C<Time::Local>, and C<File::Basename>. It
3126 also seems to require C<Perl 5.004_04> or higher.
3128 =head1 OPERATING SYSTEM COMPATIBILITY
3130 Should work on any OS.
3132 =head1 SCRIPT CATEGORIES
3134 Version_Control/CVS
3136 =head1 COPYRIGHT
3138 (C) 2001,2002,2003,2004 Martyn J. Pearce <fluffy@cpan.org>, under the GNU GPL.
3140 (C) 1999 Karl Fogel <kfogel@red-bean.com>, under the GNU GPL.
3142 cvs2cl.pl is free software; you can redistribute it and/or modify
3143 it under the terms of the GNU General Public License as published by
3144 the Free Software Foundation; either version 2, or (at your option)
3145 any later version.
3147 cvs2cl.pl is distributed in the hope that it will be useful,
3148 but WITHOUT ANY WARRANTY; without even the implied warranty of
3149 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3150 GNU General Public License for more details.
3152 You may have received a copy of the GNU General Public License
3153 along with cvs2cl.pl; see the file COPYING. If not, write to the
3154 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
3155 Boston, MA 02111-1307, USA.
3157 =head1 SEE ALSO
3159 cvs(1)