2 exec perl
-w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
5 ###########################################################################
6 # This is a quick hack version of cvs2cl.pl (see below)
7 # that simply outputs in a file-centric mannor.
8 # only CVS::Utils::ChangeLog::EntrySet::Output::output_changelog
12 # cvs2cl_by_file.pl --delta bioperl-release-1-5-1:bioperl-release-1-5-2
13 # Generates a file called ChangeLog showing, per file, all the commit
14 # messages since tag bioperl-release-1-5-1 up to tag bioperl-release-1-5-2
15 # It generates in wikipedia format suitable for immediate pasting into the
16 # a wiki page for the delta
18 # Sendu Bala <bix@sendu.me.uk>
19 ###########################################################################
22 ##############################################################
24 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
26 ##############################################################
32 use File
::Basename qw
( fileparse
);
33 use Getopt
::Long qw
( GetOptions
);
35 use Time
::Local qw
( timegm
);
36 use User
::pwent qw
( getpwnam
);
41 # Read in the logs for multiple files, spit out a nice ChangeLog that
42 # mirrors the information entered during `cvs commit'.
44 # The problem presents some challenges. In an ideal world, we could
45 # detect files with the same author, log message, and checkin time --
46 # each <filelist, author, time, logmessage> would be a changelog entry.
47 # We'd sort them; and spit them out. Unfortunately, CVS is *not atomic*
48 # so checkins can span a range of times. Also, the directory structure
49 # could be hierarchical.
51 # Another question is whether we really want to have the ChangeLog
52 # exactly reflect commits. An author could issue two related commits,
53 # with different log entries, reflecting a single logical change to the
54 # source. GNU style ChangeLogs group these under a single author/date.
55 # We try to do the same.
57 # So, we parse the output of `cvs log', storing log messages in a
58 # multilevel hash that stores the mapping:
59 # directory => author => time => message => filelist
60 # As we go, we notice "nearby" commit times and store them together
61 # (i.e., under the same timestamp), so they appear in the same log
64 # When we've read all the logs, we twist this mapping into
65 # a time => author => message => filelist mapping for each directory.
67 # If we're not using the `--distributed' flag, the directory is always
68 # considered to be `./', even as descend into subdirectories.
72 # name number of lines (10.xii.03)
75 # +-maybe_grab_accumulation_date 38
76 # +-read_changelog 277
77 # +-maybe_read_user_map_file 94
80 # +-read_symbolic_name 43
82 # +-read_date_author_and_state 25
83 # +-parse_date_author_and_state 20
85 # +-output_changelog 424
86 # +-pretty_file_list 290
87 # +-common_path_prefix 35
88 # +-preprocess_msg_text 30
92 # +-wrap_log_entry 177
102 # -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
104 # Note about a bug-slash-opportunity:
105 # -----------------------------------
107 # There's a bug in Text::Wrap, which affects cvs2cl. This script
115 # "This script demonstrates a bug in Text::Wrap. The very long line
116 # following this paragraph will be relocated relative to the surrounding
119 # ====================================================================
121 # See? When the bug happens, we'll get the line of equal signs below
122 # this paragraph, even though it should be above.";
125 # # Print out the test text with no wrapping:
126 # print "$test_text";
130 # # Now print it out wrapped, and see the bug:
131 # print wrap ("\t", " ", "$test_text");
135 # If the line of equal signs were one shorter, then the bug doesn't
136 # happen. Interesting.
138 # Anyway, rather than fix this in Text::Wrap, we might as well write a
139 # new wrap() which has the following much-needed features:
141 # * initial indentation, like current Text::Wrap()
142 # * subsequent line indentation, like current Text::Wrap()
143 # * user chooses among: force-break long words, leave them alone, or die()?
144 # * preserve existing indentation: chopped chunks from an indented line
145 # are indented by same (like this line, not counting the asterisk!)
146 # * optional list of things to preserve on line starts, default ">"
148 # Note that the last two are essentially the same concept, so unify in
149 # implementation and give a good interface to controlling them.
153 # Optionally, when encounter a line pre-indented by same as previous
154 # line, then strip the newline and refill, but indent by the same.
157 # Globals --------------------------------------------------------------------
159 # In case we have to print it out:
160 my
$VERSION = '$Revision$';
161 $VERSION =~ s
/\S
+\s
+(\S
+)\s
+\S
+/$1/;
163 ## Vars set by options:
165 # Print debugging messages?
168 # Just show version and exit?
169 my
$Print_Version = 0;
171 # Just print usage message and exit?
174 # What file should we generate (defaults to "ChangeLog")?
175 my
$Log_File_Name = "ChangeLog";
177 # Grab most recent entry date from existing ChangeLog file, just add
181 # `cvs log -d`, this will repeat the last entry in the old log. This is OK,
182 # as it guarantees at least one entry in the update changelog, which means
183 # that there will always be a date to extract for the next update. The repeat
184 # entry can be removed in postprocessing, if necessary.
187 # I don't think this actually does anything useful
190 # Expand usernames to email addresses based on a map file?
191 my
$User_Map_File = '';
192 my
$User_Passwd_File;
195 # Output log in chronological order? [default is reverse chronological order]
196 my
$Chronological_Order = 0;
198 # Grab user details via gecos
201 # User domain for gecos email addresses
204 # Output to a file or to stdout?
205 my
$Output_To_Stdout = 0;
207 # Eliminate empty log messages?
208 my
$Prune_Empty_Msgs = 0;
210 # Tags of which not to output
213 # Show only revisions with Tags
216 # Don't call Text::Wrap on the body of the message
219 # Indentation of log messages
222 # Don't do any pretty print processing
225 # Separates header from log message. Code assumes it is either " " or
226 # "\n\n", so if there's ever an option to set it to something else,
227 # make sure to go through all conditionals that use this var.
228 my
$After_Header = " ";
231 my
$XML_Encoding = '';
233 # Format more for programs than for humans.
235 my
$No_XML_Namespace = 0;
236 my
$No_XML_ISO_Date = 0;
238 # Do some special tweaks for log data that was written in FSF
242 # Show times in UTC instead of local time
245 # Show times in output?
248 # Show day of week in output?
249 my
$Show_Day_Of_Week = 0;
251 # Show revision numbers in output?
252 my
$Show_Revisions = 0;
254 # Show dead files in output?
257 # Hide dead trunk files which were created as a result of additions on a
259 my
$Hide_Branch_Additions = 1;
261 # Show tags (symbolic names) in output?
264 # Show tags separately in output?
265 my
$Show_Tag_Dates = 0;
267 # Show branches by symbolic name in output?
268 my
$Show_Branches = 0;
270 # Show only revisions on these branches or their ancestors.
272 # Show only revisions on these branches or their ancestors; ignore descendent
276 # Don't bother with files matching this regexp.
279 # How exactly we match entries. We definitely want "o",
280 # and user might add "i" by using --case-insensitive option.
281 my
$Case_Insensitive = 0;
283 # Maybe only show log messages matching a certain regular expression.
284 my
$Regexp_Gate = '';
286 # Pass this global option string along to cvs, to the left of `log':
287 my
$Global_Opts = '';
289 # Pass this option string along to the cvs log subcommand:
290 my
$Command_Opts = '';
292 # Read log output from stdin instead of invoking cvs log?
293 my
$Input_From_Stdin = 0;
295 # Don't show filenames in output.
296 my
$Hide_Filenames = 0;
298 # Don't shorten directory names from filenames.
301 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
302 # times that span a range of time. We assume that checkins will last no
303 # longer than $Max_Checkin_Duration seconds, and that similarly, no
304 # checkins will happen from the same users with the same message less
305 # than $Max_Checkin_Duration seconds apart.
306 my
$Max_Checkin_Duration = 180;
308 # What to put at the front of [each] ChangeLog.
309 my
$ChangeLog_Header = '';
311 # Whether to enable 'delta' mode, and for what start/end tags.
318 # Whether to parse filenames from the RCS filename, and if so what
322 # Whether to output information on the # of lines added and removed
323 # by each file modification.
324 my
$Show_Lines_Modified = 0;
326 ## end vars set by options.
328 # latest observed times for the start/end tags in delta mode
329 my
$Delta_StartTime = 0;
330 my
$Delta_EndTime = 0;
332 my
$No_Ancestors = 0;
334 my
$No_Extra_Indent = 0;
336 my
$GroupWithinDate = 0;
338 # ----------------------------------------------------------------------------
340 package CVS
::Utils
::ChangeLog
::EntrySet
;
345 bless \
%self
, $class;
348 # -------------------------------------
350 sub output_changelog
{
351 my
$output_type = $XML_Output ?
'XML' : 'Text';
352 my
$output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
353 my
$output = $output_class->new
(follow_branches
=> \@Follow_Branches
,
354 follow_only
=> \@Follow_Only
,
355 ignore_tags
=> \
%ignore_tags
,
356 show_tags
=> \
%show_tags
,
358 $output->output_changelog
(@_
);
361 # -------------------------------------
364 my
($self, $file_full_path, $time, $revision, $state, $lines,
365 $branch_names, $branch_roots, $branch_numbers,
366 $symbolic_names, $author, $msg_txt) = @_
;
369 CVS
::Utils
::ChangeLog
::FileEntry-
>new
($file_full_path, $time, $revision,
371 $branch_names, $branch_roots,
375 # We might be including revision numbers and/or tags and/or
376 # branch names in the output. Most of the code from here to
377 # loop-end deals with organizing these in qunk.
379 unless
( $Hide_Branch_Additions
381 $msg_txt =~
/file .
+ was initially added on branch \S
+.
/ ) {
382 # Add this file to the list
383 # (We use many spoonfuls of autovivication magic. Hashes and arrays
384 # will spring into existence if they aren't there already.)
386 &main
::debug
("(pushing log msg for ".
$qunk->dir_key .
$qunk->filename .
")\n");
388 # Store with the files in this commit. Later we'll loop through
389 # again, making sure that revisions with the same log message
390 # and nearby commit times are grouped together as one commit.
391 $self->{$qunk->dir_key
}{$author}{$time}{$msg_txt} =
392 CVS
::Utils
::ChangeLog
::Message-
>new
($msg_txt)
393 unless exists
$self->{$qunk->dir_key
}{$author}{$time}{$msg_txt};
394 $self->{$qunk->dir_key
}{$author}{$time}{$msg_txt}->add_fileentry
($qunk);
399 # ----------------------------------------------------------------------------
401 package CVS
::Utils
::ChangeLog
::EntrySet
::Output
::Text
;
403 use base qw
( CVS
::Utils
::ChangeLog
::EntrySet
::Output
);
405 use File
::Basename qw
( fileparse
);
409 my
$self = $class->SUPER
::new
(@_
);
412 # -------------------------------------
415 my
$self = shift; my
$class = ref
$self;
418 return $Show_Day_Of_Week ?
' ' .
$class->weekday_en
($wday) : '';
421 # -------------------------------------
425 my
($time, $author, $lastdate) = @_
;
427 my
$header_line = '';
429 my
(undef
,$min,$hour,$mday,$mon,$year,$wday)
430 = $UTC_Times ? gmtime
($time) : localtime
($time);
432 my
$date = $self->fdatetime
($time);
436 sprintf
"%s %s\n\n", $date, $author;
438 if ( ! defined
$lastdate or
$date ne
$lastdate or
! $GroupWithinDate ) {
439 if ( $GroupWithinDate ) {
440 $header_line = "$date\n\n";
442 $header_line = "$date $author\n\n";
450 # -------------------------------------
452 sub preprocess_msg_text
{
456 $text = $self->SUPER
::preprocess_msg_text
($text);
458 unless
( $No_Wrap ) {
459 # Strip off lone newlines, but only for lines that don't begin with
460 # whitespace or a mail-quoting character, since we want to preserve
461 # that kind of formatting. Also don't strip newlines that follow a
462 # period; we handle those specially next. And don't strip
463 # newlines that precede an open paren.
464 1 while $text =~ s
/(^|
\n)([^
>\s
].
*[^.
\n])\n([^
>\n])/$1$2 $3/g
;
466 # If a newline follows a period, make sure that when we bring up the
467 # bottom sentence, it begins with two spaces.
468 1 while $text =~ s
/(^|
\n)([^
>\s
].
*)\n([^
>\n])/$1$2 $3/g
;
474 # -------------------------------------
476 # Here we take a bunch of qunks and convert them into printed
477 # summary that will include all the information the user asked for.
478 sub pretty_file_list
{
484 my
$qunksref = shift;
487 my
$beauty = ''; # The accumulating header string for this entry.
488 my
%non_unanimous_tags
; # Tags found in a proper subset of qunks
489 my
%unanimous_tags
; # Tags found in all qunks
490 my
%all_branches
; # Branches found in any qunk
491 my
$fbegun = 0; # Did we begin printing filenames yet?
493 my
($common_dir, $qunkrefs) =
494 $self->_pretty_file_list
(\
(%unanimous_tags
, %non_unanimous_tags
, %all_branches
), $qunksref);
496 my @qunkrefs
= @
$qunkrefs;
498 # Not XML output, so complexly compactify for chordate consumption. At this
499 # point we have enough global information about all the qunks to organize
500 # them non-redundantly for output.
503 # Note that $common_dir still has its trailing slash
504 $beauty .
= "$common_dir: ";
509 # For trailing revision numbers.
512 foreach my
$branch (keys
(%all_branches
))
514 foreach my
$qunkref (@qunkrefs
)
516 if ((defined
($qunkref->branch
))
517 and
($qunkref->branch eq
$branch))
520 # kff todo: comma-delimited in XML too? Sure.
526 my
$fname = substr
($qunkref->filename
, length
($common_dir));
528 $qunkref->{'printed'} = 1; # Just setting a mark bit, basically
530 if ( $Show_Tags and defined
$qunkref->tags
) {
531 my @tags
= grep ($non_unanimous_tags{$_}, @
{$qunkref->tags
});
534 $beauty .
= " (tags: ";
535 $beauty .
= join (', ', @tags
);
540 if ($Show_Revisions) {
541 # Collect the revision numbers' last components, but don't
542 # print them -- they'll get printed with the branch name
544 $qunkref->revision
=~
/.
+\.
([\d
]+)$
/;
545 push
(@brevisions
, $1);
547 # todo: we're still collecting branch roots, but we're not
548 # showing them anywhere. If we do show them, it would be
549 # nifty to just call them revision "0" on a the branch.
550 # Yeah, that's the ticket.
554 $beauty .
= " ($branch";
556 if ((scalar
(@brevisions
)) > 1) {
558 $beauty .
= (join (',', @brevisions
));
562 # Square brackets are spurious here, since there's no range to
564 $beauty .
= ".$brevisions[0]";
571 # Okay; any qunks that were done according to branch are taken care
572 # of, and marked as printed. Now print everyone else.
574 my
%fileinfo_printed
;
575 foreach my
$qunkref (@qunkrefs
)
577 next
if (defined
($qunkref->{'printed'})); # skip if already printed
579 my
$b = substr
($qunkref->filename
, length
($common_dir));
580 # todo: Shlomo's change was this:
581 # $beauty .= substr ($qunkref->filename,
582 # (($common_dir eq "./") ? '' : length ($common_dir)));
583 $qunkref->{'printed'} = 1; # Set a mark bit.
585 if ($Show_Revisions ||
$Show_Tags ||
$Show_Dead)
587 my
$started_addendum = 0;
589 if ($Show_Revisions) {
590 $started_addendum = 1;
592 $b .
= $qunkref->revision
;
594 if ($Show_Dead && $qunkref->state
=~
/dead
/)
596 # Deliberately not using $started_addendum. Keeping it simple.
599 if ($Show_Tags && (defined
$qunkref->tags
)) {
600 my @tags
= grep ($non_unanimous_tags{$_}, @
{$qunkref->tags
});
601 if ((scalar
(@tags
)) > 0) {
602 if ($started_addendum) {
608 $b .
= join (', ', @tags
);
609 $started_addendum = 1;
612 if ($started_addendum) {
617 unless
( exists
$fileinfo_printed{$b} ) {
623 $beauty .
= $b, $fileinfo_printed{$b} = 1;
627 # Unanimous tags always come last.
628 if ($Show_Tags && %unanimous_tags
)
630 $beauty .
= " (utags: ";
631 $beauty .
= join (', ', sort keys
(%unanimous_tags
));
635 # todo: still have to take care of branch_roots?
637 $beauty = "$beauty:";
642 # -------------------------------------
646 my
($fh, $time, $tag) = @_
;
648 my
$fdatetime = $self->fdatetime
($time);
649 print
$fh "$fdatetime tag $tag\n\n";
653 # -------------------------------------
657 my
($msg, $files, $qunklist) = @_
;
661 if ( $No_Wrap and
! $Summary ) {
662 $msg = $self->preprocess_msg_text
($msg);
663 $files = $self->mywrap
("\t", "\t ", "* $files");
664 $msg =~ s
/\n(.
+)/\n$Indent$1/g
;
665 unless
($After_Header eq
" ") {
666 $msg =~ s
/^
(.
+)/$Indent$1/g
;
668 if ( $Hide_Filenames ) {
669 $body = $After_Header .
$msg;
671 $body = $files .
$After_Header .
$msg;
673 } elsif
( $Summary ) {
674 my
($filelist, $qunk);
675 my
(@DeletedQunks
, @AddedQunks
, @ChangedQunks
);
677 $msg = $self->preprocess_msg_text
($msg);
679 # Sort the files (qunks) according to the operation that was
680 # performed. Files which were added have no line change
681 # indicator, whereas deleted files have state dead.
683 foreach
$qunk ( @
$qunklist ) {
684 if ( "dead" eq
$qunk->state
) {
685 push @DeletedQunks
, $qunk;
686 } elsif
( ! defined
$qunk->lines
) {
687 push @AddedQunks
, $qunk;
689 push @ChangedQunks
, $qunk;
693 # The qunks list was originally in tree search order. Let's
694 # get that back. The lists, if they exist, will be reversed upon
699 # Now write the three sections onto $filelist
701 if ( @DeletedQunks
) {
702 $filelist .
= "\tDeleted:\n";
703 foreach
$qunk ( @DeletedQunks
) {
704 $filelist .
= "\t\t" .
$qunk->filename
;
705 $filelist .
= " (" .
$qunk->revision .
")";
712 $filelist .
= "\tAdded:\n";
713 foreach
$qunk (@AddedQunks
) {
714 $filelist .
= "\t\t" .
$qunk->filename
;
715 $filelist .
= " (" .
$qunk->revision .
")";
721 if ( @ChangedQunks
) {
722 $filelist .
= "\tChanged:\n";
723 foreach
$qunk (@ChangedQunks
) {
724 $filelist .
= "\t\t" .
$qunk->filename
;
725 $filelist .
= " (" .
$qunk->revision .
")";
726 $filelist .
= ", \"" .
$qunk->state .
"\"";
727 $filelist .
= ", lines: " .
$qunk->lines
;
735 if ( $Hide_Filenames ) {
739 $msg =~ s
/\n(.
*)/\n$Indent$1/g
;
740 unless
( $After_Header eq
" " or
$FSF_Style ) {
741 $msg =~ s
/^
(.
*)/$Indent$1/g
;
744 unless
( $No_Wrap ) {
746 $msg = $self->wrap_log_entry
($msg, '', 69, 69);
750 $msg = $self->mywrap
('', $Indent, "$msg");
751 $msg =~ s
/[ \t]+\n/\n/g
;
755 $body = $filelist .
$After_Header .
$msg;
756 } else { # do wrapping, either FSF-style or regular
757 my
$latter_wrap = $No_Extra_Indent ?
$Indent : "$Indent ";
760 $files = $self->mywrap
($Indent, $latter_wrap, "* $files");
762 my
$files_last_line_len = 0;
763 if ( $After_Header eq
" " ) {
764 $files_last_line_len = $self->last_line_len
($files);
765 $files_last_line_len += 1; # for $After_Header
768 $msg = $self->wrap_log_entry
($msg, $latter_wrap, 69-$files_last_line_len, 69);
769 $body = $files .
$After_Header .
$msg;
770 } else { # not FSF-style
771 $msg = $self->preprocess_msg_text
($msg);
772 $body = $files .
$After_Header .
$msg;
773 $body = $self->mywrap
($Indent, $latter_wrap, "* $body");
774 $body =~ s
/[ \t]+\n/\n/g
;
781 # ----------------------------------------------------------------------------
783 package CVS
::Utils
::ChangeLog
::EntrySet
::Output
::XML
;
785 use base qw
( CVS
::Utils
::ChangeLog
::EntrySet
::Output
);
787 use File
::Basename qw
( fileparse
);
791 my
$self = $class->SUPER
::new
(@_
);
794 # -------------------------------------
798 my
($time, $author, $lastdate) = @_
;
800 my
$header_line = '';
804 my
($y, $m, $d, $H, $M, $S) = (gmtime
($time))[5,4,3,2,1,0];
806 # Ideally, this would honor $UTC_Times and use +HH:MM syntax
807 $isoDate = sprintf
("%04d-%02d-%02dT%02d:%02d:%02dZ",
808 $y + 1900, $m + 1, $d, $H, $M, $S);
810 my
(undef
,$min,$hour,$mday,$mon,$year,$wday)
811 = $UTC_Times ? gmtime
($time) : localtime
($time);
813 my
$date = $self->fdatetime
($time);
814 $wday = $self->wday
($wday);
817 sprintf
("<date>%4u-%02u-%02u</date>\n${wday}<time>%02u:%02u</time>\n",
818 $year+1900, $mon+1, $mday, $hour, $min);
819 $header_line .
= "<isoDate>$isoDate</isoDate>\n"
820 unless
$No_XML_ISO_Date;
821 $header_line .
= sprintf
("<author>%s</author>\n" , $author);
824 # -------------------------------------
827 my
$self = shift; my
$class = ref
$self;
830 return '<weekday>' .
$class->weekday_en
($wday) .
"</weekday>\n";
833 # -------------------------------------
845 # -------------------------------------
852 length
$XML_Encoding ? qq
'encoding="$XML_Encoding"' : '';
853 my
$version = 'version="1.0"';
855 sprintf
'<?xml %s?>', join ' ', grep length
, $version, $encoding;
859 '<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
860 print
$fh "$declaration\n\n$root\n\n";
863 # -------------------------------------
869 print
$fh "</changelog>\n";
872 # -------------------------------------
874 sub preprocess_msg_text
{
878 $text = $self->SUPER
::preprocess_msg_text
($text);
880 $text = $self->escape
($text);
882 $text = "<msg>${text}</msg>\n";
887 # -------------------------------------
889 # Here we take a bunch of qunks and convert them into a printed
890 # summary that will include all the information the user asked for.
891 sub pretty_file_list
{
895 my
$beauty = ''; # The accumulating header string for this entry.
896 my
%non_unanimous_tags
; # Tags found in a proper subset of qunks
897 my
%unanimous_tags
; # Tags found in all qunks
898 my
%all_branches
; # Branches found in any qunk
899 my
$fbegun = 0; # Did we begin printing filenames yet?
901 my
($common_dir, $qunkrefs) =
902 $self->_pretty_file_list
(\
(%unanimous_tags
, %non_unanimous_tags
, %all_branches
),
905 my @qunkrefs
= @
$qunkrefs;
907 # If outputting XML, then our task is pretty simple, because we
908 # don't have to detect common dir, common tags, branch prefixing,
909 # etc. We just output exactly what we have, and don't worry about
910 # redundancy or readability.
912 foreach my
$qunkref (@qunkrefs
)
914 my
$filename = $qunkref->filename
;
915 my
$state = $qunkref->state
;
916 my
$revision = $qunkref->revision
;
917 my
$tags = $qunkref->tags
;
918 my
$branch = $qunkref->branch
;
919 my
$branchroots = $qunkref->roots
;
920 my
$lines = $qunkref->lines
;
922 $filename = $self->escape
($filename); # probably paranoia
923 $revision = $self->escape
($revision); # definitely paranoia
925 $beauty .
= "<file>\n";
926 $beauty .
= "<name>${filename}</name>\n";
927 $beauty .
= "<cvsstate>${state}</cvsstate>\n";
928 $beauty .
= "<revision>${revision}</revision>\n";
930 if ($Show_Lines_Modified
931 && $lines && $lines =~ m
/\
+(\d
+)\s
+-(\d
+)/) {
932 $beauty .
= "<linesadded>$1</linesadded>\n";
933 $beauty .
= "<linesremoved>$2</linesremoved>\n";
937 $branch = $self->escape
($branch); # more paranoia
938 $beauty .
= "<branch>${branch}</branch>\n";
940 foreach my
$tag (@
$tags) {
941 $tag = $self->escape
($tag); # by now you're used to the paranoia
942 $beauty .
= "<tag>${tag}</tag>\n";
944 foreach my
$root (@
$branchroots) {
945 $root = $self->escape
($root); # which is good, because it will continue
946 $beauty .
= "<branchroot>${root}</branchroot>\n";
948 $beauty .
= "</file>\n";
951 # Theoretically, we could go home now. But as long as we're here,
952 # let's print out the common_dir and utags, as a convenience to
953 # the receiver (after all, earlier code calculated that stuff
954 # anyway, so we might as well take advantage of it).
956 if ((scalar
(keys
(%unanimous_tags
))) > 1) {
957 foreach my
$utag ((keys
(%unanimous_tags
))) {
958 $utag = $self->escape
($utag); # the usual paranoia
959 $beauty .
= "<utag>${utag}</utag>\n";
963 $common_dir = $self->escape
($common_dir);
964 $beauty .
= "<commondir>${common_dir}</commondir>\n";
967 # That's enough for XML, time to go home:
971 # -------------------------------------
975 my
($fh, $time, $tag) = @_
;
977 my
($y, $m, $d, $H, $M, $S) = (gmtime
($time))[5,4,3,2,1,0];
979 # Ideally, this would honor $UTC_Times and use +HH:MM syntax
980 my
$isoDate = sprintf
("%04d-%02d-%02dT%02d:%02d:%02dZ",
981 $y + 1900, $m + 1, $d, $H, $M, $S);
983 print
$fh "<tagdate>\n";
984 print
$fh "<tagisodate>$isoDate</tagisodate>\n";
985 print
$fh "<tagdatetag>$tag</tagdatetag>\n";
986 print
$fh "</tagdate>\n\n";
990 # -------------------------------------
994 my
($fh, $entry) = @_
;
995 print
$fh "<entry>\n$entry</entry>\n\n";
998 # -------------------------------------
1002 my
($msg, $files, $qunklist) = @_
;
1004 $msg = $self->preprocess_msg_text
($msg);
1005 return $files .
$msg;
1008 # ----------------------------------------------------------------------------
1010 package CVS
::Utils
::ChangeLog
::EntrySet
::Output
;
1012 use Carp qw
( croak
);
1013 use File
::Basename qw
( fileparse
);
1015 # Class Utility Functions -------------
1019 my @weekdays
= (qw
(Sunday Monday Tuesday Wednesday Thursday Friday Saturday
));
1022 return $weekdays[$_[0]];
1027 # -------------------------------------
1030 my
($proto, %args
) = @_
;
1031 my
$class = ref
$proto ||
$proto;
1033 my
$follow_branches = delete
$args{follow_branches
};
1034 my
$follow_only = delete
$args{follow_only
};
1035 my
$ignore_tags = delete
$args{ignore_tags
};
1036 my
$show_tags = delete
$args{show_tags
};
1037 die
"Unrecognized arg to EntrySet::Output::new: '$_'\n"
1040 bless
+{follow_branches
=> $follow_branches,
1041 follow_only
=> $follow_only,
1042 show_tags
=> $show_tags,
1043 ignore_tags
=> $ignore_tags,
1047 # Abstract Subrs ----------------------
1049 sub wday
{ croak
"Whoops. Abtract method call (wday).\n" }
1050 sub pretty_file_list
{ croak
"Whoops. Abtract method call (pretty_file_list).\n" }
1051 sub output_tagdate
{ croak
"Whoops. Abtract method call (output_tagdate).\n" }
1052 sub header_line
{ croak
"Whoops. Abtract method call (header_line).\n" }
1054 # Instance Subrs ----------------------
1056 sub output_header
{ }
1058 # -------------------------------------
1062 my
($fh, $entry) = @_
;
1063 print
$fh "$entry\n";
1066 # -------------------------------------
1068 sub output_footer
{ }
1070 # -------------------------------------
1072 sub escape
{ return $_[1] }
1074 # -------------------------------------
1076 sub _revision_is_wanted
{
1077 my
($self, $qunk) = @_
;
1079 my
($revision, $branch_numbers) = @
{$qunk}{qw
( revision branch_numbers
)};
1080 my
$follow_branches = $self->{follow_branches
};
1081 my
$follow_only = $self->{follow_only
};
1083 for my
$ignore_tag (keys
%{$self->{ignore_tags
}}) {
1085 if defined
$qunk->{tags
} and
grep $_ eq
$ignore_tag, @
{$qunk->{tags
}};
1088 if ( keys
%{$self->{show_tags
}} ) {
1089 for my
$show_tag (keys
%{$self->{show_tags
}}) {
1091 if ! defined
$qunk->{tags
} or
! grep $_ eq
$show_tag, @
{$qunk->{tags
}};
1096 unless @
$follow_branches + @
$follow_only; # no follow is follow all
1098 for my
$x (map
([$_, 1], @
$follow_branches),
1099 map
([$_, 0], @
$follow_only )) {
1100 my
($branch, $followsub) = @
$x;
1102 # Special case for following trunk revisions
1104 if $branch =~
/^trunk$
/i and
$revision =~
/^
[0-9]+\.
[0-9]+$
/;
1106 if ( my
$branch_number = $branch_numbers->{$branch} ) {
1107 # Are we on one of the follow branches or an ancestor of same?
1109 # If this revision is a prefix of the branch number, or possibly is less
1110 # in the minormost number, OR if this branch number is a prefix of the
1111 # revision, then yes. Otherwise, no.
1113 # So below, we determine if any of those conditions are met.
1115 # Trivial case: is this revision on the branch? (Compare this way to
1116 # avoid regexps that screw up Emacs indentation, argh.)
1117 if ( substr
($revision, 0, (length
($branch_number) + 1))
1119 ($branch_number .
".") ) {
1122 # } elsif ( length($revision) == length($branch_number)+2 ) {
1123 } elsif
( substr
($revision, length
($branch_number)+1) =~
/^\d
+$
/ ) {
1126 } elsif
( length
($branch_number) > length
($revision)
1129 # Non-trivial case: check if rev is ancestral to branch
1131 # r_left still has the trailing "."
1132 my
($r_left, $r_end) = ($revision =~
/^
((?
:\d
+\.
)+)(\d
+)$
/);
1134 # b_left still has trailing "."
1135 # b_mid has no trailing "."
1136 my
($b_left, $b_mid) = ($branch_number =~
/^
((?
:\d
+\.
)+)(\d
+)\.\d
+$
/);
1138 if $r_left eq
$b_left and
$r_end <= $b_mid;
1146 # -------------------------------------
1148 sub output_changelog
{
1149 my
$self = shift; my
$class = ref
$self;
1150 my
($grand_poobah) = @_
;
1151 ### Process each ChangeLog
1153 while (my
($dir,$authorhash) = each
%$grand_poobah) {
1154 &main
::debug
("DOING DIR: $dir\n");
1156 # Here we twist our hash around, from being
1157 # author => time => message => filelist
1158 # in %$authorhash to
1159 # time => author => message => filelist
1162 # This is also where we merge entries. The algorithm proceeds
1163 # through the timeline of the changelog with a sliding window of
1164 # $Max_Checkin_Duration seconds; within that window, entries that
1165 # have the same log message are merged.
1167 # (To save space, we zap %$authorhash after we've copied
1168 # everything out of it.)
1170 # commit messages matching these will be ignored
1171 # should probably read these in from a file since they
1172 # will mostly change each release, but this is a quick hack!
1173 my @skip
= ("Updating mailing lists URL",
1174 "Updated bug reporting",
1175 "use base, return true",
1176 "Removed spurious ",
1177 "cleaning unnecessary ",
1179 "Improved POD markup",
1181 "I'll be using bioperl.org mail address",
1182 "Switched vanilla throw",
1194 "Removed unused \"use vars",
1195 "return, not return undef",
1196 "lexically scoped file handles",
1197 "No setting of own \$VERSION",
1198 "do not return directly from sort",
1199 "have NAME match module",
1203 "No setting own version",
1206 # not interested in these files
1207 my
%files_to_skip
= ( AUTHORS
=> 1,
1209 'INSTALL.PROGRAMS' => 1,
1218 'MANIFEST.SKIP' => 1,
1222 while (my
($author,$timehash) = each
%$authorhash) {
1223 foreach my
$time (sort {$a <=> $b} (keys
%$timehash)) {
1224 next
if ($Delta_Mode && (($time <= $Delta_StartTime) ||
($time > $Delta_EndTime && $Delta_EndTime)));
1226 my
$msghash = $timehash->{$time};
1227 MSG
: while (my
($msg, $qunklist) = each
%$msghash) {
1228 foreach my
$skip (@skip
) {
1229 if ($msg =~
/$skip/i
) {
1233 if ($msg =~
/merge
/i
&& $msg =~
/head
/i
) {
1239 foreach my
$file (@
{$qunklist->files
}) {
1240 $changelog{$file->filename
}{$time} = $msg;
1245 undef
(%$authorhash);
1247 ### Now we can write out the ChangeLog!
1249 my
($logfile_here, $logfile_bak, $tmpfile);
1252 if (! $Output_To_Stdout) {
1253 $logfile_here = $dir .
$Log_File_Name;
1254 $logfile_here =~ s
/^\.\
/\
//\
//; # fix any leading ".//" problem
1255 $tmpfile = "${logfile_here}.cvs2cl$$.tmp";
1256 $logfile_bak = "${logfile_here}.bak";
1258 open
(LOG_OUT
, ">$tmpfile") or die
"Unable to open \"$tmpfile\"";
1261 open
(LOG_OUT
, ">-") or die
"Unable to open stdout for writing";
1264 print LOG_OUT
$ChangeLog_Header;
1266 print LOG_OUT
"These are detailed notes on changes made between $Delta_From and $Delta_To.\n\n";
1268 my
%tag_date_printed
;
1270 $self->output_header
(\
*LOG_OUT
);
1272 my @file_list
= sort {$a cmp $b} (keys
%changelog
);
1273 foreach my
$file (@file_list
) {
1274 # skip files we don't need to see changes for
1275 next
if exists
$files_to_skip{$file};
1276 next
if $file =~
/^t\
//;
1278 # convert module filenames to module name
1280 if ($module =~
/^Bio
/) {
1281 $module = '<nowiki>'.
join("::", File
::Spec-
>splitdir
($file)).
'</nowiki>';
1282 $module =~ s
/\.pm
//;
1284 print LOG_OUT
"; $module\n";
1285 foreach my
$time (sort {$a <=> $b} keys
%{$changelog{$file}}) {
1286 my
$msg = $changelog{$file}{$time};
1288 # uppercase first letter
1289 $msg =~ s
/^
(\w
)/\U
$1/;
1291 # link bugs to bugzilla
1292 $msg =~ s
/bug.
*(\d
{4})/{{Bugzilla|
$1}}/i
;
1294 print LOG_OUT
": $msg\n";
1299 $self->output_footer
(\
*LOG_OUT
);
1303 if ( ! $Output_To_Stdout ) {
1304 # If accumulating, append old data to new before renaming. But
1305 # don't append the most recent entry, since it's already in the
1306 # new log due to CVS's idiosyncratic interpretation of "log -d".
1307 if ($Cumulative && -f $logfile_here) {
1308 open
$NEW_LOG, '>>', $tmpfile
1309 or die
"Could not append to file '$tmpfile': $!\n";
1311 open
$OLD_LOG, '<', $logfile_here
1312 or die
"Could not read file '$logfile_here': $!";
1314 my
$started_first_entry = 0;
1315 my
$passed_first_entry = 0;
1316 while (<$OLD_LOG>) {
1317 if ( ! $passed_first_entry ) {
1318 if ( ( ! $started_first_entry )
1319 and
/^
(\d\d\d\d-\d\d-\d\d\s
+(\w
+\s
+)?\d\d
:\d\d
)/
1321 $started_first_entry = 1;
1323 elsif
( /^
(\d\d\d\d-\d\d-\d\d\s
+(\w
+\s
+)?\d\d
:\d\d
)/ ) {
1324 $passed_first_entry = 1;
1336 if ( -f $logfile_here ) {
1337 rename
$logfile_here, $logfile_bak;
1339 rename
$tmpfile, $logfile_here;
1344 # -------------------------------------
1346 # Don't call this wrap, because with 5.5.3, that clashes with the
1347 # (unconditional :-( ) export of wrap() from Text::Wrap
1350 my
($indent1, $indent2, @text
) = @_
;
1351 # If incoming text looks preformatted, don't get clever
1352 my
$text = Text
::Wrap
::wrap
($indent1, $indent2, @text
);
1353 if ( grep /^\s
+/m
, @text
) {
1356 my @lines
= split /\n/, $text;
1357 $indent2 =~ s
!^
((?
: {8})+)!"\t" x
(length
($1)/8)!e
;
1358 $lines[0] =~ s
/^
$indent1\s
+/$indent1/;
1359 s
/^
$indent2\s
+/$indent2/
1360 for @lines
[1..
$#lines];
1361 my
$newtext = join "\n", @lines
;
1363 if substr
($text, -1) eq
"\n";
1367 # -------------------------------------
1369 sub preprocess_msg_text
{
1373 # Strip out carriage returns (as they probably result from DOSsy editors).
1374 $text =~ s
/\r\n/\n/g
;
1375 # If it *looks* like two newlines, make it *be* two newlines:
1376 $text =~ s
/\n\s
*\n/\n\n/g
;
1381 # -------------------------------------
1386 my
$files_list = shift;
1387 my @lines
= split (/\n/, $files_list);
1388 my
$last_line = pop
(@lines
);
1389 return length
($last_line);
1392 # -------------------------------------
1394 # A custom wrap function, sensitive to some common constructs used in
1396 sub wrap_log_entry
{
1399 my
$text = shift; # The text to wrap.
1400 my
$left_pad_str = shift; # String to pad with on the left.
1402 # These do NOT take left_pad_str into account:
1403 my
$length_remaining = shift; # Amount left on current line.
1404 my
$max_line_length = shift; # Amount left for a blank line.
1406 my
$wrapped_text = ''; # The accumulating wrapped entry.
1407 my
$user_indent = ''; # Inherited user_indent from prev line.
1409 my
$first_time = 1; # First iteration of the loop?
1410 my
$suppress_line_start_match = 0; # Set to disable line start checks.
1412 my @lines
= split (/\n/, $text);
1413 while (@lines
) # Don't use `foreach' here, it won't work.
1415 my
$this_line = shift (@lines
);
1418 if ($this_line =~
/^
(\s
+)/) {
1425 # If it matches any of the line-start regexps, print a newline now...
1426 if ($suppress_line_start_match)
1428 $suppress_line_start_match = 0;
1430 elsif
(($this_line =~
/^
(\s
*)\
*\s
+[a-zA-Z0-9
]/)
1431 ||
($this_line =~
/^
(\s
*)\
* [a-zA-Z0-9_\.\
/\
+-]+/)
1432 ||
($this_line =~
/^
(\s
*)\
([a-zA-Z0-9_\.\
/\
+-]+(\
)|
,\s
*)/)
1433 ||
($this_line =~
/^
(\s
+)(\S
+)/)
1434 ||
($this_line =~
/^
(\s
*)- +/)
1435 ||
($this_line =~
/^
()\s
*$
/)
1436 ||
($this_line =~
/^
(\s
*)\
*\
) +/)
1437 ||
($this_line =~
/^
(\s
*)[a-zA-Z0-9
](\
)|\.|\
:) +/))
1439 # Make a line break immediately, unless header separator is set
1440 # and this line is the first line in the entry, in which case
1441 # we're getting the blank line for free already and shouldn't
1443 unless
(($After_Header ne
" ") and
($first_time))
1445 if ($this_line =~
/^
()\s
*$
/) {
1446 $suppress_line_start_match = 1;
1447 $wrapped_text .
= "\n${left_pad_str}";
1450 $wrapped_text .
= "\n${left_pad_str}";
1453 $length_remaining = $max_line_length - (length
($user_indent));
1456 # Now that any user_indent has been preserved, strip off leading
1457 # whitespace, so up-folding has no ugly side-effects.
1458 $this_line =~ s
/^\s
*//;
1460 # Accumulate the line, and adjust parameters for next line.
1461 my
$this_len = length
($this_line);
1464 # Blank lines should cancel any user_indent level.
1466 $length_remaining = $max_line_length;
1468 elsif
($this_len >= $length_remaining) # Line too long, try breaking it.
1470 # Walk backwards from the end. At first acceptable spot, break
1472 my
$idx = $length_remaining - 1;
1473 if ($idx < 0) { $idx = 0 };
1476 if (substr
($this_line, $idx, 1) =~
/\s
/)
1478 my
$line_now = substr
($this_line, 0, $idx);
1479 my
$next_line = substr
($this_line, $idx);
1480 $this_line = $line_now;
1482 # Clean whitespace off the end.
1485 # The current line is ready to be printed.
1486 $this_line .
= "\n${left_pad_str}";
1488 # Make sure the next line is allowed full room.
1489 $length_remaining = $max_line_length - (length
($user_indent));
1491 # Strip next_line, but then preserve any user_indent.
1492 $next_line =~ s
/^\s
*//;
1494 # Sneak a peek at the user_indent of the upcoming line, so
1495 # $next_line (which will now precede it) can inherit that
1496 # indent level. Otherwise, use whatever user_indent level
1497 # we currently have, which might be none.
1498 my
$next_next_line = shift (@lines
);
1499 if ((defined
($next_next_line)) && ($next_next_line =~
/^
(\s
+)/)) {
1500 $next_line = $1 .
$next_line if (defined
($1));
1501 # $length_remaining = $max_line_length - (length ($1));
1502 $next_next_line =~ s
/^\s
*//;
1505 $next_line = $user_indent .
$next_line;
1507 if (defined
($next_next_line)) {
1508 unshift
(@lines
, $next_next_line);
1510 unshift
(@lines
, $next_line);
1512 # Our new next line might, coincidentally, begin with one of
1513 # the line-start regexps, so we temporarily turn off
1514 # sensitivity to that until we're past the line.
1515 $suppress_line_start_match = 1;
1527 # We bottomed out because the line is longer than the
1528 # available space. But that could be because the space is
1529 # small, or because the line is longer than even the maximum
1530 # possible space. Handle both cases below.
1532 if ($length_remaining == ($max_line_length - (length
($user_indent))))
1534 # The line is simply too long -- there is no hope of ever
1535 # breaking it nicely, so just insert it verbatim, with
1536 # appropriate padding.
1537 $this_line = "\n${left_pad_str}${this_line}";
1541 # Can't break it here, but may be able to on the next round...
1542 unshift
(@lines
, $this_line);
1543 $length_remaining = $max_line_length - (length
($user_indent));
1544 $this_line = "\n${left_pad_str}";
1548 else # $this_len < $length_remaining, so tack on what we can.
1550 # Leave a note for the next iteration.
1551 $length_remaining = $length_remaining - $this_len;
1553 if ($this_line =~
/\.$
/)
1556 $length_remaining -= 2;
1558 else # not a sentence end
1561 $length_remaining -= 1;
1565 # Unconditionally indicate that loop has run at least once.
1568 $wrapped_text .
= "${user_indent}${this_line}";
1571 # One last bit of padding.
1572 $wrapped_text .
= "\n";
1574 return $wrapped_text;
1577 # -------------------------------------
1579 sub _pretty_file_list
{
1582 my
($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_
;
1585 grep +( ( ! $_->tags_exists
1587 ! grep exists
$ignore_tags{$_}, @
{$_->tags
})
1593 grep exists
$show_tags{$_}, @
{$_->tags
} )
1598 my
$common_dir; # Dir prefix common to all files ('' if none)
1600 # First, loop over the qunks gathering all the tag/branch names.
1601 # We'll put them all in non_unanimous_tags, and take out the
1602 # unanimous ones later.
1604 foreach my
$qunkref (@qunkrefs
)
1606 # Keep track of whether all the files in this commit were in the
1607 # same directory, and memorize it if so. We can make the output a
1608 # little more compact by mentioning the directory only once.
1609 if ($Common_Dir && (scalar
(@qunkrefs
)) > 1)
1611 if (! (defined
($common_dir)))
1614 ($base, $dir, undef
) = fileparse
($qunkref->filename
);
1616 if ((! (defined
($dir))) # this first case is sheer paranoia
1628 elsif
($common_dir ne
'')
1630 # Already have a common dir prefix, so how much of it can we preserve?
1631 $common_dir = &main
::common_path_prefix
($qunkref->filename
, $common_dir);
1634 else # only one file in this entry anyway, so common dir not an issue
1639 if (defined
($qunkref->branch
)) {
1640 $all_branches->{$qunkref->branch
} = 1;
1642 if (defined
($qunkref->tags
)) {
1643 foreach my
$tag (@
{$qunkref->tags
}) {
1644 $non_unanimous_tags->{$tag} = 1;
1649 # Any tag held by all qunks will be printed specially... but only if
1650 # there are multiple qunks in the first place!
1651 if ((scalar
(@qunkrefs
)) > 1) {
1652 foreach my
$tag (keys
(%$non_unanimous_tags)) {
1653 my
$everyone_has_this_tag = 1;
1654 foreach my
$qunkref (@qunkrefs
) {
1655 if ((! (defined
($qunkref->tags
)))
1656 or
(! (grep ($_ eq
$tag, @
{$qunkref->tags
})))) {
1657 $everyone_has_this_tag = 0;
1660 if ($everyone_has_this_tag) {
1661 $unanimous_tags->{$tag} = 1;
1662 delete
$non_unanimous_tags->{$tag};
1667 return $common_dir, \@qunkrefs
;
1670 # -------------------------------------
1675 my
($year, $mday, $mon, $wday, $hour, $min);
1678 ($year, $mday, $mon, $wday, $hour, $min) = @_
;
1681 (undef
, $min, $hour, $mday, $mon, $year, $wday) =
1682 $UTC_Times ? gmtime
($time) : localtime
($time);
1686 $wday = $self->wday
($wday);
1689 my
$fdate = $self->fdate
($year, $mon, $mday, $wday);
1692 my
$ftime = $self->ftime
($hour, $min);
1693 return "$fdate $ftime";
1699 # -------------------------------------
1704 my
($year, $mday, $mon, $wday);
1707 ($year, $mon, $mday, $wday) = @_
;
1710 (undef
, undef
, undef
, $mday, $mon, $year, $wday) =
1711 $UTC_Times ? gmtime
($time) : localtime
($time);
1715 $wday = $self->wday
($wday);
1718 return sprintf
'%4u-%02u-%02u%s', $year, $mon, $mday, $wday;
1721 # -------------------------------------
1732 (undef
, $min, $hour) = $UTC_Times ? gmtime
($time) : localtime
($time);
1735 return sprintf
'%02u:%02u', $hour, $min;
1738 # ----------------------------------------------------------------------------
1740 package CVS
::Utils
::ChangeLog
::Message
;
1746 my
%self
= (msg
=> $msg, files
=> []);
1748 bless \
%self
, $class;
1753 my
($fileentry) = @_
;
1755 die
"Not a fileentry: $fileentry"
1756 unless
$fileentry->isa
('CVS::Utils::ChangeLog::FileEntry');
1758 push @
{$self->{files
}}, $fileentry;
1761 sub files
{ wantarray ? @
{$_[0]->{files
}} : $_[0]->{files
} }
1763 # ----------------------------------------------------------------------------
1765 package CVS
::Utils
::ChangeLog
::FileEntry
;
1767 use File
::Basename qw
( fileparse
);
1769 # Each revision of a file has a little data structure (a `qunk')
1770 # associated with it. That data structure holds not only the
1771 # file's name, but any additional information about the file
1772 # that might be needed in the output, such as the revision
1773 # number, tags, branches, etc. The reason to have these things
1774 # arranged in a data structure, instead of just appending them
1775 # textually to the file's name, is that we may want to do a
1776 # little rearranging later as we write the output. For example,
1777 # all the files on a given tag/branch will go together, followed
1778 # by the tag in parentheses (so trunk or otherwise non-tagged
1779 # files would go at the end of the file list for a given log
1780 # message). This rearrangement is a lot easier to do if we
1781 # don't have to reparse the text.
1783 # A qunk looks like this:
1786 # filename => "hello.c",
1787 # revision => "1.4.3.2",
1788 # time => a timegm() return value (moment of commit)
1789 # tags => [ "tag1", "tag2", ... ],
1790 # branch => "branchname" # There should be only one, right?
1791 # roots => [ "branchtag1", "branchtag2", ... ]
1792 # lines => "+x -y" # or undefined; x and y are integers
1795 # Single top-level ChangeLog, or one per subdirectory?
1797 sub distributed
{ $#_ ?
($distributed = $_[1]) : $distributed; }
1801 my
($path, $time, $revision, $state, $lines,
1802 $branch_names, $branch_roots, $branch_numbers, $symbolic_names) = @_
;
1804 my
%self
= (time => $time,
1805 revision
=> $revision,
1808 branch_numbers
=> $branch_numbers,
1811 if ( $distributed ) {
1812 @self
{qw
(filename dir_key
)} = fileparse
($path);
1814 @self
{qw
(filename dir_key
)} = ($path, './');
1817 { # Scope for $branch_prefix
1818 (my
($branch_prefix) = ($revision =~
/((?
:\d
+\.
)+)\d
+/));
1819 $branch_prefix =~ s
/\.$
//;
1820 if ( $branch_names->{$branch_prefix} ) {
1821 my
$branch_name = $branch_names->{$branch_prefix};
1822 $self{branch
} = $branch_name;
1823 $self{branches
} = [$branch_name];
1825 while ( $branch_prefix =~ s
/^
(\d
+(?
:\.\d
+\.\d
+)+)\.\d
+\.\d
+$
/$1/ ) {
1826 push @
{$self{branches
}}, $branch_names->{$branch_prefix}
1827 if exists
$branch_names->{$branch_prefix};
1831 # If there's anything in the @branch_roots array, then this
1832 # revision is the root of at least one branch. We'll display
1833 # them as branch names instead of revision numbers, the
1834 # substitution for which is done directly in the array:
1835 $self{'roots'} = [ map
{ $branch_names->{$_} } @
$branch_roots ]
1838 if ( exists
$symbolic_names->{$revision} ) {
1839 $self{tags
} = delete
$symbolic_names->{$revision};
1840 &main
::delta_check
($time, $self{tags
});
1843 bless \
%self
, $class;
1846 sub filename
{ $_[0]->{filename
} }
1847 sub dir_key
{ $_[0]->{dir_key
} }
1848 sub revision
{ $_[0]->{revision
} }
1849 sub branch
{ $_[0]->{branch
} }
1850 sub state
{ $_[0]->{state
} }
1851 sub lines
{ $_[0]->{lines
} }
1852 sub roots
{ $_[0]->{roots
} }
1853 sub branch_numbers
{ $_[0]->{branch_numbers
} }
1855 sub tags
{ $_[0]->{tags
} }
1857 exists
$_[0]->{tags
};
1860 # This may someday be used in a more sophisticated calculation of what other
1861 # files are involved in this commit. For now, we don't use it much except for
1862 # delta mode, because the common-commit-detection algorithm is hypothesized to
1863 # be "good enough" as it stands.
1864 sub
time { $_[0]->{time} }
1866 # ----------------------------------------------------------------------------
1868 package CVS
::Utils
::ChangeLog
::EntrySetBuilder
;
1870 use File
::Basename qw
( fileparse
);
1871 use Time
::Local qw
( timegm
);
1873 use constant MAILNAME
=> "/etc/mailname";
1875 # In 'cvs log' output, one long unbroken line of equal signs separates files:
1876 use constant FILE_SEPARATOR
=> '=' x
77;# . "\n";
1877 # In 'cvs log' output, a shorter line of dashes separates log messages within
1879 use constant REV_SEPARATOR
=> '-' x
28;# . "\n";
1881 use constant EMPTY_LOG_MESSAGE
=> '*** empty log message ***';
1883 # -------------------------------------
1887 my
$class = ref
$proto ||
$proto;
1889 my
$poobah = CVS
::Utils
::ChangeLog
::EntrySet-
>new
;
1890 my
$self = bless
+{ grand_poobah
=> $poobah }, $class;
1893 $self->maybe_read_user_map_file
;
1897 # -------------------------------------
1902 # Make way for the next message
1903 undef
$self->{rev_msg
};
1904 undef
$self->{rev_time
};
1905 undef
$self->{rev_revision
};
1906 undef
$self->{rev_author
};
1907 undef
$self->{rev_state
};
1908 undef
$self->{lines
};
1909 $self->{rev_branch_roots
} = []; # For showing which files are branch
1911 $self->{collecting_symbolic_names
} = 0;
1914 # -------------------------------------
1920 undef
$self->{filename
};
1921 $self->{branch_names
} = +{}; # We'll grab branch names while we're
1923 $self->{branch_numbers
} = +{}; # Save some revisions for
1925 $self->{symbolic_names
} = +{}; # Where tag names get stored.
1928 # -------------------------------------
1930 sub grand_poobah
{ $_[0]->{grand_poobah
} }
1932 # -------------------------------------
1934 sub read_changelog
{
1935 my
($self, $command) = @_
;
1937 local (*READER
, *WRITER
);
1939 if (! $Input_From_Stdin) {
1940 pipe
(READER
, WRITER
)
1941 or die
"Couldn't form pipe: $!\n";
1943 die
"Couldn't fork: $!\n"
1945 if ( ! $pid ) { # child
1946 open STDOUT
, '>&=' . fileno WRITER
1947 or die
"Couldn't dup stderr to ", fileno WRITER
, "\n";
1948 # strangely, some perls give spurious warnings about STDIN being opened
1949 # for output only these close calls precede the STDOUT reopen above.
1950 # I think they must be reusing fd 1.
1959 &main
::debug
("(run \"@$command\")\n");
1962 open READER
, '-' or die
"Unable to open stdin for reading";
1972 # If on a new file and don't see filename, skip until we find it, and
1973 # when we find it, grab it.
1974 if ( ! defined
$self->{filename
} ) {
1975 $self->read_file_path
($_);
1976 } elsif
( /^symbolic names
:$
/ ) {
1977 $self->{collecting_symbolic_names
} = 1;
1978 } elsif
( $self->{collecting_symbolic_names
} ) {
1979 $self->read_symbolic_name
($_);
1980 } elsif
( $_ eq FILE_SEPARATOR and
! defined
$self->{rev_revision
} ) {
1982 } elsif
( ! defined
$self->{rev_revision
} ) {
1983 # If have file name, but not revision, and see revision, then grab
1984 # it. (We collect unconditionally, even though we may or may not
1986 $self->read_revision
($_);
1987 } elsif
( ! defined
$self->{rev_time
} ) { # and /^date: /) {
1988 $self->read_date_author_and_state
($_);
1989 } elsif
( /^branches
:\s
+(.
*);$
/ ) {
1990 $self->read_branches
($1);
1991 } elsif
( ! ( $_ eq FILE_SEPARATOR or
$_ eq REV_SEPARATOR
) ) {
1992 # If have file name, time, and author, then we're just grabbing
1993 # log message texts:
1994 $self->{rev_msg
} .
= $_ .
"\n"; # Normally, just accumulate the message...
1997 if ( ! $self->{rev_msg
}
1998 or
$self->{rev_msg
} =~
/^\s
*(\.\s
*)?$
/
1999 or index
($self->{rev_msg
}, EMPTY_LOG_MESSAGE
) > -1 ) {
2000 # ... until a msg separator is encountered:
2001 # Ensure the message contains something:
2002 $self->clear_msg
, $noadd = 1
2003 if $Prune_Empty_Msgs;
2004 $self->{rev_msg
} = "[no log message]\n";
2007 $self->add_file_entry
2010 if ( $_ eq FILE_SEPARATOR
) {
2019 or die
"Couldn't close pipe reader: $!\n";
2020 if ( defined
$pid ) {
2024 or $
!=1, die sprintf
("Problem reading log input (pid/exit/signal/core: %d/%d/%d/%d)\n",
2025 $pid, $?
>> 8, $?
& 127, $?
& 128);
2030 # -------------------------------------
2032 sub add_file_entry
{
2033 $_[0]->grand_poobah-
>add_fileentry
(@
{$_[0]}{qw
(filename rev_time rev_revision
2034 rev_state lines branch_names
2038 rev_author rev_msg
)});
2041 # -------------------------------------
2043 sub maybe_read_user_map_file
{
2051 if ( $User_Map_File =~ m
{^
([-\w\@
+=.
,\
/]+):([-\w\@
+=.
,\
/:]+)} and
2052 !-f $User_Map_File )
2054 my
$rsh = (exists
$ENV{'CVS_RSH'} ?
$ENV{'CVS_RSH'} : 'ssh');
2055 $User_Map_Input = "$rsh $1 'cat $2' |";
2056 &main
::debug
("(run \"${User_Map_Input}\")\n");
2060 $User_Map_Input = "<$User_Map_File";
2063 open
(MAPFILE
, $User_Map_Input)
2064 or die
("Unable to open $User_Map_File ($!)");
2068 next
if /^\s
*#/; # Skip comment lines.
2069 next
if not
/:/; # Skip lines without colons.
2071 # It is now safe to split on ':'.
2072 my
($username, $expansion) = split ':';
2074 $expansion =~ s
/^
'(.*)'$
/$1/;
2075 $expansion =~ s
/^
"(.*)"$
/$1/;
2077 # If it looks like the expansion has a real name already, then
2078 # we toss the username we got from CVS log. Otherwise, keep
2079 # it to use in combination with the email address.
2081 if ($expansion =~
/^\s
*<{0,1}\S
+@.
*/) {
2082 # Also, add angle brackets if none present
2083 if (! ($expansion =~
/<\S
+@\S
+>/)) {
2084 $expansions{$username} = "$username <$expansion>";
2087 $expansions{$username} = "$username $expansion";
2091 $expansions{$username} = $expansion;
2093 } # fi ($User_Map_File)
2098 if (defined
$User_Passwd_File)
2100 if ( ! defined
$Domain ) {
2101 if ( -e MAILNAME
) {
2102 chomp
($Domain = slurp_file
(MAILNAME
));
2105 for ([qw
(hostname
-d)], 'dnsdomainname', 'domainname') {
2106 my
($text, $exit, $sig, $core) = run_ext
($_);
2107 if ( $exit == 0 && $sig == 0 && $core == 0 ) {
2109 if ( length
$text ) {
2111 last MAILDOMAIN_CMD
;
2118 die
"No mail domain found\n"
2119 unless defined
$Domain;
2121 open
(MAPFILE
, "<$User_Passwd_File")
2122 or die
("Unable to open $User_Passwd_File ($!)");
2125 # all lines are valid
2126 my
($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
2128 ($expansion) = split (',', $gecos)
2129 if defined
$gecos && length
$gecos;
2131 my
$mailname = $Domain eq
'' ?
$username : "$username\@$Domain";
2132 $expansions{$username} = "$expansion <$mailname>";
2137 $self->{usermap
} = \
%expansions
;
2140 # -------------------------------------
2142 sub read_file_path
{
2143 my
($self, $line) = @_
;
2147 if ( $line =~
/^Working
file: (.
*)/ ) {
2149 } elsif
( defined
$RCS_Root
2151 $line =~ m|^RCS
file: $RCS_Root[/\\](.
*),v$|
) {
2153 $path =~ s
!Attic
/!!;
2158 if ( @Ignore_Files
) {
2160 ($base, undef
, undef
) = fileparse
($path);
2162 my
$xpath = $Case_Insensitive ? lc
($path) : $path;
2164 if grep $path =~
/$_/, @Ignore_Files
;
2167 $self->{filename
} = $path;
2171 # -------------------------------------
2173 sub read_symbolic_name
{
2174 my
($self, $line) = @_
;
2176 # All tag names are listed with whitespace in front in cvs log
2177 # output; so if see non-whitespace, then we're done collecting.
2179 $self->{collecting_symbolic_names
} = 0;
2182 # we're looking at a tag name, so parse & store it
2184 # According to the Cederqvist manual, in node "Tags", tag names must start
2185 # with an uppercase or lowercase letter and can contain uppercase and
2186 # lowercase letters, digits, `-', and `_'. However, it's not our place to
2187 # enforce that, so we'll allow anything CVS hands us to be a tag:
2188 my
($tag_name, $tag_rev) = ($line =~
/^\s
+([^
:]+): ([\d.
]+)$
/);
2190 # A branch number either has an odd number of digit sections
2191 # (and hence an even number of dots), or has ".0." as the
2192 # second-to-last digit section. Test for these conditions.
2193 my
$real_branch_rev = '';
2194 if ( $tag_rev =~
/^
(\d
+\.\d
+\.
)+\d
+$
/ # Even number of dots...
2196 $tag_rev !~
/^
(1\.
)+1$
/ ) { # ...but not "1.[1.]1"
2197 $real_branch_rev = $tag_rev;
2198 } elsif
($tag_rev =~
/(\d
+\.
(\d
+\.
)+)0.
(\d
+)/) { # Has ".0."
2199 $real_branch_rev = $1 .
$3;
2202 # If we got a branch, record its number.
2203 if ( $real_branch_rev ) {
2204 $self->{branch_names
}->{$real_branch_rev} = $tag_name;
2205 $self->{branch_numbers
}->{$tag_name} = $real_branch_rev;
2206 $tag_rev =~ s
/^
(\d
+\.\d
+).
+/$1/;
2209 # regardless if it is a name on a branch, store it as symbolic name so that tag deltas always work
2210 push @
{$self->{symbolic_names
}->{$tag_rev}}, $tag_name;
2213 $self->{collecting_symbolic_names
} = 1;
2217 # -------------------------------------
2220 my
($self, $line) = @_
;
2222 my
($revision) = ( $line =~
/^revision
(\d
+\.
[\d.
]+)/ );
2227 $self->{rev_revision
} = $revision;
2231 # -------------------------------------
2233 { # Closure over %gecos_warned
2235 sub read_date_author_and_state
{
2236 my
($self, $line) = @_
;
2238 my
($time, $author, $state) = $self->parse_date_author_and_state
($line);
2240 if ( defined
($self->{usermap
}->{$author}) and
$self->{usermap
}->{$author} ) {
2241 $author = $self->{usermap
}->{$author};
2242 } elsif
( defined
$Domain or
$Gecos == 1 ) {
2243 my
$email = $author;
2244 $email = $author.
"@".
$Domain
2245 if defined
$Domain && $Domain ne
'';
2247 my
$pw = getpwnam
($author);
2248 my
($fullname, $office, $workphone, $homephone, $gcos);
2249 if ( defined
$pw ) {
2250 $gcos = (getpwnam
($author))[6];
2251 ($fullname, $office, $workphone, $homephone) =
2252 split /\s
*,\s
*/, $gcos;
2254 warn
"Couldn't find gecos info for author '$author'\n"
2255 unless
$gecos_warned{$author}++;
2258 for (grep defined
, $fullname, $office, $workphone, $homephone) {
2259 s
/&/ucfirst
(lc
($pw->name
))/ge
;
2261 $author = $fullname .
" <" .
$email .
">"
2265 $self->{rev_state
} = $state;
2266 $self->{rev_time
} = $time;
2267 $self->{rev_author
} = $author;
2272 # -------------------------------------
2275 # A "branches: ..." line here indicates that one or more branches
2276 # are rooted at this revision. If we're showing branches, then we
2277 # want to show that fact as well, so we collect all the branches
2278 # that this is the latest ancestor of and store them in
2279 # $self->[rev_branch_roots}. Just for reference, the format of the
2280 # line we're seeing at this point is:
2282 # branches: 1.5.2; 1.5.4; ...;
2285 my
($self, $line) = @_
;
2287 # Ugh. This really bothers me. Suppose we see a log entry
2290 # ----------------------------
2292 # date: 1999/10/17 03:07:38; author: jrandom; state: Exp;
2294 # Intended first line of log message begins here.
2295 # ----------------------------
2297 # The question is, how we can tell the difference between that
2298 # log message and a *two*-line log message whose first line is
2300 # "branches: 1.1.2;"
2302 # See the problem? The output of "cvs log" is inherently
2305 # For now, we punt: we liberally assume that people don't
2306 # write log messages like that, and just toss a "branches:"
2307 # line if we see it but are not showing branches. I hope no
2308 # one ever loses real log data because of this.
2309 if ( $Show_Branches ) {
2310 $line =~ s
/(1\.
)+1;|
(1\.
)+1$
//; # ignore the trivial branch 1.1.1
2311 $self->{rev_branch_roots
} = [split /;\s
+/, $line]
2316 # -------------------------------------
2318 sub parse_date_author_and_state
{
2319 my
($self, $line) = @_
;
2320 # Parses the date/time and author out of a line like:
2322 # date: 1999/02/19 23:29:05; author: apharris; state: Exp;
2324 # or, in CVS 1.12.9:
2326 # date: 2004-06-05 16:10:32 +0000; author: somebody; state: Exp;
2328 my
($year, $mon, $mday, $hours, $min, $secs, $utcOffset, $author, $state, $rest) =
2330 m
!(\d
+)[-/](\d
+)[-/](\d
+)\s
+(\d
+):(\d
+):(\d
+)(\s
+[+-]\d
{4})?
;\s
+
2331 author
:\s
+([^
;]+);\s
+state
:\s
+([^
;]+);(.
*)!x
2332 or die
"Couldn't parse date ``$line''";
2333 die
"Bad date or Y2K issues"
2334 unless
$year > 1969 and
$year < 2258;
2335 # Kinda arbitrary, but useful as a sanity check
2336 my
$time = timegm
($secs, $min, $hours, $mday, $mon-1, $year-1900);
2337 if ( defined
$utcOffset ) {
2338 my
($plusminus, $hour, $minute) = ($utcOffset =~ m
/([+-])(\d\d
)(\d\d
)/);
2339 my
$offset = (($hour * 60) + $minute) * 60 * ($plusminus eq
'+' ?
-1 : 1);
2342 if ( $rest =~ m
!\s
+lines
:\s
+(.
*)! ) {
2343 $self->{lines
} = $1;
2346 return $time, $author, $state;
2349 # Subrs ----------------------------------------------------------------------
2354 my
($time, $tags) = @_
;
2356 # If we're in 'delta' mode, update the latest observed times for the
2357 # beginning and ending tags, and when we get around to printing output, we
2358 # will simply restrict ourselves to that timeframe...
2362 $Delta_StartTime = $time
2363 if $time > $Delta_StartTime and
grep { $_ eq
$Delta_From } @
$tags;
2365 $Delta_EndTime = $time
2366 if $time > $Delta_EndTime and
grep { $_ eq
$Delta_To } @
$tags;
2374 my $out = qx"@
$cmd 2>&1";
2376 my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);
2377 return $out, $exit, $sig, $core;
2380 # -------------------------------------
2382 # If accumulating, grab the boundary date from pre-existing ChangeLog.
2383 sub maybe_grab_accumulation_date {
2384 if (! $Cumulative || $Update) {
2390 open (LOG, "$Log_File_Name")
2391 or die ("trouble opening
$Log_File_Name for reading
($
!)");
2396 if (/^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/)
2398 $boundary_date = "$1";
2405 # convert time from utc to local timezone if the ChangeLog has
2406 # dates/times in utc
2407 if ($UTC_Times && $boundary_date)
2409 # convert the utc time to a time value
2410 my ($year,$mon,$mday,$hour,$min) = $boundary_date =~
2411 m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#;
2412 my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900);
2413 # print the timevalue in the local timezone
2415 ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time);
2416 $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u",
2417 $year+1900,$mon+1,$mday,$hour,$min);
2420 return $boundary_date;
2423 # -------------------------------------
2425 # Fills up a ChangeLog structure in the current directory.
2426 sub derive_changelog {
2429 # See "The Plan
" above for a full explanation.
2431 # Might be adding to an existing ChangeLog
2432 my $accumulation_date = maybe_grab_accumulation_date;
2433 if ($accumulation_date) {
2434 # Insert -d immediately after 'cvs log'
2435 my $Log_Date_Command = "-d>${accumulation_date}";
2437 my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command;
2438 splice @$command, $log_index+1, 0, $Log_Date_Command;
2439 &debug ("(adding log msg starting from
$accumulation_date)\n");
2442 # output_changelog(read_changelog($command));
2443 my $builder = CVS::Utils::ChangeLog::EntrySetBuilder->new;
2444 $builder->read_changelog($command);
2445 $builder->grand_poobah->output_changelog;
2448 # -------------------------------------
2450 sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
2452 # -------------------------------------
2454 sub common_path_prefix {
2455 my ($path1, $path2) = @_;
2457 # For compatibility (with older versions of cvs2cl.pl), we think in UN*X
2458 # terms, and mould windoze filenames to match. Is this really appropriate?
2459 # If a file is checked in under UN*X, and cvs log run on windoze, which way
2460 # do the path separators slope? Can we use fileparse as per the local
2461 # conventions? If so, we should probably have a user option to specify an
2462 # OS to emulate to handle stdin-fed logs. If we did this, we could avoid
2463 # the nasty \-/ transmogrification below.
2465 my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;
2467 # Transmogrify Windows filenames to look like Unix.
2468 # (It is far more likely that someone is running cvs2cl.pl under
2469 # Windows than that they would genuinely have backslashes in their
2474 my ($accum1, $accum2, $last_common_prefix) = ('') x 3;
2476 my @path1 = grep length($_), split qr!/!, $dir1;
2477 my @path2 = grep length($_), split qr!/!, $dir2;
2480 for (0..min($#path1,$#path2)) {
2481 if ( $path1[$_] eq $path2[$_]) {
2482 push @common_path, $path1[$_];
2488 return join '', map "$_/", @common_path;
2491 # -------------------------------------
2494 # Check this internally before setting the global variable.
2497 # If this gets set, we encountered unknown options and will exit at
2498 # the end of this subroutine.
2499 my $exit_with_admonishment = 0;
2501 # command to generate the log
2502 my @log_source_command = qw( cvs log );
2504 my (@Global_Opts, @Local_Opts);
2506 Getopt::Long::Configure(qw( bundling permute no_getopt_compat
2507 pass_through no_ignore_case ));
2508 GetOptions('help|usage|h' => \$Print_Usage,
2509 'debug' => \$Debug, # unadvertised option, heh
2510 'version' => \$Print_Version,
2512 'file|f=s' => \$output_file,
2513 'accum' => \$Cumulative,
2514 'update' => \$Update,
2515 'fsf' => \$FSF_Style,
2516 'rcs=s' => \$RCS_Root,
2517 'usermap|U=s' => \$User_Map_File,
2519 'domain=s' => \$Domain,
2520 'passwd=s' => \$User_Passwd_File,
2521 'window|W=i' => \$Max_Checkin_Duration,
2522 'chrono' => \$Chronological_Order,
2523 'ignore|I=s' => \@Ignore_Files,
2524 'case-insensitive|C' => \$Case_Insensitive,
2525 'regexp|R=s' => \$Regexp_Gate,
2526 'stdin' => \$Input_From_Stdin,
2527 'stdout' => \$Output_To_Stdout,
2528 'distributed|d' => sub { CVS::Utils::ChangeLog::FileEntry->distributed(1) },
2529 'prune|P' => \$Prune_Empty_Msgs,
2530 'no-wrap' => \$No_Wrap,
2531 'gmt|utc' => \$UTC_Times,
2532 'day-of-week|w' => \$Show_Day_Of_Week,
2533 'revisions|r' => \$Show_Revisions,
2534 'show-dead' => \$Show_Dead,
2535 'tags|t' => \$Show_Tags,
2536 'tagdates|T' => \$Show_Tag_Dates,
2537 'branches|b' => \$Show_Branches,
2538 'follow|F=s' => \@Follow_Branches,
2539 'follow-only=s' => \@Follow_Only,
2540 'xml-encoding=s' => \$XML_Encoding,
2541 'xml' => \$XML_Output,
2542 'noxmlns' => \$No_XML_Namespace,
2543 'no-xml-iso-date' => \$No_XML_ISO_Date,
2544 'no-ancestors' => \$No_Ancestors,
2545 'lines-modified' => \$Show_Lines_Modified,
2547 'no-indent' => sub {
2553 $After_Header = "\n\n"; # Summary implies --separate-header
2560 'no-hide-branch-additions' => sub {
2561 $Hide_Branch_Additions = 0;
2564 'no-common-dir' => sub {
2568 'ignore-tag=s' => sub {
2569 $ignore_tags{$_[1]} = 1;
2572 'show-tag=s' => sub {
2573 $show_tags{$_[1]} = 1;
2576 # Deliberately undocumented. This is not a public interface, and
2577 # may change/disappear at any time.
2578 'test-code=s' => \$TestCode,
2583 /^([A-Za-z][A-Za-z0-9_\-\]\[]*):([A-Za-z][A-Za-z0-9_\-\]\[]*)$/ ) {
2588 die "--delta FROM_TAG
:TO_TAG is what you meant to say.
\n";
2595 $No_Extra_Indent = 1;
2601 $ChangeLog_Header = &slurp_file ($narg);
2602 if (! defined ($ChangeLog_Header)) {
2603 $ChangeLog_Header = '';
2607 'global-opts|g=s' => sub {
2609 push @Global_Opts, $narg;
2610 splice @log_source_command, 1, 0, $narg;
2613 'log-opts|l=s' => sub {
2615 push @Local_Opts, $narg;
2616 push @log_source_command, $narg;
2619 'mailname=s' => sub {
2621 warn "--mailname is deprecated
; please use
--domain instead
\n";
2625 'separate-header|S' => sub {
2626 $After_Header = "\n\n";
2627 $No_Extra_Indent = 1;
2630 'group-within-date' => sub {
2631 $GroupWithinDate = 1;
2635 'hide-filenames' => sub {
2636 $Hide_Filenames = 1;
2640 or die "options parsing failed
\n";
2642 push @log_source_command, map "$_", @ARGV;
2644 ## Check for contradictions...
2646 if ($Output_To_Stdout && CVS::Utils::ChangeLog::FileEntry->distributed) {
2647 print STDERR "cannot pass both
--stdout and
--distributed\n";
2648 $exit_with_admonishment = 1;
2651 if ($Output_To_Stdout && $output_file) {
2652 print STDERR "cannot pass both
--stdout and
--file\n";
2653 $exit_with_admonishment = 1;
2656 if ($Input_From_Stdin && @Global_Opts) {
2657 print STDERR "cannot pass both
--stdin and
-g\n";
2658 $exit_with_admonishment = 1;
2661 if ($Input_From_Stdin && @Local_Opts) {
2662 print STDERR "cannot pass both
--stdin and
-l\n";
2663 $exit_with_admonishment = 1;
2666 if ($XML_Output && $Cumulative) {
2667 print STDERR "cannot pass both
--xml and
--accum\n";
2668 $exit_with_admonishment = 1;
2671 # Other consistency checks and option-driven logic
2673 # Bleargh. Compensate for a deficiency of custom wrapping.
2674 if ( ($After_Header ne " ") and $FSF_Style ) {
2675 $After_Header .= "\t";
2678 @Ignore_Files = map lc, @Ignore_Files
2679 if $Case_Insensitive;
2681 # Or if any other error message has already been printed out, we
2683 if ($exit_with_admonishment) {
2687 elsif ($Print_Usage) {
2691 elsif ($Print_Version) {
2696 ## Else no problems, so proceed.
2699 $Log_File_Name = $output_file;
2702 return \@log_source_command;
2705 # -------------------------------------
2708 my $filename = shift || die ("no filename passed to slurp_file
()");
2711 open (SLURPEE, "<${filename}") or die ("unable to open
$filename ($
!)");
2713 $retstr = <SLURPEE>;
2718 # -------------------------------------
2727 # -------------------------------------
2730 print "cvs2cl.pl version
${VERSION}; distributed under the GNU GPL.
\n";
2733 # -------------------------------------
2738 eval "use Pod
::Usage qw
( pod2usage
)";
2743 * Pod::Usage was not found. The formatting may be suboptimal. Consider
2744 upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and
2745 versions of perl prior to 5.6 are getting rather rusty, now. Alternatively,
2746 install Pod::Usage direct from CPAN.
2750 my $message = <DATA>;
2751 $message =~ s/^=(head1|item) //gm;
2752 $message =~ s/^=(over|back).*\n//gm;
2753 $message =~ s/\n{3,}/\n\n/g;
2757 pod2usage( -exitval => 'NOEXIT',
2759 -output => \*STDOUT,
2766 # Main -----------------------------------------------------------------------
2768 my $log_source_command = parse_options;
2769 if ( defined $TestCode ) {
2771 die "Eval failed
: '$@'\n"
2774 derive_changelog($log_source_command);
2781 cvs2cl_by_file.pl - convert cvs log messages to changelogs
2785 B<cvs2cl> [I<options>] [I<FILE1> [I<FILE2> ...]]
2786 cvs2cl_by_file.pl --delta bioperl-release-1-5-1:bioperl-release-1-5-2
2790 This is a quick hack version of cvs2cl.pl that simply outputs in a
2792 Only CVS::Utils::ChangeLog::EntrySet::Output::output_changelog
2796 cvs2cl_by_file.pl --delta bioperl-release-1-5-1:bioperl-release-1-5-2
2797 Generates a file called ChangeLog showing, per file, all the commit
2798 messages since tag bioperl-release-1-5-1 up to tag bioperl-release-1-5-2
2800 Original cvs2cl docs now follow, but some things may not work because of
2804 cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by
2805 running "cvs log
" and parsing the output. Duplicate log messages get
2806 unified in the Right Way.
2808 The default output of cvs2cl is designed to be compact, formally unambiguous,
2809 but still easy for humans to read. It should be largely self-explanatory; the
2810 one abbreviation that might not be obvious is "utags
". That stands for
2811 "universal tags
" -- a universal tag is one held by all the files in a given
2814 If you need output that's easy for a program to parse, use the B<--xml> option.
2815 Note that with XML output, just about all available information is included
2816 with each change entry, whether you asked for it or not, on the theory that
2817 your parser can ignore anything it's not looking for.
2819 If filenames are given as arguments cvs2cl only shows log information for the
2826 =item B<-h>, B<-help>, B<--help>, B<-?>
2828 Show a short help and exit.
2832 Show version and exit.
2834 =item B<-r>, B<--revisions>
2836 Show revision numbers in output.
2838 =item B<-b>, B<--branches>
2840 Show branch names in revisions when possible.
2842 =item B<-t>, B<--tags>
2844 Show tags (symbolic names) in output.
2846 =item B<-T>, B<--tagdates>
2848 Show tags in output on their first occurance.
2850 =item B<--show-dead>
2856 Read from stdin, don't run cvs log.
2860 Output to stdout not to ChangeLog.
2862 =item B<-d>, B<--distributed>
2864 Put ChangeLogs in subdirs.
2866 =item B<-f> I<FILE>, B<--file> I<FILE>
2868 Write to I<FILE> instead of ChangeLog.
2872 Use this if log data is in FSF ChangeLog style.
2876 Attempt strict FSF-standard compatible output.
2878 =item B<-W> I<SECS>, B<--window> I<SECS>
2880 Window of time within which log entries unify.
2882 =item -B<U> I<UFILE>, B<--usermap> I<UFILE>
2884 Expand usernames to email addresses from I<UFILE>.
2886 =item B<--passwd> I<PASSWORDFILE>
2888 Use system passwd file for user name expansion. If no mail domain is provided
2889 (via B<--domain>), it tries to read one from B</etc/mailname>, output of B<hostname
2890 -d>, B<dnsdomainname>, or B<domain-name>. cvs2cl exits with an error if none of
2891 those options is successful. Use a domain of '' to prevent the addition of a
2894 =item B<--domain> I<DOMAIN>
2896 Domain to build email addresses from.
2900 Get user information from GECOS data.
2902 =item B<-R> I<REGEXP>, B<--regexp> I<REGEXP>
2904 Include only entries that match I<REGEXP>. This option may be used multiple
2907 =item B<-I> I<REGEXP>, B<--ignore> I<REGEXP>
2909 Ignore files whose names match I<REGEXP>. This option may be used multiple
2910 times. The regexp is a perl regular expression. It is matched as is; you may
2911 want to prefix with a ^ or suffix with a $ to anchor the match.
2913 =item B<-C>, B<--case-insensitive>
2915 Any regexp matching is done case-insensitively.
2917 =item B<-F> I<BRANCH>, B<--follow> I<BRANCH>
2919 Show only revisions on or ancestral to I<BRANCH>.
2921 =item B<--follow-only> I<BRANCH>
2923 Like --follow, but sub-branches are not followed.
2925 =item B<--no-ancestors>
2927 When using B<-F>, only track changes since the I<BRANCH> started.
2929 =item B<--no-hide-branch-additions>
2931 By default, entries generated by cvs for a file added on a branch (a dead 1.1
2932 entry) are not shown. This flag reverses that action.
2934 =item B<-S>, B<--separate-header>
2936 Blank line between each header and log message.
2940 Add CVS change summary information.
2944 Don't auto-wrap log message (recommend B<-S> also).
2946 =item B<--no-indent>
2948 Don't indent log message
2950 =item B<--gmt>, B<--utc>
2952 Show times in GMT/UTC instead of local time.
2956 Add to an existing ChangeLog (incompatible with B<--xml>).
2958 =item B<-w>, B<--day-of-week>
2964 Don't show times in output.
2968 Output log in chronological order (default is reverse chronological order).
2970 =item B<--header> I<FILE>
2972 Get ChangeLog header from I<FILE> ("B
<->" means stdin).
2976 Output XML instead of ChangeLog format.
2978 =item B<--xml-encoding> I<ENCODING.>
2980 Insert encoding clause in XML header.
2984 Don't include xmlns= attribute in root element.
2986 =item B<--hide-filenames>
2988 Don't show filenames (ignored for XML output).
2990 =item B<--no-common-dir>
2992 Don't shorten directory names from filenames.
2994 =item B<--rcs> I<CVSROOT>
2996 Handle filenames from raw RCS, for instance those produced by "cvs rlog
"
2997 output, stripping the prefix I<CVSROOT>.
2999 =item B<-P>, B<--prune>
3001 Don't show empty log messages.
3003 =item B<--lines-modified>
3005 Output the number of lines added and the number of lines removed for
3006 each checkin (if applicable). At the moment, this only affects the
3009 =item B<--ignore-tag> I<TAG>
3011 Ignore individual changes that are associated with a given tag.
3012 May be repeated, if so, changes that are associated with any of
3013 the given tags are ignored.
3015 =item B<--show-tag> I<TAG>
3017 Log only individual changes that are associated with a given
3018 tag. May be repeated, if so, changes that are associated with
3019 any of the given tags are logged.
3021 =item B<--delta> I<FROM_TAG>B<:>I<TO_TAG>
3023 Attempt a delta between two tags (since I<FROM_TAG> up to and
3024 including I<TO_TAG>). The algorithm is a simple date-based one
3025 (this is a hard problem) so results are imperfect.
3027 =item B<-g> I<OPTS>, B<--global-opts> I<OPTS>
3029 Pass I<OPTS> to cvs like in "cvs I
<OPTS
> log ...
".
3031 =item B<-l> I<OPTS>, B<--log-opts> I<OPTS>
3033 Pass I<OPTS> to cvs log like in "cvs ... log I
<OPTS
>".
3037 Notes about the options and arguments:
3043 The B<-I> and B<-F> options may appear multiple times.
3047 To follow trunk revisions, use "B
<-F trunk
>" ("B
<-F TRUNK
>" also works). This is
3048 okay because no would ever, ever be crazy enough to name a branch "trunk
",
3053 For the B<-U> option, the I<UFILE> should be formatted like CVSROOT/users. That is,
3054 each line of I<UFILE> looks like this:
3056 jrandom:jrandom@red-bean.com
3058 or maybe even like this
3060 jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
3062 Don't forget to quote the portion after the colon if necessary.
3066 Many people want to filter by date. To do so, invoke cvs2cl.pl like this:
3068 cvs2cl.pl -l "-d'DATESPEC'"
3070 where DATESPEC is any date specification valid for "cvs log
-d". (Note that
3071 CVS 1.10.7 and below requires there be no space between -d and its argument).
3075 Dates/times are interpreted in the local time zone.
3079 Remember to quote the argument to `B<-l>' so that your shell doesn't interpret
3080 spaces as argument separators.
3084 See the 'Common Options' section of the cvs manual ('info cvs' on UNIX-like
3085 systems) for more information.
3089 Note that the rules for quoting under windows shells are different.
3093 To run in an automated environment such as CGI or PHP, suidperl may be needed
3094 in order to execute as the correct user to enable /cvsroot read lock files to
3095 be written for the 'cvs log' command. This is likely just a case of changing
3096 the /usr/bin/perl command to /usr/bin/suidperl, and explicitly declaring the
3103 Some examples (working on UNIX shells):
3105 # logs after 6th March, 2003 (inclusive)
3106 cvs2cl.pl -l "-d'>2003-03-06'"
3107 # logs after 4:34PM 6th March, 2003 (inclusive)
3108 cvs2cl.pl -l "-d'>2003-03-06 16:34'"
3109 # logs between 4:46PM 6th March, 2003 (exclusive) and
3110 # 4:34PM 6th March, 2003 (inclusive)
3111 cvs2cl.pl -l "-d'2003-03-06 16:46>2003-03-06 16:34'"
3113 Some examples (on non-UNIX shells):
3115 # Reported to work on windows xp/2000
3116 cvs2cl.pl -l "-d"">2003-10-18;today
<"""
3124 =item Melissa O'Neill
3126 =item Martyn J. Pearce
3138 =item Richard Broberg
3142 =item Oswald Buddenhagen
3146 =item Arthur de Jong
3148 =item Mark W. Eichin
3154 =item Simon Josefsson
3156 =item Robin Hugh Johnson
3168 =item Richard Laager
3172 =item Karl-Heinz Marbaise
3174 =item Mitsuaki Masuhara
3176 =item Henrik Nordstrom
3180 =item Peter Palfrader
3182 =item Thomas Parmelan
3184 =item Jordan Russell
3186 =item Jacek Sliwerski
3188 =item Johannes Stezenbach
3198 Please report bugs to C<bug-cvs2cl@red-bean.com>.
3200 =head1 PREREQUISITES
3202 This script requires C<Text::Wrap>, C<Time::Local>, and C<File::Basename>. It
3203 also seems to require C<Perl 5.004_04> or higher.
3205 =head1 OPERATING SYSTEM COMPATIBILITY
3207 Should work on any OS.
3209 =head1 SCRIPT CATEGORIES
3215 (C) 2001,2002,2003,2004 Martyn J. Pearce E<lt>fluffy@cpan.orgE<gt>, under the GNU GPL.
3217 (C) 1999 Karl Fogel E<lt>kfogel@red-bean.comE<gt>, under the GNU GPL.
3219 cvs2cl.pl is free software; you can redistribute it and/or modify
3220 it under the terms of the GNU General Public License as published by
3221 the Free Software Foundation; either version 2, or (at your option)
3224 cvs2cl.pl is distributed in the hope that it will be useful,
3225 but WITHOUT ANY WARRANTY; without even the implied warranty of
3226 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3227 GNU General Public License for more details.
3229 You may have received a copy of the GNU General Public License
3230 along with cvs2cl.pl; see the file COPYING. If not, write to the
3231 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
3232 Boston, MA 02111-1307, USA.