fix memory leak parsing core.commentchar
[git/git-svn.git] / git-cvsimport.perl
blob73d367cea8f130bc7721803e397df710e7cfff58
1 #!/usr/bin/perl
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to aggregate CVS check-ins into related changes.
7 # Fortunately, "cvsps" does that for us; all we have to do is to parse
8 # its output.
10 # Checking out the files is done by a single long-running CVS connection
11 # / server process.
13 # The head revision is on branch "origin" by default.
14 # You can change that with the '-o' option.
16 use 5.008;
17 use strict;
18 use warnings;
19 use Getopt::Long;
20 use File::Spec;
21 use File::Temp qw(tempfile tmpnam);
22 use File::Path qw(mkpath);
23 use File::Basename qw(basename dirname);
24 use Time::Local;
25 use IO::Socket;
26 use IO::Pipe;
27 use POSIX qw(strftime tzset dup2 ENOENT);
28 use IPC::Open2;
29 use Git qw(get_tz_offset);
31 $SIG{'PIPE'}="IGNORE";
32 set_timezone('UTC');
34 our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
35 my (%conv_author_name, %conv_author_email, %conv_author_tz);
37 sub usage(;$) {
38 my $msg = shift;
39 print(STDERR "Error: $msg\n") if $msg;
40 print STDERR <<END;
41 usage: git cvsimport # fetch/update GIT from CVS
42 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
43 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
44 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
45 [-r remote] [-R] [CVS_module]
46 END
47 exit(1);
50 sub read_author_info($) {
51 my ($file) = @_;
52 my $user;
53 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
55 while (<$f>) {
56 # Expected format is this:
57 # exon=Andreas Ericsson <ae@op5.se>
58 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
59 $user = $1;
60 $conv_author_name{$user} = $2;
61 $conv_author_email{$user} = $3;
63 # or with an optional timezone:
64 # spawn=Simon Pawn <spawn@frog-pond.org> America/Chicago
65 elsif (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*(\S+?)\s*$/) {
66 $user = $1;
67 $conv_author_name{$user} = $2;
68 $conv_author_email{$user} = $3;
69 $conv_author_tz{$user} = $4;
71 # However, we also read from CVSROOT/users format
72 # to ease migration.
73 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
74 my $mapped;
75 ($user, $mapped) = ($1, $3);
76 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
77 $conv_author_name{$user} = $1;
78 $conv_author_email{$user} = $2;
80 elsif ($mapped =~ /^<?(.*)>?$/) {
81 $conv_author_name{$user} = $user;
82 $conv_author_email{$user} = $1;
85 # NEEDSWORK: Maybe warn on unrecognized lines?
87 close ($f);
90 sub write_author_info($) {
91 my ($file) = @_;
92 open my $f, '>', $file or
93 die("Failed to open $file for writing: $!");
95 foreach (keys %conv_author_name) {
96 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>";
97 print $f " $conv_author_tz{$_}" if ($conv_author_tz{$_});
98 print $f "\n";
100 close ($f);
103 # Versions of perl before 5.10.0 may not automatically check $TZ each
104 # time localtime is run (most platforms will do so only the first time).
105 # We can work around this by using tzset() to update the internal
106 # variable whenever we change the environment.
107 sub set_timezone {
108 $ENV{TZ} = shift;
109 tzset();
112 # convert getopts specs for use by git config
113 my %longmap = (
114 'A:' => 'authors-file',
115 'M:' => 'merge-regex',
116 'P:' => undef,
117 'R' => 'track-revisions',
118 'S:' => 'ignore-paths',
121 sub read_repo_config {
122 # Split the string between characters, unless there is a ':'
123 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
124 my @opts = split(/ *(?!:)/, shift);
125 foreach my $o (@opts) {
126 my $key = $o;
127 $key =~ s/://g;
128 my $arg = 'git config';
129 $arg .= ' --bool' if ($o !~ /:$/);
130 my $ckey = $key;
132 if (exists $longmap{$o}) {
133 # An uppercase option like -R cannot be
134 # expressed in the configuration, as the
135 # variable names are downcased.
136 $ckey = $longmap{$o};
137 next if (! defined $ckey);
138 $ckey =~ s/-//g;
140 chomp(my $tmp = `$arg --get cvsimport.$ckey`);
141 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
142 no strict 'refs';
143 my $opt_name = "opt_" . $key;
144 if (!$$opt_name) {
145 $$opt_name = $tmp;
151 my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
152 read_repo_config($opts);
153 Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
155 # turn the Getopt::Std specification in a Getopt::Long one,
156 # with support for multiple -M options
157 GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
158 or usage();
159 usage if $opt_h;
161 if (@ARGV == 0) {
162 chomp(my $module = `git config --get cvsimport.module`);
163 push(@ARGV, $module) if $? == 0;
165 @ARGV <= 1 or usage("You can't specify more than one CVS module");
167 if ($opt_d) {
168 $ENV{"CVSROOT"} = $opt_d;
169 } elsif (-f 'CVS/Root') {
170 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
171 $opt_d = <$f>;
172 chomp $opt_d;
173 close $f;
174 $ENV{"CVSROOT"} = $opt_d;
175 } elsif ($ENV{"CVSROOT"}) {
176 $opt_d = $ENV{"CVSROOT"};
177 } else {
178 usage("CVSROOT needs to be set");
180 $opt_s ||= "-";
181 $opt_a ||= 0;
183 my $git_tree = $opt_C;
184 $git_tree ||= ".";
186 my $remote;
187 if (defined $opt_r) {
188 $remote = 'refs/remotes/' . $opt_r;
189 $opt_o ||= "master";
190 } else {
191 $opt_o ||= "origin";
192 $remote = 'refs/heads';
195 my $cvs_tree;
196 if ($#ARGV == 0) {
197 $cvs_tree = $ARGV[0];
198 } elsif (-f 'CVS/Repository') {
199 open my $f, '<', 'CVS/Repository' or
200 die 'Failed to open CVS/Repository';
201 $cvs_tree = <$f>;
202 chomp $cvs_tree;
203 close $f;
204 } else {
205 usage("CVS module has to be specified");
208 our @mergerx = ();
209 if ($opt_m) {
210 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
212 if (@opt_M) {
213 push (@mergerx, map { qr/$_/ } @opt_M);
216 # Remember UTC of our starting time
217 # we'll want to avoid importing commits
218 # that are too recent
219 our $starttime = time();
221 select(STDERR); $|=1; select(STDOUT);
224 package CVSconn;
225 # Basic CVS dialog.
226 # We're only interested in connecting and downloading, so ...
228 use File::Spec;
229 use File::Temp qw(tempfile);
230 use POSIX qw(strftime dup2);
232 sub new {
233 my ($what,$repo,$subdir) = @_;
234 $what=ref($what) if ref($what);
236 my $self = {};
237 $self->{'buffer'} = "";
238 bless($self,$what);
240 $repo =~ s#/+$##;
241 $self->{'fullrep'} = $repo;
242 $self->conn();
244 $self->{'subdir'} = $subdir;
245 $self->{'lines'} = undef;
247 return $self;
250 sub find_password_entry {
251 my ($cvspass, @cvsroot) = @_;
252 my ($file, $delim) = @$cvspass;
253 my $pass;
254 local ($_);
256 if (open(my $fh, $file)) {
257 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
258 CVSPASSFILE:
259 while (<$fh>) {
260 chomp;
261 s/^\/\d+\s+//;
262 my ($w, $p) = split($delim,$_,2);
263 for my $cvsroot (@cvsroot) {
264 if ($w eq $cvsroot) {
265 $pass = $p;
266 last CVSPASSFILE;
270 close($fh);
272 return $pass;
275 sub conn {
276 my $self = shift;
277 my $repo = $self->{'fullrep'};
278 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
279 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
281 my ($proxyhost,$proxyport);
282 if ($param && ($param =~ m/proxy=([^;]+)/)) {
283 $proxyhost = $1;
284 # Default proxyport, if not specified, is 8080.
285 $proxyport = 8080;
286 if ($ENV{"CVS_PROXY_PORT"}) {
287 $proxyport = $ENV{"CVS_PROXY_PORT"};
289 if ($param =~ m/proxyport=([^;]+)/) {
290 $proxyport = $1;
293 $repo ||= '/';
295 # if username is not explicit in CVSROOT, then use current user, as cvs would
296 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
297 my $rr2 = "-";
298 unless ($port) {
299 $rr2 = ":pserver:$user\@$serv:$repo";
300 $port=2401;
302 my $rr = ":pserver:$user\@$serv:$port$repo";
304 if ($pass) {
305 $pass = $self->_scramble($pass);
306 } else {
307 my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
308 [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
309 my @loc = ();
310 foreach my $cvspass (@cvspass) {
311 my $p = find_password_entry($cvspass, $rr, $rr2);
312 if ($p) {
313 push @loc, $cvspass->[0];
314 $pass = $p;
318 if (1 < @loc) {
319 die("Multiple cvs password files have ".
320 "entries for CVSROOT $opt_d: @loc");
321 } elsif (!$pass) {
322 $pass = "A";
326 my ($s, $rep);
327 if ($proxyhost) {
329 # Use a HTTP Proxy. Only works for HTTP proxies that
330 # don't require user authentication
332 # See: http://www.ietf.org/rfc/rfc2817.txt
334 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
335 die "Socket to $proxyhost: $!\n" unless defined $s;
336 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
337 or die "Write to $proxyhost: $!\n";
338 $s->flush();
340 $rep = <$s>;
342 # The answer should look like 'HTTP/1.x 2yy ....'
343 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
344 die "Proxy connect: $rep\n";
346 # Skip up to the empty line of the proxy server output
347 # including the response headers.
348 while ($rep = <$s>) {
349 last if (!defined $rep ||
350 $rep eq "\n" ||
351 $rep eq "\r\n");
353 } else {
354 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
355 die "Socket to $serv: $!\n" unless defined $s;
358 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
359 or die "Write to $serv: $!\n";
360 $s->flush();
362 $rep = <$s>;
364 if ($rep ne "I LOVE YOU\n") {
365 $rep="<unknown>" unless $rep;
366 die "AuthReply: $rep\n";
368 $self->{'socketo'} = $s;
369 $self->{'socketi'} = $s;
370 } else { # local or ext: Fork off our own cvs server.
371 my $pr = IO::Pipe->new();
372 my $pw = IO::Pipe->new();
373 my $pid = fork();
374 die "Fork: $!\n" unless defined $pid;
375 my $cvs = 'cvs';
376 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
377 my $rsh = 'rsh';
378 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
380 my @cvs = ($cvs, 'server');
381 my ($local, $user, $host);
382 $local = $repo =~ s/:local://;
383 if (!$local) {
384 $repo =~ s/:ext://;
385 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
386 ($user, $host) = ($1, $2);
388 if (!$local) {
389 if ($user) {
390 unshift @cvs, $rsh, '-l', $user, $host;
391 } else {
392 unshift @cvs, $rsh, $host;
396 unless ($pid) {
397 $pr->writer();
398 $pw->reader();
399 dup2($pw->fileno(),0);
400 dup2($pr->fileno(),1);
401 $pr->close();
402 $pw->close();
403 exec(@cvs);
405 $pw->writer();
406 $pr->reader();
407 $self->{'socketo'} = $pw;
408 $self->{'socketi'} = $pr;
410 $self->{'socketo'}->write("Root $repo\n");
412 # Trial and error says that this probably is the minimum set
413 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
415 $self->{'socketo'}->write("valid-requests\n");
416 $self->{'socketo'}->flush();
418 my $rep=$self->readline();
419 die "Failed to read from server" unless defined $rep;
420 chomp($rep);
421 if ($rep !~ s/^Valid-requests\s*//) {
422 $rep="<unknown>" unless $rep;
423 die "Expected Valid-requests from server, but got: $rep\n";
425 chomp(my $res=$self->readline());
426 die "validReply: $res\n" if $res ne "ok";
428 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
429 $self->{'repo'} = $repo;
432 sub readline {
433 my ($self) = @_;
434 return $self->{'socketi'}->getline();
437 sub _file {
438 # Request a file with a given revision.
439 # Trial and error says this is a good way to do it. :-/
440 my ($self,$fn,$rev) = @_;
441 $self->{'socketo'}->write("Argument -N\n") or return undef;
442 $self->{'socketo'}->write("Argument -P\n") or return undef;
443 # -kk: Linus' version doesn't use it - defaults to off
444 if ($opt_k) {
445 $self->{'socketo'}->write("Argument -kk\n") or return undef;
447 $self->{'socketo'}->write("Argument -r\n") or return undef;
448 $self->{'socketo'}->write("Argument $rev\n") or return undef;
449 $self->{'socketo'}->write("Argument --\n") or return undef;
450 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
451 $self->{'socketo'}->write("Directory .\n") or return undef;
452 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
453 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
454 $self->{'socketo'}->write("co\n") or return undef;
455 $self->{'socketo'}->flush() or return undef;
456 $self->{'lines'} = 0;
457 return 1;
459 sub _line {
460 # Read a line from the server.
461 # ... except that 'line' may be an entire file. ;-)
462 my ($self, $fh) = @_;
463 die "Not in lines" unless defined $self->{'lines'};
465 my $line;
466 my $res=0;
467 while (defined($line = $self->readline())) {
468 # M U gnupg-cvs-rep/AUTHORS
469 # Updated gnupg-cvs-rep/
470 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
471 # /AUTHORS/1.1///T1.1
472 # u=rw,g=rw,o=rw
474 # ok
476 if ($line =~ s/^(?:Created|Updated) //) {
477 $line = $self->readline(); # path
478 $line = $self->readline(); # Entries line
479 my $mode = $self->readline(); chomp $mode;
480 $self->{'mode'} = $mode;
481 defined (my $cnt = $self->readline())
482 or die "EOF from server after 'Changed'\n";
483 chomp $cnt;
484 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
485 $line="";
486 $res = $self->_fetchfile($fh, $cnt);
487 } elsif ($line =~ s/^ //) {
488 print $fh $line;
489 $res += length($line);
490 } elsif ($line =~ /^M\b/) {
491 # output, do nothing
492 } elsif ($line =~ /^Mbinary\b/) {
493 my $cnt;
494 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
495 chomp $cnt;
496 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
497 $line="";
498 $res += $self->_fetchfile($fh, $cnt);
499 } else {
500 chomp $line;
501 if ($line eq "ok") {
502 # print STDERR "S: ok (".length($res).")\n";
503 return $res;
504 } elsif ($line =~ s/^E //) {
505 # print STDERR "S: $line\n";
506 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
507 $line = $self->readline(); # filename
508 $line = $self->readline(); # OK
509 chomp $line;
510 die "Unknown: $line" if $line ne "ok";
511 return -1;
512 } else {
513 die "Unknown: $line\n";
517 return undef;
519 sub file {
520 my ($self,$fn,$rev) = @_;
521 my $res;
523 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
524 DIR => File::Spec->tmpdir(), UNLINK => 1);
526 $self->_file($fn,$rev) and $res = $self->_line($fh);
528 if (!defined $res) {
529 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
530 truncate $fh, 0;
531 $self->conn();
532 $self->_file($fn,$rev) or die "No file command send";
533 $res = $self->_line($fh);
534 die "Retry failed" unless defined $res;
536 close ($fh);
538 return ($name, $res);
540 sub _fetchfile {
541 my ($self, $fh, $cnt) = @_;
542 my $res = 0;
543 my $bufsize = 1024 * 1024;
544 while ($cnt) {
545 if ($bufsize > $cnt) {
546 $bufsize = $cnt;
548 my $buf;
549 my $num = $self->{'socketi'}->read($buf,$bufsize);
550 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
551 print $fh $buf;
552 $res += $num;
553 $cnt -= $num;
555 return $res;
558 sub _scramble {
559 my ($self, $pass) = @_;
560 my $scrambled = "A";
562 return $scrambled unless $pass;
564 my $pass_len = length($pass);
565 my @pass_arr = split("", $pass);
566 my $i;
568 # from cvs/src/scramble.c
569 my @shifts = (
570 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
571 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
572 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
573 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
574 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
575 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
576 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
577 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
578 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
579 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
580 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
581 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
582 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
583 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
584 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
585 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
588 for ($i = 0; $i < $pass_len; $i++) {
589 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
592 return $scrambled;
595 package main;
597 my $cvs = CVSconn->new($opt_d, $cvs_tree);
600 sub pdate($) {
601 my ($d) = @_;
602 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
603 or die "Unparseable date: $d\n";
604 my $y=$1; $y-=1900 if $y>1900;
605 return timegm($6||0,$5,$4,$3,$2-1,$y);
608 sub pmode($) {
609 my ($mode) = @_;
610 my $m = 0;
611 my $mm = 0;
612 my $um = 0;
613 for my $x(split(//,$mode)) {
614 if ($x eq ",") {
615 $m |= $mm&$um;
616 $mm = 0;
617 $um = 0;
618 } elsif ($x eq "u") { $um |= 0700;
619 } elsif ($x eq "g") { $um |= 0070;
620 } elsif ($x eq "o") { $um |= 0007;
621 } elsif ($x eq "r") { $mm |= 0444;
622 } elsif ($x eq "w") { $mm |= 0222;
623 } elsif ($x eq "x") { $mm |= 0111;
624 } elsif ($x eq "=") { # do nothing
625 } else { die "Unknown mode: $mode\n";
628 $m |= $mm&$um;
629 return $m;
632 sub getwd() {
633 my $pwd = `pwd`;
634 chomp $pwd;
635 return $pwd;
638 sub is_sha1 {
639 my $s = shift;
640 return $s =~ /^[a-f0-9]{40}$/;
643 sub get_headref ($) {
644 my $name = shift;
645 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
646 return undef unless $? == 0;
647 chomp $r;
648 return $r;
651 my $user_filename_prepend = '';
652 sub munge_user_filename {
653 my $name = shift;
654 return File::Spec->file_name_is_absolute($name) ?
655 $name :
656 $user_filename_prepend . $name;
659 -d $git_tree
660 or mkdir($git_tree,0777)
661 or die "Could not create $git_tree: $!";
662 if ($git_tree ne '.') {
663 $user_filename_prepend = getwd() . '/';
664 chdir($git_tree);
667 my $last_branch = "";
668 my $orig_branch = "";
669 my %branch_date;
670 my $tip_at_start = undef;
672 my $git_dir = $ENV{"GIT_DIR"} || ".git";
673 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
674 $ENV{"GIT_DIR"} = $git_dir;
675 my $orig_git_index;
676 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
678 my %index; # holds filenames of one index per branch
680 unless (-d $git_dir) {
681 system(qw(git init));
682 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
683 system(qw(git read-tree --empty));
684 die "Cannot init an empty tree: $?\n" if $?;
686 $last_branch = $opt_o;
687 $orig_branch = "";
688 } else {
689 open(F, "-|", qw(git symbolic-ref HEAD)) or
690 die "Cannot run git symbolic-ref: $!\n";
691 chomp ($last_branch = <F>);
692 $last_branch = basename($last_branch);
693 close(F);
694 unless ($last_branch) {
695 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
696 $last_branch = "master";
698 $orig_branch = $last_branch;
699 $tip_at_start = `git rev-parse --verify HEAD`;
701 # Get the last import timestamps
702 my $fmt = '($ref, $author) = (%(refname), %(author));';
703 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
704 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
705 while (defined(my $entry = <H>)) {
706 my ($ref, $author);
707 eval($entry) || die "cannot eval refs list: $@";
708 my ($head) = ($ref =~ m|^$remote/(.*)|);
709 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
710 $branch_date{$head} = $1;
712 close(H);
713 if (!exists $branch_date{$opt_o}) {
714 die "Branch '$opt_o' does not exist.\n".
715 "Either use the correct '-o branch' option,\n".
716 "or import to a new repository.\n";
720 -d $git_dir
721 or die "Could not create git subdir ($git_dir).\n";
723 # now we read (and possibly save) author-info as well
724 -f "$git_dir/cvs-authors" and
725 read_author_info("$git_dir/cvs-authors");
726 if ($opt_A) {
727 read_author_info(munge_user_filename($opt_A));
728 write_author_info("$git_dir/cvs-authors");
731 # open .git/cvs-revisions, if requested
732 open my $revision_map, '>>', "$git_dir/cvs-revisions"
733 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
734 if defined $opt_R;
738 # run cvsps into a file unless we are getting
739 # it passed as a file via $opt_P
741 my $cvspsfile;
742 unless ($opt_P) {
743 print "Running cvsps...\n" if $opt_v;
744 my $pid = open(CVSPS,"-|");
745 my $cvspsfh;
746 die "Cannot fork: $!\n" unless defined $pid;
747 unless ($pid) {
748 my @opt;
749 @opt = split(/,/,$opt_p) if defined $opt_p;
750 unshift @opt, '-z', $opt_z if defined $opt_z;
751 unshift @opt, '-q' unless defined $opt_v;
752 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
753 push @opt, '--cvs-direct';
755 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
756 die "Could not start cvsps: $!\n";
758 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
759 DIR => File::Spec->tmpdir());
760 while (<CVSPS>) {
761 print $cvspsfh $_;
763 close CVSPS;
764 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
765 close $cvspsfh;
766 } else {
767 $cvspsfile = munge_user_filename($opt_P);
770 open(CVS, "<$cvspsfile") or die $!;
772 ## cvsps output:
773 #---------------------
774 #PatchSet 314
775 #Date: 1999/09/18 13:03:59
776 #Author: wkoch
777 #Branch: STABLE-BRANCH-1-0
778 #Ancestor branch: HEAD
779 #Tag: (none)
780 #Log:
781 # See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
782 #Members:
783 # README:1.57->1.57.2.1
784 # VERSION:1.96->1.96.2.1
786 #---------------------
788 my $state = 0;
790 sub update_index (\@\@) {
791 my $old = shift;
792 my $new = shift;
793 open(my $fh, '|-', qw(git update-index -z --index-info))
794 or die "unable to open git update-index: $!";
795 print $fh
796 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
797 @$old),
798 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
799 @$new)
800 or die "unable to write to git update-index: $!";
801 close $fh
802 or die "unable to write to git update-index: $!";
803 $? and die "git update-index reported error: $?";
806 sub write_tree () {
807 open(my $fh, '-|', qw(git write-tree))
808 or die "unable to open git write-tree: $!";
809 chomp(my $tree = <$fh>);
810 is_sha1($tree)
811 or die "Cannot get tree id ($tree): $!";
812 close($fh)
813 or die "Error running git write-tree: $?\n";
814 print "Tree ID $tree\n" if $opt_v;
815 return $tree;
818 my ($patchset,$date,$author_name,$author_email,$author_tz,$branch,$ancestor,$tag,$logmsg);
819 my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
821 # commits that cvsps cannot place anywhere...
822 $ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
824 sub commit {
825 if ($branch eq $opt_o && !$index{branch} &&
826 !get_headref("$remote/$branch")) {
827 # looks like an initial commit
828 # use the index primed by git init
829 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
830 $index{$branch} = "$git_dir/index";
831 } else {
832 # use an index per branch to speed up
833 # imports of projects with many branches
834 unless ($index{$branch}) {
835 $index{$branch} = tmpnam();
836 $ENV{GIT_INDEX_FILE} = $index{$branch};
837 if ($ancestor) {
838 system("git", "read-tree", "$remote/$ancestor");
839 } else {
840 system("git", "read-tree", "$remote/$branch");
842 die "read-tree failed: $?\n" if $?;
845 $ENV{GIT_INDEX_FILE} = $index{$branch};
847 update_index(@old, @new);
848 @old = @new = ();
849 my $tree = write_tree();
850 my $parent = get_headref("$remote/$last_branch");
851 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
853 my @commit_args;
854 push @commit_args, ("-p", $parent) if $parent;
856 # loose detection of merges
857 # based on the commit msg
858 foreach my $rx (@mergerx) {
859 next unless $logmsg =~ $rx && $1;
860 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
861 if (my $sha1 = get_headref("$remote/$mparent")) {
862 push @commit_args, '-p', "$remote/$mparent";
863 print "Merge parent branch: $mparent\n" if $opt_v;
867 set_timezone($author_tz);
868 # $date is in the seconds since epoch format
869 my $tz_offset = get_tz_offset($date);
870 my $commit_date = "$date $tz_offset";
871 set_timezone('UTC');
872 $ENV{GIT_AUTHOR_NAME} = $author_name;
873 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
874 $ENV{GIT_AUTHOR_DATE} = $commit_date;
875 $ENV{GIT_COMMITTER_NAME} = $author_name;
876 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
877 $ENV{GIT_COMMITTER_DATE} = $commit_date;
878 my $pid = open2(my $commit_read, my $commit_write,
879 'git', 'commit-tree', $tree, @commit_args);
881 # compatibility with git2cvs
882 substr($logmsg,32767) = "" if length($logmsg) > 32767;
883 $logmsg =~ s/[\s\n]+\z//;
885 if (@skipped) {
886 $logmsg .= "\n\n\nSKIPPED:\n\t";
887 $logmsg .= join("\n\t", @skipped) . "\n";
888 @skipped = ();
891 print($commit_write "$logmsg\n") && close($commit_write)
892 or die "Error writing to git commit-tree: $!\n";
894 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
895 chomp(my $cid = <$commit_read>);
896 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
897 print "Commit ID $cid\n" if $opt_v;
898 close($commit_read);
900 waitpid($pid,0);
901 die "Error running git commit-tree: $?\n" if $?;
903 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
904 or die "Cannot write branch $branch for update: $!\n";
906 if ($revision_map) {
907 print $revision_map "@$_ $cid\n" for @commit_revisions;
909 @commit_revisions = ();
911 if ($tag) {
912 my ($xtag) = $tag;
913 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
914 $xtag =~ tr/_/\./ if ( $opt_u );
915 $xtag =~ s/[\/]/$opt_s/g;
917 # See refs.c for these rules.
918 # Tag cannot contain bad chars. (See bad_ref_char in refs.c.)
919 $xtag =~ s/[ ~\^:\\\*\?\[]//g;
920 # Other bad strings for tags:
921 # (See check_refname_component in refs.c.)
922 1 while $xtag =~ s/
923 (?: \.\. # Tag cannot contain '..'.
924 | \@{ # Tag cannot contain '@{'.
925 | ^ - # Tag cannot begin with '-'.
926 | \.lock $ # Tag cannot end with '.lock'.
927 | ^ \. # Tag cannot begin...
928 | \. $ # ...or end with '.'
929 )//xg;
930 # Tag cannot be empty.
931 if ($xtag eq '') {
932 warn("warning: ignoring tag '$tag'",
933 " with invalid tagname\n");
934 return;
937 if (system('git' , 'tag', '-f', $xtag, $cid) != 0) {
938 # We did our best to sanitize the tag, but still failed
939 # for whatever reason. Bail out, and give the user
940 # enough information to understand if/how we should
941 # improve the translation in the future.
942 if ($tag ne $xtag) {
943 print "Translated '$tag' tag to '$xtag'\n";
945 die "Cannot create tag $xtag: $!\n";
948 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
952 my $commitcount = 1;
953 while (<CVS>) {
954 chomp;
955 if ($state == 0 and /^-+$/) {
956 $state = 1;
957 } elsif ($state == 0) {
958 $state = 1;
959 redo;
960 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
961 $patchset = 0+$_;
962 $state=2;
963 } elsif ($state == 2 and s/^Date:\s+//) {
964 $date = pdate($_);
965 unless ($date) {
966 print STDERR "Could not parse date: $_\n";
967 $state=0;
968 next;
970 $state=3;
971 } elsif ($state == 3 and s/^Author:\s+//) {
972 $author_tz = "UTC";
973 s/\s+$//;
974 if (/^(.*?)\s+<(.*)>/) {
975 ($author_name, $author_email) = ($1, $2);
976 } elsif ($conv_author_name{$_}) {
977 $author_name = $conv_author_name{$_};
978 $author_email = $conv_author_email{$_};
979 $author_tz = $conv_author_tz{$_} if ($conv_author_tz{$_});
980 } else {
981 $author_name = $author_email = $_;
983 $state = 4;
984 } elsif ($state == 4 and s/^Branch:\s+//) {
985 s/\s+$//;
986 tr/_/\./ if ( $opt_u );
987 s/[\/]/$opt_s/g;
988 $branch = $_;
989 $state = 5;
990 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
991 s/\s+$//;
992 $ancestor = $_;
993 $ancestor = $opt_o if $ancestor eq "HEAD";
994 $state = 6;
995 } elsif ($state == 5) {
996 $ancestor = undef;
997 $state = 6;
998 redo;
999 } elsif ($state == 6 and s/^Tag:\s+//) {
1000 s/\s+$//;
1001 if ($_ eq "(none)") {
1002 $tag = undef;
1003 } else {
1004 $tag = $_;
1006 $state = 7;
1007 } elsif ($state == 7 and /^Log:/) {
1008 $logmsg = "";
1009 $state = 8;
1010 } elsif ($state == 8 and /^Members:/) {
1011 $branch = $opt_o if $branch eq "HEAD";
1012 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
1013 # skip
1014 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
1015 $state = 11;
1016 next;
1018 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
1019 # skip if the commit is too recent
1020 # given that the cvsps default fuzz is 300s, we give ourselves another
1021 # 300s just in case -- this also prevents skipping commits
1022 # due to server clock drift
1023 print "skip patchset $patchset: $date too recent\n" if $opt_v;
1024 $state = 11;
1025 next;
1027 if (exists $ignorebranch{$branch}) {
1028 print STDERR "Skipping $branch\n";
1029 $state = 11;
1030 next;
1032 if ($ancestor) {
1033 if ($ancestor eq $branch) {
1034 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
1035 $ancestor = $opt_o;
1037 if (defined get_headref("$remote/$branch")) {
1038 print STDERR "Branch $branch already exists!\n";
1039 $state=11;
1040 next;
1042 my $id = get_headref("$remote/$ancestor");
1043 if (!$id) {
1044 print STDERR "Branch $ancestor does not exist!\n";
1045 $ignorebranch{$branch} = 1;
1046 $state=11;
1047 next;
1050 system(qw(git update-ref -m cvsimport),
1051 "$remote/$branch", $id);
1052 if($? != 0) {
1053 print STDERR "Could not create branch $branch\n";
1054 $ignorebranch{$branch} = 1;
1055 $state=11;
1056 next;
1059 $last_branch = $branch if $branch ne $last_branch;
1060 $state = 9;
1061 } elsif ($state == 8) {
1062 $logmsg .= "$_\n";
1063 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1064 # VERSION:1.96->1.96.2.1
1065 my $init = ($2 eq "INITIAL");
1066 my $fn = $1;
1067 my $rev = $3;
1068 $fn =~ s#^/+##;
1069 if ($opt_S && $fn =~ m/$opt_S/) {
1070 print "SKIPPING $fn v $rev\n";
1071 push(@skipped, $fn);
1072 next;
1074 push @commit_revisions, [$fn, $rev];
1075 print "Fetching $fn v $rev\n" if $opt_v;
1076 my ($tmpname, $size) = $cvs->file($fn,$rev);
1077 if ($size == -1) {
1078 push(@old,$fn);
1079 print "Drop $fn\n" if $opt_v;
1080 } else {
1081 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1082 my $pid = open(my $F, '-|');
1083 die $! unless defined $pid;
1084 if (!$pid) {
1085 exec("git", "hash-object", "-w", $tmpname)
1086 or die "Cannot create object: $!\n";
1088 my $sha = <$F>;
1089 chomp $sha;
1090 close $F;
1091 my $mode = pmode($cvs->{'mode'});
1092 push(@new,[$mode, $sha, $fn]); # may be resurrected!
1094 unlink($tmpname);
1095 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1096 my $fn = $1;
1097 my $rev = $2;
1098 $fn =~ s#^/+##;
1099 push @commit_revisions, [$fn, $rev];
1100 push(@old,$fn);
1101 print "Delete $fn\n" if $opt_v;
1102 } elsif ($state == 9 and /^\s*$/) {
1103 $state = 10;
1104 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1105 $commitcount++;
1106 if ($opt_L && $commitcount > $opt_L) {
1107 last;
1109 commit();
1110 if (($commitcount & 1023) == 0) {
1111 system(qw(git repack -a -d));
1113 $state = 1;
1114 } elsif ($state == 11 and /^-+$/) {
1115 $state = 1;
1116 } elsif (/^-+$/) { # end of unknown-line processing
1117 $state = 1;
1118 } elsif ($state != 11) { # ignore stuff when skipping
1119 print STDERR "* UNKNOWN LINE * $_\n";
1122 commit() if $branch and $state != 11;
1124 unless ($opt_P) {
1125 unlink($cvspsfile);
1128 # The heuristic of repacking every 1024 commits can leave a
1129 # lot of unpacked data. If there is more than 1MB worth of
1130 # not-packed objects, repack once more.
1131 my $line = `git count-objects`;
1132 if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1133 my ($n_objects, $kb) = ($1, $2);
1134 1024 < $kb
1135 and system(qw(git repack -a -d));
1138 foreach my $git_index (values %index) {
1139 if ($git_index ne "$git_dir/index") {
1140 unlink($git_index);
1144 if (defined $orig_git_index) {
1145 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1146 } else {
1147 delete $ENV{GIT_INDEX_FILE};
1150 # Now switch back to the branch we were in before all of this happened
1151 if ($orig_branch) {
1152 print "DONE.\n" if $opt_v;
1153 if ($opt_i) {
1154 exit 0;
1156 my $tip_at_end = `git rev-parse --verify HEAD`;
1157 if ($tip_at_start ne $tip_at_end) {
1158 for ($tip_at_start, $tip_at_end) { chomp; }
1159 print "Fetched into the current branch.\n" if $opt_v;
1160 system(qw(git read-tree -u -m),
1161 $tip_at_start, $tip_at_end);
1162 die "Fast-forward update failed: $?\n" if $?;
1164 else {
1165 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1166 die "Could not merge $opt_o into the current branch.\n" if $?;
1168 } else {
1169 $orig_branch = "master";
1170 print "DONE; creating $orig_branch branch\n" if $opt_v;
1171 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1172 unless defined get_headref('refs/heads/master');
1173 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1174 if ($opt_r && $opt_o ne 'HEAD');
1175 system('git', 'update-ref', 'HEAD', "$orig_branch");
1176 unless ($opt_i) {
1177 system(qw(git checkout -f));
1178 die "checkout failed: $?\n" if $?;