Merge branch 'jc/run-receive-hook-cleanup' into maint
[git/mjg.git] / git-cvsimport.perl
blob8d41610bcf260545bcc5b6a8c30b43d427c70491
1 #!/usr/bin/perl
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to aggregate CVS check-ins into related changes.
7 # Fortunately, "cvsps" does that for us; all we have to do is to parse
8 # its output.
10 # Checking out the files is done by a single long-running CVS connection
11 # / server process.
13 # The head revision is on branch "origin" by default.
14 # You can change that with the '-o' option.
16 use 5.008;
17 use strict;
18 use warnings;
19 use Getopt::Long;
20 use File::Spec;
21 use File::Temp qw(tempfile tmpnam);
22 use File::Path qw(mkpath);
23 use File::Basename qw(basename dirname);
24 use Time::Local;
25 use IO::Socket;
26 use IO::Pipe;
27 use POSIX qw(strftime dup2 ENOENT);
28 use IPC::Open2;
30 $SIG{'PIPE'}="IGNORE";
31 $ENV{'TZ'}="UTC";
33 our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
34 my (%conv_author_name, %conv_author_email);
36 sub usage(;$) {
37 my $msg = shift;
38 print(STDERR "Error: $msg\n") if $msg;
39 print STDERR <<END;
40 Usage: git cvsimport # fetch/update GIT from CVS
41 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
42 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
43 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
44 [-r remote] [-R] [CVS_module]
45 END
46 exit(1);
49 sub read_author_info($) {
50 my ($file) = @_;
51 my $user;
52 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
54 while (<$f>) {
55 # Expected format is this:
56 # exon=Andreas Ericsson <ae@op5.se>
57 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
58 $user = $1;
59 $conv_author_name{$user} = $2;
60 $conv_author_email{$user} = $3;
62 # However, we also read from CVSROOT/users format
63 # to ease migration.
64 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
65 my $mapped;
66 ($user, $mapped) = ($1, $3);
67 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
68 $conv_author_name{$user} = $1;
69 $conv_author_email{$user} = $2;
71 elsif ($mapped =~ /^<?(.*)>?$/) {
72 $conv_author_name{$user} = $user;
73 $conv_author_email{$user} = $1;
76 # NEEDSWORK: Maybe warn on unrecognized lines?
78 close ($f);
81 sub write_author_info($) {
82 my ($file) = @_;
83 open my $f, '>', $file or
84 die("Failed to open $file for writing: $!");
86 foreach (keys %conv_author_name) {
87 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
89 close ($f);
92 # convert getopts specs for use by git config
93 my %longmap = (
94 'A:' => 'authors-file',
95 'M:' => 'merge-regex',
96 'P:' => undef,
97 'R' => 'track-revisions',
98 'S:' => 'ignore-paths',
101 sub read_repo_config {
102 # Split the string between characters, unless there is a ':'
103 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
104 my @opts = split(/ *(?!:)/, shift);
105 foreach my $o (@opts) {
106 my $key = $o;
107 $key =~ s/://g;
108 my $arg = 'git config';
109 $arg .= ' --bool' if ($o !~ /:$/);
110 my $ckey = $key;
112 if (exists $longmap{$o}) {
113 # An uppercase option like -R cannot be
114 # expressed in the configuration, as the
115 # variable names are downcased.
116 $ckey = $longmap{$o};
117 next if (! defined $ckey);
118 $ckey =~ s/-//g;
120 chomp(my $tmp = `$arg --get cvsimport.$ckey`);
121 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
122 no strict 'refs';
123 my $opt_name = "opt_" . $key;
124 if (!$$opt_name) {
125 $$opt_name = $tmp;
131 my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
132 read_repo_config($opts);
133 Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
135 # turn the Getopt::Std specification in a Getopt::Long one,
136 # with support for multiple -M options
137 GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
138 or usage();
139 usage if $opt_h;
141 if (@ARGV == 0) {
142 chomp(my $module = `git config --get cvsimport.module`);
143 push(@ARGV, $module) if $? == 0;
145 @ARGV <= 1 or usage("You can't specify more than one CVS module");
147 if ($opt_d) {
148 $ENV{"CVSROOT"} = $opt_d;
149 } elsif (-f 'CVS/Root') {
150 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
151 $opt_d = <$f>;
152 chomp $opt_d;
153 close $f;
154 $ENV{"CVSROOT"} = $opt_d;
155 } elsif ($ENV{"CVSROOT"}) {
156 $opt_d = $ENV{"CVSROOT"};
157 } else {
158 usage("CVSROOT needs to be set");
160 $opt_s ||= "-";
161 $opt_a ||= 0;
163 my $git_tree = $opt_C;
164 $git_tree ||= ".";
166 my $remote;
167 if (defined $opt_r) {
168 $remote = 'refs/remotes/' . $opt_r;
169 $opt_o ||= "master";
170 } else {
171 $opt_o ||= "origin";
172 $remote = 'refs/heads';
175 my $cvs_tree;
176 if ($#ARGV == 0) {
177 $cvs_tree = $ARGV[0];
178 } elsif (-f 'CVS/Repository') {
179 open my $f, '<', 'CVS/Repository' or
180 die 'Failed to open CVS/Repository';
181 $cvs_tree = <$f>;
182 chomp $cvs_tree;
183 close $f;
184 } else {
185 usage("CVS module has to be specified");
188 our @mergerx = ();
189 if ($opt_m) {
190 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
192 if (@opt_M) {
193 push (@mergerx, map { qr/$_/ } @opt_M);
196 # Remember UTC of our starting time
197 # we'll want to avoid importing commits
198 # that are too recent
199 our $starttime = time();
201 select(STDERR); $|=1; select(STDOUT);
204 package CVSconn;
205 # Basic CVS dialog.
206 # We're only interested in connecting and downloading, so ...
208 use File::Spec;
209 use File::Temp qw(tempfile);
210 use POSIX qw(strftime dup2);
212 sub new {
213 my ($what,$repo,$subdir) = @_;
214 $what=ref($what) if ref($what);
216 my $self = {};
217 $self->{'buffer'} = "";
218 bless($self,$what);
220 $repo =~ s#/+$##;
221 $self->{'fullrep'} = $repo;
222 $self->conn();
224 $self->{'subdir'} = $subdir;
225 $self->{'lines'} = undef;
227 return $self;
230 sub find_password_entry {
231 my ($cvspass, @cvsroot) = @_;
232 my ($file, $delim) = @$cvspass;
233 my $pass;
234 local ($_);
236 if (open(my $fh, $file)) {
237 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
238 CVSPASSFILE:
239 while (<$fh>) {
240 chomp;
241 s/^\/\d+\s+//;
242 my ($w, $p) = split($delim,$_,2);
243 for my $cvsroot (@cvsroot) {
244 if ($w eq $cvsroot) {
245 $pass = $p;
246 last CVSPASSFILE;
250 close($fh);
252 return $pass;
255 sub conn {
256 my $self = shift;
257 my $repo = $self->{'fullrep'};
258 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
259 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
261 my ($proxyhost,$proxyport);
262 if ($param && ($param =~ m/proxy=([^;]+)/)) {
263 $proxyhost = $1;
264 # Default proxyport, if not specified, is 8080.
265 $proxyport = 8080;
266 if ($ENV{"CVS_PROXY_PORT"}) {
267 $proxyport = $ENV{"CVS_PROXY_PORT"};
269 if ($param =~ m/proxyport=([^;]+)/) {
270 $proxyport = $1;
273 $repo ||= '/';
275 # if username is not explicit in CVSROOT, then use current user, as cvs would
276 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
277 my $rr2 = "-";
278 unless ($port) {
279 $rr2 = ":pserver:$user\@$serv:$repo";
280 $port=2401;
282 my $rr = ":pserver:$user\@$serv:$port$repo";
284 if ($pass) {
285 $pass = $self->_scramble($pass);
286 } else {
287 my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
288 [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
289 my @loc = ();
290 foreach my $cvspass (@cvspass) {
291 my $p = find_password_entry($cvspass, $rr, $rr2);
292 if ($p) {
293 push @loc, $cvspass->[0];
294 $pass = $p;
298 if (1 < @loc) {
299 die("Multiple cvs password files have ".
300 "entries for CVSROOT $opt_d: @loc");
301 } elsif (!$pass) {
302 $pass = "A";
306 my ($s, $rep);
307 if ($proxyhost) {
309 # Use a HTTP Proxy. Only works for HTTP proxies that
310 # don't require user authentication
312 # See: http://www.ietf.org/rfc/rfc2817.txt
314 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
315 die "Socket to $proxyhost: $!\n" unless defined $s;
316 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
317 or die "Write to $proxyhost: $!\n";
318 $s->flush();
320 $rep = <$s>;
322 # The answer should look like 'HTTP/1.x 2yy ....'
323 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
324 die "Proxy connect: $rep\n";
326 # Skip up to the empty line of the proxy server output
327 # including the response headers.
328 while ($rep = <$s>) {
329 last if (!defined $rep ||
330 $rep eq "\n" ||
331 $rep eq "\r\n");
333 } else {
334 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
335 die "Socket to $serv: $!\n" unless defined $s;
338 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
339 or die "Write to $serv: $!\n";
340 $s->flush();
342 $rep = <$s>;
344 if ($rep ne "I LOVE YOU\n") {
345 $rep="<unknown>" unless $rep;
346 die "AuthReply: $rep\n";
348 $self->{'socketo'} = $s;
349 $self->{'socketi'} = $s;
350 } else { # local or ext: Fork off our own cvs server.
351 my $pr = IO::Pipe->new();
352 my $pw = IO::Pipe->new();
353 my $pid = fork();
354 die "Fork: $!\n" unless defined $pid;
355 my $cvs = 'cvs';
356 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
357 my $rsh = 'rsh';
358 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
360 my @cvs = ($cvs, 'server');
361 my ($local, $user, $host);
362 $local = $repo =~ s/:local://;
363 if (!$local) {
364 $repo =~ s/:ext://;
365 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
366 ($user, $host) = ($1, $2);
368 if (!$local) {
369 if ($user) {
370 unshift @cvs, $rsh, '-l', $user, $host;
371 } else {
372 unshift @cvs, $rsh, $host;
376 unless ($pid) {
377 $pr->writer();
378 $pw->reader();
379 dup2($pw->fileno(),0);
380 dup2($pr->fileno(),1);
381 $pr->close();
382 $pw->close();
383 exec(@cvs);
385 $pw->writer();
386 $pr->reader();
387 $self->{'socketo'} = $pw;
388 $self->{'socketi'} = $pr;
390 $self->{'socketo'}->write("Root $repo\n");
392 # Trial and error says that this probably is the minimum set
393 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
395 $self->{'socketo'}->write("valid-requests\n");
396 $self->{'socketo'}->flush();
398 my $rep=$self->readline();
399 die "Failed to read from server" unless defined $rep;
400 chomp($rep);
401 if ($rep !~ s/^Valid-requests\s*//) {
402 $rep="<unknown>" unless $rep;
403 die "Expected Valid-requests from server, but got: $rep\n";
405 chomp(my $res=$self->readline());
406 die "validReply: $res\n" if $res ne "ok";
408 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
409 $self->{'repo'} = $repo;
412 sub readline {
413 my ($self) = @_;
414 return $self->{'socketi'}->getline();
417 sub _file {
418 # Request a file with a given revision.
419 # Trial and error says this is a good way to do it. :-/
420 my ($self,$fn,$rev) = @_;
421 $self->{'socketo'}->write("Argument -N\n") or return undef;
422 $self->{'socketo'}->write("Argument -P\n") or return undef;
423 # -kk: Linus' version doesn't use it - defaults to off
424 if ($opt_k) {
425 $self->{'socketo'}->write("Argument -kk\n") or return undef;
427 $self->{'socketo'}->write("Argument -r\n") or return undef;
428 $self->{'socketo'}->write("Argument $rev\n") or return undef;
429 $self->{'socketo'}->write("Argument --\n") or return undef;
430 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
431 $self->{'socketo'}->write("Directory .\n") or return undef;
432 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
433 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
434 $self->{'socketo'}->write("co\n") or return undef;
435 $self->{'socketo'}->flush() or return undef;
436 $self->{'lines'} = 0;
437 return 1;
439 sub _line {
440 # Read a line from the server.
441 # ... except that 'line' may be an entire file. ;-)
442 my ($self, $fh) = @_;
443 die "Not in lines" unless defined $self->{'lines'};
445 my $line;
446 my $res=0;
447 while (defined($line = $self->readline())) {
448 # M U gnupg-cvs-rep/AUTHORS
449 # Updated gnupg-cvs-rep/
450 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
451 # /AUTHORS/1.1///T1.1
452 # u=rw,g=rw,o=rw
454 # ok
456 if ($line =~ s/^(?:Created|Updated) //) {
457 $line = $self->readline(); # path
458 $line = $self->readline(); # Entries line
459 my $mode = $self->readline(); chomp $mode;
460 $self->{'mode'} = $mode;
461 defined (my $cnt = $self->readline())
462 or die "EOF from server after 'Changed'\n";
463 chomp $cnt;
464 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
465 $line="";
466 $res = $self->_fetchfile($fh, $cnt);
467 } elsif ($line =~ s/^ //) {
468 print $fh $line;
469 $res += length($line);
470 } elsif ($line =~ /^M\b/) {
471 # output, do nothing
472 } elsif ($line =~ /^Mbinary\b/) {
473 my $cnt;
474 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
475 chomp $cnt;
476 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
477 $line="";
478 $res += $self->_fetchfile($fh, $cnt);
479 } else {
480 chomp $line;
481 if ($line eq "ok") {
482 # print STDERR "S: ok (".length($res).")\n";
483 return $res;
484 } elsif ($line =~ s/^E //) {
485 # print STDERR "S: $line\n";
486 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
487 $line = $self->readline(); # filename
488 $line = $self->readline(); # OK
489 chomp $line;
490 die "Unknown: $line" if $line ne "ok";
491 return -1;
492 } else {
493 die "Unknown: $line\n";
497 return undef;
499 sub file {
500 my ($self,$fn,$rev) = @_;
501 my $res;
503 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
504 DIR => File::Spec->tmpdir(), UNLINK => 1);
506 $self->_file($fn,$rev) and $res = $self->_line($fh);
508 if (!defined $res) {
509 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
510 truncate $fh, 0;
511 $self->conn();
512 $self->_file($fn,$rev) or die "No file command send";
513 $res = $self->_line($fh);
514 die "Retry failed" unless defined $res;
516 close ($fh);
518 return ($name, $res);
520 sub _fetchfile {
521 my ($self, $fh, $cnt) = @_;
522 my $res = 0;
523 my $bufsize = 1024 * 1024;
524 while ($cnt) {
525 if ($bufsize > $cnt) {
526 $bufsize = $cnt;
528 my $buf;
529 my $num = $self->{'socketi'}->read($buf,$bufsize);
530 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
531 print $fh $buf;
532 $res += $num;
533 $cnt -= $num;
535 return $res;
538 sub _scramble {
539 my ($self, $pass) = @_;
540 my $scrambled = "A";
542 return $scrambled unless $pass;
544 my $pass_len = length($pass);
545 my @pass_arr = split("", $pass);
546 my $i;
548 # from cvs/src/scramble.c
549 my @shifts = (
550 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
551 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
552 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
553 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
554 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
555 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
556 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
557 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
558 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
559 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
560 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
561 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
562 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
563 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
564 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
565 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
568 for ($i = 0; $i < $pass_len; $i++) {
569 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
572 return $scrambled;
575 package main;
577 my $cvs = CVSconn->new($opt_d, $cvs_tree);
580 sub pdate($) {
581 my ($d) = @_;
582 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
583 or die "Unparseable date: $d\n";
584 my $y=$1; $y-=1900 if $y>1900;
585 return timegm($6||0,$5,$4,$3,$2-1,$y);
588 sub pmode($) {
589 my ($mode) = @_;
590 my $m = 0;
591 my $mm = 0;
592 my $um = 0;
593 for my $x(split(//,$mode)) {
594 if ($x eq ",") {
595 $m |= $mm&$um;
596 $mm = 0;
597 $um = 0;
598 } elsif ($x eq "u") { $um |= 0700;
599 } elsif ($x eq "g") { $um |= 0070;
600 } elsif ($x eq "o") { $um |= 0007;
601 } elsif ($x eq "r") { $mm |= 0444;
602 } elsif ($x eq "w") { $mm |= 0222;
603 } elsif ($x eq "x") { $mm |= 0111;
604 } elsif ($x eq "=") { # do nothing
605 } else { die "Unknown mode: $mode\n";
608 $m |= $mm&$um;
609 return $m;
612 sub getwd() {
613 my $pwd = `pwd`;
614 chomp $pwd;
615 return $pwd;
618 sub is_sha1 {
619 my $s = shift;
620 return $s =~ /^[a-f0-9]{40}$/;
623 sub get_headref ($) {
624 my $name = shift;
625 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
626 return undef unless $? == 0;
627 chomp $r;
628 return $r;
631 my $user_filename_prepend = '';
632 sub munge_user_filename {
633 my $name = shift;
634 return File::Spec->file_name_is_absolute($name) ?
635 $name :
636 $user_filename_prepend . $name;
639 -d $git_tree
640 or mkdir($git_tree,0777)
641 or die "Could not create $git_tree: $!";
642 if ($git_tree ne '.') {
643 $user_filename_prepend = getwd() . '/';
644 chdir($git_tree);
647 my $last_branch = "";
648 my $orig_branch = "";
649 my %branch_date;
650 my $tip_at_start = undef;
652 my $git_dir = $ENV{"GIT_DIR"} || ".git";
653 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
654 $ENV{"GIT_DIR"} = $git_dir;
655 my $orig_git_index;
656 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
658 my %index; # holds filenames of one index per branch
660 unless (-d $git_dir) {
661 system(qw(git init));
662 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
663 system(qw(git read-tree --empty));
664 die "Cannot init an empty tree: $?\n" if $?;
666 $last_branch = $opt_o;
667 $orig_branch = "";
668 } else {
669 open(F, "-|", qw(git symbolic-ref HEAD)) or
670 die "Cannot run git symbolic-ref: $!\n";
671 chomp ($last_branch = <F>);
672 $last_branch = basename($last_branch);
673 close(F);
674 unless ($last_branch) {
675 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
676 $last_branch = "master";
678 $orig_branch = $last_branch;
679 $tip_at_start = `git rev-parse --verify HEAD`;
681 # Get the last import timestamps
682 my $fmt = '($ref, $author) = (%(refname), %(author));';
683 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
684 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
685 while (defined(my $entry = <H>)) {
686 my ($ref, $author);
687 eval($entry) || die "cannot eval refs list: $@";
688 my ($head) = ($ref =~ m|^$remote/(.*)|);
689 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
690 $branch_date{$head} = $1;
692 close(H);
693 if (!exists $branch_date{$opt_o}) {
694 die "Branch '$opt_o' does not exist.\n".
695 "Either use the correct '-o branch' option,\n".
696 "or import to a new repository.\n";
700 -d $git_dir
701 or die "Could not create git subdir ($git_dir).\n";
703 # now we read (and possibly save) author-info as well
704 -f "$git_dir/cvs-authors" and
705 read_author_info("$git_dir/cvs-authors");
706 if ($opt_A) {
707 read_author_info(munge_user_filename($opt_A));
708 write_author_info("$git_dir/cvs-authors");
711 # open .git/cvs-revisions, if requested
712 open my $revision_map, '>>', "$git_dir/cvs-revisions"
713 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
714 if defined $opt_R;
718 # run cvsps into a file unless we are getting
719 # it passed as a file via $opt_P
721 my $cvspsfile;
722 unless ($opt_P) {
723 print "Running cvsps...\n" if $opt_v;
724 my $pid = open(CVSPS,"-|");
725 my $cvspsfh;
726 die "Cannot fork: $!\n" unless defined $pid;
727 unless ($pid) {
728 my @opt;
729 @opt = split(/,/,$opt_p) if defined $opt_p;
730 unshift @opt, '-z', $opt_z if defined $opt_z;
731 unshift @opt, '-q' unless defined $opt_v;
732 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
733 push @opt, '--cvs-direct';
735 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
736 die "Could not start cvsps: $!\n";
738 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
739 DIR => File::Spec->tmpdir());
740 while (<CVSPS>) {
741 print $cvspsfh $_;
743 close CVSPS;
744 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
745 close $cvspsfh;
746 } else {
747 $cvspsfile = munge_user_filename($opt_P);
750 open(CVS, "<$cvspsfile") or die $!;
752 ## cvsps output:
753 #---------------------
754 #PatchSet 314
755 #Date: 1999/09/18 13:03:59
756 #Author: wkoch
757 #Branch: STABLE-BRANCH-1-0
758 #Ancestor branch: HEAD
759 #Tag: (none)
760 #Log:
761 # See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
762 #Members:
763 # README:1.57->1.57.2.1
764 # VERSION:1.96->1.96.2.1
766 #---------------------
768 my $state = 0;
770 sub update_index (\@\@) {
771 my $old = shift;
772 my $new = shift;
773 open(my $fh, '|-', qw(git update-index -z --index-info))
774 or die "unable to open git update-index: $!";
775 print $fh
776 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
777 @$old),
778 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
779 @$new)
780 or die "unable to write to git update-index: $!";
781 close $fh
782 or die "unable to write to git update-index: $!";
783 $? and die "git update-index reported error: $?";
786 sub write_tree () {
787 open(my $fh, '-|', qw(git write-tree))
788 or die "unable to open git write-tree: $!";
789 chomp(my $tree = <$fh>);
790 is_sha1($tree)
791 or die "Cannot get tree id ($tree): $!";
792 close($fh)
793 or die "Error running git write-tree: $?\n";
794 print "Tree ID $tree\n" if $opt_v;
795 return $tree;
798 my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
799 my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
801 # commits that cvsps cannot place anywhere...
802 $ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
804 sub commit {
805 if ($branch eq $opt_o && !$index{branch} &&
806 !get_headref("$remote/$branch")) {
807 # looks like an initial commit
808 # use the index primed by git init
809 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
810 $index{$branch} = "$git_dir/index";
811 } else {
812 # use an index per branch to speed up
813 # imports of projects with many branches
814 unless ($index{$branch}) {
815 $index{$branch} = tmpnam();
816 $ENV{GIT_INDEX_FILE} = $index{$branch};
817 if ($ancestor) {
818 system("git", "read-tree", "$remote/$ancestor");
819 } else {
820 system("git", "read-tree", "$remote/$branch");
822 die "read-tree failed: $?\n" if $?;
825 $ENV{GIT_INDEX_FILE} = $index{$branch};
827 update_index(@old, @new);
828 @old = @new = ();
829 my $tree = write_tree();
830 my $parent = get_headref("$remote/$last_branch");
831 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
833 my @commit_args;
834 push @commit_args, ("-p", $parent) if $parent;
836 # loose detection of merges
837 # based on the commit msg
838 foreach my $rx (@mergerx) {
839 next unless $logmsg =~ $rx && $1;
840 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
841 if (my $sha1 = get_headref("$remote/$mparent")) {
842 push @commit_args, '-p', "$remote/$mparent";
843 print "Merge parent branch: $mparent\n" if $opt_v;
847 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
848 $ENV{GIT_AUTHOR_NAME} = $author_name;
849 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
850 $ENV{GIT_AUTHOR_DATE} = $commit_date;
851 $ENV{GIT_COMMITTER_NAME} = $author_name;
852 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
853 $ENV{GIT_COMMITTER_DATE} = $commit_date;
854 my $pid = open2(my $commit_read, my $commit_write,
855 'git', 'commit-tree', $tree, @commit_args);
857 # compatibility with git2cvs
858 substr($logmsg,32767) = "" if length($logmsg) > 32767;
859 $logmsg =~ s/[\s\n]+\z//;
861 if (@skipped) {
862 $logmsg .= "\n\n\nSKIPPED:\n\t";
863 $logmsg .= join("\n\t", @skipped) . "\n";
864 @skipped = ();
867 print($commit_write "$logmsg\n") && close($commit_write)
868 or die "Error writing to git commit-tree: $!\n";
870 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
871 chomp(my $cid = <$commit_read>);
872 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
873 print "Commit ID $cid\n" if $opt_v;
874 close($commit_read);
876 waitpid($pid,0);
877 die "Error running git commit-tree: $?\n" if $?;
879 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
880 or die "Cannot write branch $branch for update: $!\n";
882 if ($revision_map) {
883 print $revision_map "@$_ $cid\n" for @commit_revisions;
885 @commit_revisions = ();
887 if ($tag) {
888 my ($xtag) = $tag;
889 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
890 $xtag =~ tr/_/\./ if ( $opt_u );
891 $xtag =~ s/[\/]/$opt_s/g;
892 $xtag =~ s/\[//g;
894 system('git' , 'tag', '-f', $xtag, $cid) == 0
895 or die "Cannot create tag $xtag: $!\n";
897 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
901 my $commitcount = 1;
902 while (<CVS>) {
903 chomp;
904 if ($state == 0 and /^-+$/) {
905 $state = 1;
906 } elsif ($state == 0) {
907 $state = 1;
908 redo;
909 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
910 $patchset = 0+$_;
911 $state=2;
912 } elsif ($state == 2 and s/^Date:\s+//) {
913 $date = pdate($_);
914 unless ($date) {
915 print STDERR "Could not parse date: $_\n";
916 $state=0;
917 next;
919 $state=3;
920 } elsif ($state == 3 and s/^Author:\s+//) {
921 s/\s+$//;
922 if (/^(.*?)\s+<(.*)>/) {
923 ($author_name, $author_email) = ($1, $2);
924 } elsif ($conv_author_name{$_}) {
925 $author_name = $conv_author_name{$_};
926 $author_email = $conv_author_email{$_};
927 } else {
928 $author_name = $author_email = $_;
930 $state = 4;
931 } elsif ($state == 4 and s/^Branch:\s+//) {
932 s/\s+$//;
933 tr/_/\./ if ( $opt_u );
934 s/[\/]/$opt_s/g;
935 $branch = $_;
936 $state = 5;
937 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
938 s/\s+$//;
939 $ancestor = $_;
940 $ancestor = $opt_o if $ancestor eq "HEAD";
941 $state = 6;
942 } elsif ($state == 5) {
943 $ancestor = undef;
944 $state = 6;
945 redo;
946 } elsif ($state == 6 and s/^Tag:\s+//) {
947 s/\s+$//;
948 if ($_ eq "(none)") {
949 $tag = undef;
950 } else {
951 $tag = $_;
953 $state = 7;
954 } elsif ($state == 7 and /^Log:/) {
955 $logmsg = "";
956 $state = 8;
957 } elsif ($state == 8 and /^Members:/) {
958 $branch = $opt_o if $branch eq "HEAD";
959 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
960 # skip
961 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
962 $state = 11;
963 next;
965 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
966 # skip if the commit is too recent
967 # given that the cvsps default fuzz is 300s, we give ourselves another
968 # 300s just in case -- this also prevents skipping commits
969 # due to server clock drift
970 print "skip patchset $patchset: $date too recent\n" if $opt_v;
971 $state = 11;
972 next;
974 if (exists $ignorebranch{$branch}) {
975 print STDERR "Skipping $branch\n";
976 $state = 11;
977 next;
979 if ($ancestor) {
980 if ($ancestor eq $branch) {
981 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
982 $ancestor = $opt_o;
984 if (defined get_headref("$remote/$branch")) {
985 print STDERR "Branch $branch already exists!\n";
986 $state=11;
987 next;
989 my $id = get_headref("$remote/$ancestor");
990 if (!$id) {
991 print STDERR "Branch $ancestor does not exist!\n";
992 $ignorebranch{$branch} = 1;
993 $state=11;
994 next;
997 system(qw(git update-ref -m cvsimport),
998 "$remote/$branch", $id);
999 if($? != 0) {
1000 print STDERR "Could not create branch $branch\n";
1001 $ignorebranch{$branch} = 1;
1002 $state=11;
1003 next;
1006 $last_branch = $branch if $branch ne $last_branch;
1007 $state = 9;
1008 } elsif ($state == 8) {
1009 $logmsg .= "$_\n";
1010 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1011 # VERSION:1.96->1.96.2.1
1012 my $init = ($2 eq "INITIAL");
1013 my $fn = $1;
1014 my $rev = $3;
1015 $fn =~ s#^/+##;
1016 if ($opt_S && $fn =~ m/$opt_S/) {
1017 print "SKIPPING $fn v $rev\n";
1018 push(@skipped, $fn);
1019 next;
1021 push @commit_revisions, [$fn, $rev];
1022 print "Fetching $fn v $rev\n" if $opt_v;
1023 my ($tmpname, $size) = $cvs->file($fn,$rev);
1024 if ($size == -1) {
1025 push(@old,$fn);
1026 print "Drop $fn\n" if $opt_v;
1027 } else {
1028 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1029 my $pid = open(my $F, '-|');
1030 die $! unless defined $pid;
1031 if (!$pid) {
1032 exec("git", "hash-object", "-w", $tmpname)
1033 or die "Cannot create object: $!\n";
1035 my $sha = <$F>;
1036 chomp $sha;
1037 close $F;
1038 my $mode = pmode($cvs->{'mode'});
1039 push(@new,[$mode, $sha, $fn]); # may be resurrected!
1041 unlink($tmpname);
1042 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1043 my $fn = $1;
1044 my $rev = $2;
1045 $fn =~ s#^/+##;
1046 push @commit_revisions, [$fn, $rev];
1047 push(@old,$fn);
1048 print "Delete $fn\n" if $opt_v;
1049 } elsif ($state == 9 and /^\s*$/) {
1050 $state = 10;
1051 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1052 $commitcount++;
1053 if ($opt_L && $commitcount > $opt_L) {
1054 last;
1056 commit();
1057 if (($commitcount & 1023) == 0) {
1058 system(qw(git repack -a -d));
1060 $state = 1;
1061 } elsif ($state == 11 and /^-+$/) {
1062 $state = 1;
1063 } elsif (/^-+$/) { # end of unknown-line processing
1064 $state = 1;
1065 } elsif ($state != 11) { # ignore stuff when skipping
1066 print STDERR "* UNKNOWN LINE * $_\n";
1069 commit() if $branch and $state != 11;
1071 unless ($opt_P) {
1072 unlink($cvspsfile);
1075 # The heuristic of repacking every 1024 commits can leave a
1076 # lot of unpacked data. If there is more than 1MB worth of
1077 # not-packed objects, repack once more.
1078 my $line = `git count-objects`;
1079 if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1080 my ($n_objects, $kb) = ($1, $2);
1081 1024 < $kb
1082 and system(qw(git repack -a -d));
1085 foreach my $git_index (values %index) {
1086 if ($git_index ne "$git_dir/index") {
1087 unlink($git_index);
1091 if (defined $orig_git_index) {
1092 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1093 } else {
1094 delete $ENV{GIT_INDEX_FILE};
1097 # Now switch back to the branch we were in before all of this happened
1098 if ($orig_branch) {
1099 print "DONE.\n" if $opt_v;
1100 if ($opt_i) {
1101 exit 0;
1103 my $tip_at_end = `git rev-parse --verify HEAD`;
1104 if ($tip_at_start ne $tip_at_end) {
1105 for ($tip_at_start, $tip_at_end) { chomp; }
1106 print "Fetched into the current branch.\n" if $opt_v;
1107 system(qw(git read-tree -u -m),
1108 $tip_at_start, $tip_at_end);
1109 die "Fast-forward update failed: $?\n" if $?;
1111 else {
1112 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1113 die "Could not merge $opt_o into the current branch.\n" if $?;
1115 } else {
1116 $orig_branch = "master";
1117 print "DONE; creating $orig_branch branch\n" if $opt_v;
1118 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1119 unless defined get_headref('refs/heads/master');
1120 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1121 if ($opt_r && $opt_o ne 'HEAD');
1122 system('git', 'update-ref', 'HEAD', "$orig_branch");
1123 unless ($opt_i) {
1124 system(qw(git checkout -f));
1125 die "checkout failed: $?\n" if $?;