fixup.cc5711424b7ae36276a40c06ede5d95f87ca20f0
[git/dscho.git] / git-cvsimport.perl
blobcda946d2674d8ebbf3a5b1adab6bb7b85aad03eb
1 #!/usr/bin/perl -w
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to aggregate CVS check-ins into related changes.
7 # Fortunately, "cvsps" does that for us; all we have to do is to parse
8 # its output.
10 # Checking out the files is done by a single long-running CVS connection
11 # / server process.
13 # The head revision is on branch "origin" by default.
14 # You can change that with the '-o' option.
16 use strict;
17 use warnings;
18 use Getopt::Long;
19 use File::Spec;
20 use File::Temp qw(tempfile tmpnam);
21 use File::Path qw(mkpath);
22 use File::Basename qw(basename dirname);
23 use Time::Local;
24 use IO::Socket;
25 use IO::Pipe;
26 use POSIX qw(strftime dup2 ENOENT);
27 use IPC::Open2;
29 $SIG{'PIPE'}="IGNORE";
30 $ENV{'TZ'}="UTC";
32 our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r);
33 my (%conv_author_name, %conv_author_email);
35 sub usage(;$) {
36 my $msg = shift;
37 print(STDERR "Error: $msg\n") if $msg;
38 print STDERR <<END;
39 Usage: git cvsimport # fetch/update GIT from CVS
40 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
41 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
42 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
43 [-r remote] [CVS_module]
44 END
45 exit(1);
48 sub read_author_info($) {
49 my ($file) = @_;
50 my $user;
51 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
53 while (<$f>) {
54 # Expected format is this:
55 # exon=Andreas Ericsson <ae@op5.se>
56 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
57 $user = $1;
58 $conv_author_name{$user} = $2;
59 $conv_author_email{$user} = $3;
61 # However, we also read from CVSROOT/users format
62 # to ease migration.
63 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
64 my $mapped;
65 ($user, $mapped) = ($1, $3);
66 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
67 $conv_author_name{$user} = $1;
68 $conv_author_email{$user} = $2;
70 elsif ($mapped =~ /^<?(.*)>?$/) {
71 $conv_author_name{$user} = $user;
72 $conv_author_email{$user} = $1;
75 # NEEDSWORK: Maybe warn on unrecognized lines?
77 close ($f);
80 sub write_author_info($) {
81 my ($file) = @_;
82 open my $f, '>', $file or
83 die("Failed to open $file for writing: $!");
85 foreach (keys %conv_author_name) {
86 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
88 close ($f);
91 # convert getopts specs for use by git config
92 sub read_repo_config {
93 # Split the string between characters, unless there is a ':'
94 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
95 my @opts = split(/ *(?!:)/, shift);
96 foreach my $o (@opts) {
97 my $key = $o;
98 $key =~ s/://g;
99 my $arg = 'git config';
100 $arg .= ' --bool' if ($o !~ /:$/);
102 chomp(my $tmp = `$arg --get cvsimport.$key`);
103 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
104 no strict 'refs';
105 my $opt_name = "opt_" . $key;
106 if (!$$opt_name) {
107 $$opt_name = $tmp;
113 my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:";
114 read_repo_config($opts);
115 Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
117 # turn the Getopt::Std specification in a Getopt::Long one,
118 # with support for multiple -M options
119 GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
120 or usage();
121 usage if $opt_h;
123 if (@ARGV == 0) {
124 chomp(my $module = `git config --get cvsimport.module`);
125 push(@ARGV, $module) if $? == 0;
127 @ARGV <= 1 or usage("You can't specify more than one CVS module");
129 if ($opt_d) {
130 $ENV{"CVSROOT"} = $opt_d;
131 } elsif (-f 'CVS/Root') {
132 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
133 $opt_d = <$f>;
134 chomp $opt_d;
135 close $f;
136 $ENV{"CVSROOT"} = $opt_d;
137 } elsif ($ENV{"CVSROOT"}) {
138 $opt_d = $ENV{"CVSROOT"};
139 } else {
140 usage("CVSROOT needs to be set");
142 $opt_s ||= "-";
143 $opt_a ||= 0;
145 my $git_tree = $opt_C;
146 $git_tree ||= ".";
148 my $remote;
149 if (defined $opt_r) {
150 $remote = 'refs/remotes/' . $opt_r;
151 $opt_o ||= "master";
152 } else {
153 $opt_o ||= "origin";
154 $remote = 'refs/heads';
157 my $cvs_tree;
158 if ($#ARGV == 0) {
159 $cvs_tree = $ARGV[0];
160 } elsif (-f 'CVS/Repository') {
161 open my $f, '<', 'CVS/Repository' or
162 die 'Failed to open CVS/Repository';
163 $cvs_tree = <$f>;
164 chomp $cvs_tree;
165 close $f;
166 } else {
167 usage("CVS module has to be specified");
170 our @mergerx = ();
171 if ($opt_m) {
172 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
174 if (@opt_M) {
175 push (@mergerx, map { qr/$_/ } @opt_M);
178 # Remember UTC of our starting time
179 # we'll want to avoid importing commits
180 # that are too recent
181 our $starttime = time();
183 select(STDERR); $|=1; select(STDOUT);
186 package CVSconn;
187 # Basic CVS dialog.
188 # We're only interested in connecting and downloading, so ...
190 use File::Spec;
191 use File::Temp qw(tempfile);
192 use POSIX qw(strftime dup2);
194 sub new {
195 my ($what,$repo,$subdir) = @_;
196 $what=ref($what) if ref($what);
198 my $self = {};
199 $self->{'buffer'} = "";
200 bless($self,$what);
202 $repo =~ s#/+$##;
203 $self->{'fullrep'} = $repo;
204 $self->conn();
206 $self->{'subdir'} = $subdir;
207 $self->{'lines'} = undef;
209 return $self;
212 sub conn {
213 my $self = shift;
214 my $repo = $self->{'fullrep'};
215 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
216 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
218 my ($proxyhost,$proxyport);
219 if ($param && ($param =~ m/proxy=([^;]+)/)) {
220 $proxyhost = $1;
221 # Default proxyport, if not specified, is 8080.
222 $proxyport = 8080;
223 if ($ENV{"CVS_PROXY_PORT"}) {
224 $proxyport = $ENV{"CVS_PROXY_PORT"};
226 if ($param =~ m/proxyport=([^;]+)/) {
227 $proxyport = $1;
230 $repo ||= '/';
232 # if username is not explicit in CVSROOT, then use current user, as cvs would
233 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
234 my $rr2 = "-";
235 unless ($port) {
236 $rr2 = ":pserver:$user\@$serv:$repo";
237 $port=2401;
239 my $rr = ":pserver:$user\@$serv:$port$repo";
241 if ($pass) {
242 $pass = $self->_scramble($pass);
243 } else {
244 open(H,$ENV{'HOME'}."/.cvspass") and do {
245 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
246 while (<H>) {
247 chomp;
248 s/^\/\d+\s+//;
249 my ($w,$p) = split(/\s/,$_,2);
250 if ($w eq $rr or $w eq $rr2) {
251 $pass = $p;
252 last;
256 $pass = "A" unless $pass;
259 my ($s, $rep);
260 if ($proxyhost) {
262 # Use a HTTP Proxy. Only works for HTTP proxies that
263 # don't require user authentication
265 # See: http://www.ietf.org/rfc/rfc2817.txt
267 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
268 die "Socket to $proxyhost: $!\n" unless defined $s;
269 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
270 or die "Write to $proxyhost: $!\n";
271 $s->flush();
273 $rep = <$s>;
275 # The answer should look like 'HTTP/1.x 2yy ....'
276 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
277 die "Proxy connect: $rep\n";
279 # Skip up to the empty line of the proxy server output
280 # including the response headers.
281 while ($rep = <$s>) {
282 last if (!defined $rep ||
283 $rep eq "\n" ||
284 $rep eq "\r\n");
286 } else {
287 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
288 die "Socket to $serv: $!\n" unless defined $s;
291 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
292 or die "Write to $serv: $!\n";
293 $s->flush();
295 $rep = <$s>;
297 if ($rep ne "I LOVE YOU\n") {
298 $rep="<unknown>" unless $rep;
299 die "AuthReply: $rep\n";
301 $self->{'socketo'} = $s;
302 $self->{'socketi'} = $s;
303 } else { # local or ext: Fork off our own cvs server.
304 my $pr = IO::Pipe->new();
305 my $pw = IO::Pipe->new();
306 my $pid = fork();
307 die "Fork: $!\n" unless defined $pid;
308 my $cvs = 'cvs';
309 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
310 my $rsh = 'rsh';
311 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
313 my @cvs = ($cvs, 'server');
314 my ($local, $user, $host);
315 $local = $repo =~ s/:local://;
316 if (!$local) {
317 $repo =~ s/:ext://;
318 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
319 ($user, $host) = ($1, $2);
321 if (!$local) {
322 if ($user) {
323 unshift @cvs, $rsh, '-l', $user, $host;
324 } else {
325 unshift @cvs, $rsh, $host;
329 unless ($pid) {
330 $pr->writer();
331 $pw->reader();
332 dup2($pw->fileno(),0);
333 dup2($pr->fileno(),1);
334 $pr->close();
335 $pw->close();
336 exec(@cvs);
338 $pw->writer();
339 $pr->reader();
340 $self->{'socketo'} = $pw;
341 $self->{'socketi'} = $pr;
343 $self->{'socketo'}->write("Root $repo\n");
345 # Trial and error says that this probably is the minimum set
346 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
348 $self->{'socketo'}->write("valid-requests\n");
349 $self->{'socketo'}->flush();
351 chomp(my $rep=$self->readline());
352 if ($rep !~ s/^Valid-requests\s*//) {
353 $rep="<unknown>" unless $rep;
354 die "Expected Valid-requests from server, but got: $rep\n";
356 chomp(my $res=$self->readline());
357 die "validReply: $res\n" if $res ne "ok";
359 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
360 $self->{'repo'} = $repo;
363 sub readline {
364 my ($self) = @_;
365 return $self->{'socketi'}->getline();
368 sub _file {
369 # Request a file with a given revision.
370 # Trial and error says this is a good way to do it. :-/
371 my ($self,$fn,$rev) = @_;
372 $self->{'socketo'}->write("Argument -N\n") or return undef;
373 $self->{'socketo'}->write("Argument -P\n") or return undef;
374 # -kk: Linus' version doesn't use it - defaults to off
375 if ($opt_k) {
376 $self->{'socketo'}->write("Argument -kk\n") or return undef;
378 $self->{'socketo'}->write("Argument -r\n") or return undef;
379 $self->{'socketo'}->write("Argument $rev\n") or return undef;
380 $self->{'socketo'}->write("Argument --\n") or return undef;
381 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
382 $self->{'socketo'}->write("Directory .\n") or return undef;
383 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
384 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
385 $self->{'socketo'}->write("co\n") or return undef;
386 $self->{'socketo'}->flush() or return undef;
387 $self->{'lines'} = 0;
388 return 1;
390 sub _line {
391 # Read a line from the server.
392 # ... except that 'line' may be an entire file. ;-)
393 my ($self, $fh) = @_;
394 die "Not in lines" unless defined $self->{'lines'};
396 my $line;
397 my $res=0;
398 while (defined($line = $self->readline())) {
399 # M U gnupg-cvs-rep/AUTHORS
400 # Updated gnupg-cvs-rep/
401 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
402 # /AUTHORS/1.1///T1.1
403 # u=rw,g=rw,o=rw
405 # ok
407 if ($line =~ s/^(?:Created|Updated) //) {
408 $line = $self->readline(); # path
409 $line = $self->readline(); # Entries line
410 my $mode = $self->readline(); chomp $mode;
411 $self->{'mode'} = $mode;
412 defined (my $cnt = $self->readline())
413 or die "EOF from server after 'Changed'\n";
414 chomp $cnt;
415 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
416 $line="";
417 $res = $self->_fetchfile($fh, $cnt);
418 } elsif ($line =~ s/^ //) {
419 print $fh $line;
420 $res += length($line);
421 } elsif ($line =~ /^M\b/) {
422 # output, do nothing
423 } elsif ($line =~ /^Mbinary\b/) {
424 my $cnt;
425 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
426 chomp $cnt;
427 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
428 $line="";
429 $res += $self->_fetchfile($fh, $cnt);
430 } else {
431 chomp $line;
432 if ($line eq "ok") {
433 # print STDERR "S: ok (".length($res).")\n";
434 return $res;
435 } elsif ($line =~ s/^E //) {
436 # print STDERR "S: $line\n";
437 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
438 $line = $self->readline(); # filename
439 $line = $self->readline(); # OK
440 chomp $line;
441 die "Unknown: $line" if $line ne "ok";
442 return -1;
443 } else {
444 die "Unknown: $line\n";
448 return undef;
450 sub file {
451 my ($self,$fn,$rev) = @_;
452 my $res;
454 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
455 DIR => File::Spec->tmpdir(), UNLINK => 1);
457 $self->_file($fn,$rev) and $res = $self->_line($fh);
459 if (!defined $res) {
460 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
461 truncate $fh, 0;
462 $self->conn();
463 $self->_file($fn,$rev) or die "No file command send";
464 $res = $self->_line($fh);
465 die "Retry failed" unless defined $res;
467 close ($fh);
469 return ($name, $res);
471 sub _fetchfile {
472 my ($self, $fh, $cnt) = @_;
473 my $res = 0;
474 my $bufsize = 1024 * 1024;
475 while ($cnt) {
476 if ($bufsize > $cnt) {
477 $bufsize = $cnt;
479 my $buf;
480 my $num = $self->{'socketi'}->read($buf,$bufsize);
481 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
482 print $fh $buf;
483 $res += $num;
484 $cnt -= $num;
486 return $res;
489 sub _scramble {
490 my ($self, $pass) = @_;
491 my $scrambled = "A";
493 return $scrambled unless $pass;
495 my $pass_len = length($pass);
496 my @pass_arr = split("", $pass);
497 my $i;
499 # from cvs/src/scramble.c
500 my @shifts = (
501 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
502 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
503 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
504 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
505 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
506 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
507 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
508 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
509 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
510 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
511 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
512 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
513 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
514 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
515 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
516 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
519 for ($i = 0; $i < $pass_len; $i++) {
520 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
523 return $scrambled;
526 package main;
528 my $cvs = CVSconn->new($opt_d, $cvs_tree);
531 sub pdate($) {
532 my ($d) = @_;
533 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
534 or die "Unparseable date: $d\n";
535 my $y=$1; $y-=1900 if $y>1900;
536 return timegm($6||0,$5,$4,$3,$2-1,$y);
539 sub pmode($) {
540 my ($mode) = @_;
541 my $m = 0;
542 my $mm = 0;
543 my $um = 0;
544 for my $x(split(//,$mode)) {
545 if ($x eq ",") {
546 $m |= $mm&$um;
547 $mm = 0;
548 $um = 0;
549 } elsif ($x eq "u") { $um |= 0700;
550 } elsif ($x eq "g") { $um |= 0070;
551 } elsif ($x eq "o") { $um |= 0007;
552 } elsif ($x eq "r") { $mm |= 0444;
553 } elsif ($x eq "w") { $mm |= 0222;
554 } elsif ($x eq "x") { $mm |= 0111;
555 } elsif ($x eq "=") { # do nothing
556 } else { die "Unknown mode: $mode\n";
559 $m |= $mm&$um;
560 return $m;
563 sub getwd() {
564 my $pwd = `pwd`;
565 chomp $pwd;
566 return $pwd;
569 sub is_sha1 {
570 my $s = shift;
571 return $s =~ /^[a-f0-9]{40}$/;
574 sub get_headref ($) {
575 my $name = shift;
576 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
577 return undef unless $? == 0;
578 chomp $r;
579 return $r;
582 my $user_filename_prepend = '';
583 sub munge_user_filename {
584 my $name = shift;
585 return File::Spec->file_name_is_absolute($name) ?
586 $name :
587 $user_filename_prepend . $name;
590 -d $git_tree
591 or mkdir($git_tree,0777)
592 or die "Could not create $git_tree: $!";
593 if ($git_tree ne '.') {
594 $user_filename_prepend = getwd() . '/';
595 chdir($git_tree);
598 my $last_branch = "";
599 my $orig_branch = "";
600 my %branch_date;
601 my $tip_at_start = undef;
603 my $git_dir = $ENV{"GIT_DIR"} || ".git";
604 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
605 $ENV{"GIT_DIR"} = $git_dir;
606 my $orig_git_index;
607 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
609 my %index; # holds filenames of one index per branch
611 unless (-d $git_dir) {
612 system(qw(git init));
613 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
614 system(qw(git-config core.autocrlf false));
615 die "Cannot set core.autocrlf false" if $?;
616 system(qw(git read-tree));
617 die "Cannot init an empty tree: $?\n" if $?;
619 $last_branch = $opt_o;
620 $orig_branch = "";
621 } else {
622 die "Cannot operate with core.autocrlf other than 'false'"
623 if (`git-config --bool core.autocrlf` =~ /true|input/);
624 open(F, "-|", qw(git symbolic-ref HEAD)) or
625 die "Cannot run git symbolic-ref: $!\n";
626 chomp ($last_branch = <F>);
627 $last_branch = basename($last_branch);
628 close(F);
629 unless ($last_branch) {
630 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
631 $last_branch = "master";
633 $orig_branch = $last_branch;
634 $tip_at_start = `git rev-parse --verify HEAD`;
636 # Get the last import timestamps
637 my $fmt = '($ref, $author) = (%(refname), %(author));';
638 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
639 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
640 while (defined(my $entry = <H>)) {
641 my ($ref, $author);
642 eval($entry) || die "cannot eval refs list: $@";
643 my ($head) = ($ref =~ m|^$remote/(.*)|);
644 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
645 $branch_date{$head} = $1;
647 close(H);
648 if (!exists $branch_date{$opt_o}) {
649 die "Branch '$opt_o' does not exist.\n".
650 "Either use the correct '-o branch' option,\n".
651 "or import to a new repository.\n";
655 -d $git_dir
656 or die "Could not create git subdir ($git_dir).\n";
658 # now we read (and possibly save) author-info as well
659 -f "$git_dir/cvs-authors" and
660 read_author_info("$git_dir/cvs-authors");
661 if ($opt_A) {
662 read_author_info(munge_user_filename($opt_A));
663 write_author_info("$git_dir/cvs-authors");
668 # run cvsps into a file unless we are getting
669 # it passed as a file via $opt_P
671 my $cvspsfile;
672 unless ($opt_P) {
673 print "Running cvsps...\n" if $opt_v;
674 my $pid = open(CVSPS,"-|");
675 my $cvspsfh;
676 die "Cannot fork: $!\n" unless defined $pid;
677 unless ($pid) {
678 my @opt;
679 @opt = split(/,/,$opt_p) if defined $opt_p;
680 unshift @opt, '-z', $opt_z if defined $opt_z;
681 unshift @opt, '-q' unless defined $opt_v;
682 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
683 push @opt, '--cvs-direct';
685 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
686 die "Could not start cvsps: $!\n";
688 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
689 DIR => File::Spec->tmpdir());
690 while (<CVSPS>) {
691 print $cvspsfh $_;
693 close CVSPS;
694 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
695 close $cvspsfh;
696 } else {
697 $cvspsfile = munge_user_filename($opt_P);
700 open(CVS, "<$cvspsfile") or die $!;
702 ## cvsps output:
703 #---------------------
704 #PatchSet 314
705 #Date: 1999/09/18 13:03:59
706 #Author: wkoch
707 #Branch: STABLE-BRANCH-1-0
708 #Ancestor branch: HEAD
709 #Tag: (none)
710 #Log:
711 # See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
712 #Members:
713 # README:1.57->1.57.2.1
714 # VERSION:1.96->1.96.2.1
716 #---------------------
718 my $state = 0;
720 sub update_index (\@\@) {
721 my $old = shift;
722 my $new = shift;
723 open(my $fh, '|-', qw(git update-index -z --index-info))
724 or die "unable to open git update-index: $!";
725 print $fh
726 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
727 @$old),
728 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
729 @$new)
730 or die "unable to write to git update-index: $!";
731 close $fh
732 or die "unable to write to git update-index: $!";
733 $? and die "git update-index reported error: $?";
736 sub write_tree () {
737 open(my $fh, '-|', qw(git write-tree))
738 or die "unable to open git write-tree: $!";
739 chomp(my $tree = <$fh>);
740 is_sha1($tree)
741 or die "Cannot get tree id ($tree): $!";
742 close($fh)
743 or die "Error running git write-tree: $?\n";
744 print "Tree ID $tree\n" if $opt_v;
745 return $tree;
748 my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
749 my (@old,@new,@skipped,%ignorebranch);
751 # commits that cvsps cannot place anywhere...
752 $ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
754 sub commit {
755 if ($branch eq $opt_o && !$index{branch} &&
756 !get_headref("$remote/$branch")) {
757 # looks like an initial commit
758 # use the index primed by git init
759 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
760 $index{$branch} = "$git_dir/index";
761 } else {
762 # use an index per branch to speed up
763 # imports of projects with many branches
764 unless ($index{$branch}) {
765 $index{$branch} = tmpnam();
766 $ENV{GIT_INDEX_FILE} = $index{$branch};
767 if ($ancestor) {
768 system("git", "read-tree", "$remote/$ancestor");
769 } else {
770 system("git", "read-tree", "$remote/$branch");
772 die "read-tree failed: $?\n" if $?;
775 $ENV{GIT_INDEX_FILE} = $index{$branch};
777 update_index(@old, @new);
778 @old = @new = ();
779 my $tree = write_tree();
780 my $parent = get_headref("$remote/$last_branch");
781 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
783 my @commit_args;
784 push @commit_args, ("-p", $parent) if $parent;
786 # loose detection of merges
787 # based on the commit msg
788 foreach my $rx (@mergerx) {
789 next unless $logmsg =~ $rx && $1;
790 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
791 if (my $sha1 = get_headref("$remote/$mparent")) {
792 push @commit_args, '-p', "$remote/$mparent";
793 print "Merge parent branch: $mparent\n" if $opt_v;
797 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
798 $ENV{GIT_AUTHOR_NAME} = $author_name;
799 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
800 $ENV{GIT_AUTHOR_DATE} = $commit_date;
801 $ENV{GIT_COMMITTER_NAME} = $author_name;
802 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
803 $ENV{GIT_COMMITTER_DATE} = $commit_date;
804 my $pid = open2(my $commit_read, my $commit_write,
805 'git', 'commit-tree', $tree, @commit_args);
807 # compatibility with git2cvs
808 substr($logmsg,32767) = "" if length($logmsg) > 32767;
809 $logmsg =~ s/[\s\n]+\z//;
811 if (@skipped) {
812 $logmsg .= "\n\n\nSKIPPED:\n\t";
813 $logmsg .= join("\n\t", @skipped) . "\n";
814 @skipped = ();
817 print($commit_write "$logmsg\n") && close($commit_write)
818 or die "Error writing to git commit-tree: $!\n";
820 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
821 chomp(my $cid = <$commit_read>);
822 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
823 print "Commit ID $cid\n" if $opt_v;
824 close($commit_read);
826 waitpid($pid,0);
827 die "Error running git commit-tree: $?\n" if $?;
829 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
830 or die "Cannot write branch $branch for update: $!\n";
832 if ($tag) {
833 my ($xtag) = $tag;
834 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
835 $xtag =~ tr/_/\./ if ( $opt_u );
836 $xtag =~ s/[\/]/$opt_s/g;
837 $xtag =~ s/\[//g;
839 system('git' , 'tag', '-f', $xtag, $cid) == 0
840 or die "Cannot create tag $xtag: $!\n";
842 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
846 my $commitcount = 1;
847 while (<CVS>) {
848 chomp;
849 if ($state == 0 and /^-+$/) {
850 $state = 1;
851 } elsif ($state == 0) {
852 $state = 1;
853 redo;
854 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
855 $patchset = 0+$_;
856 $state=2;
857 } elsif ($state == 2 and s/^Date:\s+//) {
858 $date = pdate($_);
859 unless ($date) {
860 print STDERR "Could not parse date: $_\n";
861 $state=0;
862 next;
864 $state=3;
865 } elsif ($state == 3 and s/^Author:\s+//) {
866 s/\s+$//;
867 if (/^(.*?)\s+<(.*)>/) {
868 ($author_name, $author_email) = ($1, $2);
869 } elsif ($conv_author_name{$_}) {
870 $author_name = $conv_author_name{$_};
871 $author_email = $conv_author_email{$_};
872 } else {
873 $author_name = $author_email = $_;
875 $state = 4;
876 } elsif ($state == 4 and s/^Branch:\s+//) {
877 s/\s+$//;
878 tr/_/\./ if ( $opt_u );
879 s/[\/]/$opt_s/g;
880 $branch = $_;
881 $state = 5;
882 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
883 s/\s+$//;
884 $ancestor = $_;
885 $ancestor = $opt_o if $ancestor eq "HEAD";
886 $state = 6;
887 } elsif ($state == 5) {
888 $ancestor = undef;
889 $state = 6;
890 redo;
891 } elsif ($state == 6 and s/^Tag:\s+//) {
892 s/\s+$//;
893 if ($_ eq "(none)") {
894 $tag = undef;
895 } else {
896 $tag = $_;
898 $state = 7;
899 } elsif ($state == 7 and /^Log:/) {
900 $logmsg = "";
901 $state = 8;
902 } elsif ($state == 8 and /^Members:/) {
903 $branch = $opt_o if $branch eq "HEAD";
904 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
905 # skip
906 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
907 $state = 11;
908 next;
910 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
911 # skip if the commit is too recent
912 # given that the cvsps default fuzz is 300s, we give ourselves another
913 # 300s just in case -- this also prevents skipping commits
914 # due to server clock drift
915 print "skip patchset $patchset: $date too recent\n" if $opt_v;
916 $state = 11;
917 next;
919 if (exists $ignorebranch{$branch}) {
920 print STDERR "Skipping $branch\n";
921 $state = 11;
922 next;
924 if ($ancestor) {
925 if ($ancestor eq $branch) {
926 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
927 $ancestor = $opt_o;
929 if (defined get_headref("$remote/$branch")) {
930 print STDERR "Branch $branch already exists!\n";
931 $state=11;
932 next;
934 my $id = get_headref("$remote/$ancestor");
935 if (!$id) {
936 print STDERR "Branch $ancestor does not exist!\n";
937 $ignorebranch{$branch} = 1;
938 $state=11;
939 next;
942 system(qw(git update-ref -m cvsimport),
943 "$remote/$branch", $id);
944 if($? != 0) {
945 print STDERR "Could not create branch $branch\n";
946 $ignorebranch{$branch} = 1;
947 $state=11;
948 next;
951 $last_branch = $branch if $branch ne $last_branch;
952 $state = 9;
953 } elsif ($state == 8) {
954 $logmsg .= "$_\n";
955 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
956 # VERSION:1.96->1.96.2.1
957 my $init = ($2 eq "INITIAL");
958 my $fn = $1;
959 my $rev = $3;
960 $fn =~ s#^/+##;
961 if ($opt_S && $fn =~ m/$opt_S/) {
962 print "SKIPPING $fn v $rev\n";
963 push(@skipped, $fn);
964 next;
966 print "Fetching $fn v $rev\n" if $opt_v;
967 my ($tmpname, $size) = $cvs->file($fn,$rev);
968 if ($size == -1) {
969 push(@old,$fn);
970 print "Drop $fn\n" if $opt_v;
971 } else {
972 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
973 my $pid = open(my $F, '-|');
974 die $! unless defined $pid;
975 if (!$pid) {
976 exec("git", "hash-object", "-w", $tmpname)
977 or die "Cannot create object: $!\n";
979 my $sha = <$F>;
980 chomp $sha;
981 close $F;
982 my $mode = pmode($cvs->{'mode'});
983 push(@new,[$mode, $sha, $fn]); # may be resurrected!
985 unlink($tmpname);
986 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
987 my $fn = $1;
988 $fn =~ s#^/+##;
989 push(@old,$fn);
990 print "Delete $fn\n" if $opt_v;
991 } elsif ($state == 9 and /^\s*$/) {
992 $state = 10;
993 } elsif (($state == 9 or $state == 10) and /^-+$/) {
994 $commitcount++;
995 if ($opt_L && $commitcount > $opt_L) {
996 last;
998 commit();
999 if (($commitcount & 1023) == 0) {
1000 system(qw(git repack -a -d));
1002 $state = 1;
1003 } elsif ($state == 11 and /^-+$/) {
1004 $state = 1;
1005 } elsif (/^-+$/) { # end of unknown-line processing
1006 $state = 1;
1007 } elsif ($state != 11) { # ignore stuff when skipping
1008 print STDERR "* UNKNOWN LINE * $_\n";
1011 commit() if $branch and $state != 11;
1013 unless ($opt_P) {
1014 unlink($cvspsfile);
1017 # The heuristic of repacking every 1024 commits can leave a
1018 # lot of unpacked data. If there is more than 1MB worth of
1019 # not-packed objects, repack once more.
1020 my $line = `git count-objects`;
1021 if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1022 my ($n_objects, $kb) = ($1, $2);
1023 1024 < $kb
1024 and system(qw(git repack -a -d));
1027 foreach my $git_index (values %index) {
1028 if ($git_index ne "$git_dir/index") {
1029 unlink($git_index);
1033 if (defined $orig_git_index) {
1034 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1035 } else {
1036 delete $ENV{GIT_INDEX_FILE};
1039 # Now switch back to the branch we were in before all of this happened
1040 if ($orig_branch) {
1041 print "DONE.\n" if $opt_v;
1042 if ($opt_i) {
1043 exit 0;
1045 my $tip_at_end = `git rev-parse --verify HEAD`;
1046 if ($tip_at_start ne $tip_at_end) {
1047 for ($tip_at_start, $tip_at_end) { chomp; }
1048 print "Fetched into the current branch.\n" if $opt_v;
1049 system(qw(git read-tree -u -m),
1050 $tip_at_start, $tip_at_end);
1051 die "Fast-forward update failed: $?\n" if $?;
1053 else {
1054 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1055 die "Could not merge $opt_o into the current branch.\n" if $?;
1057 } else {
1058 $orig_branch = "master";
1059 print "DONE; creating $orig_branch branch\n" if $opt_v;
1060 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1061 unless defined get_headref('refs/heads/master');
1062 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1063 if ($opt_r && $opt_o ne 'HEAD');
1064 system('git', 'update-ref', 'HEAD', "$orig_branch");
1065 unless ($opt_i) {
1066 system(qw(git checkout -f));
1067 die "checkout failed: $?\n" if $?;