submodule: Use cat instead of echo to avoid DOS line-endings
[git/dscho.git] / git-cvsimport.perl
blob8032f23202ff9a1e8e0fd2a6745021fb8f1fcf71
1 #!/usr/bin/perl
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to aggregate CVS check-ins into related changes.
7 # Fortunately, "cvsps" does that for us; all we have to do is to parse
8 # its output.
10 # Checking out the files is done by a single long-running CVS connection
11 # / server process.
13 # The head revision is on branch "origin" by default.
14 # You can change that with the '-o' option.
16 use 5.008;
17 use strict;
18 use warnings;
19 use Getopt::Long;
20 use File::Spec;
21 use File::Temp qw(tempfile tmpnam);
22 use File::Path qw(mkpath);
23 use File::Basename qw(basename dirname);
24 use Time::Local;
25 use IO::Socket;
26 use IO::Pipe;
27 use POSIX qw(strftime dup2 ENOENT);
28 use IPC::Open2;
30 $SIG{'PIPE'}="IGNORE";
31 $ENV{'TZ'}="UTC";
33 our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
34 my (%conv_author_name, %conv_author_email);
36 sub usage(;$) {
37 my $msg = shift;
38 print(STDERR "Error: $msg\n") if $msg;
39 print STDERR <<END;
40 Usage: git cvsimport # fetch/update GIT from CVS
41 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
42 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
43 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
44 [-r remote] [-R] [CVS_module]
45 END
46 exit(1);
49 sub read_author_info($) {
50 my ($file) = @_;
51 my $user;
52 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
54 while (<$f>) {
55 # Expected format is this:
56 # exon=Andreas Ericsson <ae@op5.se>
57 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
58 $user = $1;
59 $conv_author_name{$user} = $2;
60 $conv_author_email{$user} = $3;
62 # However, we also read from CVSROOT/users format
63 # to ease migration.
64 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
65 my $mapped;
66 ($user, $mapped) = ($1, $3);
67 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
68 $conv_author_name{$user} = $1;
69 $conv_author_email{$user} = $2;
71 elsif ($mapped =~ /^<?(.*)>?$/) {
72 $conv_author_name{$user} = $user;
73 $conv_author_email{$user} = $1;
76 # NEEDSWORK: Maybe warn on unrecognized lines?
78 close ($f);
81 sub write_author_info($) {
82 my ($file) = @_;
83 open my $f, '>', $file or
84 die("Failed to open $file for writing: $!");
86 foreach (keys %conv_author_name) {
87 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
89 close ($f);
92 # convert getopts specs for use by git config
93 my %longmap = (
94 'A:' => 'authors-file',
95 'M:' => 'merge-regex',
96 'P:' => undef,
97 'R' => 'track-revisions',
98 'S:' => 'ignore-paths',
101 sub read_repo_config {
102 # Split the string between characters, unless there is a ':'
103 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
104 my @opts = split(/ *(?!:)/, shift);
105 foreach my $o (@opts) {
106 my $key = $o;
107 $key =~ s/://g;
108 my $arg = 'git config';
109 $arg .= ' --bool' if ($o !~ /:$/);
110 my $ckey = $key;
112 if (exists $longmap{$o}) {
113 # An uppercase option like -R cannot be
114 # expressed in the configuration, as the
115 # variable names are downcased.
116 $ckey = $longmap{$o};
117 next if (! defined $ckey);
118 $ckey =~ s/-//g;
120 chomp(my $tmp = `$arg --get cvsimport.$ckey`);
121 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
122 no strict 'refs';
123 my $opt_name = "opt_" . $key;
124 if (!$$opt_name) {
125 $$opt_name = $tmp;
131 my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
132 read_repo_config($opts);
133 Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
135 # turn the Getopt::Std specification in a Getopt::Long one,
136 # with support for multiple -M options
137 GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
138 or usage();
139 usage if $opt_h;
141 if (@ARGV == 0) {
142 chomp(my $module = `git config --get cvsimport.module`);
143 push(@ARGV, $module) if $? == 0;
145 @ARGV <= 1 or usage("You can't specify more than one CVS module");
147 if ($opt_d) {
148 $ENV{"CVSROOT"} = $opt_d;
149 } elsif (-f 'CVS/Root') {
150 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
151 $opt_d = <$f>;
152 chomp $opt_d;
153 close $f;
154 $ENV{"CVSROOT"} = $opt_d;
155 } elsif ($ENV{"CVSROOT"}) {
156 $opt_d = $ENV{"CVSROOT"};
157 } else {
158 usage("CVSROOT needs to be set");
160 $opt_s ||= "-";
161 $opt_a ||= 0;
163 my $git_tree = $opt_C;
164 $git_tree ||= ".";
166 my $remote;
167 if (defined $opt_r) {
168 $remote = 'refs/remotes/' . $opt_r;
169 $opt_o ||= "master";
170 } else {
171 $opt_o ||= "origin";
172 $remote = 'refs/heads';
175 my $cvs_tree;
176 if ($#ARGV == 0) {
177 $cvs_tree = $ARGV[0];
178 } elsif (-f 'CVS/Repository') {
179 open my $f, '<', 'CVS/Repository' or
180 die 'Failed to open CVS/Repository';
181 $cvs_tree = <$f>;
182 chomp $cvs_tree;
183 close $f;
184 } else {
185 usage("CVS module has to be specified");
188 our @mergerx = ();
189 if ($opt_m) {
190 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
192 if (@opt_M) {
193 push (@mergerx, map { qr/$_/ } @opt_M);
196 # Remember UTC of our starting time
197 # we'll want to avoid importing commits
198 # that are too recent
199 our $starttime = time();
201 select(STDERR); $|=1; select(STDOUT);
204 package CVSconn;
205 # Basic CVS dialog.
206 # We're only interested in connecting and downloading, so ...
208 use File::Spec;
209 use File::Temp qw(tempfile);
210 use POSIX qw(strftime dup2);
212 sub new {
213 my ($what,$repo,$subdir) = @_;
214 $what=ref($what) if ref($what);
216 my $self = {};
217 $self->{'buffer'} = "";
218 bless($self,$what);
220 $repo =~ s#/+$##;
221 $self->{'fullrep'} = $repo;
222 $self->conn();
224 $self->{'subdir'} = $subdir;
225 $self->{'lines'} = undef;
227 return $self;
230 sub find_password_entry {
231 my ($cvspass, @cvsroot) = @_;
232 my ($file, $delim) = @$cvspass;
233 my $pass;
234 local ($_);
236 if (open(my $fh, $file)) {
237 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
238 CVSPASSFILE:
239 while (<$fh>) {
240 chomp;
241 s/^\/\d+\s+//;
242 my ($w, $p) = split($delim,$_,2);
243 for my $cvsroot (@cvsroot) {
244 if ($w eq $cvsroot) {
245 $pass = $p;
246 last CVSPASSFILE;
250 close($fh);
252 return $pass;
255 sub conn {
256 my $self = shift;
257 my $repo = $self->{'fullrep'};
258 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
259 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
261 my ($proxyhost,$proxyport);
262 if ($param && ($param =~ m/proxy=([^;]+)/)) {
263 $proxyhost = $1;
264 # Default proxyport, if not specified, is 8080.
265 $proxyport = 8080;
266 if ($ENV{"CVS_PROXY_PORT"}) {
267 $proxyport = $ENV{"CVS_PROXY_PORT"};
269 if ($param =~ m/proxyport=([^;]+)/) {
270 $proxyport = $1;
273 $repo ||= '/';
275 # if username is not explicit in CVSROOT, then use current user, as cvs would
276 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
277 my $rr2 = "-";
278 unless ($port) {
279 $rr2 = ":pserver:$user\@$serv:$repo";
280 $port=2401;
282 my $rr = ":pserver:$user\@$serv:$port$repo";
284 if ($pass) {
285 $pass = $self->_scramble($pass);
286 } else {
287 my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
288 [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
289 my @loc = ();
290 foreach my $cvspass (@cvspass) {
291 my $p = find_password_entry($cvspass, $rr, $rr2);
292 if ($p) {
293 push @loc, $cvspass->[0];
294 $pass = $p;
298 if (1 < @loc) {
299 die("Multiple cvs password files have ".
300 "entries for CVSROOT $opt_d: @loc");
301 } elsif (!$pass) {
302 $pass = "A";
306 my ($s, $rep);
307 if ($proxyhost) {
309 # Use a HTTP Proxy. Only works for HTTP proxies that
310 # don't require user authentication
312 # See: http://www.ietf.org/rfc/rfc2817.txt
314 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
315 die "Socket to $proxyhost: $!\n" unless defined $s;
316 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
317 or die "Write to $proxyhost: $!\n";
318 $s->flush();
320 $rep = <$s>;
322 # The answer should look like 'HTTP/1.x 2yy ....'
323 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
324 die "Proxy connect: $rep\n";
326 # Skip up to the empty line of the proxy server output
327 # including the response headers.
328 while ($rep = <$s>) {
329 last if (!defined $rep ||
330 $rep eq "\n" ||
331 $rep eq "\r\n");
333 } else {
334 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
335 die "Socket to $serv: $!\n" unless defined $s;
338 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
339 or die "Write to $serv: $!\n";
340 $s->flush();
342 $rep = <$s>;
344 if ($rep ne "I LOVE YOU\n") {
345 $rep="<unknown>" unless $rep;
346 die "AuthReply: $rep\n";
348 $self->{'socketo'} = $s;
349 $self->{'socketi'} = $s;
350 } else { # local or ext: Fork off our own cvs server.
351 my $pr = IO::Pipe->new();
352 my $pw = IO::Pipe->new();
353 my $pid = fork();
354 die "Fork: $!\n" unless defined $pid;
355 my $cvs = 'cvs';
356 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
357 my $rsh = 'rsh';
358 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
360 my @cvs = ($cvs, 'server');
361 my ($local, $user, $host);
362 $local = $repo =~ s/:local://;
363 if (!$local) {
364 $repo =~ s/:ext://;
365 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
366 ($user, $host) = ($1, $2);
368 if (!$local) {
369 if ($user) {
370 unshift @cvs, $rsh, '-l', $user, $host;
371 } else {
372 unshift @cvs, $rsh, $host;
376 unless ($pid) {
377 $pr->writer();
378 $pw->reader();
379 dup2($pw->fileno(),0);
380 dup2($pr->fileno(),1);
381 $pr->close();
382 $pw->close();
383 exec(@cvs);
385 $pw->writer();
386 $pr->reader();
387 $self->{'socketo'} = $pw;
388 $self->{'socketi'} = $pr;
390 $self->{'socketo'}->write("Root $repo\n");
392 # Trial and error says that this probably is the minimum set
393 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
395 $self->{'socketo'}->write("valid-requests\n");
396 $self->{'socketo'}->flush();
398 my $rep=$self->readline();
399 die "Failed to read from server" unless defined $rep;
400 chomp($rep);
401 if ($rep !~ s/^Valid-requests\s*//) {
402 $rep="<unknown>" unless $rep;
403 die "Expected Valid-requests from server, but got: $rep\n";
405 chomp(my $res=$self->readline());
406 die "validReply: $res\n" if $res ne "ok";
408 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
409 $self->{'repo'} = $repo;
412 sub readline {
413 my ($self) = @_;
414 return $self->{'socketi'}->getline();
417 sub _file {
418 # Request a file with a given revision.
419 # Trial and error says this is a good way to do it. :-/
420 my ($self,$fn,$rev) = @_;
421 $self->{'socketo'}->write("Argument -N\n") or return undef;
422 $self->{'socketo'}->write("Argument -P\n") or return undef;
423 # -kk: Linus' version doesn't use it - defaults to off
424 if ($opt_k) {
425 $self->{'socketo'}->write("Argument -kk\n") or return undef;
427 $self->{'socketo'}->write("Argument -r\n") or return undef;
428 $self->{'socketo'}->write("Argument $rev\n") or return undef;
429 $self->{'socketo'}->write("Argument --\n") or return undef;
430 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
431 $self->{'socketo'}->write("Directory .\n") or return undef;
432 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
433 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
434 $self->{'socketo'}->write("co\n") or return undef;
435 $self->{'socketo'}->flush() or return undef;
436 $self->{'lines'} = 0;
437 return 1;
439 sub _line {
440 # Read a line from the server.
441 # ... except that 'line' may be an entire file. ;-)
442 my ($self, $fh) = @_;
443 die "Not in lines" unless defined $self->{'lines'};
445 my $line;
446 my $res=0;
447 while (defined($line = $self->readline())) {
448 # M U gnupg-cvs-rep/AUTHORS
449 # Updated gnupg-cvs-rep/
450 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
451 # /AUTHORS/1.1///T1.1
452 # u=rw,g=rw,o=rw
454 # ok
456 if ($line =~ s/^(?:Created|Updated) //) {
457 $line = $self->readline(); # path
458 $line = $self->readline(); # Entries line
459 my $mode = $self->readline(); chomp $mode;
460 $self->{'mode'} = $mode;
461 defined (my $cnt = $self->readline())
462 or die "EOF from server after 'Changed'\n";
463 chomp $cnt;
464 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
465 $line="";
466 $res = $self->_fetchfile($fh, $cnt);
467 } elsif ($line =~ s/^ //) {
468 print $fh $line;
469 $res += length($line);
470 } elsif ($line =~ /^M\b/) {
471 # output, do nothing
472 } elsif ($line =~ /^Mbinary\b/) {
473 my $cnt;
474 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
475 chomp $cnt;
476 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
477 $line="";
478 $res += $self->_fetchfile($fh, $cnt);
479 } else {
480 chomp $line;
481 if ($line eq "ok") {
482 # print STDERR "S: ok (".length($res).")\n";
483 return $res;
484 } elsif ($line =~ s/^E //) {
485 # print STDERR "S: $line\n";
486 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
487 $line = $self->readline(); # filename
488 $line = $self->readline(); # OK
489 chomp $line;
490 die "Unknown: $line" if $line ne "ok";
491 return -1;
492 } else {
493 die "Unknown: $line\n";
497 return undef;
499 sub file {
500 my ($self,$fn,$rev) = @_;
501 my $res;
503 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
504 DIR => File::Spec->tmpdir(), UNLINK => 1);
506 $self->_file($fn,$rev) and $res = $self->_line($fh);
508 if (!defined $res) {
509 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
510 truncate $fh, 0;
511 $self->conn();
512 $self->_file($fn,$rev) or die "No file command send";
513 $res = $self->_line($fh);
514 die "Retry failed" unless defined $res;
516 close ($fh);
518 return ($name, $res);
520 sub _fetchfile {
521 my ($self, $fh, $cnt) = @_;
522 my $res = 0;
523 my $bufsize = 1024 * 1024;
524 while ($cnt) {
525 if ($bufsize > $cnt) {
526 $bufsize = $cnt;
528 my $buf;
529 my $num = $self->{'socketi'}->read($buf,$bufsize);
530 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
531 print $fh $buf;
532 $res += $num;
533 $cnt -= $num;
535 return $res;
538 sub _scramble {
539 my ($self, $pass) = @_;
540 my $scrambled = "A";
542 return $scrambled unless $pass;
544 my $pass_len = length($pass);
545 my @pass_arr = split("", $pass);
546 my $i;
548 # from cvs/src/scramble.c
549 my @shifts = (
550 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
551 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
552 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
553 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
554 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
555 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
556 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
557 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
558 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
559 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
560 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
561 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
562 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
563 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
564 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
565 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
568 for ($i = 0; $i < $pass_len; $i++) {
569 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
572 return $scrambled;
575 package main;
577 my $cvs = CVSconn->new($opt_d, $cvs_tree);
580 sub pdate($) {
581 my ($d) = @_;
582 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
583 or die "Unparseable date: $d\n";
584 my $y=$1; $y-=1900 if $y>1900;
585 return timegm($6||0,$5,$4,$3,$2-1,$y);
588 sub pmode($) {
589 my ($mode) = @_;
590 my $m = 0;
591 my $mm = 0;
592 my $um = 0;
593 for my $x(split(//,$mode)) {
594 if ($x eq ",") {
595 $m |= $mm&$um;
596 $mm = 0;
597 $um = 0;
598 } elsif ($x eq "u") { $um |= 0700;
599 } elsif ($x eq "g") { $um |= 0070;
600 } elsif ($x eq "o") { $um |= 0007;
601 } elsif ($x eq "r") { $mm |= 0444;
602 } elsif ($x eq "w") { $mm |= 0222;
603 } elsif ($x eq "x") { $mm |= 0111;
604 } elsif ($x eq "=") { # do nothing
605 } else { die "Unknown mode: $mode\n";
608 $m |= $mm&$um;
609 return $m;
612 sub getwd() {
613 my $pwd = `pwd`;
614 chomp $pwd;
615 return $pwd;
618 sub is_sha1 {
619 my $s = shift;
620 return $s =~ /^[a-f0-9]{40}$/;
623 sub get_headref ($) {
624 my $name = shift;
625 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
626 return undef unless $? == 0;
627 chomp $r;
628 return $r;
631 my $user_filename_prepend = '';
632 sub munge_user_filename {
633 my $name = shift;
634 return File::Spec->file_name_is_absolute($name) ?
635 $name :
636 $user_filename_prepend . $name;
639 -d $git_tree
640 or mkdir($git_tree,0777)
641 or die "Could not create $git_tree: $!";
642 if ($git_tree ne '.') {
643 $user_filename_prepend = getwd() . '/';
644 chdir($git_tree);
647 my $last_branch = "";
648 my $orig_branch = "";
649 my %branch_date;
650 my $tip_at_start = undef;
652 my $git_dir = $ENV{"GIT_DIR"} || ".git";
653 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
654 $ENV{"GIT_DIR"} = $git_dir;
655 my $orig_git_index;
656 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
658 my %index; # holds filenames of one index per branch
660 unless (-d $git_dir) {
661 system(qw(git init));
662 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
663 system(qw(git read-tree --empty));
664 die "Cannot init an empty tree: $?\n" if $?;
666 $last_branch = $opt_o;
667 $orig_branch = "";
668 } else {
669 open(F, "-|", qw(git symbolic-ref HEAD)) or
670 die "Cannot run git symbolic-ref: $!\n";
671 chomp ($last_branch = <F>);
672 $last_branch = basename($last_branch);
673 close(F);
674 unless ($last_branch) {
675 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
676 $last_branch = "master";
678 $orig_branch = $last_branch;
679 $tip_at_start = `git rev-parse --verify HEAD`;
681 # Get the last import timestamps
682 my $fmt = '($ref, $author) = (%(refname), %(author));';
683 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
684 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
685 while (defined(my $entry = <H>)) {
686 my ($ref, $author);
687 eval($entry) || die "cannot eval refs list: $@";
688 my ($head) = ($ref =~ m|^$remote/(.*)|);
689 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
690 $branch_date{$head} = $1;
692 close(H);
693 if (!exists $branch_date{$opt_o}) {
694 die "Branch '$opt_o' does not exist.\n".
695 "Either use the correct '-o branch' option,\n".
696 "or import to a new repository.\n";
700 -d $git_dir
701 or die "Could not create git subdir ($git_dir).\n";
703 # now we read (and possibly save) author-info as well
704 -f "$git_dir/cvs-authors" and
705 read_author_info("$git_dir/cvs-authors");
706 if ($opt_A) {
707 read_author_info(munge_user_filename($opt_A));
708 write_author_info("$git_dir/cvs-authors");
711 # open .git/cvs-revisions, if requested
712 open my $revision_map, '>>', "$git_dir/cvs-revisions"
713 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
714 if defined $opt_R;
718 # run cvsps into a file unless we are getting
719 # it passed as a file via $opt_P
721 my $cvspsfile;
722 unless ($opt_P) {
723 print "Running cvsps...\n" if $opt_v;
724 my $pid = open(CVSPS,"-|");
725 my $cvspsfh;
726 die "Cannot fork: $!\n" unless defined $pid;
727 unless ($pid) {
728 my @opt;
729 @opt = split(/,/,$opt_p) if defined $opt_p;
730 unshift @opt, '-z', $opt_z if defined $opt_z;
731 unshift @opt, '-q' unless defined $opt_v;
732 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
733 push @opt, '--cvs-direct';
735 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
736 die "Could not start cvsps: $!\n";
738 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
739 DIR => File::Spec->tmpdir());
740 while (<CVSPS>) {
741 print $cvspsfh $_;
743 close CVSPS;
744 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
745 close $cvspsfh;
746 } else {
747 $cvspsfile = munge_user_filename($opt_P);
750 open(CVS, "<$cvspsfile") or die $!;
752 ## cvsps output:
753 #---------------------
754 #PatchSet 314
755 #Date: 1999/09/18 13:03:59
756 #Author: wkoch
757 #Branch: STABLE-BRANCH-1-0
758 #Ancestor branch: HEAD
759 #Tag: (none)
760 #Log:
761 # See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
762 #Members:
763 # README:1.57->1.57.2.1
764 # VERSION:1.96->1.96.2.1
766 #---------------------
768 my $state = 0;
770 sub update_index (\@\@) {
771 my $old = shift;
772 my $new = shift;
773 open(my $fh, '|-', qw(git update-index -z --index-info))
774 or die "unable to open git update-index: $!";
775 print $fh
776 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
777 @$old),
778 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
779 @$new)
780 or die "unable to write to git update-index: $!";
781 close $fh
782 or die "unable to write to git update-index: $!";
783 $? and die "git update-index reported error: $?";
786 sub write_tree () {
787 open(my $fh, '-|', qw(git write-tree))
788 or die "unable to open git write-tree: $!";
789 chomp(my $tree = <$fh>);
790 is_sha1($tree)
791 or die "Cannot get tree id ($tree): $!";
792 close($fh)
793 or die "Error running git write-tree: $?\n";
794 print "Tree ID $tree\n" if $opt_v;
795 return $tree;
798 my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
799 my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
801 # commits that cvsps cannot place anywhere...
802 $ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
804 sub commit {
805 if ($branch eq $opt_o && !$index{branch} &&
806 !get_headref("$remote/$branch")) {
807 # looks like an initial commit
808 # use the index primed by git init
809 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
810 $index{$branch} = "$git_dir/index";
811 } else {
812 # use an index per branch to speed up
813 # imports of projects with many branches
814 unless ($index{$branch}) {
815 $index{$branch} = tmpnam();
816 $ENV{GIT_INDEX_FILE} = $index{$branch};
817 if ($ancestor) {
818 system("git", "read-tree", "$remote/$ancestor");
819 } else {
820 system("git", "read-tree", "$remote/$branch");
822 die "read-tree failed: $?\n" if $?;
825 $ENV{GIT_INDEX_FILE} = $index{$branch};
827 update_index(@old, @new);
828 @old = @new = ();
829 my $tree = write_tree();
830 my $parent = get_headref("$remote/$last_branch");
831 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
833 my @commit_args;
834 push @commit_args, ("-p", $parent) if $parent;
836 # loose detection of merges
837 # based on the commit msg
838 foreach my $rx (@mergerx) {
839 next unless $logmsg =~ $rx && $1;
840 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
841 if (my $sha1 = get_headref("$remote/$mparent")) {
842 push @commit_args, '-p', "$remote/$mparent";
843 print "Merge parent branch: $mparent\n" if $opt_v;
847 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
848 $ENV{GIT_AUTHOR_NAME} = $author_name;
849 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
850 $ENV{GIT_AUTHOR_DATE} = $commit_date;
851 $ENV{GIT_COMMITTER_NAME} = $author_name;
852 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
853 $ENV{GIT_COMMITTER_DATE} = $commit_date;
854 my $pid = open2(my $commit_read, my $commit_write,
855 'git', 'commit-tree', $tree, @commit_args);
857 # compatibility with git2cvs
858 substr($logmsg,32767) = "" if length($logmsg) > 32767;
859 $logmsg =~ s/[\s\n]+\z//;
861 if (@skipped) {
862 $logmsg .= "\n\n\nSKIPPED:\n\t";
863 $logmsg .= join("\n\t", @skipped) . "\n";
864 @skipped = ();
867 print($commit_write "$logmsg\n") && close($commit_write)
868 or die "Error writing to git commit-tree: $!\n";
870 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
871 chomp(my $cid = <$commit_read>);
872 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
873 print "Commit ID $cid\n" if $opt_v;
874 close($commit_read);
876 waitpid($pid,0);
877 die "Error running git commit-tree: $?\n" if $?;
879 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
880 or die "Cannot write branch $branch for update: $!\n";
882 if ($revision_map) {
883 print $revision_map "@$_ $cid\n" for @commit_revisions;
885 @commit_revisions = ();
887 if ($tag) {
888 my ($xtag) = $tag;
889 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
890 $xtag =~ tr/_/\./ if ( $opt_u );
891 $xtag =~ s/[\/]/$opt_s/g;
893 # See refs.c for these rules.
894 # Tag cannot contain bad chars. (See bad_ref_char in refs.c.)
895 $xtag =~ s/[ ~\^:\\\*\?\[]//g;
896 # Other bad strings for tags:
897 # (See check_refname_component in refs.c.)
898 1 while $xtag =~ s/
899 (?: \.\. # Tag cannot contain '..'.
900 | \@{ # Tag cannot contain '@{'.
901 | ^ - # Tag cannot begin with '-'.
902 | \.lock $ # Tag cannot end with '.lock'.
903 | ^ \. # Tag cannot begin...
904 | \. $ # ...or end with '.'
905 )//xg;
906 # Tag cannot be empty.
907 if ($xtag eq '') {
908 warn("warning: ignoring tag '$tag'",
909 " with invalid tagname\n");
910 return;
913 if (system('git' , 'tag', '-f', $xtag, $cid) != 0) {
914 # We did our best to sanitize the tag, but still failed
915 # for whatever reason. Bail out, and give the user
916 # enough information to understand if/how we should
917 # improve the translation in the future.
918 if ($tag ne $xtag) {
919 print "Translated '$tag' tag to '$xtag'\n";
921 die "Cannot create tag $xtag: $!\n";
924 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
928 my $commitcount = 1;
929 while (<CVS>) {
930 chomp;
931 if ($state == 0 and /^-+$/) {
932 $state = 1;
933 } elsif ($state == 0) {
934 $state = 1;
935 redo;
936 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
937 $patchset = 0+$_;
938 $state=2;
939 } elsif ($state == 2 and s/^Date:\s+//) {
940 $date = pdate($_);
941 unless ($date) {
942 print STDERR "Could not parse date: $_\n";
943 $state=0;
944 next;
946 $state=3;
947 } elsif ($state == 3 and s/^Author:\s+//) {
948 s/\s+$//;
949 if (/^(.*?)\s+<(.*)>/) {
950 ($author_name, $author_email) = ($1, $2);
951 } elsif ($conv_author_name{$_}) {
952 $author_name = $conv_author_name{$_};
953 $author_email = $conv_author_email{$_};
954 } else {
955 $author_name = $author_email = $_;
957 $state = 4;
958 } elsif ($state == 4 and s/^Branch:\s+//) {
959 s/\s+$//;
960 tr/_/\./ if ( $opt_u );
961 s/[\/]/$opt_s/g;
962 $branch = $_;
963 $state = 5;
964 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
965 s/\s+$//;
966 $ancestor = $_;
967 $ancestor = $opt_o if $ancestor eq "HEAD";
968 $state = 6;
969 } elsif ($state == 5) {
970 $ancestor = undef;
971 $state = 6;
972 redo;
973 } elsif ($state == 6 and s/^Tag:\s+//) {
974 s/\s+$//;
975 if ($_ eq "(none)") {
976 $tag = undef;
977 } else {
978 $tag = $_;
980 $state = 7;
981 } elsif ($state == 7 and /^Log:/) {
982 $logmsg = "";
983 $state = 8;
984 } elsif ($state == 8 and /^Members:/) {
985 $branch = $opt_o if $branch eq "HEAD";
986 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
987 # skip
988 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
989 $state = 11;
990 next;
992 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
993 # skip if the commit is too recent
994 # given that the cvsps default fuzz is 300s, we give ourselves another
995 # 300s just in case -- this also prevents skipping commits
996 # due to server clock drift
997 print "skip patchset $patchset: $date too recent\n" if $opt_v;
998 $state = 11;
999 next;
1001 if (exists $ignorebranch{$branch}) {
1002 print STDERR "Skipping $branch\n";
1003 $state = 11;
1004 next;
1006 if ($ancestor) {
1007 if ($ancestor eq $branch) {
1008 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
1009 $ancestor = $opt_o;
1011 if (defined get_headref("$remote/$branch")) {
1012 print STDERR "Branch $branch already exists!\n";
1013 $state=11;
1014 next;
1016 my $id = get_headref("$remote/$ancestor");
1017 if (!$id) {
1018 print STDERR "Branch $ancestor does not exist!\n";
1019 $ignorebranch{$branch} = 1;
1020 $state=11;
1021 next;
1024 system(qw(git update-ref -m cvsimport),
1025 "$remote/$branch", $id);
1026 if($? != 0) {
1027 print STDERR "Could not create branch $branch\n";
1028 $ignorebranch{$branch} = 1;
1029 $state=11;
1030 next;
1033 $last_branch = $branch if $branch ne $last_branch;
1034 $state = 9;
1035 } elsif ($state == 8) {
1036 $logmsg .= "$_\n";
1037 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1038 # VERSION:1.96->1.96.2.1
1039 my $init = ($2 eq "INITIAL");
1040 my $fn = $1;
1041 my $rev = $3;
1042 $fn =~ s#^/+##;
1043 if ($opt_S && $fn =~ m/$opt_S/) {
1044 print "SKIPPING $fn v $rev\n";
1045 push(@skipped, $fn);
1046 next;
1048 push @commit_revisions, [$fn, $rev];
1049 print "Fetching $fn v $rev\n" if $opt_v;
1050 my ($tmpname, $size) = $cvs->file($fn,$rev);
1051 if ($size == -1) {
1052 push(@old,$fn);
1053 print "Drop $fn\n" if $opt_v;
1054 } else {
1055 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1056 my $pid = open(my $F, '-|');
1057 die $! unless defined $pid;
1058 if (!$pid) {
1059 exec("git", "hash-object", "-w", $tmpname)
1060 or die "Cannot create object: $!\n";
1062 my $sha = <$F>;
1063 chomp $sha;
1064 close $F;
1065 my $mode = pmode($cvs->{'mode'});
1066 push(@new,[$mode, $sha, $fn]); # may be resurrected!
1068 unlink($tmpname);
1069 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1070 my $fn = $1;
1071 my $rev = $2;
1072 $fn =~ s#^/+##;
1073 push @commit_revisions, [$fn, $rev];
1074 push(@old,$fn);
1075 print "Delete $fn\n" if $opt_v;
1076 } elsif ($state == 9 and /^\s*$/) {
1077 $state = 10;
1078 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1079 $commitcount++;
1080 if ($opt_L && $commitcount > $opt_L) {
1081 last;
1083 commit();
1084 if (($commitcount & 1023) == 0) {
1085 system(qw(git repack -a -d));
1087 $state = 1;
1088 } elsif ($state == 11 and /^-+$/) {
1089 $state = 1;
1090 } elsif (/^-+$/) { # end of unknown-line processing
1091 $state = 1;
1092 } elsif ($state != 11) { # ignore stuff when skipping
1093 print STDERR "* UNKNOWN LINE * $_\n";
1096 commit() if $branch and $state != 11;
1098 unless ($opt_P) {
1099 unlink($cvspsfile);
1102 # The heuristic of repacking every 1024 commits can leave a
1103 # lot of unpacked data. If there is more than 1MB worth of
1104 # not-packed objects, repack once more.
1105 my $line = `git count-objects`;
1106 if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1107 my ($n_objects, $kb) = ($1, $2);
1108 1024 < $kb
1109 and system(qw(git repack -a -d));
1112 foreach my $git_index (values %index) {
1113 if ($git_index ne "$git_dir/index") {
1114 unlink($git_index);
1118 if (defined $orig_git_index) {
1119 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1120 } else {
1121 delete $ENV{GIT_INDEX_FILE};
1124 # Now switch back to the branch we were in before all of this happened
1125 if ($orig_branch) {
1126 print "DONE.\n" if $opt_v;
1127 if ($opt_i) {
1128 exit 0;
1130 my $tip_at_end = `git rev-parse --verify HEAD`;
1131 if ($tip_at_start ne $tip_at_end) {
1132 for ($tip_at_start, $tip_at_end) { chomp; }
1133 print "Fetched into the current branch.\n" if $opt_v;
1134 system(qw(git read-tree -u -m),
1135 $tip_at_start, $tip_at_end);
1136 die "Fast-forward update failed: $?\n" if $?;
1138 else {
1139 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1140 die "Could not merge $opt_o into the current branch.\n" if $?;
1142 } else {
1143 $orig_branch = "master";
1144 print "DONE; creating $orig_branch branch\n" if $opt_v;
1145 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1146 unless defined get_headref('refs/heads/master');
1147 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1148 if ($opt_r && $opt_o ne 'HEAD');
1149 system('git', 'update-ref', 'HEAD', "$orig_branch");
1150 unless ($opt_i) {
1151 system(qw(git checkout -f));
1152 die "checkout failed: $?\n" if $?;