Merge branch 'esr/maint-doc-fast-import'
[git.git] / git-cvsimport.perl
blob0a31ebd82020f3aca0020d357c2028d5e7b5e37b
1 #!/usr/bin/perl
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to aggregate CVS check-ins into related changes.
7 # Fortunately, "cvsps" does that for us; all we have to do is to parse
8 # its output.
10 # Checking out the files is done by a single long-running CVS connection
11 # / server process.
13 # The head revision is on branch "origin" by default.
14 # You can change that with the '-o' option.
16 use 5.008;
17 use strict;
18 use warnings;
19 use Getopt::Long;
20 use File::Spec;
21 use File::Temp qw(tempfile tmpnam);
22 use File::Path qw(mkpath);
23 use File::Basename qw(basename dirname);
24 use Time::Local;
25 use IO::Socket;
26 use IO::Pipe;
27 use POSIX qw(strftime tzset dup2 ENOENT);
28 use IPC::Open2;
30 $SIG{'PIPE'}="IGNORE";
31 set_timezone('UTC');
33 our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
34 my (%conv_author_name, %conv_author_email, %conv_author_tz);
36 sub usage(;$) {
37 my $msg = shift;
38 print(STDERR "Error: $msg\n") if $msg;
39 print STDERR <<END;
40 Usage: git cvsimport # fetch/update GIT from CVS
41 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
42 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
43 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
44 [-r remote] [-R] [CVS_module]
45 END
46 exit(1);
49 sub read_author_info($) {
50 my ($file) = @_;
51 my $user;
52 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
54 while (<$f>) {
55 # Expected format is this:
56 # exon=Andreas Ericsson <ae@op5.se>
57 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
58 $user = $1;
59 $conv_author_name{$user} = $2;
60 $conv_author_email{$user} = $3;
62 # or with an optional timezone:
63 # spawn=Simon Pawn <spawn@frog-pond.org> America/Chicago
64 elsif (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*(\S+?)\s*$/) {
65 $user = $1;
66 $conv_author_name{$user} = $2;
67 $conv_author_email{$user} = $3;
68 $conv_author_tz{$user} = $4;
70 # However, we also read from CVSROOT/users format
71 # to ease migration.
72 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
73 my $mapped;
74 ($user, $mapped) = ($1, $3);
75 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
76 $conv_author_name{$user} = $1;
77 $conv_author_email{$user} = $2;
79 elsif ($mapped =~ /^<?(.*)>?$/) {
80 $conv_author_name{$user} = $user;
81 $conv_author_email{$user} = $1;
84 # NEEDSWORK: Maybe warn on unrecognized lines?
86 close ($f);
89 sub write_author_info($) {
90 my ($file) = @_;
91 open my $f, '>', $file or
92 die("Failed to open $file for writing: $!");
94 foreach (keys %conv_author_name) {
95 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>";
96 print $f " $conv_author_tz{$_}" if ($conv_author_tz{$_});
97 print $f "\n";
99 close ($f);
102 # Versions of perl before 5.10.0 may not automatically check $TZ each
103 # time localtime is run (most platforms will do so only the first time).
104 # We can work around this by using tzset() to update the internal
105 # variable whenever we change the environment.
106 sub set_timezone {
107 $ENV{TZ} = shift;
108 tzset();
111 # convert getopts specs for use by git config
112 my %longmap = (
113 'A:' => 'authors-file',
114 'M:' => 'merge-regex',
115 'P:' => undef,
116 'R' => 'track-revisions',
117 'S:' => 'ignore-paths',
120 sub read_repo_config {
121 # Split the string between characters, unless there is a ':'
122 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
123 my @opts = split(/ *(?!:)/, shift);
124 foreach my $o (@opts) {
125 my $key = $o;
126 $key =~ s/://g;
127 my $arg = 'git config';
128 $arg .= ' --bool' if ($o !~ /:$/);
129 my $ckey = $key;
131 if (exists $longmap{$o}) {
132 # An uppercase option like -R cannot be
133 # expressed in the configuration, as the
134 # variable names are downcased.
135 $ckey = $longmap{$o};
136 next if (! defined $ckey);
137 $ckey =~ s/-//g;
139 chomp(my $tmp = `$arg --get cvsimport.$ckey`);
140 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
141 no strict 'refs';
142 my $opt_name = "opt_" . $key;
143 if (!$$opt_name) {
144 $$opt_name = $tmp;
150 my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
151 read_repo_config($opts);
152 Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
154 # turn the Getopt::Std specification in a Getopt::Long one,
155 # with support for multiple -M options
156 GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
157 or usage();
158 usage if $opt_h;
160 if (@ARGV == 0) {
161 chomp(my $module = `git config --get cvsimport.module`);
162 push(@ARGV, $module) if $? == 0;
164 @ARGV <= 1 or usage("You can't specify more than one CVS module");
166 if ($opt_d) {
167 $ENV{"CVSROOT"} = $opt_d;
168 } elsif (-f 'CVS/Root') {
169 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
170 $opt_d = <$f>;
171 chomp $opt_d;
172 close $f;
173 $ENV{"CVSROOT"} = $opt_d;
174 } elsif ($ENV{"CVSROOT"}) {
175 $opt_d = $ENV{"CVSROOT"};
176 } else {
177 usage("CVSROOT needs to be set");
179 $opt_s ||= "-";
180 $opt_a ||= 0;
182 my $git_tree = $opt_C;
183 $git_tree ||= ".";
185 my $remote;
186 if (defined $opt_r) {
187 $remote = 'refs/remotes/' . $opt_r;
188 $opt_o ||= "master";
189 } else {
190 $opt_o ||= "origin";
191 $remote = 'refs/heads';
194 my $cvs_tree;
195 if ($#ARGV == 0) {
196 $cvs_tree = $ARGV[0];
197 } elsif (-f 'CVS/Repository') {
198 open my $f, '<', 'CVS/Repository' or
199 die 'Failed to open CVS/Repository';
200 $cvs_tree = <$f>;
201 chomp $cvs_tree;
202 close $f;
203 } else {
204 usage("CVS module has to be specified");
207 our @mergerx = ();
208 if ($opt_m) {
209 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
211 if (@opt_M) {
212 push (@mergerx, map { qr/$_/ } @opt_M);
215 # Remember UTC of our starting time
216 # we'll want to avoid importing commits
217 # that are too recent
218 our $starttime = time();
220 select(STDERR); $|=1; select(STDOUT);
223 package CVSconn;
224 # Basic CVS dialog.
225 # We're only interested in connecting and downloading, so ...
227 use File::Spec;
228 use File::Temp qw(tempfile);
229 use POSIX qw(strftime dup2);
231 sub new {
232 my ($what,$repo,$subdir) = @_;
233 $what=ref($what) if ref($what);
235 my $self = {};
236 $self->{'buffer'} = "";
237 bless($self,$what);
239 $repo =~ s#/+$##;
240 $self->{'fullrep'} = $repo;
241 $self->conn();
243 $self->{'subdir'} = $subdir;
244 $self->{'lines'} = undef;
246 return $self;
249 sub find_password_entry {
250 my ($cvspass, @cvsroot) = @_;
251 my ($file, $delim) = @$cvspass;
252 my $pass;
253 local ($_);
255 if (open(my $fh, $file)) {
256 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
257 CVSPASSFILE:
258 while (<$fh>) {
259 chomp;
260 s/^\/\d+\s+//;
261 my ($w, $p) = split($delim,$_,2);
262 for my $cvsroot (@cvsroot) {
263 if ($w eq $cvsroot) {
264 $pass = $p;
265 last CVSPASSFILE;
269 close($fh);
271 return $pass;
274 sub conn {
275 my $self = shift;
276 my $repo = $self->{'fullrep'};
277 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
278 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
280 my ($proxyhost,$proxyport);
281 if ($param && ($param =~ m/proxy=([^;]+)/)) {
282 $proxyhost = $1;
283 # Default proxyport, if not specified, is 8080.
284 $proxyport = 8080;
285 if ($ENV{"CVS_PROXY_PORT"}) {
286 $proxyport = $ENV{"CVS_PROXY_PORT"};
288 if ($param =~ m/proxyport=([^;]+)/) {
289 $proxyport = $1;
292 $repo ||= '/';
294 # if username is not explicit in CVSROOT, then use current user, as cvs would
295 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
296 my $rr2 = "-";
297 unless ($port) {
298 $rr2 = ":pserver:$user\@$serv:$repo";
299 $port=2401;
301 my $rr = ":pserver:$user\@$serv:$port$repo";
303 if ($pass) {
304 $pass = $self->_scramble($pass);
305 } else {
306 my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
307 [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
308 my @loc = ();
309 foreach my $cvspass (@cvspass) {
310 my $p = find_password_entry($cvspass, $rr, $rr2);
311 if ($p) {
312 push @loc, $cvspass->[0];
313 $pass = $p;
317 if (1 < @loc) {
318 die("Multiple cvs password files have ".
319 "entries for CVSROOT $opt_d: @loc");
320 } elsif (!$pass) {
321 $pass = "A";
325 my ($s, $rep);
326 if ($proxyhost) {
328 # Use a HTTP Proxy. Only works for HTTP proxies that
329 # don't require user authentication
331 # See: http://www.ietf.org/rfc/rfc2817.txt
333 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
334 die "Socket to $proxyhost: $!\n" unless defined $s;
335 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
336 or die "Write to $proxyhost: $!\n";
337 $s->flush();
339 $rep = <$s>;
341 # The answer should look like 'HTTP/1.x 2yy ....'
342 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
343 die "Proxy connect: $rep\n";
345 # Skip up to the empty line of the proxy server output
346 # including the response headers.
347 while ($rep = <$s>) {
348 last if (!defined $rep ||
349 $rep eq "\n" ||
350 $rep eq "\r\n");
352 } else {
353 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
354 die "Socket to $serv: $!\n" unless defined $s;
357 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
358 or die "Write to $serv: $!\n";
359 $s->flush();
361 $rep = <$s>;
363 if ($rep ne "I LOVE YOU\n") {
364 $rep="<unknown>" unless $rep;
365 die "AuthReply: $rep\n";
367 $self->{'socketo'} = $s;
368 $self->{'socketi'} = $s;
369 } else { # local or ext: Fork off our own cvs server.
370 my $pr = IO::Pipe->new();
371 my $pw = IO::Pipe->new();
372 my $pid = fork();
373 die "Fork: $!\n" unless defined $pid;
374 my $cvs = 'cvs';
375 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
376 my $rsh = 'rsh';
377 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
379 my @cvs = ($cvs, 'server');
380 my ($local, $user, $host);
381 $local = $repo =~ s/:local://;
382 if (!$local) {
383 $repo =~ s/:ext://;
384 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
385 ($user, $host) = ($1, $2);
387 if (!$local) {
388 if ($user) {
389 unshift @cvs, $rsh, '-l', $user, $host;
390 } else {
391 unshift @cvs, $rsh, $host;
395 unless ($pid) {
396 $pr->writer();
397 $pw->reader();
398 dup2($pw->fileno(),0);
399 dup2($pr->fileno(),1);
400 $pr->close();
401 $pw->close();
402 exec(@cvs);
404 $pw->writer();
405 $pr->reader();
406 $self->{'socketo'} = $pw;
407 $self->{'socketi'} = $pr;
409 $self->{'socketo'}->write("Root $repo\n");
411 # Trial and error says that this probably is the minimum set
412 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
414 $self->{'socketo'}->write("valid-requests\n");
415 $self->{'socketo'}->flush();
417 my $rep=$self->readline();
418 die "Failed to read from server" unless defined $rep;
419 chomp($rep);
420 if ($rep !~ s/^Valid-requests\s*//) {
421 $rep="<unknown>" unless $rep;
422 die "Expected Valid-requests from server, but got: $rep\n";
424 chomp(my $res=$self->readline());
425 die "validReply: $res\n" if $res ne "ok";
427 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
428 $self->{'repo'} = $repo;
431 sub readline {
432 my ($self) = @_;
433 return $self->{'socketi'}->getline();
436 sub _file {
437 # Request a file with a given revision.
438 # Trial and error says this is a good way to do it. :-/
439 my ($self,$fn,$rev) = @_;
440 $self->{'socketo'}->write("Argument -N\n") or return undef;
441 $self->{'socketo'}->write("Argument -P\n") or return undef;
442 # -kk: Linus' version doesn't use it - defaults to off
443 if ($opt_k) {
444 $self->{'socketo'}->write("Argument -kk\n") or return undef;
446 $self->{'socketo'}->write("Argument -r\n") or return undef;
447 $self->{'socketo'}->write("Argument $rev\n") or return undef;
448 $self->{'socketo'}->write("Argument --\n") or return undef;
449 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
450 $self->{'socketo'}->write("Directory .\n") or return undef;
451 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
452 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
453 $self->{'socketo'}->write("co\n") or return undef;
454 $self->{'socketo'}->flush() or return undef;
455 $self->{'lines'} = 0;
456 return 1;
458 sub _line {
459 # Read a line from the server.
460 # ... except that 'line' may be an entire file. ;-)
461 my ($self, $fh) = @_;
462 die "Not in lines" unless defined $self->{'lines'};
464 my $line;
465 my $res=0;
466 while (defined($line = $self->readline())) {
467 # M U gnupg-cvs-rep/AUTHORS
468 # Updated gnupg-cvs-rep/
469 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
470 # /AUTHORS/1.1///T1.1
471 # u=rw,g=rw,o=rw
473 # ok
475 if ($line =~ s/^(?:Created|Updated) //) {
476 $line = $self->readline(); # path
477 $line = $self->readline(); # Entries line
478 my $mode = $self->readline(); chomp $mode;
479 $self->{'mode'} = $mode;
480 defined (my $cnt = $self->readline())
481 or die "EOF from server after 'Changed'\n";
482 chomp $cnt;
483 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
484 $line="";
485 $res = $self->_fetchfile($fh, $cnt);
486 } elsif ($line =~ s/^ //) {
487 print $fh $line;
488 $res += length($line);
489 } elsif ($line =~ /^M\b/) {
490 # output, do nothing
491 } elsif ($line =~ /^Mbinary\b/) {
492 my $cnt;
493 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
494 chomp $cnt;
495 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
496 $line="";
497 $res += $self->_fetchfile($fh, $cnt);
498 } else {
499 chomp $line;
500 if ($line eq "ok") {
501 # print STDERR "S: ok (".length($res).")\n";
502 return $res;
503 } elsif ($line =~ s/^E //) {
504 # print STDERR "S: $line\n";
505 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
506 $line = $self->readline(); # filename
507 $line = $self->readline(); # OK
508 chomp $line;
509 die "Unknown: $line" if $line ne "ok";
510 return -1;
511 } else {
512 die "Unknown: $line\n";
516 return undef;
518 sub file {
519 my ($self,$fn,$rev) = @_;
520 my $res;
522 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
523 DIR => File::Spec->tmpdir(), UNLINK => 1);
525 $self->_file($fn,$rev) and $res = $self->_line($fh);
527 if (!defined $res) {
528 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
529 truncate $fh, 0;
530 $self->conn();
531 $self->_file($fn,$rev) or die "No file command send";
532 $res = $self->_line($fh);
533 die "Retry failed" unless defined $res;
535 close ($fh);
537 return ($name, $res);
539 sub _fetchfile {
540 my ($self, $fh, $cnt) = @_;
541 my $res = 0;
542 my $bufsize = 1024 * 1024;
543 while ($cnt) {
544 if ($bufsize > $cnt) {
545 $bufsize = $cnt;
547 my $buf;
548 my $num = $self->{'socketi'}->read($buf,$bufsize);
549 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
550 print $fh $buf;
551 $res += $num;
552 $cnt -= $num;
554 return $res;
557 sub _scramble {
558 my ($self, $pass) = @_;
559 my $scrambled = "A";
561 return $scrambled unless $pass;
563 my $pass_len = length($pass);
564 my @pass_arr = split("", $pass);
565 my $i;
567 # from cvs/src/scramble.c
568 my @shifts = (
569 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
570 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
571 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
572 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
573 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
574 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
575 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
576 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
577 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
578 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
579 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
580 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
581 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
582 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
583 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
584 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
587 for ($i = 0; $i < $pass_len; $i++) {
588 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
591 return $scrambled;
594 package main;
596 my $cvs = CVSconn->new($opt_d, $cvs_tree);
599 sub pdate($) {
600 my ($d) = @_;
601 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
602 or die "Unparseable date: $d\n";
603 my $y=$1; $y-=1900 if $y>1900;
604 return timegm($6||0,$5,$4,$3,$2-1,$y);
607 sub pmode($) {
608 my ($mode) = @_;
609 my $m = 0;
610 my $mm = 0;
611 my $um = 0;
612 for my $x(split(//,$mode)) {
613 if ($x eq ",") {
614 $m |= $mm&$um;
615 $mm = 0;
616 $um = 0;
617 } elsif ($x eq "u") { $um |= 0700;
618 } elsif ($x eq "g") { $um |= 0070;
619 } elsif ($x eq "o") { $um |= 0007;
620 } elsif ($x eq "r") { $mm |= 0444;
621 } elsif ($x eq "w") { $mm |= 0222;
622 } elsif ($x eq "x") { $mm |= 0111;
623 } elsif ($x eq "=") { # do nothing
624 } else { die "Unknown mode: $mode\n";
627 $m |= $mm&$um;
628 return $m;
631 sub getwd() {
632 my $pwd = `pwd`;
633 chomp $pwd;
634 return $pwd;
637 sub is_sha1 {
638 my $s = shift;
639 return $s =~ /^[a-f0-9]{40}$/;
642 sub get_headref ($) {
643 my $name = shift;
644 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
645 return undef unless $? == 0;
646 chomp $r;
647 return $r;
650 my $user_filename_prepend = '';
651 sub munge_user_filename {
652 my $name = shift;
653 return File::Spec->file_name_is_absolute($name) ?
654 $name :
655 $user_filename_prepend . $name;
658 -d $git_tree
659 or mkdir($git_tree,0777)
660 or die "Could not create $git_tree: $!";
661 if ($git_tree ne '.') {
662 $user_filename_prepend = getwd() . '/';
663 chdir($git_tree);
666 my $last_branch = "";
667 my $orig_branch = "";
668 my %branch_date;
669 my $tip_at_start = undef;
671 my $git_dir = $ENV{"GIT_DIR"} || ".git";
672 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
673 $ENV{"GIT_DIR"} = $git_dir;
674 my $orig_git_index;
675 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
677 my %index; # holds filenames of one index per branch
679 unless (-d $git_dir) {
680 system(qw(git init));
681 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
682 system(qw(git read-tree --empty));
683 die "Cannot init an empty tree: $?\n" if $?;
685 $last_branch = $opt_o;
686 $orig_branch = "";
687 } else {
688 open(F, "-|", qw(git symbolic-ref HEAD)) or
689 die "Cannot run git symbolic-ref: $!\n";
690 chomp ($last_branch = <F>);
691 $last_branch = basename($last_branch);
692 close(F);
693 unless ($last_branch) {
694 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
695 $last_branch = "master";
697 $orig_branch = $last_branch;
698 $tip_at_start = `git rev-parse --verify HEAD`;
700 # Get the last import timestamps
701 my $fmt = '($ref, $author) = (%(refname), %(author));';
702 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
703 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
704 while (defined(my $entry = <H>)) {
705 my ($ref, $author);
706 eval($entry) || die "cannot eval refs list: $@";
707 my ($head) = ($ref =~ m|^$remote/(.*)|);
708 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
709 $branch_date{$head} = $1;
711 close(H);
712 if (!exists $branch_date{$opt_o}) {
713 die "Branch '$opt_o' does not exist.\n".
714 "Either use the correct '-o branch' option,\n".
715 "or import to a new repository.\n";
719 -d $git_dir
720 or die "Could not create git subdir ($git_dir).\n";
722 # now we read (and possibly save) author-info as well
723 -f "$git_dir/cvs-authors" and
724 read_author_info("$git_dir/cvs-authors");
725 if ($opt_A) {
726 read_author_info(munge_user_filename($opt_A));
727 write_author_info("$git_dir/cvs-authors");
730 # open .git/cvs-revisions, if requested
731 open my $revision_map, '>>', "$git_dir/cvs-revisions"
732 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
733 if defined $opt_R;
737 # run cvsps into a file unless we are getting
738 # it passed as a file via $opt_P
740 my $cvspsfile;
741 unless ($opt_P) {
742 print "Running cvsps...\n" if $opt_v;
743 my $pid = open(CVSPS,"-|");
744 my $cvspsfh;
745 die "Cannot fork: $!\n" unless defined $pid;
746 unless ($pid) {
747 my @opt;
748 @opt = split(/,/,$opt_p) if defined $opt_p;
749 unshift @opt, '-z', $opt_z if defined $opt_z;
750 unshift @opt, '-q' unless defined $opt_v;
751 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
752 push @opt, '--cvs-direct';
754 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
755 die "Could not start cvsps: $!\n";
757 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
758 DIR => File::Spec->tmpdir());
759 while (<CVSPS>) {
760 print $cvspsfh $_;
762 close CVSPS;
763 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
764 close $cvspsfh;
765 } else {
766 $cvspsfile = munge_user_filename($opt_P);
769 open(CVS, "<$cvspsfile") or die $!;
771 ## cvsps output:
772 #---------------------
773 #PatchSet 314
774 #Date: 1999/09/18 13:03:59
775 #Author: wkoch
776 #Branch: STABLE-BRANCH-1-0
777 #Ancestor branch: HEAD
778 #Tag: (none)
779 #Log:
780 # See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
781 #Members:
782 # README:1.57->1.57.2.1
783 # VERSION:1.96->1.96.2.1
785 #---------------------
787 my $state = 0;
789 sub update_index (\@\@) {
790 my $old = shift;
791 my $new = shift;
792 open(my $fh, '|-', qw(git update-index -z --index-info))
793 or die "unable to open git update-index: $!";
794 print $fh
795 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
796 @$old),
797 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
798 @$new)
799 or die "unable to write to git update-index: $!";
800 close $fh
801 or die "unable to write to git update-index: $!";
802 $? and die "git update-index reported error: $?";
805 sub write_tree () {
806 open(my $fh, '-|', qw(git write-tree))
807 or die "unable to open git write-tree: $!";
808 chomp(my $tree = <$fh>);
809 is_sha1($tree)
810 or die "Cannot get tree id ($tree): $!";
811 close($fh)
812 or die "Error running git write-tree: $?\n";
813 print "Tree ID $tree\n" if $opt_v;
814 return $tree;
817 my ($patchset,$date,$author_name,$author_email,$author_tz,$branch,$ancestor,$tag,$logmsg);
818 my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
820 # commits that cvsps cannot place anywhere...
821 $ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
823 sub commit {
824 if ($branch eq $opt_o && !$index{branch} &&
825 !get_headref("$remote/$branch")) {
826 # looks like an initial commit
827 # use the index primed by git init
828 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
829 $index{$branch} = "$git_dir/index";
830 } else {
831 # use an index per branch to speed up
832 # imports of projects with many branches
833 unless ($index{$branch}) {
834 $index{$branch} = tmpnam();
835 $ENV{GIT_INDEX_FILE} = $index{$branch};
836 if ($ancestor) {
837 system("git", "read-tree", "$remote/$ancestor");
838 } else {
839 system("git", "read-tree", "$remote/$branch");
841 die "read-tree failed: $?\n" if $?;
844 $ENV{GIT_INDEX_FILE} = $index{$branch};
846 update_index(@old, @new);
847 @old = @new = ();
848 my $tree = write_tree();
849 my $parent = get_headref("$remote/$last_branch");
850 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
852 my @commit_args;
853 push @commit_args, ("-p", $parent) if $parent;
855 # loose detection of merges
856 # based on the commit msg
857 foreach my $rx (@mergerx) {
858 next unless $logmsg =~ $rx && $1;
859 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
860 if (my $sha1 = get_headref("$remote/$mparent")) {
861 push @commit_args, '-p', "$remote/$mparent";
862 print "Merge parent branch: $mparent\n" if $opt_v;
866 set_timezone($author_tz);
867 my $commit_date = strftime("%s %z", localtime($date));
868 set_timezone('UTC');
869 $ENV{GIT_AUTHOR_NAME} = $author_name;
870 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
871 $ENV{GIT_AUTHOR_DATE} = $commit_date;
872 $ENV{GIT_COMMITTER_NAME} = $author_name;
873 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
874 $ENV{GIT_COMMITTER_DATE} = $commit_date;
875 my $pid = open2(my $commit_read, my $commit_write,
876 'git', 'commit-tree', $tree, @commit_args);
878 # compatibility with git2cvs
879 substr($logmsg,32767) = "" if length($logmsg) > 32767;
880 $logmsg =~ s/[\s\n]+\z//;
882 if (@skipped) {
883 $logmsg .= "\n\n\nSKIPPED:\n\t";
884 $logmsg .= join("\n\t", @skipped) . "\n";
885 @skipped = ();
888 print($commit_write "$logmsg\n") && close($commit_write)
889 or die "Error writing to git commit-tree: $!\n";
891 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
892 chomp(my $cid = <$commit_read>);
893 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
894 print "Commit ID $cid\n" if $opt_v;
895 close($commit_read);
897 waitpid($pid,0);
898 die "Error running git commit-tree: $?\n" if $?;
900 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
901 or die "Cannot write branch $branch for update: $!\n";
903 if ($revision_map) {
904 print $revision_map "@$_ $cid\n" for @commit_revisions;
906 @commit_revisions = ();
908 if ($tag) {
909 my ($xtag) = $tag;
910 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
911 $xtag =~ tr/_/\./ if ( $opt_u );
912 $xtag =~ s/[\/]/$opt_s/g;
914 # See refs.c for these rules.
915 # Tag cannot contain bad chars. (See bad_ref_char in refs.c.)
916 $xtag =~ s/[ ~\^:\\\*\?\[]//g;
917 # Other bad strings for tags:
918 # (See check_refname_component in refs.c.)
919 1 while $xtag =~ s/
920 (?: \.\. # Tag cannot contain '..'.
921 | \@{ # Tag cannot contain '@{'.
922 | ^ - # Tag cannot begin with '-'.
923 | \.lock $ # Tag cannot end with '.lock'.
924 | ^ \. # Tag cannot begin...
925 | \. $ # ...or end with '.'
926 )//xg;
927 # Tag cannot be empty.
928 if ($xtag eq '') {
929 warn("warning: ignoring tag '$tag'",
930 " with invalid tagname\n");
931 return;
934 if (system('git' , 'tag', '-f', $xtag, $cid) != 0) {
935 # We did our best to sanitize the tag, but still failed
936 # for whatever reason. Bail out, and give the user
937 # enough information to understand if/how we should
938 # improve the translation in the future.
939 if ($tag ne $xtag) {
940 print "Translated '$tag' tag to '$xtag'\n";
942 die "Cannot create tag $xtag: $!\n";
945 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
949 my $commitcount = 1;
950 while (<CVS>) {
951 chomp;
952 if ($state == 0 and /^-+$/) {
953 $state = 1;
954 } elsif ($state == 0) {
955 $state = 1;
956 redo;
957 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
958 $patchset = 0+$_;
959 $state=2;
960 } elsif ($state == 2 and s/^Date:\s+//) {
961 $date = pdate($_);
962 unless ($date) {
963 print STDERR "Could not parse date: $_\n";
964 $state=0;
965 next;
967 $state=3;
968 } elsif ($state == 3 and s/^Author:\s+//) {
969 $author_tz = "UTC";
970 s/\s+$//;
971 if (/^(.*?)\s+<(.*)>/) {
972 ($author_name, $author_email) = ($1, $2);
973 } elsif ($conv_author_name{$_}) {
974 $author_name = $conv_author_name{$_};
975 $author_email = $conv_author_email{$_};
976 $author_tz = $conv_author_tz{$_} if ($conv_author_tz{$_});
977 } else {
978 $author_name = $author_email = $_;
980 $state = 4;
981 } elsif ($state == 4 and s/^Branch:\s+//) {
982 s/\s+$//;
983 tr/_/\./ if ( $opt_u );
984 s/[\/]/$opt_s/g;
985 $branch = $_;
986 $state = 5;
987 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
988 s/\s+$//;
989 $ancestor = $_;
990 $ancestor = $opt_o if $ancestor eq "HEAD";
991 $state = 6;
992 } elsif ($state == 5) {
993 $ancestor = undef;
994 $state = 6;
995 redo;
996 } elsif ($state == 6 and s/^Tag:\s+//) {
997 s/\s+$//;
998 if ($_ eq "(none)") {
999 $tag = undef;
1000 } else {
1001 $tag = $_;
1003 $state = 7;
1004 } elsif ($state == 7 and /^Log:/) {
1005 $logmsg = "";
1006 $state = 8;
1007 } elsif ($state == 8 and /^Members:/) {
1008 $branch = $opt_o if $branch eq "HEAD";
1009 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
1010 # skip
1011 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
1012 $state = 11;
1013 next;
1015 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
1016 # skip if the commit is too recent
1017 # given that the cvsps default fuzz is 300s, we give ourselves another
1018 # 300s just in case -- this also prevents skipping commits
1019 # due to server clock drift
1020 print "skip patchset $patchset: $date too recent\n" if $opt_v;
1021 $state = 11;
1022 next;
1024 if (exists $ignorebranch{$branch}) {
1025 print STDERR "Skipping $branch\n";
1026 $state = 11;
1027 next;
1029 if ($ancestor) {
1030 if ($ancestor eq $branch) {
1031 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
1032 $ancestor = $opt_o;
1034 if (defined get_headref("$remote/$branch")) {
1035 print STDERR "Branch $branch already exists!\n";
1036 $state=11;
1037 next;
1039 my $id = get_headref("$remote/$ancestor");
1040 if (!$id) {
1041 print STDERR "Branch $ancestor does not exist!\n";
1042 $ignorebranch{$branch} = 1;
1043 $state=11;
1044 next;
1047 system(qw(git update-ref -m cvsimport),
1048 "$remote/$branch", $id);
1049 if($? != 0) {
1050 print STDERR "Could not create branch $branch\n";
1051 $ignorebranch{$branch} = 1;
1052 $state=11;
1053 next;
1056 $last_branch = $branch if $branch ne $last_branch;
1057 $state = 9;
1058 } elsif ($state == 8) {
1059 $logmsg .= "$_\n";
1060 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1061 # VERSION:1.96->1.96.2.1
1062 my $init = ($2 eq "INITIAL");
1063 my $fn = $1;
1064 my $rev = $3;
1065 $fn =~ s#^/+##;
1066 if ($opt_S && $fn =~ m/$opt_S/) {
1067 print "SKIPPING $fn v $rev\n";
1068 push(@skipped, $fn);
1069 next;
1071 push @commit_revisions, [$fn, $rev];
1072 print "Fetching $fn v $rev\n" if $opt_v;
1073 my ($tmpname, $size) = $cvs->file($fn,$rev);
1074 if ($size == -1) {
1075 push(@old,$fn);
1076 print "Drop $fn\n" if $opt_v;
1077 } else {
1078 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1079 my $pid = open(my $F, '-|');
1080 die $! unless defined $pid;
1081 if (!$pid) {
1082 exec("git", "hash-object", "-w", $tmpname)
1083 or die "Cannot create object: $!\n";
1085 my $sha = <$F>;
1086 chomp $sha;
1087 close $F;
1088 my $mode = pmode($cvs->{'mode'});
1089 push(@new,[$mode, $sha, $fn]); # may be resurrected!
1091 unlink($tmpname);
1092 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1093 my $fn = $1;
1094 my $rev = $2;
1095 $fn =~ s#^/+##;
1096 push @commit_revisions, [$fn, $rev];
1097 push(@old,$fn);
1098 print "Delete $fn\n" if $opt_v;
1099 } elsif ($state == 9 and /^\s*$/) {
1100 $state = 10;
1101 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1102 $commitcount++;
1103 if ($opt_L && $commitcount > $opt_L) {
1104 last;
1106 commit();
1107 if (($commitcount & 1023) == 0) {
1108 system(qw(git repack -a -d));
1110 $state = 1;
1111 } elsif ($state == 11 and /^-+$/) {
1112 $state = 1;
1113 } elsif (/^-+$/) { # end of unknown-line processing
1114 $state = 1;
1115 } elsif ($state != 11) { # ignore stuff when skipping
1116 print STDERR "* UNKNOWN LINE * $_\n";
1119 commit() if $branch and $state != 11;
1121 unless ($opt_P) {
1122 unlink($cvspsfile);
1125 # The heuristic of repacking every 1024 commits can leave a
1126 # lot of unpacked data. If there is more than 1MB worth of
1127 # not-packed objects, repack once more.
1128 my $line = `git count-objects`;
1129 if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1130 my ($n_objects, $kb) = ($1, $2);
1131 1024 < $kb
1132 and system(qw(git repack -a -d));
1135 foreach my $git_index (values %index) {
1136 if ($git_index ne "$git_dir/index") {
1137 unlink($git_index);
1141 if (defined $orig_git_index) {
1142 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1143 } else {
1144 delete $ENV{GIT_INDEX_FILE};
1147 # Now switch back to the branch we were in before all of this happened
1148 if ($orig_branch) {
1149 print "DONE.\n" if $opt_v;
1150 if ($opt_i) {
1151 exit 0;
1153 my $tip_at_end = `git rev-parse --verify HEAD`;
1154 if ($tip_at_start ne $tip_at_end) {
1155 for ($tip_at_start, $tip_at_end) { chomp; }
1156 print "Fetched into the current branch.\n" if $opt_v;
1157 system(qw(git read-tree -u -m),
1158 $tip_at_start, $tip_at_end);
1159 die "Fast-forward update failed: $?\n" if $?;
1161 else {
1162 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1163 die "Could not merge $opt_o into the current branch.\n" if $?;
1165 } else {
1166 $orig_branch = "master";
1167 print "DONE; creating $orig_branch branch\n" if $opt_v;
1168 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1169 unless defined get_headref('refs/heads/master');
1170 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1171 if ($opt_r && $opt_o ne 'HEAD');
1172 system('git', 'update-ref', 'HEAD', "$orig_branch");
1173 unless ($opt_i) {
1174 system(qw(git checkout -f));
1175 die "checkout failed: $?\n" if $?;