vcs-svn: implement text-delta handling
[git/debian.git] / git-cvsimport.perl
blobbbf327f3e891cacdd14c0b18b51702ce14b2b6df
1 #!/usr/bin/perl
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to aggregate CVS check-ins into related changes.
7 # Fortunately, "cvsps" does that for us; all we have to do is to parse
8 # its output.
10 # Checking out the files is done by a single long-running CVS connection
11 # / server process.
13 # The head revision is on branch "origin" by default.
14 # You can change that with the '-o' option.
16 use 5.008;
17 use strict;
18 use warnings;
19 use Getopt::Long;
20 use File::Spec;
21 use File::Temp qw(tempfile tmpnam);
22 use File::Path qw(mkpath);
23 use File::Basename qw(basename dirname);
24 use Time::Local;
25 use IO::Socket;
26 use IO::Pipe;
27 use POSIX qw(strftime dup2 ENOENT);
28 use IPC::Open2;
30 $SIG{'PIPE'}="IGNORE";
31 $ENV{'TZ'}="UTC";
33 our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
34 my (%conv_author_name, %conv_author_email);
36 sub usage(;$) {
37 my $msg = shift;
38 print(STDERR "Error: $msg\n") if $msg;
39 print STDERR <<END;
40 Usage: git cvsimport # fetch/update GIT from CVS
41 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
42 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
43 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
44 [-r remote] [-R] [CVS_module]
45 END
46 exit(1);
49 sub read_author_info($) {
50 my ($file) = @_;
51 my $user;
52 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
54 while (<$f>) {
55 # Expected format is this:
56 # exon=Andreas Ericsson <ae@op5.se>
57 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
58 $user = $1;
59 $conv_author_name{$user} = $2;
60 $conv_author_email{$user} = $3;
62 # However, we also read from CVSROOT/users format
63 # to ease migration.
64 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
65 my $mapped;
66 ($user, $mapped) = ($1, $3);
67 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
68 $conv_author_name{$user} = $1;
69 $conv_author_email{$user} = $2;
71 elsif ($mapped =~ /^<?(.*)>?$/) {
72 $conv_author_name{$user} = $user;
73 $conv_author_email{$user} = $1;
76 # NEEDSWORK: Maybe warn on unrecognized lines?
78 close ($f);
81 sub write_author_info($) {
82 my ($file) = @_;
83 open my $f, '>', $file or
84 die("Failed to open $file for writing: $!");
86 foreach (keys %conv_author_name) {
87 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
89 close ($f);
92 # convert getopts specs for use by git config
93 my %longmap = (
94 'A:' => 'authors-file',
95 'M:' => 'merge-regex',
96 'P:' => undef,
97 'R' => 'track-revisions',
98 'S:' => 'ignore-paths',
101 sub read_repo_config {
102 # Split the string between characters, unless there is a ':'
103 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
104 my @opts = split(/ *(?!:)/, shift);
105 foreach my $o (@opts) {
106 my $key = $o;
107 $key =~ s/://g;
108 my $arg = 'git config';
109 $arg .= ' --bool' if ($o !~ /:$/);
110 my $ckey = $key;
112 if (exists $longmap{$o}) {
113 # An uppercase option like -R cannot be
114 # expressed in the configuration, as the
115 # variable names are downcased.
116 $ckey = $longmap{$o};
117 next if (! defined $ckey);
118 $ckey =~ s/-//g;
120 chomp(my $tmp = `$arg --get cvsimport.$ckey`);
121 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
122 no strict 'refs';
123 my $opt_name = "opt_" . $key;
124 if (!$$opt_name) {
125 $$opt_name = $tmp;
131 my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
132 read_repo_config($opts);
133 Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
135 # turn the Getopt::Std specification in a Getopt::Long one,
136 # with support for multiple -M options
137 GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
138 or usage();
139 usage if $opt_h;
141 if (@ARGV == 0) {
142 chomp(my $module = `git config --get cvsimport.module`);
143 push(@ARGV, $module) if $? == 0;
145 @ARGV <= 1 or usage("You can't specify more than one CVS module");
147 if ($opt_d) {
148 $ENV{"CVSROOT"} = $opt_d;
149 } elsif (-f 'CVS/Root') {
150 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
151 $opt_d = <$f>;
152 chomp $opt_d;
153 close $f;
154 $ENV{"CVSROOT"} = $opt_d;
155 } elsif ($ENV{"CVSROOT"}) {
156 $opt_d = $ENV{"CVSROOT"};
157 } else {
158 usage("CVSROOT needs to be set");
160 $opt_s ||= "-";
161 $opt_a ||= 0;
163 my $git_tree = $opt_C;
164 $git_tree ||= ".";
166 my $remote;
167 if (defined $opt_r) {
168 $remote = 'refs/remotes/' . $opt_r;
169 $opt_o ||= "master";
170 } else {
171 $opt_o ||= "origin";
172 $remote = 'refs/heads';
175 my $cvs_tree;
176 if ($#ARGV == 0) {
177 $cvs_tree = $ARGV[0];
178 } elsif (-f 'CVS/Repository') {
179 open my $f, '<', 'CVS/Repository' or
180 die 'Failed to open CVS/Repository';
181 $cvs_tree = <$f>;
182 chomp $cvs_tree;
183 close $f;
184 } else {
185 usage("CVS module has to be specified");
188 our @mergerx = ();
189 if ($opt_m) {
190 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
192 if (@opt_M) {
193 push (@mergerx, map { qr/$_/ } @opt_M);
196 # Remember UTC of our starting time
197 # we'll want to avoid importing commits
198 # that are too recent
199 our $starttime = time();
201 select(STDERR); $|=1; select(STDOUT);
204 package CVSconn;
205 # Basic CVS dialog.
206 # We're only interested in connecting and downloading, so ...
208 use File::Spec;
209 use File::Temp qw(tempfile);
210 use POSIX qw(strftime dup2);
212 sub new {
213 my ($what,$repo,$subdir) = @_;
214 $what=ref($what) if ref($what);
216 my $self = {};
217 $self->{'buffer'} = "";
218 bless($self,$what);
220 $repo =~ s#/+$##;
221 $self->{'fullrep'} = $repo;
222 $self->conn();
224 $self->{'subdir'} = $subdir;
225 $self->{'lines'} = undef;
227 return $self;
230 sub conn {
231 my $self = shift;
232 my $repo = $self->{'fullrep'};
233 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
234 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
236 my ($proxyhost,$proxyport);
237 if ($param && ($param =~ m/proxy=([^;]+)/)) {
238 $proxyhost = $1;
239 # Default proxyport, if not specified, is 8080.
240 $proxyport = 8080;
241 if ($ENV{"CVS_PROXY_PORT"}) {
242 $proxyport = $ENV{"CVS_PROXY_PORT"};
244 if ($param =~ m/proxyport=([^;]+)/) {
245 $proxyport = $1;
248 $repo ||= '/';
250 # if username is not explicit in CVSROOT, then use current user, as cvs would
251 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
252 my $rr2 = "-";
253 unless ($port) {
254 $rr2 = ":pserver:$user\@$serv:$repo";
255 $port=2401;
257 my $rr = ":pserver:$user\@$serv:$port$repo";
259 if ($pass) {
260 $pass = $self->_scramble($pass);
261 } else {
262 open(H,$ENV{'HOME'}."/.cvspass") and do {
263 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
264 while (<H>) {
265 chomp;
266 s/^\/\d+\s+//;
267 my ($w,$p) = split(/\s/,$_,2);
268 if ($w eq $rr or $w eq $rr2) {
269 $pass = $p;
270 last;
274 $pass = "A" unless $pass;
277 my ($s, $rep);
278 if ($proxyhost) {
280 # Use a HTTP Proxy. Only works for HTTP proxies that
281 # don't require user authentication
283 # See: http://www.ietf.org/rfc/rfc2817.txt
285 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
286 die "Socket to $proxyhost: $!\n" unless defined $s;
287 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
288 or die "Write to $proxyhost: $!\n";
289 $s->flush();
291 $rep = <$s>;
293 # The answer should look like 'HTTP/1.x 2yy ....'
294 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
295 die "Proxy connect: $rep\n";
297 # Skip up to the empty line of the proxy server output
298 # including the response headers.
299 while ($rep = <$s>) {
300 last if (!defined $rep ||
301 $rep eq "\n" ||
302 $rep eq "\r\n");
304 } else {
305 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
306 die "Socket to $serv: $!\n" unless defined $s;
309 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
310 or die "Write to $serv: $!\n";
311 $s->flush();
313 $rep = <$s>;
315 if ($rep ne "I LOVE YOU\n") {
316 $rep="<unknown>" unless $rep;
317 die "AuthReply: $rep\n";
319 $self->{'socketo'} = $s;
320 $self->{'socketi'} = $s;
321 } else { # local or ext: Fork off our own cvs server.
322 my $pr = IO::Pipe->new();
323 my $pw = IO::Pipe->new();
324 my $pid = fork();
325 die "Fork: $!\n" unless defined $pid;
326 my $cvs = 'cvs';
327 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
328 my $rsh = 'rsh';
329 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
331 my @cvs = ($cvs, 'server');
332 my ($local, $user, $host);
333 $local = $repo =~ s/:local://;
334 if (!$local) {
335 $repo =~ s/:ext://;
336 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
337 ($user, $host) = ($1, $2);
339 if (!$local) {
340 if ($user) {
341 unshift @cvs, $rsh, '-l', $user, $host;
342 } else {
343 unshift @cvs, $rsh, $host;
347 unless ($pid) {
348 $pr->writer();
349 $pw->reader();
350 dup2($pw->fileno(),0);
351 dup2($pr->fileno(),1);
352 $pr->close();
353 $pw->close();
354 exec(@cvs);
356 $pw->writer();
357 $pr->reader();
358 $self->{'socketo'} = $pw;
359 $self->{'socketi'} = $pr;
361 $self->{'socketo'}->write("Root $repo\n");
363 # Trial and error says that this probably is the minimum set
364 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
366 $self->{'socketo'}->write("valid-requests\n");
367 $self->{'socketo'}->flush();
369 my $rep=$self->readline();
370 die "Failed to read from server" unless defined $rep;
371 chomp($rep);
372 if ($rep !~ s/^Valid-requests\s*//) {
373 $rep="<unknown>" unless $rep;
374 die "Expected Valid-requests from server, but got: $rep\n";
376 chomp(my $res=$self->readline());
377 die "validReply: $res\n" if $res ne "ok";
379 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
380 $self->{'repo'} = $repo;
383 sub readline {
384 my ($self) = @_;
385 return $self->{'socketi'}->getline();
388 sub _file {
389 # Request a file with a given revision.
390 # Trial and error says this is a good way to do it. :-/
391 my ($self,$fn,$rev) = @_;
392 $self->{'socketo'}->write("Argument -N\n") or return undef;
393 $self->{'socketo'}->write("Argument -P\n") or return undef;
394 # -kk: Linus' version doesn't use it - defaults to off
395 if ($opt_k) {
396 $self->{'socketo'}->write("Argument -kk\n") or return undef;
398 $self->{'socketo'}->write("Argument -r\n") or return undef;
399 $self->{'socketo'}->write("Argument $rev\n") or return undef;
400 $self->{'socketo'}->write("Argument --\n") or return undef;
401 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
402 $self->{'socketo'}->write("Directory .\n") or return undef;
403 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
404 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
405 $self->{'socketo'}->write("co\n") or return undef;
406 $self->{'socketo'}->flush() or return undef;
407 $self->{'lines'} = 0;
408 return 1;
410 sub _line {
411 # Read a line from the server.
412 # ... except that 'line' may be an entire file. ;-)
413 my ($self, $fh) = @_;
414 die "Not in lines" unless defined $self->{'lines'};
416 my $line;
417 my $res=0;
418 while (defined($line = $self->readline())) {
419 # M U gnupg-cvs-rep/AUTHORS
420 # Updated gnupg-cvs-rep/
421 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
422 # /AUTHORS/1.1///T1.1
423 # u=rw,g=rw,o=rw
425 # ok
427 if ($line =~ s/^(?:Created|Updated) //) {
428 $line = $self->readline(); # path
429 $line = $self->readline(); # Entries line
430 my $mode = $self->readline(); chomp $mode;
431 $self->{'mode'} = $mode;
432 defined (my $cnt = $self->readline())
433 or die "EOF from server after 'Changed'\n";
434 chomp $cnt;
435 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
436 $line="";
437 $res = $self->_fetchfile($fh, $cnt);
438 } elsif ($line =~ s/^ //) {
439 print $fh $line;
440 $res += length($line);
441 } elsif ($line =~ /^M\b/) {
442 # output, do nothing
443 } elsif ($line =~ /^Mbinary\b/) {
444 my $cnt;
445 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
446 chomp $cnt;
447 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
448 $line="";
449 $res += $self->_fetchfile($fh, $cnt);
450 } else {
451 chomp $line;
452 if ($line eq "ok") {
453 # print STDERR "S: ok (".length($res).")\n";
454 return $res;
455 } elsif ($line =~ s/^E //) {
456 # print STDERR "S: $line\n";
457 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
458 $line = $self->readline(); # filename
459 $line = $self->readline(); # OK
460 chomp $line;
461 die "Unknown: $line" if $line ne "ok";
462 return -1;
463 } else {
464 die "Unknown: $line\n";
468 return undef;
470 sub file {
471 my ($self,$fn,$rev) = @_;
472 my $res;
474 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
475 DIR => File::Spec->tmpdir(), UNLINK => 1);
477 $self->_file($fn,$rev) and $res = $self->_line($fh);
479 if (!defined $res) {
480 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
481 truncate $fh, 0;
482 $self->conn();
483 $self->_file($fn,$rev) or die "No file command send";
484 $res = $self->_line($fh);
485 die "Retry failed" unless defined $res;
487 close ($fh);
489 return ($name, $res);
491 sub _fetchfile {
492 my ($self, $fh, $cnt) = @_;
493 my $res = 0;
494 my $bufsize = 1024 * 1024;
495 while ($cnt) {
496 if ($bufsize > $cnt) {
497 $bufsize = $cnt;
499 my $buf;
500 my $num = $self->{'socketi'}->read($buf,$bufsize);
501 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
502 print $fh $buf;
503 $res += $num;
504 $cnt -= $num;
506 return $res;
509 sub _scramble {
510 my ($self, $pass) = @_;
511 my $scrambled = "A";
513 return $scrambled unless $pass;
515 my $pass_len = length($pass);
516 my @pass_arr = split("", $pass);
517 my $i;
519 # from cvs/src/scramble.c
520 my @shifts = (
521 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
522 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
523 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
524 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
525 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
526 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
527 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
528 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
529 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
530 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
531 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
532 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
533 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
534 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
535 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
536 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
539 for ($i = 0; $i < $pass_len; $i++) {
540 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
543 return $scrambled;
546 package main;
548 my $cvs = CVSconn->new($opt_d, $cvs_tree);
551 sub pdate($) {
552 my ($d) = @_;
553 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
554 or die "Unparseable date: $d\n";
555 my $y=$1; $y-=1900 if $y>1900;
556 return timegm($6||0,$5,$4,$3,$2-1,$y);
559 sub pmode($) {
560 my ($mode) = @_;
561 my $m = 0;
562 my $mm = 0;
563 my $um = 0;
564 for my $x(split(//,$mode)) {
565 if ($x eq ",") {
566 $m |= $mm&$um;
567 $mm = 0;
568 $um = 0;
569 } elsif ($x eq "u") { $um |= 0700;
570 } elsif ($x eq "g") { $um |= 0070;
571 } elsif ($x eq "o") { $um |= 0007;
572 } elsif ($x eq "r") { $mm |= 0444;
573 } elsif ($x eq "w") { $mm |= 0222;
574 } elsif ($x eq "x") { $mm |= 0111;
575 } elsif ($x eq "=") { # do nothing
576 } else { die "Unknown mode: $mode\n";
579 $m |= $mm&$um;
580 return $m;
583 sub getwd() {
584 my $pwd = `pwd`;
585 chomp $pwd;
586 return $pwd;
589 sub is_sha1 {
590 my $s = shift;
591 return $s =~ /^[a-f0-9]{40}$/;
594 sub get_headref ($) {
595 my $name = shift;
596 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
597 return undef unless $? == 0;
598 chomp $r;
599 return $r;
602 my $user_filename_prepend = '';
603 sub munge_user_filename {
604 my $name = shift;
605 return File::Spec->file_name_is_absolute($name) ?
606 $name :
607 $user_filename_prepend . $name;
610 -d $git_tree
611 or mkdir($git_tree,0777)
612 or die "Could not create $git_tree: $!";
613 if ($git_tree ne '.') {
614 $user_filename_prepend = getwd() . '/';
615 chdir($git_tree);
618 my $last_branch = "";
619 my $orig_branch = "";
620 my %branch_date;
621 my $tip_at_start = undef;
623 my $git_dir = $ENV{"GIT_DIR"} || ".git";
624 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
625 $ENV{"GIT_DIR"} = $git_dir;
626 my $orig_git_index;
627 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
629 my %index; # holds filenames of one index per branch
631 unless (-d $git_dir) {
632 system(qw(git init));
633 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
634 system(qw(git read-tree --empty));
635 die "Cannot init an empty tree: $?\n" if $?;
637 $last_branch = $opt_o;
638 $orig_branch = "";
639 } else {
640 open(F, "-|", qw(git symbolic-ref HEAD)) or
641 die "Cannot run git symbolic-ref: $!\n";
642 chomp ($last_branch = <F>);
643 $last_branch = basename($last_branch);
644 close(F);
645 unless ($last_branch) {
646 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
647 $last_branch = "master";
649 $orig_branch = $last_branch;
650 $tip_at_start = `git rev-parse --verify HEAD`;
652 # Get the last import timestamps
653 my $fmt = '($ref, $author) = (%(refname), %(author));';
654 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
655 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
656 while (defined(my $entry = <H>)) {
657 my ($ref, $author);
658 eval($entry) || die "cannot eval refs list: $@";
659 my ($head) = ($ref =~ m|^$remote/(.*)|);
660 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
661 $branch_date{$head} = $1;
663 close(H);
664 if (!exists $branch_date{$opt_o}) {
665 die "Branch '$opt_o' does not exist.\n".
666 "Either use the correct '-o branch' option,\n".
667 "or import to a new repository.\n";
671 -d $git_dir
672 or die "Could not create git subdir ($git_dir).\n";
674 # now we read (and possibly save) author-info as well
675 -f "$git_dir/cvs-authors" and
676 read_author_info("$git_dir/cvs-authors");
677 if ($opt_A) {
678 read_author_info(munge_user_filename($opt_A));
679 write_author_info("$git_dir/cvs-authors");
682 # open .git/cvs-revisions, if requested
683 open my $revision_map, '>>', "$git_dir/cvs-revisions"
684 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
685 if defined $opt_R;
689 # run cvsps into a file unless we are getting
690 # it passed as a file via $opt_P
692 my $cvspsfile;
693 unless ($opt_P) {
694 print "Running cvsps...\n" if $opt_v;
695 my $pid = open(CVSPS,"-|");
696 my $cvspsfh;
697 die "Cannot fork: $!\n" unless defined $pid;
698 unless ($pid) {
699 my @opt;
700 @opt = split(/,/,$opt_p) if defined $opt_p;
701 unshift @opt, '-z', $opt_z if defined $opt_z;
702 unshift @opt, '-q' unless defined $opt_v;
703 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
704 push @opt, '--cvs-direct';
706 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
707 die "Could not start cvsps: $!\n";
709 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
710 DIR => File::Spec->tmpdir());
711 while (<CVSPS>) {
712 print $cvspsfh $_;
714 close CVSPS;
715 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
716 close $cvspsfh;
717 } else {
718 $cvspsfile = munge_user_filename($opt_P);
721 open(CVS, "<$cvspsfile") or die $!;
723 ## cvsps output:
724 #---------------------
725 #PatchSet 314
726 #Date: 1999/09/18 13:03:59
727 #Author: wkoch
728 #Branch: STABLE-BRANCH-1-0
729 #Ancestor branch: HEAD
730 #Tag: (none)
731 #Log:
732 # See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
733 #Members:
734 # README:1.57->1.57.2.1
735 # VERSION:1.96->1.96.2.1
737 #---------------------
739 my $state = 0;
741 sub update_index (\@\@) {
742 my $old = shift;
743 my $new = shift;
744 open(my $fh, '|-', qw(git update-index -z --index-info))
745 or die "unable to open git update-index: $!";
746 print $fh
747 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
748 @$old),
749 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
750 @$new)
751 or die "unable to write to git update-index: $!";
752 close $fh
753 or die "unable to write to git update-index: $!";
754 $? and die "git update-index reported error: $?";
757 sub write_tree () {
758 open(my $fh, '-|', qw(git write-tree))
759 or die "unable to open git write-tree: $!";
760 chomp(my $tree = <$fh>);
761 is_sha1($tree)
762 or die "Cannot get tree id ($tree): $!";
763 close($fh)
764 or die "Error running git write-tree: $?\n";
765 print "Tree ID $tree\n" if $opt_v;
766 return $tree;
769 my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
770 my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
772 # commits that cvsps cannot place anywhere...
773 $ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
775 sub commit {
776 if ($branch eq $opt_o && !$index{branch} &&
777 !get_headref("$remote/$branch")) {
778 # looks like an initial commit
779 # use the index primed by git init
780 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
781 $index{$branch} = "$git_dir/index";
782 } else {
783 # use an index per branch to speed up
784 # imports of projects with many branches
785 unless ($index{$branch}) {
786 $index{$branch} = tmpnam();
787 $ENV{GIT_INDEX_FILE} = $index{$branch};
788 if ($ancestor) {
789 system("git", "read-tree", "$remote/$ancestor");
790 } else {
791 system("git", "read-tree", "$remote/$branch");
793 die "read-tree failed: $?\n" if $?;
796 $ENV{GIT_INDEX_FILE} = $index{$branch};
798 update_index(@old, @new);
799 @old = @new = ();
800 my $tree = write_tree();
801 my $parent = get_headref("$remote/$last_branch");
802 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
804 my @commit_args;
805 push @commit_args, ("-p", $parent) if $parent;
807 # loose detection of merges
808 # based on the commit msg
809 foreach my $rx (@mergerx) {
810 next unless $logmsg =~ $rx && $1;
811 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
812 if (my $sha1 = get_headref("$remote/$mparent")) {
813 push @commit_args, '-p', "$remote/$mparent";
814 print "Merge parent branch: $mparent\n" if $opt_v;
818 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
819 $ENV{GIT_AUTHOR_NAME} = $author_name;
820 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
821 $ENV{GIT_AUTHOR_DATE} = $commit_date;
822 $ENV{GIT_COMMITTER_NAME} = $author_name;
823 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
824 $ENV{GIT_COMMITTER_DATE} = $commit_date;
825 my $pid = open2(my $commit_read, my $commit_write,
826 'git', 'commit-tree', $tree, @commit_args);
828 # compatibility with git2cvs
829 substr($logmsg,32767) = "" if length($logmsg) > 32767;
830 $logmsg =~ s/[\s\n]+\z//;
832 if (@skipped) {
833 $logmsg .= "\n\n\nSKIPPED:\n\t";
834 $logmsg .= join("\n\t", @skipped) . "\n";
835 @skipped = ();
838 print($commit_write "$logmsg\n") && close($commit_write)
839 or die "Error writing to git commit-tree: $!\n";
841 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
842 chomp(my $cid = <$commit_read>);
843 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
844 print "Commit ID $cid\n" if $opt_v;
845 close($commit_read);
847 waitpid($pid,0);
848 die "Error running git commit-tree: $?\n" if $?;
850 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
851 or die "Cannot write branch $branch for update: $!\n";
853 if ($revision_map) {
854 print $revision_map "@$_ $cid\n" for @commit_revisions;
856 @commit_revisions = ();
858 if ($tag) {
859 my ($xtag) = $tag;
860 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
861 $xtag =~ tr/_/\./ if ( $opt_u );
862 $xtag =~ s/[\/]/$opt_s/g;
863 $xtag =~ s/\[//g;
865 system('git' , 'tag', '-f', $xtag, $cid) == 0
866 or die "Cannot create tag $xtag: $!\n";
868 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
872 my $commitcount = 1;
873 while (<CVS>) {
874 chomp;
875 if ($state == 0 and /^-+$/) {
876 $state = 1;
877 } elsif ($state == 0) {
878 $state = 1;
879 redo;
880 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
881 $patchset = 0+$_;
882 $state=2;
883 } elsif ($state == 2 and s/^Date:\s+//) {
884 $date = pdate($_);
885 unless ($date) {
886 print STDERR "Could not parse date: $_\n";
887 $state=0;
888 next;
890 $state=3;
891 } elsif ($state == 3 and s/^Author:\s+//) {
892 s/\s+$//;
893 if (/^(.*?)\s+<(.*)>/) {
894 ($author_name, $author_email) = ($1, $2);
895 } elsif ($conv_author_name{$_}) {
896 $author_name = $conv_author_name{$_};
897 $author_email = $conv_author_email{$_};
898 } else {
899 $author_name = $author_email = $_;
901 $state = 4;
902 } elsif ($state == 4 and s/^Branch:\s+//) {
903 s/\s+$//;
904 tr/_/\./ if ( $opt_u );
905 s/[\/]/$opt_s/g;
906 $branch = $_;
907 $state = 5;
908 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
909 s/\s+$//;
910 $ancestor = $_;
911 $ancestor = $opt_o if $ancestor eq "HEAD";
912 $state = 6;
913 } elsif ($state == 5) {
914 $ancestor = undef;
915 $state = 6;
916 redo;
917 } elsif ($state == 6 and s/^Tag:\s+//) {
918 s/\s+$//;
919 if ($_ eq "(none)") {
920 $tag = undef;
921 } else {
922 $tag = $_;
924 $state = 7;
925 } elsif ($state == 7 and /^Log:/) {
926 $logmsg = "";
927 $state = 8;
928 } elsif ($state == 8 and /^Members:/) {
929 $branch = $opt_o if $branch eq "HEAD";
930 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
931 # skip
932 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
933 $state = 11;
934 next;
936 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
937 # skip if the commit is too recent
938 # given that the cvsps default fuzz is 300s, we give ourselves another
939 # 300s just in case -- this also prevents skipping commits
940 # due to server clock drift
941 print "skip patchset $patchset: $date too recent\n" if $opt_v;
942 $state = 11;
943 next;
945 if (exists $ignorebranch{$branch}) {
946 print STDERR "Skipping $branch\n";
947 $state = 11;
948 next;
950 if ($ancestor) {
951 if ($ancestor eq $branch) {
952 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
953 $ancestor = $opt_o;
955 if (defined get_headref("$remote/$branch")) {
956 print STDERR "Branch $branch already exists!\n";
957 $state=11;
958 next;
960 my $id = get_headref("$remote/$ancestor");
961 if (!$id) {
962 print STDERR "Branch $ancestor does not exist!\n";
963 $ignorebranch{$branch} = 1;
964 $state=11;
965 next;
968 system(qw(git update-ref -m cvsimport),
969 "$remote/$branch", $id);
970 if($? != 0) {
971 print STDERR "Could not create branch $branch\n";
972 $ignorebranch{$branch} = 1;
973 $state=11;
974 next;
977 $last_branch = $branch if $branch ne $last_branch;
978 $state = 9;
979 } elsif ($state == 8) {
980 $logmsg .= "$_\n";
981 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
982 # VERSION:1.96->1.96.2.1
983 my $init = ($2 eq "INITIAL");
984 my $fn = $1;
985 my $rev = $3;
986 $fn =~ s#^/+##;
987 if ($opt_S && $fn =~ m/$opt_S/) {
988 print "SKIPPING $fn v $rev\n";
989 push(@skipped, $fn);
990 next;
992 push @commit_revisions, [$fn, $rev];
993 print "Fetching $fn v $rev\n" if $opt_v;
994 my ($tmpname, $size) = $cvs->file($fn,$rev);
995 if ($size == -1) {
996 push(@old,$fn);
997 print "Drop $fn\n" if $opt_v;
998 } else {
999 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1000 my $pid = open(my $F, '-|');
1001 die $! unless defined $pid;
1002 if (!$pid) {
1003 exec("git", "hash-object", "-w", $tmpname)
1004 or die "Cannot create object: $!\n";
1006 my $sha = <$F>;
1007 chomp $sha;
1008 close $F;
1009 my $mode = pmode($cvs->{'mode'});
1010 push(@new,[$mode, $sha, $fn]); # may be resurrected!
1012 unlink($tmpname);
1013 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1014 my $fn = $1;
1015 my $rev = $2;
1016 $fn =~ s#^/+##;
1017 push @commit_revisions, [$fn, $rev];
1018 push(@old,$fn);
1019 print "Delete $fn\n" if $opt_v;
1020 } elsif ($state == 9 and /^\s*$/) {
1021 $state = 10;
1022 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1023 $commitcount++;
1024 if ($opt_L && $commitcount > $opt_L) {
1025 last;
1027 commit();
1028 if (($commitcount & 1023) == 0) {
1029 system(qw(git repack -a -d));
1031 $state = 1;
1032 } elsif ($state == 11 and /^-+$/) {
1033 $state = 1;
1034 } elsif (/^-+$/) { # end of unknown-line processing
1035 $state = 1;
1036 } elsif ($state != 11) { # ignore stuff when skipping
1037 print STDERR "* UNKNOWN LINE * $_\n";
1040 commit() if $branch and $state != 11;
1042 unless ($opt_P) {
1043 unlink($cvspsfile);
1046 # The heuristic of repacking every 1024 commits can leave a
1047 # lot of unpacked data. If there is more than 1MB worth of
1048 # not-packed objects, repack once more.
1049 my $line = `git count-objects`;
1050 if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1051 my ($n_objects, $kb) = ($1, $2);
1052 1024 < $kb
1053 and system(qw(git repack -a -d));
1056 foreach my $git_index (values %index) {
1057 if ($git_index ne "$git_dir/index") {
1058 unlink($git_index);
1062 if (defined $orig_git_index) {
1063 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1064 } else {
1065 delete $ENV{GIT_INDEX_FILE};
1068 # Now switch back to the branch we were in before all of this happened
1069 if ($orig_branch) {
1070 print "DONE.\n" if $opt_v;
1071 if ($opt_i) {
1072 exit 0;
1074 my $tip_at_end = `git rev-parse --verify HEAD`;
1075 if ($tip_at_start ne $tip_at_end) {
1076 for ($tip_at_start, $tip_at_end) { chomp; }
1077 print "Fetched into the current branch.\n" if $opt_v;
1078 system(qw(git read-tree -u -m),
1079 $tip_at_start, $tip_at_end);
1080 die "Fast-forward update failed: $?\n" if $?;
1082 else {
1083 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1084 die "Could not merge $opt_o into the current branch.\n" if $?;
1086 } else {
1087 $orig_branch = "master";
1088 print "DONE; creating $orig_branch branch\n" if $opt_v;
1089 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1090 unless defined get_headref('refs/heads/master');
1091 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1092 if ($opt_r && $opt_o ne 'HEAD');
1093 system('git', 'update-ref', 'HEAD', "$orig_branch");
1094 unless ($opt_i) {
1095 system(qw(git checkout -f));
1096 die "checkout failed: $?\n" if $?;