mailinfo: always clean up rfc822 header folding
[git.git] / git-cvsimport.perl
blob8e683e54783ab6e916bdb8ade56832ac81470648
1 #!/usr/bin/perl
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to aggregate CVS check-ins into related changes.
7 # Fortunately, "cvsps" does that for us; all we have to do is to parse
8 # its output.
10 # Checking out the files is done by a single long-running CVS connection
11 # / server process.
13 # The head revision is on branch "origin" by default.
14 # You can change that with the '-o' option.
16 use 5.008;
17 use strict;
18 use warnings;
19 use Getopt::Long;
20 use File::Spec;
21 use File::Temp qw(tempfile tmpnam);
22 use File::Path qw(mkpath);
23 use File::Basename qw(basename dirname);
24 use Time::Local;
25 use IO::Socket;
26 use IO::Pipe;
27 use POSIX qw(strftime dup2 ENOENT);
28 use IPC::Open2;
30 $SIG{'PIPE'}="IGNORE";
31 $ENV{'TZ'}="UTC";
33 our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
34 my (%conv_author_name, %conv_author_email);
36 sub usage(;$) {
37 my $msg = shift;
38 print(STDERR "Error: $msg\n") if $msg;
39 print STDERR <<END;
40 Usage: git cvsimport # fetch/update GIT from CVS
41 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
42 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
43 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
44 [-r remote] [-R] [CVS_module]
45 END
46 exit(1);
49 sub read_author_info($) {
50 my ($file) = @_;
51 my $user;
52 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
54 while (<$f>) {
55 # Expected format is this:
56 # exon=Andreas Ericsson <ae@op5.se>
57 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
58 $user = $1;
59 $conv_author_name{$user} = $2;
60 $conv_author_email{$user} = $3;
62 # However, we also read from CVSROOT/users format
63 # to ease migration.
64 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
65 my $mapped;
66 ($user, $mapped) = ($1, $3);
67 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
68 $conv_author_name{$user} = $1;
69 $conv_author_email{$user} = $2;
71 elsif ($mapped =~ /^<?(.*)>?$/) {
72 $conv_author_name{$user} = $user;
73 $conv_author_email{$user} = $1;
76 # NEEDSWORK: Maybe warn on unrecognized lines?
78 close ($f);
81 sub write_author_info($) {
82 my ($file) = @_;
83 open my $f, '>', $file or
84 die("Failed to open $file for writing: $!");
86 foreach (keys %conv_author_name) {
87 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
89 close ($f);
92 # convert getopts specs for use by git config
93 my %longmap = (
94 'A:' => 'authors-file',
95 'M:' => 'merge-regex',
96 'P:' => undef,
97 'R' => 'track-revisions',
98 'S:' => 'ignore-paths',
101 sub read_repo_config {
102 # Split the string between characters, unless there is a ':'
103 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
104 my @opts = split(/ *(?!:)/, shift);
105 foreach my $o (@opts) {
106 my $key = $o;
107 $key =~ s/://g;
108 my $arg = 'git config';
109 $arg .= ' --bool' if ($o !~ /:$/);
110 my $ckey = $key;
112 if (exists $longmap{$o}) {
113 # An uppercase option like -R cannot be
114 # expressed in the configuration, as the
115 # variable names are downcased.
116 $ckey = $longmap{$o};
117 next if (! defined $ckey);
118 $ckey =~ s/-//g;
120 chomp(my $tmp = `$arg --get cvsimport.$ckey`);
121 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
122 no strict 'refs';
123 my $opt_name = "opt_" . $key;
124 if (!$$opt_name) {
125 $$opt_name = $tmp;
131 my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
132 read_repo_config($opts);
133 Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
135 # turn the Getopt::Std specification in a Getopt::Long one,
136 # with support for multiple -M options
137 GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
138 or usage();
139 usage if $opt_h;
141 if (@ARGV == 0) {
142 chomp(my $module = `git config --get cvsimport.module`);
143 push(@ARGV, $module) if $? == 0;
145 @ARGV <= 1 or usage("You can't specify more than one CVS module");
147 if ($opt_d) {
148 $ENV{"CVSROOT"} = $opt_d;
149 } elsif (-f 'CVS/Root') {
150 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
151 $opt_d = <$f>;
152 chomp $opt_d;
153 close $f;
154 $ENV{"CVSROOT"} = $opt_d;
155 } elsif ($ENV{"CVSROOT"}) {
156 $opt_d = $ENV{"CVSROOT"};
157 } else {
158 usage("CVSROOT needs to be set");
160 $opt_s ||= "-";
161 $opt_a ||= 0;
163 my $git_tree = $opt_C;
164 $git_tree ||= ".";
166 my $remote;
167 if (defined $opt_r) {
168 $remote = 'refs/remotes/' . $opt_r;
169 $opt_o ||= "master";
170 } else {
171 $opt_o ||= "origin";
172 $remote = 'refs/heads';
175 my $cvs_tree;
176 if ($#ARGV == 0) {
177 $cvs_tree = $ARGV[0];
178 } elsif (-f 'CVS/Repository') {
179 open my $f, '<', 'CVS/Repository' or
180 die 'Failed to open CVS/Repository';
181 $cvs_tree = <$f>;
182 chomp $cvs_tree;
183 close $f;
184 } else {
185 usage("CVS module has to be specified");
188 our @mergerx = ();
189 if ($opt_m) {
190 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
192 if (@opt_M) {
193 push (@mergerx, map { qr/$_/ } @opt_M);
196 # Remember UTC of our starting time
197 # we'll want to avoid importing commits
198 # that are too recent
199 our $starttime = time();
201 select(STDERR); $|=1; select(STDOUT);
204 package CVSconn;
205 # Basic CVS dialog.
206 # We're only interested in connecting and downloading, so ...
208 use File::Spec;
209 use File::Temp qw(tempfile);
210 use POSIX qw(strftime dup2);
212 sub new {
213 my ($what,$repo,$subdir) = @_;
214 $what=ref($what) if ref($what);
216 my $self = {};
217 $self->{'buffer'} = "";
218 bless($self,$what);
220 $repo =~ s#/+$##;
221 $self->{'fullrep'} = $repo;
222 $self->conn();
224 $self->{'subdir'} = $subdir;
225 $self->{'lines'} = undef;
227 return $self;
230 sub conn {
231 my $self = shift;
232 my $repo = $self->{'fullrep'};
233 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
234 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
236 my ($proxyhost,$proxyport);
237 if ($param && ($param =~ m/proxy=([^;]+)/)) {
238 $proxyhost = $1;
239 # Default proxyport, if not specified, is 8080.
240 $proxyport = 8080;
241 if ($ENV{"CVS_PROXY_PORT"}) {
242 $proxyport = $ENV{"CVS_PROXY_PORT"};
244 if ($param =~ m/proxyport=([^;]+)/) {
245 $proxyport = $1;
248 $repo ||= '/';
250 # if username is not explicit in CVSROOT, then use current user, as cvs would
251 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
252 my $rr2 = "-";
253 unless ($port) {
254 $rr2 = ":pserver:$user\@$serv:$repo";
255 $port=2401;
257 my $rr = ":pserver:$user\@$serv:$port$repo";
259 if ($pass) {
260 $pass = $self->_scramble($pass);
261 } else {
262 open(H,$ENV{'HOME'}."/.cvspass") and do {
263 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
264 while (<H>) {
265 chomp;
266 s/^\/\d+\s+//;
267 my ($w,$p) = split(/\s/,$_,2);
268 if ($w eq $rr or $w eq $rr2) {
269 $pass = $p;
270 last;
274 $pass = "A" unless $pass;
277 my ($s, $rep);
278 if ($proxyhost) {
280 # Use a HTTP Proxy. Only works for HTTP proxies that
281 # don't require user authentication
283 # See: http://www.ietf.org/rfc/rfc2817.txt
285 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
286 die "Socket to $proxyhost: $!\n" unless defined $s;
287 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
288 or die "Write to $proxyhost: $!\n";
289 $s->flush();
291 $rep = <$s>;
293 # The answer should look like 'HTTP/1.x 2yy ....'
294 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
295 die "Proxy connect: $rep\n";
297 # Skip up to the empty line of the proxy server output
298 # including the response headers.
299 while ($rep = <$s>) {
300 last if (!defined $rep ||
301 $rep eq "\n" ||
302 $rep eq "\r\n");
304 } else {
305 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
306 die "Socket to $serv: $!\n" unless defined $s;
309 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
310 or die "Write to $serv: $!\n";
311 $s->flush();
313 $rep = <$s>;
315 if ($rep ne "I LOVE YOU\n") {
316 $rep="<unknown>" unless $rep;
317 die "AuthReply: $rep\n";
319 $self->{'socketo'} = $s;
320 $self->{'socketi'} = $s;
321 } else { # local or ext: Fork off our own cvs server.
322 my $pr = IO::Pipe->new();
323 my $pw = IO::Pipe->new();
324 my $pid = fork();
325 die "Fork: $!\n" unless defined $pid;
326 my $cvs = 'cvs';
327 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
328 my $rsh = 'rsh';
329 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
331 my @cvs = ($cvs, 'server');
332 my ($local, $user, $host);
333 $local = $repo =~ s/:local://;
334 if (!$local) {
335 $repo =~ s/:ext://;
336 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
337 ($user, $host) = ($1, $2);
339 if (!$local) {
340 if ($user) {
341 unshift @cvs, $rsh, '-l', $user, $host;
342 } else {
343 unshift @cvs, $rsh, $host;
347 unless ($pid) {
348 $pr->writer();
349 $pw->reader();
350 dup2($pw->fileno(),0);
351 dup2($pr->fileno(),1);
352 $pr->close();
353 $pw->close();
354 exec(@cvs);
356 $pw->writer();
357 $pr->reader();
358 $self->{'socketo'} = $pw;
359 $self->{'socketi'} = $pr;
361 $self->{'socketo'}->write("Root $repo\n");
363 # Trial and error says that this probably is the minimum set
364 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
366 $self->{'socketo'}->write("valid-requests\n");
367 $self->{'socketo'}->flush();
369 chomp(my $rep=$self->readline());
370 if ($rep !~ s/^Valid-requests\s*//) {
371 $rep="<unknown>" unless $rep;
372 die "Expected Valid-requests from server, but got: $rep\n";
374 chomp(my $res=$self->readline());
375 die "validReply: $res\n" if $res ne "ok";
377 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
378 $self->{'repo'} = $repo;
381 sub readline {
382 my ($self) = @_;
383 return $self->{'socketi'}->getline();
386 sub _file {
387 # Request a file with a given revision.
388 # Trial and error says this is a good way to do it. :-/
389 my ($self,$fn,$rev) = @_;
390 $self->{'socketo'}->write("Argument -N\n") or return undef;
391 $self->{'socketo'}->write("Argument -P\n") or return undef;
392 # -kk: Linus' version doesn't use it - defaults to off
393 if ($opt_k) {
394 $self->{'socketo'}->write("Argument -kk\n") or return undef;
396 $self->{'socketo'}->write("Argument -r\n") or return undef;
397 $self->{'socketo'}->write("Argument $rev\n") or return undef;
398 $self->{'socketo'}->write("Argument --\n") or return undef;
399 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
400 $self->{'socketo'}->write("Directory .\n") or return undef;
401 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
402 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
403 $self->{'socketo'}->write("co\n") or return undef;
404 $self->{'socketo'}->flush() or return undef;
405 $self->{'lines'} = 0;
406 return 1;
408 sub _line {
409 # Read a line from the server.
410 # ... except that 'line' may be an entire file. ;-)
411 my ($self, $fh) = @_;
412 die "Not in lines" unless defined $self->{'lines'};
414 my $line;
415 my $res=0;
416 while (defined($line = $self->readline())) {
417 # M U gnupg-cvs-rep/AUTHORS
418 # Updated gnupg-cvs-rep/
419 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
420 # /AUTHORS/1.1///T1.1
421 # u=rw,g=rw,o=rw
423 # ok
425 if ($line =~ s/^(?:Created|Updated) //) {
426 $line = $self->readline(); # path
427 $line = $self->readline(); # Entries line
428 my $mode = $self->readline(); chomp $mode;
429 $self->{'mode'} = $mode;
430 defined (my $cnt = $self->readline())
431 or die "EOF from server after 'Changed'\n";
432 chomp $cnt;
433 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
434 $line="";
435 $res = $self->_fetchfile($fh, $cnt);
436 } elsif ($line =~ s/^ //) {
437 print $fh $line;
438 $res += length($line);
439 } elsif ($line =~ /^M\b/) {
440 # output, do nothing
441 } elsif ($line =~ /^Mbinary\b/) {
442 my $cnt;
443 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
444 chomp $cnt;
445 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
446 $line="";
447 $res += $self->_fetchfile($fh, $cnt);
448 } else {
449 chomp $line;
450 if ($line eq "ok") {
451 # print STDERR "S: ok (".length($res).")\n";
452 return $res;
453 } elsif ($line =~ s/^E //) {
454 # print STDERR "S: $line\n";
455 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
456 $line = $self->readline(); # filename
457 $line = $self->readline(); # OK
458 chomp $line;
459 die "Unknown: $line" if $line ne "ok";
460 return -1;
461 } else {
462 die "Unknown: $line\n";
466 return undef;
468 sub file {
469 my ($self,$fn,$rev) = @_;
470 my $res;
472 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
473 DIR => File::Spec->tmpdir(), UNLINK => 1);
475 $self->_file($fn,$rev) and $res = $self->_line($fh);
477 if (!defined $res) {
478 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
479 truncate $fh, 0;
480 $self->conn();
481 $self->_file($fn,$rev) or die "No file command send";
482 $res = $self->_line($fh);
483 die "Retry failed" unless defined $res;
485 close ($fh);
487 return ($name, $res);
489 sub _fetchfile {
490 my ($self, $fh, $cnt) = @_;
491 my $res = 0;
492 my $bufsize = 1024 * 1024;
493 while ($cnt) {
494 if ($bufsize > $cnt) {
495 $bufsize = $cnt;
497 my $buf;
498 my $num = $self->{'socketi'}->read($buf,$bufsize);
499 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
500 print $fh $buf;
501 $res += $num;
502 $cnt -= $num;
504 return $res;
507 sub _scramble {
508 my ($self, $pass) = @_;
509 my $scrambled = "A";
511 return $scrambled unless $pass;
513 my $pass_len = length($pass);
514 my @pass_arr = split("", $pass);
515 my $i;
517 # from cvs/src/scramble.c
518 my @shifts = (
519 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
520 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
521 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
522 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
523 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
524 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
525 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
526 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
527 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
528 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
529 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
530 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
531 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
532 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
533 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
534 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
537 for ($i = 0; $i < $pass_len; $i++) {
538 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
541 return $scrambled;
544 package main;
546 my $cvs = CVSconn->new($opt_d, $cvs_tree);
549 sub pdate($) {
550 my ($d) = @_;
551 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
552 or die "Unparseable date: $d\n";
553 my $y=$1; $y-=1900 if $y>1900;
554 return timegm($6||0,$5,$4,$3,$2-1,$y);
557 sub pmode($) {
558 my ($mode) = @_;
559 my $m = 0;
560 my $mm = 0;
561 my $um = 0;
562 for my $x(split(//,$mode)) {
563 if ($x eq ",") {
564 $m |= $mm&$um;
565 $mm = 0;
566 $um = 0;
567 } elsif ($x eq "u") { $um |= 0700;
568 } elsif ($x eq "g") { $um |= 0070;
569 } elsif ($x eq "o") { $um |= 0007;
570 } elsif ($x eq "r") { $mm |= 0444;
571 } elsif ($x eq "w") { $mm |= 0222;
572 } elsif ($x eq "x") { $mm |= 0111;
573 } elsif ($x eq "=") { # do nothing
574 } else { die "Unknown mode: $mode\n";
577 $m |= $mm&$um;
578 return $m;
581 sub getwd() {
582 my $pwd = `pwd`;
583 chomp $pwd;
584 return $pwd;
587 sub is_sha1 {
588 my $s = shift;
589 return $s =~ /^[a-f0-9]{40}$/;
592 sub get_headref ($) {
593 my $name = shift;
594 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
595 return undef unless $? == 0;
596 chomp $r;
597 return $r;
600 my $user_filename_prepend = '';
601 sub munge_user_filename {
602 my $name = shift;
603 return File::Spec->file_name_is_absolute($name) ?
604 $name :
605 $user_filename_prepend . $name;
608 -d $git_tree
609 or mkdir($git_tree,0777)
610 or die "Could not create $git_tree: $!";
611 if ($git_tree ne '.') {
612 $user_filename_prepend = getwd() . '/';
613 chdir($git_tree);
616 my $last_branch = "";
617 my $orig_branch = "";
618 my %branch_date;
619 my $tip_at_start = undef;
621 my $git_dir = $ENV{"GIT_DIR"} || ".git";
622 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
623 $ENV{"GIT_DIR"} = $git_dir;
624 my $orig_git_index;
625 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
627 my %index; # holds filenames of one index per branch
629 unless (-d $git_dir) {
630 system(qw(git init));
631 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
632 system(qw(git read-tree --empty));
633 die "Cannot init an empty tree: $?\n" if $?;
635 $last_branch = $opt_o;
636 $orig_branch = "";
637 } else {
638 open(F, "-|", qw(git symbolic-ref HEAD)) or
639 die "Cannot run git symbolic-ref: $!\n";
640 chomp ($last_branch = <F>);
641 $last_branch = basename($last_branch);
642 close(F);
643 unless ($last_branch) {
644 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
645 $last_branch = "master";
647 $orig_branch = $last_branch;
648 $tip_at_start = `git rev-parse --verify HEAD`;
650 # Get the last import timestamps
651 my $fmt = '($ref, $author) = (%(refname), %(author));';
652 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
653 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
654 while (defined(my $entry = <H>)) {
655 my ($ref, $author);
656 eval($entry) || die "cannot eval refs list: $@";
657 my ($head) = ($ref =~ m|^$remote/(.*)|);
658 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
659 $branch_date{$head} = $1;
661 close(H);
662 if (!exists $branch_date{$opt_o}) {
663 die "Branch '$opt_o' does not exist.\n".
664 "Either use the correct '-o branch' option,\n".
665 "or import to a new repository.\n";
669 -d $git_dir
670 or die "Could not create git subdir ($git_dir).\n";
672 # now we read (and possibly save) author-info as well
673 -f "$git_dir/cvs-authors" and
674 read_author_info("$git_dir/cvs-authors");
675 if ($opt_A) {
676 read_author_info(munge_user_filename($opt_A));
677 write_author_info("$git_dir/cvs-authors");
680 # open .git/cvs-revisions, if requested
681 open my $revision_map, '>>', "$git_dir/cvs-revisions"
682 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
683 if defined $opt_R;
687 # run cvsps into a file unless we are getting
688 # it passed as a file via $opt_P
690 my $cvspsfile;
691 unless ($opt_P) {
692 print "Running cvsps...\n" if $opt_v;
693 my $pid = open(CVSPS,"-|");
694 my $cvspsfh;
695 die "Cannot fork: $!\n" unless defined $pid;
696 unless ($pid) {
697 my @opt;
698 @opt = split(/,/,$opt_p) if defined $opt_p;
699 unshift @opt, '-z', $opt_z if defined $opt_z;
700 unshift @opt, '-q' unless defined $opt_v;
701 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
702 push @opt, '--cvs-direct';
704 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
705 die "Could not start cvsps: $!\n";
707 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
708 DIR => File::Spec->tmpdir());
709 while (<CVSPS>) {
710 print $cvspsfh $_;
712 close CVSPS;
713 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
714 close $cvspsfh;
715 } else {
716 $cvspsfile = munge_user_filename($opt_P);
719 open(CVS, "<$cvspsfile") or die $!;
721 ## cvsps output:
722 #---------------------
723 #PatchSet 314
724 #Date: 1999/09/18 13:03:59
725 #Author: wkoch
726 #Branch: STABLE-BRANCH-1-0
727 #Ancestor branch: HEAD
728 #Tag: (none)
729 #Log:
730 # See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
731 #Members:
732 # README:1.57->1.57.2.1
733 # VERSION:1.96->1.96.2.1
735 #---------------------
737 my $state = 0;
739 sub update_index (\@\@) {
740 my $old = shift;
741 my $new = shift;
742 open(my $fh, '|-', qw(git update-index -z --index-info))
743 or die "unable to open git update-index: $!";
744 print $fh
745 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
746 @$old),
747 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
748 @$new)
749 or die "unable to write to git update-index: $!";
750 close $fh
751 or die "unable to write to git update-index: $!";
752 $? and die "git update-index reported error: $?";
755 sub write_tree () {
756 open(my $fh, '-|', qw(git write-tree))
757 or die "unable to open git write-tree: $!";
758 chomp(my $tree = <$fh>);
759 is_sha1($tree)
760 or die "Cannot get tree id ($tree): $!";
761 close($fh)
762 or die "Error running git write-tree: $?\n";
763 print "Tree ID $tree\n" if $opt_v;
764 return $tree;
767 my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
768 my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
770 # commits that cvsps cannot place anywhere...
771 $ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
773 sub commit {
774 if ($branch eq $opt_o && !$index{branch} &&
775 !get_headref("$remote/$branch")) {
776 # looks like an initial commit
777 # use the index primed by git init
778 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
779 $index{$branch} = "$git_dir/index";
780 } else {
781 # use an index per branch to speed up
782 # imports of projects with many branches
783 unless ($index{$branch}) {
784 $index{$branch} = tmpnam();
785 $ENV{GIT_INDEX_FILE} = $index{$branch};
786 if ($ancestor) {
787 system("git", "read-tree", "$remote/$ancestor");
788 } else {
789 system("git", "read-tree", "$remote/$branch");
791 die "read-tree failed: $?\n" if $?;
794 $ENV{GIT_INDEX_FILE} = $index{$branch};
796 update_index(@old, @new);
797 @old = @new = ();
798 my $tree = write_tree();
799 my $parent = get_headref("$remote/$last_branch");
800 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
802 my @commit_args;
803 push @commit_args, ("-p", $parent) if $parent;
805 # loose detection of merges
806 # based on the commit msg
807 foreach my $rx (@mergerx) {
808 next unless $logmsg =~ $rx && $1;
809 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
810 if (my $sha1 = get_headref("$remote/$mparent")) {
811 push @commit_args, '-p', "$remote/$mparent";
812 print "Merge parent branch: $mparent\n" if $opt_v;
816 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
817 $ENV{GIT_AUTHOR_NAME} = $author_name;
818 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
819 $ENV{GIT_AUTHOR_DATE} = $commit_date;
820 $ENV{GIT_COMMITTER_NAME} = $author_name;
821 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
822 $ENV{GIT_COMMITTER_DATE} = $commit_date;
823 my $pid = open2(my $commit_read, my $commit_write,
824 'git', 'commit-tree', $tree, @commit_args);
826 # compatibility with git2cvs
827 substr($logmsg,32767) = "" if length($logmsg) > 32767;
828 $logmsg =~ s/[\s\n]+\z//;
830 if (@skipped) {
831 $logmsg .= "\n\n\nSKIPPED:\n\t";
832 $logmsg .= join("\n\t", @skipped) . "\n";
833 @skipped = ();
836 print($commit_write "$logmsg\n") && close($commit_write)
837 or die "Error writing to git commit-tree: $!\n";
839 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
840 chomp(my $cid = <$commit_read>);
841 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
842 print "Commit ID $cid\n" if $opt_v;
843 close($commit_read);
845 waitpid($pid,0);
846 die "Error running git commit-tree: $?\n" if $?;
848 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
849 or die "Cannot write branch $branch for update: $!\n";
851 if ($revision_map) {
852 print $revision_map "@$_ $cid\n" for @commit_revisions;
854 @commit_revisions = ();
856 if ($tag) {
857 my ($xtag) = $tag;
858 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
859 $xtag =~ tr/_/\./ if ( $opt_u );
860 $xtag =~ s/[\/]/$opt_s/g;
861 $xtag =~ s/\[//g;
863 system('git' , 'tag', '-f', $xtag, $cid) == 0
864 or die "Cannot create tag $xtag: $!\n";
866 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
870 my $commitcount = 1;
871 while (<CVS>) {
872 chomp;
873 if ($state == 0 and /^-+$/) {
874 $state = 1;
875 } elsif ($state == 0) {
876 $state = 1;
877 redo;
878 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
879 $patchset = 0+$_;
880 $state=2;
881 } elsif ($state == 2 and s/^Date:\s+//) {
882 $date = pdate($_);
883 unless ($date) {
884 print STDERR "Could not parse date: $_\n";
885 $state=0;
886 next;
888 $state=3;
889 } elsif ($state == 3 and s/^Author:\s+//) {
890 s/\s+$//;
891 if (/^(.*?)\s+<(.*)>/) {
892 ($author_name, $author_email) = ($1, $2);
893 } elsif ($conv_author_name{$_}) {
894 $author_name = $conv_author_name{$_};
895 $author_email = $conv_author_email{$_};
896 } else {
897 $author_name = $author_email = $_;
899 $state = 4;
900 } elsif ($state == 4 and s/^Branch:\s+//) {
901 s/\s+$//;
902 tr/_/\./ if ( $opt_u );
903 s/[\/]/$opt_s/g;
904 $branch = $_;
905 $state = 5;
906 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
907 s/\s+$//;
908 $ancestor = $_;
909 $ancestor = $opt_o if $ancestor eq "HEAD";
910 $state = 6;
911 } elsif ($state == 5) {
912 $ancestor = undef;
913 $state = 6;
914 redo;
915 } elsif ($state == 6 and s/^Tag:\s+//) {
916 s/\s+$//;
917 if ($_ eq "(none)") {
918 $tag = undef;
919 } else {
920 $tag = $_;
922 $state = 7;
923 } elsif ($state == 7 and /^Log:/) {
924 $logmsg = "";
925 $state = 8;
926 } elsif ($state == 8 and /^Members:/) {
927 $branch = $opt_o if $branch eq "HEAD";
928 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
929 # skip
930 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
931 $state = 11;
932 next;
934 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
935 # skip if the commit is too recent
936 # given that the cvsps default fuzz is 300s, we give ourselves another
937 # 300s just in case -- this also prevents skipping commits
938 # due to server clock drift
939 print "skip patchset $patchset: $date too recent\n" if $opt_v;
940 $state = 11;
941 next;
943 if (exists $ignorebranch{$branch}) {
944 print STDERR "Skipping $branch\n";
945 $state = 11;
946 next;
948 if ($ancestor) {
949 if ($ancestor eq $branch) {
950 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
951 $ancestor = $opt_o;
953 if (defined get_headref("$remote/$branch")) {
954 print STDERR "Branch $branch already exists!\n";
955 $state=11;
956 next;
958 my $id = get_headref("$remote/$ancestor");
959 if (!$id) {
960 print STDERR "Branch $ancestor does not exist!\n";
961 $ignorebranch{$branch} = 1;
962 $state=11;
963 next;
966 system(qw(git update-ref -m cvsimport),
967 "$remote/$branch", $id);
968 if($? != 0) {
969 print STDERR "Could not create branch $branch\n";
970 $ignorebranch{$branch} = 1;
971 $state=11;
972 next;
975 $last_branch = $branch if $branch ne $last_branch;
976 $state = 9;
977 } elsif ($state == 8) {
978 $logmsg .= "$_\n";
979 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
980 # VERSION:1.96->1.96.2.1
981 my $init = ($2 eq "INITIAL");
982 my $fn = $1;
983 my $rev = $3;
984 $fn =~ s#^/+##;
985 if ($opt_S && $fn =~ m/$opt_S/) {
986 print "SKIPPING $fn v $rev\n";
987 push(@skipped, $fn);
988 next;
990 push @commit_revisions, [$fn, $rev];
991 print "Fetching $fn v $rev\n" if $opt_v;
992 my ($tmpname, $size) = $cvs->file($fn,$rev);
993 if ($size == -1) {
994 push(@old,$fn);
995 print "Drop $fn\n" if $opt_v;
996 } else {
997 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
998 my $pid = open(my $F, '-|');
999 die $! unless defined $pid;
1000 if (!$pid) {
1001 exec("git", "hash-object", "-w", $tmpname)
1002 or die "Cannot create object: $!\n";
1004 my $sha = <$F>;
1005 chomp $sha;
1006 close $F;
1007 my $mode = pmode($cvs->{'mode'});
1008 push(@new,[$mode, $sha, $fn]); # may be resurrected!
1010 unlink($tmpname);
1011 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1012 my $fn = $1;
1013 my $rev = $2;
1014 $fn =~ s#^/+##;
1015 push @commit_revisions, [$fn, $rev];
1016 push(@old,$fn);
1017 print "Delete $fn\n" if $opt_v;
1018 } elsif ($state == 9 and /^\s*$/) {
1019 $state = 10;
1020 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1021 $commitcount++;
1022 if ($opt_L && $commitcount > $opt_L) {
1023 last;
1025 commit();
1026 if (($commitcount & 1023) == 0) {
1027 system(qw(git repack -a -d));
1029 $state = 1;
1030 } elsif ($state == 11 and /^-+$/) {
1031 $state = 1;
1032 } elsif (/^-+$/) { # end of unknown-line processing
1033 $state = 1;
1034 } elsif ($state != 11) { # ignore stuff when skipping
1035 print STDERR "* UNKNOWN LINE * $_\n";
1038 commit() if $branch and $state != 11;
1040 unless ($opt_P) {
1041 unlink($cvspsfile);
1044 # The heuristic of repacking every 1024 commits can leave a
1045 # lot of unpacked data. If there is more than 1MB worth of
1046 # not-packed objects, repack once more.
1047 my $line = `git count-objects`;
1048 if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1049 my ($n_objects, $kb) = ($1, $2);
1050 1024 < $kb
1051 and system(qw(git repack -a -d));
1054 foreach my $git_index (values %index) {
1055 if ($git_index ne "$git_dir/index") {
1056 unlink($git_index);
1060 if (defined $orig_git_index) {
1061 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1062 } else {
1063 delete $ENV{GIT_INDEX_FILE};
1066 # Now switch back to the branch we were in before all of this happened
1067 if ($orig_branch) {
1068 print "DONE.\n" if $opt_v;
1069 if ($opt_i) {
1070 exit 0;
1072 my $tip_at_end = `git rev-parse --verify HEAD`;
1073 if ($tip_at_start ne $tip_at_end) {
1074 for ($tip_at_start, $tip_at_end) { chomp; }
1075 print "Fetched into the current branch.\n" if $opt_v;
1076 system(qw(git read-tree -u -m),
1077 $tip_at_start, $tip_at_end);
1078 die "Fast-forward update failed: $?\n" if $?;
1080 else {
1081 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1082 die "Could not merge $opt_o into the current branch.\n" if $?;
1084 } else {
1085 $orig_branch = "master";
1086 print "DONE; creating $orig_branch branch\n" if $opt_v;
1087 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1088 unless defined get_headref('refs/heads/master');
1089 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1090 if ($opt_r && $opt_o ne 'HEAD');
1091 system('git', 'update-ref', 'HEAD', "$orig_branch");
1092 unless ($opt_i) {
1093 system(qw(git checkout -f));
1094 die "checkout failed: $?\n" if $?;