3 # This tool is copyright (c) 2005, Martin Langhoff.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to walk the output of tla abrowse,
7 # fetch the changesets and apply them.
11 git-archimport -i <archive>/<branch> [<archive>/<branch>]
12 [ <archive>/<branch> ]
14 The script expects you to provide the key roots where it can start the
15 import from an 'initial import' or 'tag' type of Arch commit. It will
16 then follow all the branching and tagging within the provided roots.
18 It will die if it sees branches that have different roots.
22 - keep track of merged patches, and mark a git merge when it happens
23 - smarter rules to parse the archive history "up" and "down"
24 - be able to continue an import where we left off
25 - audit shell-escaping of filenames
29 Add print in front of the shell commands invoked via backticks.
37 use File
::Temp
qw(tempfile);
38 use File
::Path
qw(mkpath);
39 use File
::Basename
qw(basename dirname);
40 use String
::ShellQuote
;
44 use POSIX
qw(strftime dup2);
45 use Data
::Dumper qw
/ Dumper /;
48 $SIG{'PIPE'}="IGNORE";
51 our($opt_h,$opt_v, $opt_T,
56 Usage: ${\basename $0} # fetch/update GIT from Arch
58 [ -C GIT_repository ] [ -t tempdir ]
59 repository/arch-branch [ repository/arch-branch] ...
64 getopts
("ThviC:t:") or usage
();
67 @ARGV >= 1 or usage
();
68 my @arch_roots = @ARGV;
72 $tmp .= '/git-archimport/';
74 my $git_tree = $opt_C;
78 my @psets = (); # the collection
79 my %psets = (); # the collection, by name
81 my %rptags = (); # my reverse private tags
82 # to map a SHA1 to a commitid
84 foreach my $root (@arch_roots) {
85 my ($arepo, $abranch) = split(m!/!, $root);
86 open ABROWSE
, "tla abrowse -f -A $arepo --desc --merges $abranch |"
87 or die "Problems with tla abrowse: $!";
89 my %ps = (); # the current one
96 # first record padded w 8 spaces
99 # store the record we just captured
101 my %temp = %ps; # break references
102 push (@psets, \
%temp);
103 $psets{$temp{id
}} = \
%temp;
107 my ($id, $type) = split(m/\s{3}/, $_);
112 if ($type =~ m/^\(simple changeset\)/) {
114 } elsif ($type eq '(initial import)') {
116 } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
120 warn "Unknown type $type";
126 # 10 leading spaces or more
127 # indicate commit metadata
130 if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
132 my ($date, $authoremail) = split(m/\s{2,}/, $_);
134 $ps{date
} =~ s/\bGMT$//; # strip off trailign GMT
135 if ($ps{date
} =~ m/\b\w+$/) {
136 warn 'Arch dates not in GMT?! - imported dates will be wrong';
139 $authoremail =~ m/^(.+)\s(\S+)$/;
145 } elsif ($lastseen eq 'date') {
146 # the only hint is position
147 # subject is after date
151 } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
153 $lastseen = 'merges';
155 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
156 push (@
{$ps{merges
}}, $_);
158 warn 'more metadata after merges!?';
165 my %temp = %ps; # break references
166 push (@psets, \
%temp);
167 $psets{ $temp{id
} } = \
%temp;
171 } # end foreach $root
173 ## Order patches by time
174 @psets = sort {$a->{date
}.$b->{id
} cmp $b->{date
}.$b->{id
}} @psets;
176 #print Dumper \@psets;
179 ## TODO cleanup irrelevant patches
180 ## and put an initial import
183 unless (-d
'.git') { # initial import
184 if ($psets[0]{type
} eq 'i' || $psets[0]{type
} eq 't') {
185 print "Starting import from $psets[0]{id}\n";
190 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
192 } else { # progressing an import
194 opendir(DIR
, ".git/archimport/tags")
195 || die "can't opendir: $!";
196 while (my $file = readdir(DIR
)) {
197 # skip non-interesting-files
198 next unless -f
".git/archimport/tags/$file";
199 next if $file =~ m/--base-0$/; # don't care for base-0
200 my $sha = ptag
($file);
202 # reconvert the 3rd '--' sequence from the end
204 # $file = reverse $file;
205 # $file =~ s!^(.+?--.+?--.+?--.+?)--(.+)$!$1/$2!;
206 # $file = reverse $file;
207 $rptags{$sha} = $file;
213 foreach my $ps (@psets) {
215 $ps->{branch
} = branchname
($ps->{id
});
218 # ensure we have a clean state
220 if (`git diff-files`) {
221 die "Unclean tree when about to process $ps->{id} " .
222 " - did we fail to commit cleanly before?";
227 # skip commits already in repo
229 if (ptag
($ps->{id
})) {
230 $opt_v && print "Skipping already imported: $ps->{id}\n";
235 # create the branch if needed
237 if ($ps->{type
} eq 'i' && !$import) {
238 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
241 unless ($import) { # skip for import
242 if ( -e
".git/refs/heads/$ps->{branch}") {
243 # we know about this branch
244 `git checkout $ps->{branch}`;
246 # new branch! we need to verify a few things
247 die "Branch on a non-tag!" unless $ps->{type
} eq 't';
248 my $branchpoint = ptag
($ps->{tag
});
249 die "Tagging from unknown id unsupported: $ps->{tag}"
252 # find where we are supposed to branch from
253 `git checkout -b $ps->{branch} $branchpoint`;
255 # If we trust Arch with the fact that this is just
256 # a tag, and it does not affect the state of the tree
257 # then we just tag and move on
258 tag
($ps->{id
}, $branchpoint);
259 ptag
($ps->{id
}, $branchpoint);
260 print " * Tagged $ps->{id} at $branchpoint\n";
267 # Apply the import/changeset/merge into the working tree
269 if ($ps->{type
} eq 'i' || $ps->{type
} eq 't') {
270 apply_import
($ps) or die $!;
272 } elsif ($ps->{type
} eq 's') {
277 # prepare update git's index, based on what arch knows
278 # about the pset, resolve parents, etc
282 my $commitlog = `tla cat-archive-log -A $ps->{repo} $ps->{id}`;
283 die "Error in cat-archive-log: $!" if $?
;
285 # parselog will git-add/rm files
286 # and generally prepare things for the commit
287 # NOTE: parselog will shell-quote filenames!
288 my ($sum, $msg, $add, $del, $mod, $ren) = parselog
($commitlog);
289 my $logmessage = "$sum\n$msg";
292 # imports don't give us good info
293 # on added files. Shame on them
294 if ($ps->{type
} eq 'i' || $ps->{type
} eq 't') {
295 `find . -type f -print0 | grep -zv '^./.git' | xargs -0 -l100 git-update-index --add`;
296 `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-index --remove`;
301 my @slice = splice(@
$add, 0, 100);
302 my $slice = join(' ', @slice);
303 `git-update-index --add $slice`;
304 die "Error in git-update-index --add: $!" if $?
;
308 foreach my $file (@
$del) {
309 unlink $file or die "Problems deleting $file : $!";
312 my @slice = splice(@
$del, 0, 100);
313 my $slice = join(' ', @slice);
314 `git-update-index --remove $slice`;
315 die "Error in git-update-index --remove: $!" if $?
;
318 if (@
$ren) { # renamed
320 die "Odd number of entries in rename!?";
324 my $from = pop @
$ren;
327 unless (-d dirname
($to)) {
328 mkpath
(dirname
($to)); # will die on err
330 #print "moving $from $to";
332 die "Error renaming $from $to : $!" if $?
;
333 `git-update-index --remove $from`;
334 die "Error in git-update-index --remove: $!" if $?
;
335 `git-update-index --add $to`;
336 die "Error in git-update-index --add: $!" if $?
;
340 if (@
$mod) { # must be _after_ renames
342 my @slice = splice(@
$mod, 0, 100);
343 my $slice = join(' ', @slice);
344 `git-update-index $slice`;
345 die "Error in git-update-index: $!" if $?
;
349 # warn "errors when running git-update-index! $!";
350 $tree = `git-write-tree`;
351 die "cannot write tree $!" if $?
;
359 if ( -e
".git/refs/heads/$ps->{branch}") {
360 if (open HEAD
, "<.git/refs/heads/$ps->{branch}") {
366 if ($ps->{type
} eq 's') {
367 warn "Could not find the right head for the branch $ps->{branch}";
373 push @par, find_parents
($ps);
375 my $par = join (' ', @par);
378 # Commit, tag and clean state
381 $ENV{GIT_AUTHOR_NAME
} = $ps->{author
};
382 $ENV{GIT_AUTHOR_EMAIL
} = $ps->{email
};
383 $ENV{GIT_AUTHOR_DATE
} = $ps->{date
};
384 $ENV{GIT_COMMITTER_NAME
} = $ps->{author
};
385 $ENV{GIT_COMMITTER_EMAIL
} = $ps->{email
};
386 $ENV{GIT_COMMITTER_DATE
} = $ps->{date
};
388 my ($pid, $commit_rh, $commit_wh);
389 $commit_rh = 'commit_rh';
390 $commit_wh = 'commit_wh';
392 $pid = open2
(*READER
, *WRITER
, "git-commit-tree $tree $par")
394 print WRITER
$logmessage; # write
396 my $commitid = <READER
>; # read
399 waitpid $pid,0; # close;
401 if (length $commitid != 40) {
402 die "Something went wrong with the commit! $! $commitid";
407 open HEAD
, ">.git/refs/heads/$ps->{branch}";
408 print HEAD
$commitid;
410 unlink ('.git/HEAD');
411 symlink("refs/heads/$ps->{branch}",".git/HEAD");
414 ptag
($ps->{id
}, $commitid); # private tag
415 if ($opt_T || $ps->{type
} eq 't' || $ps->{type
} eq 'i') {
416 tag
($ps->{id
}, $commitid);
418 print " * Committed $ps->{id}\n";
419 print " + tree $tree\n";
420 print " + commit $commitid\n";
421 $opt_v && print " + commit date is $ps->{date} \n";
422 $opt_v && print " + parents: $par \n";
428 my @parts = split(m/--/, $id);
429 return join('--', @parts[0..1]);
434 my $bname = branchname
($ps->{id
});
438 `tla get -s --no-pristine -A $ps->{repo} $ps->{id} $tmp/import`;
439 die "Cannot get import: $!" if $?
;
440 `rsync -v --archive --delete --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./`;
441 die "Cannot rsync import:$!" if $?
;
443 `rm -fr $tmp/import`;
444 die "Cannot remove tempdir: $!" if $?
;
456 `tla get-changeset -A $ps->{repo} $ps->{id} $tmp/changeset`;
457 die "Cannot get changeset: $!" if $?
;
460 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
461 # this can be sped up considerably by doing
462 # (find | xargs cat) | patch
463 # but that cna get mucked up by patches
464 # with missing trailing newlines or the standard
465 # 'missing newline' flag in the patch - possibly
466 # produced with an old/buggy diff.
467 # slow and safe, we invoke patch once per patchfile
468 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
469 die "Problem applying patches! $!" if $?
;
472 # apply changed binary files
473 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
474 foreach my $mod (@modified) {
477 $orig =~ s/\.modified$//; # lazy
478 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
479 #print "rsync -p '$mod' '$orig'";
480 `rsync -p $mod ./$orig`;
481 die "Problem applying binary changes! $!" if $?
;
486 `rsync --archive --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/changeset/new-files-archive/* ./`;
488 # deleted files are hinted from the commitlog processing
490 `rm -fr $tmp/changeset`;
495 # A log entry looks like
496 # Revision: moodle-org--moodle--1.3.3--patch-15
497 # Archive: arch-eduforge@catalyst.net.nz--2004
498 # Creator: Penny Leach <penny@catalyst.net.nz>
499 # Date: Wed May 25 14:15:34 NZST 2005
500 # Standard-date: 2005-05-25 02:15:34 GMT
501 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
502 # lang/de/.arch-ids/block_html.php.id
503 # New-directories: lang/de/help/questionnaire
504 # lang/de/help/questionnaire/.arch-ids
505 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
506 # db_sears.sql db/db_sears.sql
507 # Removed-files: lang/be/docs/.arch-ids/release.html.id
508 # lang/be/docs/.arch-ids/releaseold.html.id
509 # Modified-files: admin/cron.php admin/delete.php
510 # admin/editor.html backup/lib.php backup/restore.php
511 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
512 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
515 # Updating yadda tadda tadda madda
520 my (@add, @del, @mod, @ren, @kw, $sum, $msg );
522 if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
524 @add = split(m/\s+/s, $files);
527 if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
529 @del = split(m/\s+/s, $files);
532 if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
534 @mod = split(m/\s+/s, $files);
537 if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
539 @ren = split(m/\s+/s, $files);
543 if ($log =~ m/^Summary:(.+?)$/m ) {
550 if ($log =~ m/\n\n(.+)$/s) {
558 foreach my $ref ( (\
@add, \
@del, \
@mod, \
@ren) ) {
560 while (my $t = pop @
$ref) {
561 next unless length ($t);
562 next if $t =~ m!\{arch\}/!;
563 next if $t =~ m!\.arch-ids/!;
564 next if $t =~ m!\.arch-inventory$!;
565 push (@tmp, shell_quote
($t));
570 #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren];
571 return ($sum, $msg, \
@add, \
@del, \
@mod, \
@ren);
576 my ($tag, $commit) = @_;
578 $tag = shell_quote
($tag);
581 open(C
,">.git/refs/tags/$tag")
582 or die "Cannot create tag $tag: $!\n";
584 or die "Cannot write tag $tag: $!\n";
586 or die "Cannot write tag $tag: $!\n";
587 print " * Created tag ' $tag' on '$commit'\n" if $opt_v;
589 open(C
,"<.git/refs/tags/$tag")
590 or die "Cannot read tag $tag: $!\n";
593 die "Error reading tag $tag: $!\n" unless length $commit == 40;
595 or die "Cannot read tag $tag: $!\n";
600 # write/read a private tag
601 # reads fail softly if the tag isn't there
603 my ($tag, $commit) = @_;
605 $tag = shell_quote
($tag);
607 unless (-d
'.git/archimport/tags') {
608 mkpath
('.git/archimport/tags');
611 if ($commit) { # write
612 open(C
,">.git/archimport/tags/$tag")
613 or die "Cannot create tag $tag: $!\n";
615 or die "Cannot write tag $tag: $!\n";
617 or die "Cannot write tag $tag: $!\n";
618 $rptags{$commit} = $tag
619 unless $tag =~ m/--base-0$/;
621 # if the tag isn't there, return 0
622 unless ( -s
".git/archimport/tags/$tag") {
625 open(C
,"<.git/archimport/tags/$tag")
626 or die "Cannot read tag $tag: $!\n";
629 die "Error reading tag $tag: $!\n" unless length $commit == 40;
631 or die "Cannot read tag $tag: $!\n";
632 unless (defined $rptags{$commit}) {
633 $rptags{$commit} = $tag;
641 # Identify what branches are merging into me
642 # and whether we are fully merged
643 # git-merge-base <headsha> <headsha> should tell
644 # me what the base of the merge should be
648 my %branches; # holds an arrayref per branch
649 # the arrayref contains a list of
650 # merged patches between the base
651 # of the merge and the current head
653 my @parents; # parents found for this commit
655 # simple loop to split the merges
657 foreach my $merge (@
{$ps->{merges
}}) {
658 my $branch = branchname
($merge);
659 unless (defined $branches{$branch} ){
660 $branches{$branch} = [];
662 push @
{$branches{$branch}}, $merge;
666 # foreach branch find a merge base and walk it to the
667 # head where we are, collecting the merged patchsets that
668 # Arch has recorded. Keep that in @have
669 # Compare that with the commits on the other branch
670 # between merge-base and the tip of the branch (@need)
671 # and see if we have a series of consecutive patches
672 # starting from the merge base. The tip of the series
673 # of consecutive patches merged is our new parent for
676 foreach my $branch (keys %branches) {
677 my $mergebase = `git-merge-base $branch $ps->{branch}`;
678 die "Cannot find merge base for $branch and $ps->{branch}" if $?
;
681 # now walk up to the mergepoint collecting what patches we have
682 my $branchtip = git_rev_parse
($ps->{branch
});
683 my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
684 my %have; # collected merges this branch has
685 foreach my $merge (@
{$ps->{merges
}}) {
689 foreach my $par (@ancestors) {
690 $par = commitid2pset
($par);
691 if (defined $par->{merges
}) {
692 foreach my $merge (@
{$par->{merges
}}) {
693 $ancestorshave{$merge}=1;
697 # print "++++ Merges in $ps->{id} are....\n";
698 # my @have = sort keys %have; print Dumper(\@have);
700 # merge what we have with what ancestors have
701 %have = (%have, %ancestorshave);
703 # see what the remote branch has - these are the merges we
704 # will want to have in a consecutive series from the mergebase
705 my $otherbranchtip = git_rev_parse
($branch);
706 my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
708 foreach my $needps (@needraw) { # get the psets
709 $needps = commitid2pset
($needps);
710 # git-rev-list will also
711 # list commits merged in via earlier
712 # merges. we are only interested in commits
713 # from the branch we're looking at
714 if ($branch eq $needps->{branch
}) {
715 push @need, $needps->{id
};
719 # print "++++ Merges from $branch we want are....\n";
720 # print Dumper(\@need);
723 while (my $needed_commit = pop @need) {
724 if ($have{$needed_commit}) {
725 $newparent = $needed_commit;
727 last; # break out of the while
731 push @parents, $newparent;
735 } # end foreach branch
737 # prune redundant parents
739 foreach my $p (@parents) {
742 foreach my $p (@parents) {
743 next unless exists $psets{$p}{merges
};
744 next unless ref $psets{$p}{merges
};
745 my @merges = @
{$psets{$p}{merges
}};
746 foreach my $merge (@merges) {
747 if ($parents{$merge}) {
748 delete $parents{$merge};
752 @parents = keys %parents;
753 @parents = map { " -p " . ptag
($_) } @parents;
759 my $val = `git-rev-parse $name`;
760 die "Error: git-rev-parse $name" if $?
;
765 # resolve a SHA1 to a known patchset
767 my $commitid = shift;
769 my $name = $rptags{$commitid}
770 || die "Cannot find reverse tag mapping for $commitid";
771 # the keys in %rptag are slightly munged; unmunge
772 # reconvert the 3rd '--' sequence from the end
774 $name = reverse $name;
775 $name =~ s!^(.+?--.+?--.+?--.+?)--(.+)$!$1/$2!;
776 $name = reverse $name;
777 my $ps = $psets{$name}
778 || (print Dumper
(sort keys %psets)) && die "Cannot find patchset for $name";