3 # This tool is copyright (c) 2005, Martin Langhoff.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to walk the output of tla abrowse,
7 # fetch the changesets and apply them.
12 git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
14 Imports a project from one or more Arch repositories. It will follow branches
15 and repositories within the namespaces defined by the <archive/branch>
16 parameters suppplied. If it cannot find the remote branch a merge comes from
17 it will just import it as a regular commit. If it can find it, it will mark it
18 as a merge whenever possible.
20 See man (1) git-archimport for more details.
24 - create tag objects instead of ref tags
25 - audit shell-escaping of filenames
26 - hide our private tags somewhere smarter
27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
31 Add print in front of the shell commands invoked via backticks.
35 There are several places where Arch and git terminology are intermixed
36 and potentially confused.
38 The notion of a "branch" in git is approximately equivalent to
39 a "archive/category--branch--version" in Arch. Also, it should be noted
40 that the "--branch" portion of "archive/category--branch--version" is really
41 optional in Arch although not many people (nor tools!) seem to know this.
42 This means that "archive/category--version" is also a valid "branch"
45 We always refer to Arch names by their fully qualified variant (which
46 means the "archive" name is prefixed.
48 For people unfamiliar with Arch, an "archive" is the term for "repository",
49 and can contain multiple, unrelated branches.
57 use File
::Temp
qw(tempfile tempdir);
58 use File
::Path
qw(mkpath);
59 use File
::Basename
qw(basename dirname);
60 use String
::ShellQuote
;
64 use POSIX
qw(strftime dup2);
65 use Data
::Dumper qw
/ Dumper /;
68 $SIG{'PIPE'}="IGNORE";
71 my $git_dir = $ENV{"GIT_DIR"} || ".git";
72 $ENV{"GIT_DIR"} = $git_dir;
73 my $ptag_dir = "$git_dir/archimport/tags";
75 our($opt_h,$opt_v, $opt_T,$opt_t,$opt_o);
79 Usage: ${\basename $0} # fetch/update GIT from Arch
80 [ -o ] [ -h ] [ -v ] [ -T ] [ -t tempdir ]
81 repository/arch-branch [ repository/arch-branch] ...
86 getopts
("Thvt:") or usage
();
89 @ARGV >= 1 or usage
();
90 my @arch_roots = @ARGV;
92 my ($tmpdir, $tmpdirname) = tempdir
('git-archimport-XXXXXX', TMPDIR
=> 1, CLEANUP
=> 1);
93 my $tmp = $opt_t || 1;
94 $tmp = tempdir
('git-archimport-XXXXXX', TMPDIR
=> 1, CLEANUP
=> 1);
95 $opt_v && print "+ Using $tmp as temporary directory\n";
97 my @psets = (); # the collection
98 my %psets = (); # the collection, by name
100 my %rptags = (); # my reverse private tags
101 # to map a SHA1 to a commitid
102 my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
104 foreach my $root (@arch_roots) {
105 my ($arepo, $abranch) = split(m!/!, $root);
106 open ABROWSE
, "tla abrowse -f -A $arepo --desc --merges $abranch |"
107 or die "Problems with tla abrowse: $!";
109 my %ps = (); # the current one
116 # first record padded w 8 spaces
119 # store the record we just captured
121 my %temp = %ps; # break references
122 push (@psets, \
%temp);
123 $psets{$temp{id
}} = \
%temp;
127 my ($id, $type) = split(m/\s{3}/, $_);
132 if ($type =~ m/^\(simple changeset\)/) {
134 } elsif ($type eq '(initial import)') {
136 } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
140 warn "Unknown type $type";
146 # 10 leading spaces or more
147 # indicate commit metadata
150 if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
152 my ($date, $authoremail) = split(m/\s{2,}/, $_);
154 $ps{date
} =~ s/\bGMT$//; # strip off trailign GMT
155 if ($ps{date
} =~ m/\b\w+$/) {
156 warn 'Arch dates not in GMT?! - imported dates will be wrong';
159 $authoremail =~ m/^(.+)\s(\S+)$/;
165 } elsif ($lastseen eq 'date') {
166 # the only hint is position
167 # subject is after date
171 } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
173 $lastseen = 'merges';
175 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
176 push (@
{$ps{merges
}}, $_);
178 warn 'more metadata after merges!?';
185 my %temp = %ps; # break references
186 push (@psets, \
%temp);
187 $psets{ $temp{id
} } = \
%temp;
191 } # end foreach $root
193 ## Order patches by time
194 @psets = sort {$a->{date
}.$b->{id
} cmp $b->{date
}.$b->{id
}} @psets;
196 #print Dumper \@psets;
199 ## TODO cleanup irrelevant patches
200 ## and put an initial import
203 unless (-d
$git_dir) { # initial import
204 if ($psets[0]{type
} eq 'i' || $psets[0]{type
} eq 't') {
205 print "Starting import from $psets[0]{id}\n";
210 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
212 } else { # progressing an import
214 opendir(DIR
, "$git_dir/archimport/tags")
215 || die "can't opendir: $!";
216 while (my $file = readdir(DIR
)) {
217 # skip non-interesting-files
218 next unless -f
"$ptag_dir/$file";
220 # convert first '--' to '/' from old git-archimport to use
221 # as an archivename/c--b--v private tag
225 print STDERR
"converting old tag $oldfile to $file\n";
226 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
228 my $sha = ptag
($file);
230 $rptags{$sha} = $file;
236 # extract the Arch repository name (Arch "archive" in Arch-speak)
237 sub extract_reponame
{
238 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
239 return (split(/\//, $fq_cvbr))[0];
242 sub extract_versionname
{
244 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
248 # convert a fully-qualified revision or version to a unique dirname:
249 # normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
250 # becomes: normalperson@yhbt.net-05,mpd--uclinux--1
252 # the git notion of a branch is closer to
253 # archive/category--branch--version than archive/category--branch, so we
254 # use this to convert to git branch names.
255 # Also, keep archive names but replace '/' with ',' since it won't require
256 # subdirectories, and is safer than swapping '--' which could confuse
257 # reverse-mapping when dealing with bastard branches that
258 # are just archive/category--version (no --branch)
260 my $revision = shift;
261 my $name = extract_versionname
($revision);
266 # old versions of git-archimport just use the <category--branch> part:
267 sub old_style_branchname
{
269 my $ret = safe_pipe_capture
($TLA,'parse-package-name','-p',$id);
274 *git_branchname
= $opt_o ?
*old_style_branchname
: *tree_dirname
;
277 foreach my $ps (@psets) {
278 $ps->{branch
} = git_branchname
($ps->{id
});
281 # ensure we have a clean state
283 if (`git diff-files`) {
284 die "Unclean tree when about to process $ps->{id} " .
285 " - did we fail to commit cleanly before?";
290 # skip commits already in repo
292 if (ptag
($ps->{id
})) {
293 $opt_v && print " * Skipping already imported: $ps->{id}\n";
297 print " * Starting to work on $ps->{id}\n";
300 # create the branch if needed
302 if ($ps->{type
} eq 'i' && !$import) {
303 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
306 unless ($import) { # skip for import
307 if ( -e
"$git_dir/refs/heads/$ps->{branch}") {
308 # we know about this branch
309 `git checkout $ps->{branch}`;
311 # new branch! we need to verify a few things
312 die "Branch on a non-tag!" unless $ps->{type
} eq 't';
313 my $branchpoint = ptag
($ps->{tag
});
314 die "Tagging from unknown id unsupported: $ps->{tag}"
317 # find where we are supposed to branch from
318 `git checkout -b $ps->{branch} $branchpoint`;
320 # If we trust Arch with the fact that this is just
321 # a tag, and it does not affect the state of the tree
322 # then we just tag and move on
323 tag
($ps->{id
}, $branchpoint);
324 ptag
($ps->{id
}, $branchpoint);
325 print " * Tagged $ps->{id} at $branchpoint\n";
332 # Apply the import/changeset/merge into the working tree
334 if ($ps->{type
} eq 'i' || $ps->{type
} eq 't') {
335 apply_import
($ps) or die $!;
337 } elsif ($ps->{type
} eq 's') {
342 # prepare update git's index, based on what arch knows
343 # about the pset, resolve parents, etc
347 my $commitlog = `tla cat-archive-log -A $ps->{repo} $ps->{id}`;
348 die "Error in cat-archive-log: $!" if $?
;
350 # parselog will git-add/rm files
351 # and generally prepare things for the commit
352 # NOTE: parselog will shell-quote filenames!
353 my ($sum, $msg, $add, $del, $mod, $ren) = parselog
($commitlog);
354 my $logmessage = "$sum\n$msg";
357 # imports don't give us good info
358 # on added files. Shame on them
359 if ($ps->{type
} eq 'i' || $ps->{type
} eq 't') {
360 `find . -type f -print0 | grep -zv '^./$git_dir' | xargs -0 -l100 git-update-index --add`;
361 `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-index --remove`;
366 my @slice = splice(@
$add, 0, 100);
367 my $slice = join(' ', @slice);
368 `git-update-index --add $slice`;
369 die "Error in git-update-index --add: $!" if $?
;
373 foreach my $file (@
$del) {
374 unlink $file or die "Problems deleting $file : $!";
377 my @slice = splice(@
$del, 0, 100);
378 my $slice = join(' ', @slice);
379 `git-update-index --remove $slice`;
380 die "Error in git-update-index --remove: $!" if $?
;
383 if (@
$ren) { # renamed
385 die "Odd number of entries in rename!?";
389 my $from = pop @
$ren;
392 unless (-d dirname
($to)) {
393 mkpath
(dirname
($to)); # will die on err
395 #print "moving $from $to";
397 die "Error renaming $from $to : $!" if $?
;
398 `git-update-index --remove $from`;
399 die "Error in git-update-index --remove: $!" if $?
;
400 `git-update-index --add $to`;
401 die "Error in git-update-index --add: $!" if $?
;
405 if (@
$mod) { # must be _after_ renames
407 my @slice = splice(@
$mod, 0, 100);
408 my $slice = join(' ', @slice);
409 `git-update-index $slice`;
410 die "Error in git-update-index: $!" if $?
;
414 # warn "errors when running git-update-index! $!";
415 $tree = `git-write-tree`;
416 die "cannot write tree $!" if $?
;
424 if ( -e
"$git_dir/refs/heads/$ps->{branch}") {
425 if (open HEAD
, "<$git_dir/refs/heads/$ps->{branch}") {
431 if ($ps->{type
} eq 's') {
432 warn "Could not find the right head for the branch $ps->{branch}";
438 push @par, find_parents
($ps);
440 my $par = join (' ', @par);
443 # Commit, tag and clean state
446 $ENV{GIT_AUTHOR_NAME
} = $ps->{author
};
447 $ENV{GIT_AUTHOR_EMAIL
} = $ps->{email
};
448 $ENV{GIT_AUTHOR_DATE
} = $ps->{date
};
449 $ENV{GIT_COMMITTER_NAME
} = $ps->{author
};
450 $ENV{GIT_COMMITTER_EMAIL
} = $ps->{email
};
451 $ENV{GIT_COMMITTER_DATE
} = $ps->{date
};
453 my ($pid, $commit_rh, $commit_wh);
454 $commit_rh = 'commit_rh';
455 $commit_wh = 'commit_wh';
457 $pid = open2
(*READER
, *WRITER
, "git-commit-tree $tree $par")
459 print WRITER
$logmessage; # write
461 my $commitid = <READER
>; # read
464 waitpid $pid,0; # close;
466 if (length $commitid != 40) {
467 die "Something went wrong with the commit! $! $commitid";
472 open HEAD
, ">$git_dir/refs/heads/$ps->{branch}";
473 print HEAD
$commitid;
475 system('git-update-ref', 'HEAD', "$ps->{branch}");
478 ptag
($ps->{id
}, $commitid); # private tag
479 if ($opt_T || $ps->{type
} eq 't' || $ps->{type
} eq 'i') {
480 tag
($ps->{id
}, $commitid);
482 print " * Committed $ps->{id}\n";
483 print " + tree $tree\n";
484 print " + commit $commitid\n";
485 $opt_v && print " + commit date is $ps->{date} \n";
486 $opt_v && print " + parents: $par \n";
491 my $bname = git_branchname
($ps->{id
});
495 `tla get -s --no-pristine -A $ps->{repo} $ps->{id} $tmp/import`;
496 die "Cannot get import: $!" if $?
;
497 `rsync -v --archive --delete --exclude '$git_dir' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./`;
498 die "Cannot rsync import:$!" if $?
;
500 `rm -fr $tmp/import`;
501 die "Cannot remove tempdir: $!" if $?
;
513 `tla get-changeset -A $ps->{repo} $ps->{id} $tmp/changeset`;
514 die "Cannot get changeset: $!" if $?
;
517 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
518 # this can be sped up considerably by doing
519 # (find | xargs cat) | patch
520 # but that cna get mucked up by patches
521 # with missing trailing newlines or the standard
522 # 'missing newline' flag in the patch - possibly
523 # produced with an old/buggy diff.
524 # slow and safe, we invoke patch once per patchfile
525 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
526 die "Problem applying patches! $!" if $?
;
529 # apply changed binary files
530 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
531 foreach my $mod (@modified) {
534 $orig =~ s/\.modified$//; # lazy
535 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
536 #print "rsync -p '$mod' '$orig'";
537 `rsync -p $mod ./$orig`;
538 die "Problem applying binary changes! $!" if $?
;
543 `rsync --archive --exclude '$git_dir' --exclude '.arch-ids' --exclude '{arch}' $tmp/changeset/new-files-archive/* ./`;
545 # deleted files are hinted from the commitlog processing
547 `rm -fr $tmp/changeset`;
552 # A log entry looks like
553 # Revision: moodle-org--moodle--1.3.3--patch-15
554 # Archive: arch-eduforge@catalyst.net.nz--2004
555 # Creator: Penny Leach <penny@catalyst.net.nz>
556 # Date: Wed May 25 14:15:34 NZST 2005
557 # Standard-date: 2005-05-25 02:15:34 GMT
558 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
559 # lang/de/.arch-ids/block_html.php.id
560 # New-directories: lang/de/help/questionnaire
561 # lang/de/help/questionnaire/.arch-ids
562 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
563 # db_sears.sql db/db_sears.sql
564 # Removed-files: lang/be/docs/.arch-ids/release.html.id
565 # lang/be/docs/.arch-ids/releaseold.html.id
566 # Modified-files: admin/cron.php admin/delete.php
567 # admin/editor.html backup/lib.php backup/restore.php
568 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
569 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
572 # Updating yadda tadda tadda madda
577 my (@add, @del, @mod, @ren, @kw, $sum, $msg );
579 if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
581 @add = split(m/\s+/s, $files);
584 if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
586 @del = split(m/\s+/s, $files);
589 if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
591 @mod = split(m/\s+/s, $files);
594 if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
596 @ren = split(m/\s+/s, $files);
600 if ($log =~ m/^Summary:(.+?)$/m ) {
607 if ($log =~ m/\n\n(.+)$/s) {
615 foreach my $ref ( (\
@add, \
@del, \
@mod, \
@ren) ) {
617 while (my $t = pop @
$ref) {
618 next unless length ($t);
619 next if $t =~ m!\{arch\}/!;
620 next if $t =~ m!\.arch-ids/!;
621 next if $t =~ m!\.arch-inventory$!;
622 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
623 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
625 $t = `tla escape --unescaped '$t'`;
627 push (@tmp, shell_quote
($t));
632 #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren];
633 return ($sum, $msg, \
@add, \
@del, \
@mod, \
@ren);
638 my ($tag, $commit) = @_;
643 # don't use subdirs for tags yet, it could screw up other porcelains
648 open(C
,">","$git_dir/refs/tags/$tag")
649 or die "Cannot create tag $tag: $!\n";
651 or die "Cannot write tag $tag: $!\n";
653 or die "Cannot write tag $tag: $!\n";
654 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
656 open(C
,"<","$git_dir/refs/tags/$tag")
657 or die "Cannot read tag $tag: $!\n";
660 die "Error reading tag $tag: $!\n" unless length $commit == 40;
662 or die "Cannot read tag $tag: $!\n";
667 # write/read a private tag
668 # reads fail softly if the tag isn't there
670 my ($tag, $commit) = @_;
672 # don't use subdirs for tags yet, it could screw up other porcelains
675 my $tag_file = "$ptag_dir/$tag";
676 my $tag_branch_dir = dirname
($tag_file);
677 mkpath
($tag_branch_dir) unless (-d
$tag_branch_dir);
679 if ($commit) { # write
680 open(C
,">",$tag_file)
681 or die "Cannot create tag $tag: $!\n";
683 or die "Cannot write tag $tag: $!\n";
685 or die "Cannot write tag $tag: $!\n";
686 $rptags{$commit} = $tag
687 unless $tag =~ m/--base-0$/;
689 # if the tag isn't there, return 0
690 unless ( -s
$tag_file) {
693 open(C
,"<",$tag_file)
694 or die "Cannot read tag $tag: $!\n";
697 die "Error reading tag $tag: $!\n" unless length $commit == 40;
699 or die "Cannot read tag $tag: $!\n";
700 unless (defined $rptags{$commit}) {
701 $rptags{$commit} = $tag;
709 # Identify what branches are merging into me
710 # and whether we are fully merged
711 # git-merge-base <headsha> <headsha> should tell
712 # me what the base of the merge should be
716 my %branches; # holds an arrayref per branch
717 # the arrayref contains a list of
718 # merged patches between the base
719 # of the merge and the current head
721 my @parents; # parents found for this commit
723 # simple loop to split the merges
725 foreach my $merge (@
{$ps->{merges
}}) {
726 my $branch = git_branchname
($merge);
727 unless (defined $branches{$branch} ){
728 $branches{$branch} = [];
730 push @
{$branches{$branch}}, $merge;
734 # foreach branch find a merge base and walk it to the
735 # head where we are, collecting the merged patchsets that
736 # Arch has recorded. Keep that in @have
737 # Compare that with the commits on the other branch
738 # between merge-base and the tip of the branch (@need)
739 # and see if we have a series of consecutive patches
740 # starting from the merge base. The tip of the series
741 # of consecutive patches merged is our new parent for
744 foreach my $branch (keys %branches) {
746 # check that we actually know about the branch
747 next unless -e
"$git_dir/refs/heads/$branch";
749 my $mergebase = `git-merge-base $branch $ps->{branch}`;
751 # Don't die here, Arch supports one-way cherry-picking
752 # between branches with no common base (or any relationship
754 warn "Cannot find merge base for $branch and $ps->{branch}";
759 # now walk up to the mergepoint collecting what patches we have
760 my $branchtip = git_rev_parse
($ps->{branch
});
761 my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
762 my %have; # collected merges this branch has
763 foreach my $merge (@
{$ps->{merges
}}) {
767 foreach my $par (@ancestors) {
768 $par = commitid2pset
($par);
769 if (defined $par->{merges
}) {
770 foreach my $merge (@
{$par->{merges
}}) {
771 $ancestorshave{$merge}=1;
775 # print "++++ Merges in $ps->{id} are....\n";
776 # my @have = sort keys %have; print Dumper(\@have);
778 # merge what we have with what ancestors have
779 %have = (%have, %ancestorshave);
781 # see what the remote branch has - these are the merges we
782 # will want to have in a consecutive series from the mergebase
783 my $otherbranchtip = git_rev_parse
($branch);
784 my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
786 foreach my $needps (@needraw) { # get the psets
787 $needps = commitid2pset
($needps);
788 # git-rev-list will also
789 # list commits merged in via earlier
790 # merges. we are only interested in commits
791 # from the branch we're looking at
792 if ($branch eq $needps->{branch
}) {
793 push @need, $needps->{id
};
797 # print "++++ Merges from $branch we want are....\n";
798 # print Dumper(\@need);
801 while (my $needed_commit = pop @need) {
802 if ($have{$needed_commit}) {
803 $newparent = $needed_commit;
805 last; # break out of the while
809 push @parents, $newparent;
813 } # end foreach branch
815 # prune redundant parents
817 foreach my $p (@parents) {
820 foreach my $p (@parents) {
821 next unless exists $psets{$p}{merges
};
822 next unless ref $psets{$p}{merges
};
823 my @merges = @
{$psets{$p}{merges
}};
824 foreach my $merge (@merges) {
825 if ($parents{$merge}) {
826 delete $parents{$merge};
830 @parents = keys %parents;
831 @parents = map { " -p " . ptag
($_) } @parents;
837 my $val = `git-rev-parse $name`;
838 die "Error: git-rev-parse $name" if $?
;
843 # resolve a SHA1 to a known patchset
845 my $commitid = shift;
847 my $name = $rptags{$commitid}
848 || die "Cannot find reverse tag mapping for $commitid";
850 my $ps = $psets{$name}
851 || (print Dumper
(sort keys %psets)) && die "Cannot find patchset for $name";
855 # an alterative to `command` that allows input to be passed as an array
856 # to work around shell problems with weird characters in arguments
857 sub safe_pipe_capture
{
859 if (my $pid = open my $child, '-|') {
860 @output = (<$child>);
861 close $child or die join(' ',@_).": $! $?";
863 exec(@_) or die $?
; # exec() can fail the executable can't be found
865 return wantarray ?
@output : join('',@output);