archimport: add -D <depth> and -a switch
[git/mingw.git] / git-archimport.perl
blob396874080a8c61ca7262185b1b1309a3cd56de86
1 #!/usr/bin/perl -w
3 # This tool is copyright (c) 2005, Martin Langhoff.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to walk the output of tla abrowse,
7 # fetch the changesets and apply them.
10 =head1 Invocation
12 git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
14 Imports a project from one or more Arch repositories. It will follow branches
15 and repositories within the namespaces defined by the <archive/branch>
16 parameters suppplied. If it cannot find the remote branch a merge comes from
17 it will just import it as a regular commit. If it can find it, it will mark it
18 as a merge whenever possible.
20 See man (1) git-archimport for more details.
22 =head1 TODO
24 - create tag objects instead of ref tags
25 - audit shell-escaping of filenames
26 - hide our private tags somewhere smarter
27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
29 =head1 Devel tricks
31 Add print in front of the shell commands invoked via backticks.
33 =head1 Devel Notes
35 There are several places where Arch and git terminology are intermixed
36 and potentially confused.
38 The notion of a "branch" in git is approximately equivalent to
39 a "archive/category--branch--version" in Arch. Also, it should be noted
40 that the "--branch" portion of "archive/category--branch--version" is really
41 optional in Arch although not many people (nor tools!) seem to know this.
42 This means that "archive/category--version" is also a valid "branch"
43 in git terms.
45 We always refer to Arch names by their fully qualified variant (which
46 means the "archive" name is prefixed.
48 For people unfamiliar with Arch, an "archive" is the term for "repository",
49 and can contain multiple, unrelated branches.
51 =cut
53 use strict;
54 use warnings;
55 use Getopt::Std;
56 use File::Temp qw(tempdir);
57 use File::Path qw(mkpath rmtree);
58 use File::Basename qw(basename dirname);
59 use Data::Dumper qw/ Dumper /;
60 use IPC::Open2;
62 $SIG{'PIPE'}="IGNORE";
63 $ENV{'TZ'}="UTC";
65 my $git_dir = $ENV{"GIT_DIR"} || ".git";
66 $ENV{"GIT_DIR"} = $git_dir;
67 my $ptag_dir = "$git_dir/archimport/tags";
69 our($opt_h,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
71 sub usage() {
72 print STDERR <<END;
73 Usage: ${\basename $0} # fetch/update GIT from Arch
74 [ -o ] [ -h ] [ -v ] [ -T ] [ -a ] [ -D depth ] [ -t tempdir ]
75 repository/arch-branch [ repository/arch-branch] ...
76 END
77 exit(1);
80 getopts("Thvat:D:") or usage();
81 usage if $opt_h;
83 @ARGV >= 1 or usage();
84 # $arch_branches:
85 # values associated with keys:
86 # =1 - Arch version / git 'branch' detected via abrowse on a limit
87 # >1 - Arch version / git 'branch' of an auxilliary branch we've merged
88 my %arch_branches = map { $_ => 1 } @ARGV;
90 $ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
91 my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
92 $opt_v && print "+ Using $tmp as temporary directory\n";
94 my %reachable = (); # Arch repositories we can access
95 my %unreachable = (); # Arch repositories we can't access :<
96 my @psets = (); # the collection
97 my %psets = (); # the collection, by name
99 my %rptags = (); # my reverse private tags
100 # to map a SHA1 to a commitid
101 my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
103 sub do_abrowse {
104 my $stage = shift;
105 while (my ($limit, $level) = each %arch_branches) {
106 next unless $level == $stage;
108 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
109 or die "Problems with tla abrowse: $!";
111 my %ps = (); # the current one
112 my $lastseen = '';
114 while (<ABROWSE>) {
115 chomp;
117 # first record padded w 8 spaces
118 if (s/^\s{8}\b//) {
119 my ($id, $type) = split(m/\s+/, $_, 2);
121 my %last_ps;
122 # store the record we just captured
123 if (%ps && !exists $psets{ $ps{id} }) {
124 %last_ps = %ps; # break references
125 push (@psets, \%last_ps);
126 $psets{ $last_ps{id} } = \%last_ps;
129 my $branch = extract_versionname($id);
130 %ps = ( id => $id, branch => $branch );
131 if (%last_ps && ($last_ps{branch} eq $branch)) {
132 $ps{parent_id} = $last_ps{id};
135 $arch_branches{$branch} = 1;
136 $lastseen = 'id';
138 # deal with types (should work with baz or tla):
139 if ($type =~ m/\(.*changeset\)/) {
140 $ps{type} = 's';
141 } elsif ($type =~ /\(.*import\)/) {
142 $ps{type} = 'i';
143 } elsif ($type =~ m/\(tag.*\)/) {
144 $ps{type} = 't';
145 # read which revision we've tagged when we parse the log
146 #$ps{tag} = $1;
147 } else {
148 warn "Unknown type $type";
151 $arch_branches{$branch} = 1;
152 $lastseen = 'id';
153 } elsif (s/^\s{10}//) {
154 # 10 leading spaces or more
155 # indicate commit metadata
157 # date
158 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
159 $ps{date} = $1;
160 $lastseen = 'date';
161 } elsif ($_ eq 'merges in:') {
162 $ps{merges} = [];
163 $lastseen = 'merges';
164 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
165 my $id = $_;
166 push (@{$ps{merges}}, $id);
168 # aggressive branch finding:
169 if ($opt_D) {
170 my $branch = extract_versionname($id);
171 my $repo = extract_reponame($branch);
173 if (archive_reachable($repo) &&
174 !defined $arch_branches{$branch}) {
175 $arch_branches{$branch} = $stage + 1;
178 } else {
179 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
184 if (%ps && !exists $psets{ $ps{id} }) {
185 my %temp = %ps; # break references
186 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
187 $temp{parent_id} = $psets[$#psets]{id};
189 push (@psets, \%temp);
190 $psets{ $temp{id} } = \%temp;
193 close ABROWSE or die "$TLA abrowse failed on $limit\n";
195 } # end foreach $root
197 do_abrowse(1);
198 my $depth = 2;
199 $opt_D ||= 0;
200 while ($depth <= $opt_D) {
201 do_abrowse($depth);
202 $depth++;
205 ## Order patches by time
206 # FIXME see if we can find a more optimal way to do this by graphing
207 # the ancestry data and walking it, that way we won't have to rely on
208 # client-supplied dates
209 @psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
211 #print Dumper \@psets;
214 ## TODO cleanup irrelevant patches
215 ## and put an initial import
216 ## or a full tag
217 my $import = 0;
218 unless (-d $git_dir) { # initial import
219 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
220 print "Starting import from $psets[0]{id}\n";
221 `git-init-db`;
222 die $! if $?;
223 $import = 1;
224 } else {
225 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
227 } else { # progressing an import
228 # load the rptags
229 opendir(DIR, $ptag_dir)
230 || die "can't opendir: $!";
231 while (my $file = readdir(DIR)) {
232 # skip non-interesting-files
233 next unless -f "$ptag_dir/$file";
235 # convert first '--' to '/' from old git-archimport to use
236 # as an archivename/c--b--v private tag
237 if ($file !~ m!,!) {
238 my $oldfile = $file;
239 $file =~ s!--!,!;
240 print STDERR "converting old tag $oldfile to $file\n";
241 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
243 my $sha = ptag($file);
244 chomp $sha;
245 $rptags{$sha} = $file;
247 closedir DIR;
250 # process patchsets
251 # extract the Arch repository name (Arch "archive" in Arch-speak)
252 sub extract_reponame {
253 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
254 return (split(/\//, $fq_cvbr))[0];
257 sub extract_versionname {
258 my $name = shift;
259 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
260 return $name;
263 # convert a fully-qualified revision or version to a unique dirname:
264 # normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
265 # becomes: normalperson@yhbt.net-05,mpd--uclinux--1
267 # the git notion of a branch is closer to
268 # archive/category--branch--version than archive/category--branch, so we
269 # use this to convert to git branch names.
270 # Also, keep archive names but replace '/' with ',' since it won't require
271 # subdirectories, and is safer than swapping '--' which could confuse
272 # reverse-mapping when dealing with bastard branches that
273 # are just archive/category--version (no --branch)
274 sub tree_dirname {
275 my $revision = shift;
276 my $name = extract_versionname($revision);
277 $name =~ s#/#,#;
278 return $name;
281 # old versions of git-archimport just use the <category--branch> part:
282 sub old_style_branchname {
283 my $id = shift;
284 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
285 chomp $ret;
286 return $ret;
289 *git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
291 # process patchsets
292 foreach my $ps (@psets) {
293 $ps->{branch} = git_branchname($ps->{id});
296 # ensure we have a clean state
298 if (`git-diff-files`) {
299 die "Unclean tree when about to process $ps->{id} " .
300 " - did we fail to commit cleanly before?";
302 die $! if $?;
305 # skip commits already in repo
307 if (ptag($ps->{id})) {
308 $opt_v && print " * Skipping already imported: $ps->{id}\n";
309 next;
312 print " * Starting to work on $ps->{id}\n";
315 # create the branch if needed
317 if ($ps->{type} eq 'i' && !$import) {
318 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
321 unless ($import) { # skip for import
322 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
323 # we know about this branch
324 system('git-checkout',$ps->{branch});
325 } else {
326 # new branch! we need to verify a few things
327 die "Branch on a non-tag!" unless $ps->{type} eq 't';
328 my $branchpoint = ptag($ps->{tag});
329 die "Tagging from unknown id unsupported: $ps->{tag}"
330 unless $branchpoint;
332 # find where we are supposed to branch from
333 system('git-checkout','-b',$ps->{branch},$branchpoint);
335 # If we trust Arch with the fact that this is just
336 # a tag, and it does not affect the state of the tree
337 # then we just tag and move on
338 tag($ps->{id}, $branchpoint);
339 ptag($ps->{id}, $branchpoint);
340 print " * Tagged $ps->{id} at $branchpoint\n";
341 next;
343 die $! if $?;
347 # Apply the import/changeset/merge into the working tree
349 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
350 apply_import($ps) or die $!;
351 $import=0;
352 } elsif ($ps->{type} eq 's') {
353 apply_cset($ps);
357 # prepare update git's index, based on what arch knows
358 # about the pset, resolve parents, etc
360 my $tree;
362 my $commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
363 die "Error in cat-archive-log: $!" if $?;
365 # parselog will git-add/rm files
366 # and generally prepare things for the commit
367 # NOTE: parselog will shell-quote filenames!
368 my ($sum, $msg, $add, $del, $mod, $ren) = parselog($commitlog);
369 my $logmessage = "$sum\n$msg";
372 # imports don't give us good info
373 # on added files. Shame on them
374 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
375 `find . -type f -print0 | grep -zv '^./$git_dir' | xargs -0 -l100 git-update-index --add`;
376 `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-index --remove`;
379 if (@$add) {
380 while (@$add) {
381 my @slice = splice(@$add, 0, 100);
382 my $slice = join(' ', @slice);
383 `git-update-index --add $slice`;
384 die "Error in git-update-index --add: $!" if $?;
387 if (@$del) {
388 foreach my $file (@$del) {
389 unlink $file or die "Problems deleting $file : $!";
391 while (@$del) {
392 my @slice = splice(@$del, 0, 100);
393 my $slice = join(' ', @slice);
394 `git-update-index --remove $slice`;
395 die "Error in git-update-index --remove: $!" if $?;
398 if (@$ren) { # renamed
399 if (@$ren % 2) {
400 die "Odd number of entries in rename!?";
403 while (@$ren) {
404 my $from = pop @$ren;
405 my $to = pop @$ren;
407 unless (-d dirname($to)) {
408 mkpath(dirname($to)); # will die on err
410 #print "moving $from $to";
411 `mv $from $to`;
412 die "Error renaming $from $to : $!" if $?;
413 `git-update-index --remove $from`;
414 die "Error in git-update-index --remove: $!" if $?;
415 `git-update-index --add $to`;
416 die "Error in git-update-index --add: $!" if $?;
420 if (@$mod) { # must be _after_ renames
421 while (@$mod) {
422 my @slice = splice(@$mod, 0, 100);
423 my $slice = join(' ', @slice);
424 `git-update-index $slice`;
425 die "Error in git-update-index: $!" if $?;
429 # warn "errors when running git-update-index! $!";
430 $tree = `git-write-tree`;
431 die "cannot write tree $!" if $?;
432 chomp $tree;
436 # Who's your daddy?
438 my @par;
439 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
440 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
441 my $p = <HEAD>;
442 close HEAD;
443 chomp $p;
444 push @par, '-p', $p;
445 } else {
446 if ($ps->{type} eq 's') {
447 warn "Could not find the right head for the branch $ps->{branch}";
452 if ($ps->{merges}) {
453 push @par, find_parents($ps);
457 # Commit, tag and clean state
459 $ENV{TZ} = 'GMT';
460 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
461 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
462 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
463 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
464 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
465 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
467 my ($pid, $commit_rh, $commit_wh);
468 $commit_rh = 'commit_rh';
469 $commit_wh = 'commit_wh';
471 $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
472 or die $!;
473 print WRITER $logmessage; # write
474 close WRITER;
475 my $commitid = <READER>; # read
476 chomp $commitid;
477 close READER;
478 waitpid $pid,0; # close;
480 if (length $commitid != 40) {
481 die "Something went wrong with the commit! $! $commitid";
484 # Update the branch
486 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
487 print HEAD $commitid;
488 close HEAD;
489 system('git-update-ref', 'HEAD', "$ps->{branch}");
491 # tag accordingly
492 ptag($ps->{id}, $commitid); # private tag
493 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
494 tag($ps->{id}, $commitid);
496 print " * Committed $ps->{id}\n";
497 print " + tree $tree\n";
498 print " + commit $commitid\n";
499 $opt_v && print " + commit date is $ps->{date} \n";
500 $opt_v && print " + parents: ",join(' ',@par),"\n";
503 sub apply_import {
504 my $ps = shift;
505 my $bname = git_branchname($ps->{id});
507 mkpath($tmp);
509 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
510 die "Cannot get import: $!" if $?;
511 system('rsync','-aI','--delete', '--exclude',$git_dir,
512 '--exclude','.arch-ids','--exclude','{arch}',
513 "$tmp/import/", './');
514 die "Cannot rsync import:$!" if $?;
516 rmtree("$tmp/import");
517 die "Cannot remove tempdir: $!" if $?;
520 return 1;
523 sub apply_cset {
524 my $ps = shift;
526 mkpath($tmp);
528 # get the changeset
529 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
530 die "Cannot get changeset: $!" if $?;
532 # apply patches
533 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
534 # this can be sped up considerably by doing
535 # (find | xargs cat) | patch
536 # but that cna get mucked up by patches
537 # with missing trailing newlines or the standard
538 # 'missing newline' flag in the patch - possibly
539 # produced with an old/buggy diff.
540 # slow and safe, we invoke patch once per patchfile
541 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
542 die "Problem applying patches! $!" if $?;
545 # apply changed binary files
546 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
547 foreach my $mod (@modified) {
548 chomp $mod;
549 my $orig = $mod;
550 $orig =~ s/\.modified$//; # lazy
551 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
552 #print "rsync -p '$mod' '$orig'";
553 system('rsync','-p',$mod,"./$orig");
554 die "Problem applying binary changes! $!" if $?;
558 # bring in new files
559 system('rsync','-aI','--exclude',$git_dir,
560 '--exclude','.arch-ids',
561 '--exclude', '{arch}',
562 "$tmp/changeset/new-files-archive/",'./');
564 # deleted files are hinted from the commitlog processing
566 rmtree("$tmp/changeset");
570 # =for reference
571 # A log entry looks like
572 # Revision: moodle-org--moodle--1.3.3--patch-15
573 # Archive: arch-eduforge@catalyst.net.nz--2004
574 # Creator: Penny Leach <penny@catalyst.net.nz>
575 # Date: Wed May 25 14:15:34 NZST 2005
576 # Standard-date: 2005-05-25 02:15:34 GMT
577 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
578 # lang/de/.arch-ids/block_html.php.id
579 # New-directories: lang/de/help/questionnaire
580 # lang/de/help/questionnaire/.arch-ids
581 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
582 # db_sears.sql db/db_sears.sql
583 # Removed-files: lang/be/docs/.arch-ids/release.html.id
584 # lang/be/docs/.arch-ids/releaseold.html.id
585 # Modified-files: admin/cron.php admin/delete.php
586 # admin/editor.html backup/lib.php backup/restore.php
587 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
588 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
589 # Keywords:
591 # Updating yadda tadda tadda madda
592 sub parselog {
593 my $log = shift;
594 #print $log;
596 my (@add, @del, @mod, @ren, @kw, $sum, $msg );
598 if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
599 my $files = $1;
600 @add = split(m/\s+/s, $files);
603 if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
604 my $files = $1;
605 @del = split(m/\s+/s, $files);
608 if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
609 my $files = $1;
610 @mod = split(m/\s+/s, $files);
613 if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
614 my $files = $1;
615 @ren = split(m/\s+/s, $files);
618 $sum ='';
619 if ($log =~ m/^Summary:(.+?)$/m ) {
620 $sum = $1;
621 $sum =~ s/^\s+//;
622 $sum =~ s/\s+$//;
625 $msg = '';
626 if ($log =~ m/\n\n(.+)$/s) {
627 $msg = $1;
628 $msg =~ s/^\s+//;
629 $msg =~ s/\s+$//;
633 # cleanup the arrays
634 foreach my $ref ( (\@add, \@del, \@mod, \@ren) ) {
635 my @tmp = ();
636 while (my $t = pop @$ref) {
637 next unless length ($t);
638 next if $t =~ m!\{arch\}/!;
639 next if $t =~ m!\.arch-ids/!;
640 next if $t =~ m!\.arch-inventory$!;
641 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
642 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
643 if ($t =~ /\\/ ){
644 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
646 push (@tmp, $t);
648 @$ref = @tmp;
651 #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren];
652 return ($sum, $msg, \@add, \@del, \@mod, \@ren);
655 # write/read a tag
656 sub tag {
657 my ($tag, $commit) = @_;
659 if ($opt_o) {
660 $tag =~ s|/|--|g;
661 } else {
662 # don't use subdirs for tags yet, it could screw up other porcelains
663 $tag =~ s|/|,|g;
666 if ($commit) {
667 open(C,">","$git_dir/refs/tags/$tag")
668 or die "Cannot create tag $tag: $!\n";
669 print C "$commit\n"
670 or die "Cannot write tag $tag: $!\n";
671 close(C)
672 or die "Cannot write tag $tag: $!\n";
673 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
674 } else { # read
675 open(C,"<","$git_dir/refs/tags/$tag")
676 or die "Cannot read tag $tag: $!\n";
677 $commit = <C>;
678 chomp $commit;
679 die "Error reading tag $tag: $!\n" unless length $commit == 40;
680 close(C)
681 or die "Cannot read tag $tag: $!\n";
682 return $commit;
686 # write/read a private tag
687 # reads fail softly if the tag isn't there
688 sub ptag {
689 my ($tag, $commit) = @_;
691 # don't use subdirs for tags yet, it could screw up other porcelains
692 $tag =~ s|/|,|g;
694 my $tag_file = "$ptag_dir/$tag";
695 my $tag_branch_dir = dirname($tag_file);
696 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
698 if ($commit) { # write
699 open(C,">",$tag_file)
700 or die "Cannot create tag $tag: $!\n";
701 print C "$commit\n"
702 or die "Cannot write tag $tag: $!\n";
703 close(C)
704 or die "Cannot write tag $tag: $!\n";
705 $rptags{$commit} = $tag
706 unless $tag =~ m/--base-0$/;
707 } else { # read
708 # if the tag isn't there, return 0
709 unless ( -s $tag_file) {
710 return 0;
712 open(C,"<",$tag_file)
713 or die "Cannot read tag $tag: $!\n";
714 $commit = <C>;
715 chomp $commit;
716 die "Error reading tag $tag: $!\n" unless length $commit == 40;
717 close(C)
718 or die "Cannot read tag $tag: $!\n";
719 unless (defined $rptags{$commit}) {
720 $rptags{$commit} = $tag;
722 return $commit;
726 sub find_parents {
728 # Identify what branches are merging into me
729 # and whether we are fully merged
730 # git-merge-base <headsha> <headsha> should tell
731 # me what the base of the merge should be
733 my $ps = shift;
735 my %branches; # holds an arrayref per branch
736 # the arrayref contains a list of
737 # merged patches between the base
738 # of the merge and the current head
740 my @parents; # parents found for this commit
742 # simple loop to split the merges
743 # per branch
744 foreach my $merge (@{$ps->{merges}}) {
745 my $branch = git_branchname($merge);
746 unless (defined $branches{$branch} ){
747 $branches{$branch} = [];
749 push @{$branches{$branch}}, $merge;
753 # foreach branch find a merge base and walk it to the
754 # head where we are, collecting the merged patchsets that
755 # Arch has recorded. Keep that in @have
756 # Compare that with the commits on the other branch
757 # between merge-base and the tip of the branch (@need)
758 # and see if we have a series of consecutive patches
759 # starting from the merge base. The tip of the series
760 # of consecutive patches merged is our new parent for
761 # that branch.
763 foreach my $branch (keys %branches) {
765 # check that we actually know about the branch
766 next unless -e "$git_dir/refs/heads/$branch";
768 my $mergebase = `git-merge-base $branch $ps->{branch}`;
769 if ($?) {
770 # Don't die here, Arch supports one-way cherry-picking
771 # between branches with no common base (or any relationship
772 # at all beforehand)
773 warn "Cannot find merge base for $branch and $ps->{branch}";
774 next;
776 chomp $mergebase;
778 # now walk up to the mergepoint collecting what patches we have
779 my $branchtip = git_rev_parse($ps->{branch});
780 my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
781 my %have; # collected merges this branch has
782 foreach my $merge (@{$ps->{merges}}) {
783 $have{$merge} = 1;
785 my %ancestorshave;
786 foreach my $par (@ancestors) {
787 $par = commitid2pset($par);
788 if (defined $par->{merges}) {
789 foreach my $merge (@{$par->{merges}}) {
790 $ancestorshave{$merge}=1;
794 # print "++++ Merges in $ps->{id} are....\n";
795 # my @have = sort keys %have; print Dumper(\@have);
797 # merge what we have with what ancestors have
798 %have = (%have, %ancestorshave);
800 # see what the remote branch has - these are the merges we
801 # will want to have in a consecutive series from the mergebase
802 my $otherbranchtip = git_rev_parse($branch);
803 my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
804 my @need;
805 foreach my $needps (@needraw) { # get the psets
806 $needps = commitid2pset($needps);
807 # git-rev-list will also
808 # list commits merged in via earlier
809 # merges. we are only interested in commits
810 # from the branch we're looking at
811 if ($branch eq $needps->{branch}) {
812 push @need, $needps->{id};
816 # print "++++ Merges from $branch we want are....\n";
817 # print Dumper(\@need);
819 my $newparent;
820 while (my $needed_commit = pop @need) {
821 if ($have{$needed_commit}) {
822 $newparent = $needed_commit;
823 } else {
824 last; # break out of the while
827 if ($newparent) {
828 push @parents, $newparent;
832 } # end foreach branch
834 # prune redundant parents
835 my %parents;
836 foreach my $p (@parents) {
837 $parents{$p} = 1;
839 foreach my $p (@parents) {
840 next unless exists $psets{$p}{merges};
841 next unless ref $psets{$p}{merges};
842 my @merges = @{$psets{$p}{merges}};
843 foreach my $merge (@merges) {
844 if ($parents{$merge}) {
845 delete $parents{$merge};
850 @parents = ();
851 foreach (keys %parents) {
852 push @parents, '-p', ptag($_);
854 return @parents;
857 sub git_rev_parse {
858 my $name = shift;
859 my $val = `git-rev-parse $name`;
860 die "Error: git-rev-parse $name" if $?;
861 chomp $val;
862 return $val;
865 # resolve a SHA1 to a known patchset
866 sub commitid2pset {
867 my $commitid = shift;
868 chomp $commitid;
869 my $name = $rptags{$commitid}
870 || die "Cannot find reverse tag mapping for $commitid";
871 $name =~ s|,|/|;
872 my $ps = $psets{$name}
873 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
874 return $ps;
878 # an alterative to `command` that allows input to be passed as an array
879 # to work around shell problems with weird characters in arguments
880 sub safe_pipe_capture {
881 my @output;
882 if (my $pid = open my $child, '-|') {
883 @output = (<$child>);
884 close $child or die join(' ',@_).": $! $?";
885 } else {
886 exec(@_) or die $?; # exec() can fail the executable can't be found
888 return wantarray ? @output : join('',@output);
891 # `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
892 sub arch_tree_id {
893 my $dir = shift;
894 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
895 return $ret;
898 sub archive_reachable {
899 my $archive = shift;
900 return 1 if $reachable{$archive};
901 return 0 if $unreachable{$archive};
903 if (system "$TLA whereis-archive $archive >/dev/null") {
904 if ($opt_a && (system($TLA,'register-archive',
905 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
906 $reachable{$archive} = 1;
907 return 1;
909 print STDERR "Archive is unreachable: $archive\n";
910 $unreachable{$archive} = 1;
911 return 0;
912 } else {
913 $reachable{$archive} = 1;
914 return 1;