Merge branch 'ap/combine-diff-coalesce-lost'
[git/jnareb-git.git] / git-archimport.perl
blob9cb123a07df88c975cf1b0d6c5832cb410175dd0
1 #!/usr/bin/perl
3 # This tool is copyright (c) 2005, Martin Langhoff.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to walk the output of tla abrowse,
7 # fetch the changesets and apply them.
10 =head1 Invocation
12 git archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
13 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
15 Imports a project from one or more Arch repositories. It will follow branches
16 and repositories within the namespaces defined by the <archive/branch>
17 parameters supplied. If it cannot find the remote branch a merge comes from
18 it will just import it as a regular commit. If it can find it, it will mark it
19 as a merge whenever possible.
21 See man (1) git-archimport for more details.
23 =head1 TODO
25 - create tag objects instead of ref tags
26 - audit shell-escaping of filenames
27 - hide our private tags somewhere smarter
28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
29 - sort and apply patches by graphing ancestry relations instead of just
30 relying in dates supplied in the changeset itself.
31 tla ancestry-graph -m could be helpful here...
33 =head1 Devel tricks
35 Add print in front of the shell commands invoked via backticks.
37 =head1 Devel Notes
39 There are several places where Arch and git terminology are intermixed
40 and potentially confused.
42 The notion of a "branch" in git is approximately equivalent to
43 a "archive/category--branch--version" in Arch. Also, it should be noted
44 that the "--branch" portion of "archive/category--branch--version" is really
45 optional in Arch although not many people (nor tools!) seem to know this.
46 This means that "archive/category--version" is also a valid "branch"
47 in git terms.
49 We always refer to Arch names by their fully qualified variant (which
50 means the "archive" name is prefixed.
52 For people unfamiliar with Arch, an "archive" is the term for "repository",
53 and can contain multiple, unrelated branches.
55 =cut
57 use 5.008;
58 use strict;
59 use warnings;
60 use Getopt::Std;
61 use File::Temp qw(tempdir);
62 use File::Path qw(mkpath rmtree);
63 use File::Basename qw(basename dirname);
64 use Data::Dumper qw/ Dumper /;
65 use IPC::Open2;
67 $SIG{'PIPE'}="IGNORE";
68 $ENV{'TZ'}="UTC";
70 my $git_dir = $ENV{"GIT_DIR"} || ".git";
71 $ENV{"GIT_DIR"} = $git_dir;
72 my $ptag_dir = "$git_dir/archimport/tags";
74 our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
76 sub usage() {
77 print STDERR <<END;
78 usage: git archimport # fetch/update GIT from Arch
79 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
80 repository/arch-branch [ repository/arch-branch] ...
81 END
82 exit(1);
85 getopts("fThvat:D:") or usage();
86 usage if $opt_h;
88 @ARGV >= 1 or usage();
89 # $arch_branches:
90 # values associated with keys:
91 # =1 - Arch version / git 'branch' detected via abrowse on a limit
92 # >1 - Arch version / git 'branch' of an auxiliary branch we've merged
93 my %arch_branches = map { my $branch = $_; $branch =~ s/:[^:]*$//; $branch => 1 } @ARGV;
95 # $branch_name_map:
96 # maps arch branches to git branch names
97 my %branch_name_map = map { m/^(.*):([^:]*)$/; $1 => $2 } grep { m/:/ } @ARGV;
99 $ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
100 my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
101 $opt_v && print "+ Using $tmp as temporary directory\n";
103 unless (-d $git_dir) { # initial import needs empty directory
104 opendir DIR, '.' or die "Unable to open current directory: $!\n";
105 while (my $entry = readdir DIR) {
106 $entry =~ /^\.\.?$/ or
107 die "Initial import needs an empty current working directory.\n"
109 closedir DIR
112 my $default_archive; # default Arch archive
113 my %reachable = (); # Arch repositories we can access
114 my %unreachable = (); # Arch repositories we can't access :<
115 my @psets = (); # the collection
116 my %psets = (); # the collection, by name
117 my %stats = ( # Track which strategy we used to import:
118 get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
119 simple_changeset => 0, import_or_tag => 0
122 my %rptags = (); # my reverse private tags
123 # to map a SHA1 to a commitid
124 my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
126 sub do_abrowse {
127 my $stage = shift;
128 while (my ($limit, $level) = each %arch_branches) {
129 next unless $level == $stage;
131 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
132 or die "Problems with tla abrowse: $!";
134 my %ps = (); # the current one
135 my $lastseen = '';
137 while (<ABROWSE>) {
138 chomp;
140 # first record padded w 8 spaces
141 if (s/^\s{8}\b//) {
142 my ($id, $type) = split(m/\s+/, $_, 2);
144 my %last_ps;
145 # store the record we just captured
146 if (%ps && !exists $psets{ $ps{id} }) {
147 %last_ps = %ps; # break references
148 push (@psets, \%last_ps);
149 $psets{ $last_ps{id} } = \%last_ps;
152 my $branch = extract_versionname($id);
153 %ps = ( id => $id, branch => $branch );
154 if (%last_ps && ($last_ps{branch} eq $branch)) {
155 $ps{parent_id} = $last_ps{id};
158 $arch_branches{$branch} = 1;
159 $lastseen = 'id';
161 # deal with types (should work with baz or tla):
162 if ($type =~ m/\(.*changeset\)/) {
163 $ps{type} = 's';
164 } elsif ($type =~ /\(.*import\)/) {
165 $ps{type} = 'i';
166 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
167 $ps{type} = 't';
168 # read which revision we've tagged when we parse the log
169 $ps{tag} = $1;
170 } else {
171 warn "Unknown type $type";
174 $arch_branches{$branch} = 1;
175 $lastseen = 'id';
176 } elsif (s/^\s{10}//) {
177 # 10 leading spaces or more
178 # indicate commit metadata
180 # date
181 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
182 $ps{date} = $1;
183 $lastseen = 'date';
184 } elsif ($_ eq 'merges in:') {
185 $ps{merges} = [];
186 $lastseen = 'merges';
187 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
188 my $id = $_;
189 push (@{$ps{merges}}, $id);
191 # aggressive branch finding:
192 if ($opt_D) {
193 my $branch = extract_versionname($id);
194 my $repo = extract_reponame($branch);
196 if (archive_reachable($repo) &&
197 !defined $arch_branches{$branch}) {
198 $arch_branches{$branch} = $stage + 1;
201 } else {
202 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
207 if (%ps && !exists $psets{ $ps{id} }) {
208 my %temp = %ps; # break references
209 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
210 $temp{parent_id} = $psets[$#psets]{id};
212 push (@psets, \%temp);
213 $psets{ $temp{id} } = \%temp;
216 close ABROWSE or die "$TLA abrowse failed on $limit\n";
218 } # end foreach $root
220 do_abrowse(1);
221 my $depth = 2;
222 $opt_D ||= 0;
223 while ($depth <= $opt_D) {
224 do_abrowse($depth);
225 $depth++;
228 ## Order patches by time
229 # FIXME see if we can find a more optimal way to do this by graphing
230 # the ancestry data and walking it, that way we won't have to rely on
231 # client-supplied dates
232 @psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
234 #print Dumper \@psets;
237 ## TODO cleanup irrelevant patches
238 ## and put an initial import
239 ## or a full tag
240 my $import = 0;
241 unless (-d $git_dir) { # initial import
242 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
243 print "Starting import from $psets[0]{id}\n";
244 `git-init`;
245 die $! if $?;
246 $import = 1;
247 } else {
248 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
250 } else { # progressing an import
251 # load the rptags
252 opendir(DIR, $ptag_dir)
253 || die "can't opendir: $!";
254 while (my $file = readdir(DIR)) {
255 # skip non-interesting-files
256 next unless -f "$ptag_dir/$file";
258 # convert first '--' to '/' from old git-archimport to use
259 # as an archivename/c--b--v private tag
260 if ($file !~ m!,!) {
261 my $oldfile = $file;
262 $file =~ s!--!,!;
263 print STDERR "converting old tag $oldfile to $file\n";
264 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
266 my $sha = ptag($file);
267 chomp $sha;
268 $rptags{$sha} = $file;
270 closedir DIR;
273 # process patchsets
274 # extract the Arch repository name (Arch "archive" in Arch-speak)
275 sub extract_reponame {
276 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
277 return (split(/\//, $fq_cvbr))[0];
280 sub extract_versionname {
281 my $name = shift;
282 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
283 return $name;
286 # convert a fully-qualified revision or version to a unique dirname:
287 # normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
288 # becomes: normalperson@yhbt.net-05,mpd--uclinux--1
290 # the git notion of a branch is closer to
291 # archive/category--branch--version than archive/category--branch, so we
292 # use this to convert to git branch names.
293 # Also, keep archive names but replace '/' with ',' since it won't require
294 # subdirectories, and is safer than swapping '--' which could confuse
295 # reverse-mapping when dealing with bastard branches that
296 # are just archive/category--version (no --branch)
297 sub tree_dirname {
298 my $revision = shift;
299 my $name = extract_versionname($revision);
300 $name =~ s#/#,#;
301 return $name;
304 # old versions of git-archimport just use the <category--branch> part:
305 sub old_style_branchname {
306 my $id = shift;
307 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
308 chomp $ret;
309 return $ret;
312 *git_default_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
314 # retrieve default archive, since $branch_name_map keys might not include it
315 sub get_default_archive {
316 if (!defined $default_archive) {
317 $default_archive = safe_pipe_capture($TLA,'my-default-archive');
318 chomp $default_archive;
320 return $default_archive;
323 sub git_branchname {
324 my $revision = shift;
325 my $name = extract_versionname($revision);
327 if (exists $branch_name_map{$name}) {
328 return $branch_name_map{$name};
330 } elsif ($name =~ m#^([^/]*)/(.*)$#
331 && $1 eq get_default_archive()
332 && exists $branch_name_map{$2}) {
333 # the names given in the command-line lacked the archive.
334 return $branch_name_map{$2};
336 } else {
337 return git_default_branchname($revision);
341 sub process_patchset_accurate {
342 my $ps = shift;
344 # switch to that branch if we're not already in that branch:
345 if (-e "$git_dir/refs/heads/$ps->{branch}") {
346 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
348 # remove any old stuff that got leftover:
349 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
350 rmtree(split(/\0/,$rm)) if $rm;
353 # Apply the import/changeset/merge into the working tree
354 my $dir = sync_to_ps($ps);
355 # read the new log entry:
356 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
357 die "Error in cat-log: $!" if $?;
358 chomp @commitlog;
360 # grab variables we want from the log, new fields get added to $ps:
361 # (author, date, email, summary, message body ...)
362 parselog($ps, \@commitlog);
364 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
365 # this should work when importing continuations
366 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
368 # find where we are supposed to branch from
369 if (! -e "$git_dir/refs/heads/$ps->{branch}") {
370 system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
372 # We trust Arch with the fact that this is just a tag,
373 # and it does not affect the state of the tree, so
374 # we just tag and move on. If the user really wants us
375 # to consolidate more branches into one, don't tag because
376 # the tag name would be already taken.
377 tag($ps->{id}, $branchpoint);
378 ptag($ps->{id}, $branchpoint);
379 print " * Tagged $ps->{id} at $branchpoint\n";
381 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
383 # remove any old stuff that got leftover:
384 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
385 rmtree(split(/\0/,$rm)) if $rm;
386 return 0;
387 } else {
388 warn "Tagging from unknown id unsupported\n" if $ps->{tag};
390 # allow multiple bases/imports here since Arch supports cherry-picks
391 # from unrelated trees
394 # update the index with all the changes we got
395 system('git-diff-files --name-only -z | '.
396 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
397 system('git-ls-files --others -z | '.
398 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
399 return 1;
402 # the native changeset processing strategy. This is very fast, but
403 # does not handle permissions or any renames involving directories
404 sub process_patchset_fast {
405 my $ps = shift;
407 # create the branch if needed
409 if ($ps->{type} eq 'i' && !$import) {
410 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
413 unless ($import) { # skip for import
414 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
415 # we know about this branch
416 system('git-checkout',$ps->{branch});
417 } else {
418 # new branch! we need to verify a few things
419 die "Branch on a non-tag!" unless $ps->{type} eq 't';
420 my $branchpoint = ptag($ps->{tag});
421 die "Tagging from unknown id unsupported: $ps->{tag}"
422 unless $branchpoint;
424 # find where we are supposed to branch from
425 if (! -e "$git_dir/refs/heads/$ps->{branch}") {
426 system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
428 # We trust Arch with the fact that this is just a tag,
429 # and it does not affect the state of the tree, so
430 # we just tag and move on. If the user really wants us
431 # to consolidate more branches into one, don't tag because
432 # the tag name would be already taken.
433 tag($ps->{id}, $branchpoint);
434 ptag($ps->{id}, $branchpoint);
435 print " * Tagged $ps->{id} at $branchpoint\n";
437 system('git-checkout',$ps->{branch}) == 0 or die "$! $?\n";
438 return 0;
440 die $! if $?;
444 # Apply the import/changeset/merge into the working tree
446 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
447 apply_import($ps) or die $!;
448 $stats{import_or_tag}++;
449 $import=0;
450 } elsif ($ps->{type} eq 's') {
451 apply_cset($ps);
452 $stats{simple_changeset}++;
456 # prepare update git's index, based on what arch knows
457 # about the pset, resolve parents, etc
460 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
461 die "Error in cat-archive-log: $!" if $?;
463 parselog($ps,\@commitlog);
465 # imports don't give us good info
466 # on added files. Shame on them
467 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
468 system('git-ls-files --deleted -z | '.
469 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
470 system('git-ls-files --others -z | '.
471 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
474 # TODO: handle removed_directories and renamed_directories:
476 if (my $del = $ps->{removed_files}) {
477 unlink @$del;
478 while (@$del) {
479 my @slice = splice(@$del, 0, 100);
480 system('git-update-index','--remove','--',@slice) == 0 or
481 die "Error in git-update-index --remove: $! $?\n";
485 if (my $ren = $ps->{renamed_files}) { # renamed
486 if (@$ren % 2) {
487 die "Odd number of entries in rename!?";
490 while (@$ren) {
491 my $from = shift @$ren;
492 my $to = shift @$ren;
494 unless (-d dirname($to)) {
495 mkpath(dirname($to)); # will die on err
497 # print "moving $from $to";
498 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
499 system('git-update-index','--remove','--',$from) == 0 or
500 die "Error in git-update-index --remove: $! $?\n";
501 system('git-update-index','--add','--',$to) == 0 or
502 die "Error in git-update-index --add: $! $?\n";
506 if (my $add = $ps->{new_files}) {
507 while (@$add) {
508 my @slice = splice(@$add, 0, 100);
509 system('git-update-index','--add','--',@slice) == 0 or
510 die "Error in git-update-index --add: $! $?\n";
514 if (my $mod = $ps->{modified_files}) {
515 while (@$mod) {
516 my @slice = splice(@$mod, 0, 100);
517 system('git-update-index','--',@slice) == 0 or
518 die "Error in git-update-index: $! $?\n";
521 return 1; # we successfully applied the changeset
524 if ($opt_f) {
525 print "Will import patchsets using the fast strategy\n",
526 "Renamed directories and permission changes will be missed\n";
527 *process_patchset = *process_patchset_fast;
528 } else {
529 print "Using the default (accurate) import strategy.\n",
530 "Things may be a bit slow\n";
531 *process_patchset = *process_patchset_accurate;
534 foreach my $ps (@psets) {
535 # process patchsets
536 $ps->{branch} = git_branchname($ps->{id});
539 # ensure we have a clean state
541 if (my $dirty = `git-diff-files`) {
542 die "Unclean tree when about to process $ps->{id} " .
543 " - did we fail to commit cleanly before?\n$dirty";
545 die $! if $?;
548 # skip commits already in repo
550 if (ptag($ps->{id})) {
551 $opt_v && print " * Skipping already imported: $ps->{id}\n";
552 next;
555 print " * Starting to work on $ps->{id}\n";
557 process_patchset($ps) or next;
559 # warn "errors when running git-update-index! $!";
560 my $tree = `git-write-tree`;
561 die "cannot write tree $!" if $?;
562 chomp $tree;
565 # Who's your daddy?
567 my @par;
568 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
569 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
570 my $p = <HEAD>;
571 close HEAD;
572 chomp $p;
573 push @par, '-p', $p;
574 } else {
575 if ($ps->{type} eq 's') {
576 warn "Could not find the right head for the branch $ps->{branch}";
581 if ($ps->{merges}) {
582 push @par, find_parents($ps);
586 # Commit, tag and clean state
588 $ENV{TZ} = 'GMT';
589 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
590 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
591 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
592 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
593 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
594 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
596 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
597 or die $!;
598 print WRITER $ps->{summary},"\n\n";
600 # only print message if it's not empty, to avoid a spurious blank line;
601 # also append an extra newline, so there's a blank line before the
602 # following "git-archimport-id:" line.
603 print WRITER $ps->{message},"\n\n" if ($ps->{message} ne "");
605 # make it easy to backtrack and figure out which Arch revision this was:
606 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
608 close WRITER;
609 my $commitid = <READER>; # read
610 chomp $commitid;
611 close READER;
612 waitpid $pid,0; # close;
614 if (length $commitid != 40) {
615 die "Something went wrong with the commit! $! $commitid";
618 # Update the branch
620 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
621 print HEAD $commitid;
622 close HEAD;
623 system('git-update-ref', 'HEAD', "$ps->{branch}");
625 # tag accordingly
626 ptag($ps->{id}, $commitid); # private tag
627 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
628 tag($ps->{id}, $commitid);
630 print " * Committed $ps->{id}\n";
631 print " + tree $tree\n";
632 print " + commit $commitid\n";
633 $opt_v && print " + commit date is $ps->{date} \n";
634 $opt_v && print " + parents: ",join(' ',@par),"\n";
637 if ($opt_v) {
638 foreach (sort keys %stats) {
639 print" $_: $stats{$_}\n";
642 exit 0;
644 # used by the accurate strategy:
645 sub sync_to_ps {
646 my $ps = shift;
647 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
649 $opt_v && print "sync_to_ps($ps->{id}) method: ";
651 if (-d $tree_dir) {
652 if ($ps->{type} eq 't') {
653 $opt_v && print "get (tag)\n";
654 # looks like a tag-only or (worse,) a mixed tags/changeset branch,
655 # can't rely on replay to work correctly on these
656 rmtree($tree_dir);
657 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
658 $stats{get_tag}++;
659 } else {
660 my $tree_id = arch_tree_id($tree_dir);
661 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
662 # the common case (hopefully)
663 $opt_v && print "replay\n";
664 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
665 $stats{replay}++;
666 } else {
667 # getting one tree is usually faster than getting two trees
668 # and applying the delta ...
669 rmtree($tree_dir);
670 $opt_v && print "apply-delta\n";
671 safe_pipe_capture($TLA,'get','--no-pristine',
672 $ps->{id},$tree_dir);
673 $stats{get_delta}++;
676 } else {
677 # new branch work
678 $opt_v && print "get (new tree)\n";
679 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
680 $stats{get_new}++;
683 # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
684 system('rsync','-aI','--delete','--exclude',$git_dir,
685 # '--exclude','.arch-inventory',
686 '--exclude','.arch-ids','--exclude','{arch}',
687 '--exclude','+*','--exclude',',*',
688 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
689 return $tree_dir;
692 sub apply_import {
693 my $ps = shift;
694 my $bname = git_branchname($ps->{id});
696 mkpath($tmp);
698 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
699 die "Cannot get import: $!" if $?;
700 system('rsync','-aI','--delete', '--exclude',$git_dir,
701 '--exclude','.arch-ids','--exclude','{arch}',
702 "$tmp/import/", './');
703 die "Cannot rsync import:$!" if $?;
705 rmtree("$tmp/import");
706 die "Cannot remove tempdir: $!" if $?;
709 return 1;
712 sub apply_cset {
713 my $ps = shift;
715 mkpath($tmp);
717 # get the changeset
718 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
719 die "Cannot get changeset: $!" if $?;
721 # apply patches
722 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
723 # this can be sped up considerably by doing
724 # (find | xargs cat) | patch
725 # but that can get mucked up by patches
726 # with missing trailing newlines or the standard
727 # 'missing newline' flag in the patch - possibly
728 # produced with an old/buggy diff.
729 # slow and safe, we invoke patch once per patchfile
730 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
731 die "Problem applying patches! $!" if $?;
734 # apply changed binary files
735 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
736 foreach my $mod (@modified) {
737 chomp $mod;
738 my $orig = $mod;
739 $orig =~ s/\.modified$//; # lazy
740 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
741 #print "rsync -p '$mod' '$orig'";
742 system('rsync','-p',$mod,"./$orig");
743 die "Problem applying binary changes! $!" if $?;
747 # bring in new files
748 system('rsync','-aI','--exclude',$git_dir,
749 '--exclude','.arch-ids',
750 '--exclude', '{arch}',
751 "$tmp/changeset/new-files-archive/",'./');
753 # deleted files are hinted from the commitlog processing
755 rmtree("$tmp/changeset");
759 # =for reference
760 # notes: *-files/-directories keys cannot have spaces, they're always
761 # pika-escaped. Everything after the first newline
762 # A log entry looks like:
763 # Revision: moodle-org--moodle--1.3.3--patch-15
764 # Archive: arch-eduforge@catalyst.net.nz--2004
765 # Creator: Penny Leach <penny@catalyst.net.nz>
766 # Date: Wed May 25 14:15:34 NZST 2005
767 # Standard-date: 2005-05-25 02:15:34 GMT
768 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
769 # lang/de/.arch-ids/block_html.php.id
770 # New-directories: lang/de/help/questionnaire
771 # lang/de/help/questionnaire/.arch-ids
772 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
773 # db_sears.sql db/db_sears.sql
774 # Removed-files: lang/be/docs/.arch-ids/release.html.id
775 # lang/be/docs/.arch-ids/releaseold.html.id
776 # Modified-files: admin/cron.php admin/delete.php
777 # admin/editor.html backup/lib.php backup/restore.php
778 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
779 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
780 # summary can be multiline with a leading space just like the above fields
781 # Keywords:
783 # Updating yadda tadda tadda madda
784 sub parselog {
785 my ($ps, $log) = @_;
786 my $key = undef;
788 # headers we want that contain filenames:
789 my %want_headers = (
790 new_files => 1,
791 modified_files => 1,
792 renamed_files => 1,
793 renamed_directories => 1,
794 removed_files => 1,
795 removed_directories => 1,
798 chomp (@$log);
799 while ($_ = shift @$log) {
800 if (/^Continuation-of:\s*(.*)/) {
801 $ps->{tag} = $1;
802 $key = undef;
803 } elsif (/^Summary:\s*(.*)$/ ) {
804 # summary can be multiline as long as it has a leading space.
805 # we squeeze it onto a single line, though.
806 $ps->{summary} = [ $1 ];
807 $key = 'summary';
808 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
809 $ps->{author} = $1;
810 $ps->{email} = $2;
811 $key = undef;
812 # any *-files or *-directories can be read here:
813 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
814 my $val = $2;
815 $key = lc $1;
816 $key =~ tr/-/_/; # too lazy to quote :P
817 if ($want_headers{$key}) {
818 push @{$ps->{$key}}, split(/\s+/, $val);
819 } else {
820 $key = undef;
822 } elsif (/^$/) {
823 last; # remainder of @$log that didn't get shifted off is message
824 } elsif ($key) {
825 if (/^\s+(.*)$/) {
826 if ($key eq 'summary') {
827 push @{$ps->{$key}}, $1;
828 } else { # files/directories:
829 push @{$ps->{$key}}, split(/\s+/, $1);
831 } else {
832 $key = undef;
837 # drop leading empty lines from the log message
838 while (@$log && $log->[0] eq '') {
839 shift @$log;
841 if (exists $ps->{summary} && @{$ps->{summary}}) {
842 $ps->{summary} = join(' ', @{$ps->{summary}});
844 elsif (@$log == 0) {
845 $ps->{summary} = 'empty commit message';
846 } else {
847 $ps->{summary} = $log->[0] . '...';
849 $ps->{message} = join("\n",@$log);
851 # skip Arch control files, unescape pika-escaped files
852 foreach my $k (keys %want_headers) {
853 next unless (defined $ps->{$k});
854 my @tmp = ();
855 foreach my $t (@{$ps->{$k}}) {
856 next unless length ($t);
857 next if $t =~ m!\{arch\}/!;
858 next if $t =~ m!\.arch-ids/!;
859 # should we skip this?
860 next if $t =~ m!\.arch-inventory$!;
861 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
862 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
863 if ($t =~ /\\/ ){
864 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
866 push @tmp, $t;
868 $ps->{$k} = \@tmp;
872 # write/read a tag
873 sub tag {
874 my ($tag, $commit) = @_;
876 if ($opt_o) {
877 $tag =~ s|/|--|g;
878 } else {
879 my $patchname = $tag;
880 $patchname =~ s/.*--//;
881 $tag = git_branchname ($tag) . '--' . $patchname;
884 if ($commit) {
885 open(C,">","$git_dir/refs/tags/$tag")
886 or die "Cannot create tag $tag: $!\n";
887 print C "$commit\n"
888 or die "Cannot write tag $tag: $!\n";
889 close(C)
890 or die "Cannot write tag $tag: $!\n";
891 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
892 } else { # read
893 open(C,"<","$git_dir/refs/tags/$tag")
894 or die "Cannot read tag $tag: $!\n";
895 $commit = <C>;
896 chomp $commit;
897 die "Error reading tag $tag: $!\n" unless length $commit == 40;
898 close(C)
899 or die "Cannot read tag $tag: $!\n";
900 return $commit;
904 # write/read a private tag
905 # reads fail softly if the tag isn't there
906 sub ptag {
907 my ($tag, $commit) = @_;
909 # don't use subdirs for tags yet, it could screw up other porcelains
910 $tag =~ s|/|,|g;
912 my $tag_file = "$ptag_dir/$tag";
913 my $tag_branch_dir = dirname($tag_file);
914 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
916 if ($commit) { # write
917 open(C,">",$tag_file)
918 or die "Cannot create tag $tag: $!\n";
919 print C "$commit\n"
920 or die "Cannot write tag $tag: $!\n";
921 close(C)
922 or die "Cannot write tag $tag: $!\n";
923 $rptags{$commit} = $tag
924 unless $tag =~ m/--base-0$/;
925 } else { # read
926 # if the tag isn't there, return 0
927 unless ( -s $tag_file) {
928 return 0;
930 open(C,"<",$tag_file)
931 or die "Cannot read tag $tag: $!\n";
932 $commit = <C>;
933 chomp $commit;
934 die "Error reading tag $tag: $!\n" unless length $commit == 40;
935 close(C)
936 or die "Cannot read tag $tag: $!\n";
937 unless (defined $rptags{$commit}) {
938 $rptags{$commit} = $tag;
940 return $commit;
944 sub find_parents {
946 # Identify what branches are merging into me
947 # and whether we are fully merged
948 # git-merge-base <headsha> <headsha> should tell
949 # me what the base of the merge should be
951 my $ps = shift;
953 my %branches; # holds an arrayref per branch
954 # the arrayref contains a list of
955 # merged patches between the base
956 # of the merge and the current head
958 my @parents; # parents found for this commit
960 # simple loop to split the merges
961 # per branch
962 foreach my $merge (@{$ps->{merges}}) {
963 my $branch = git_branchname($merge);
964 unless (defined $branches{$branch} ){
965 $branches{$branch} = [];
967 push @{$branches{$branch}}, $merge;
971 # foreach branch find a merge base and walk it to the
972 # head where we are, collecting the merged patchsets that
973 # Arch has recorded. Keep that in @have
974 # Compare that with the commits on the other branch
975 # between merge-base and the tip of the branch (@need)
976 # and see if we have a series of consecutive patches
977 # starting from the merge base. The tip of the series
978 # of consecutive patches merged is our new parent for
979 # that branch.
981 foreach my $branch (keys %branches) {
983 # check that we actually know about the branch
984 next unless -e "$git_dir/refs/heads/$branch";
986 my $mergebase = `git-merge-base $branch $ps->{branch}`;
987 if ($?) {
988 # Don't die here, Arch supports one-way cherry-picking
989 # between branches with no common base (or any relationship
990 # at all beforehand)
991 warn "Cannot find merge base for $branch and $ps->{branch}";
992 next;
994 chomp $mergebase;
996 # now walk up to the mergepoint collecting what patches we have
997 my $branchtip = git_rev_parse($ps->{branch});
998 my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
999 my %have; # collected merges this branch has
1000 foreach my $merge (@{$ps->{merges}}) {
1001 $have{$merge} = 1;
1003 my %ancestorshave;
1004 foreach my $par (@ancestors) {
1005 $par = commitid2pset($par);
1006 if (defined $par->{merges}) {
1007 foreach my $merge (@{$par->{merges}}) {
1008 $ancestorshave{$merge}=1;
1012 # print "++++ Merges in $ps->{id} are....\n";
1013 # my @have = sort keys %have; print Dumper(\@have);
1015 # merge what we have with what ancestors have
1016 %have = (%have, %ancestorshave);
1018 # see what the remote branch has - these are the merges we
1019 # will want to have in a consecutive series from the mergebase
1020 my $otherbranchtip = git_rev_parse($branch);
1021 my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
1022 my @need;
1023 foreach my $needps (@needraw) { # get the psets
1024 $needps = commitid2pset($needps);
1025 # git-rev-list will also
1026 # list commits merged in via earlier
1027 # merges. we are only interested in commits
1028 # from the branch we're looking at
1029 if ($branch eq $needps->{branch}) {
1030 push @need, $needps->{id};
1034 # print "++++ Merges from $branch we want are....\n";
1035 # print Dumper(\@need);
1037 my $newparent;
1038 while (my $needed_commit = pop @need) {
1039 if ($have{$needed_commit}) {
1040 $newparent = $needed_commit;
1041 } else {
1042 last; # break out of the while
1045 if ($newparent) {
1046 push @parents, $newparent;
1050 } # end foreach branch
1052 # prune redundant parents
1053 my %parents;
1054 foreach my $p (@parents) {
1055 $parents{$p} = 1;
1057 foreach my $p (@parents) {
1058 next unless exists $psets{$p}{merges};
1059 next unless ref $psets{$p}{merges};
1060 my @merges = @{$psets{$p}{merges}};
1061 foreach my $merge (@merges) {
1062 if ($parents{$merge}) {
1063 delete $parents{$merge};
1068 @parents = ();
1069 foreach (keys %parents) {
1070 push @parents, '-p', ptag($_);
1072 return @parents;
1075 sub git_rev_parse {
1076 my $name = shift;
1077 my $val = `git-rev-parse $name`;
1078 die "Error: git-rev-parse $name" if $?;
1079 chomp $val;
1080 return $val;
1083 # resolve a SHA1 to a known patchset
1084 sub commitid2pset {
1085 my $commitid = shift;
1086 chomp $commitid;
1087 my $name = $rptags{$commitid}
1088 || die "Cannot find reverse tag mapping for $commitid";
1089 $name =~ s|,|/|;
1090 my $ps = $psets{$name}
1091 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1092 return $ps;
1096 # an alternative to `command` that allows input to be passed as an array
1097 # to work around shell problems with weird characters in arguments
1098 sub safe_pipe_capture {
1099 my @output;
1100 if (my $pid = open my $child, '-|') {
1101 @output = (<$child>);
1102 close $child or die join(' ',@_).": $! $?";
1103 } else {
1104 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
1106 return wantarray ? @output : join('',@output);
1109 # `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1110 sub arch_tree_id {
1111 my $dir = shift;
1112 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1113 return $ret;
1116 sub archive_reachable {
1117 my $archive = shift;
1118 return 1 if $reachable{$archive};
1119 return 0 if $unreachable{$archive};
1121 if (system "$TLA whereis-archive $archive >/dev/null") {
1122 if ($opt_a && (system($TLA,'register-archive',
1123 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1124 $reachable{$archive} = 1;
1125 return 1;
1127 print STDERR "Archive is unreachable: $archive\n";
1128 $unreachable{$archive} = 1;
1129 return 0;
1130 } else {
1131 $reachable{$archive} = 1;
1132 return 1;