sha1write: don't copy full sized buffers
[git/spearce.git] / git-archimport.perl
blob98f3ede566a6cb0c902ce84795f7de8f8afbe633
1 #!/usr/bin/perl -w
3 # This tool is copyright (c) 2005, Martin Langhoff.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to walk the output of tla abrowse,
7 # fetch the changesets and apply them.
10 =head1 Invocation
12 git archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
13 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
15 Imports a project from one or more Arch repositories. It will follow branches
16 and repositories within the namespaces defined by the <archive/branch>
17 parameters supplied. If it cannot find the remote branch a merge comes from
18 it will just import it as a regular commit. If it can find it, it will mark it
19 as a merge whenever possible.
21 See man (1) git-archimport for more details.
23 =head1 TODO
25 - create tag objects instead of ref tags
26 - audit shell-escaping of filenames
27 - hide our private tags somewhere smarter
28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
29 - sort and apply patches by graphing ancestry relations instead of just
30 relying in dates supplied in the changeset itself.
31 tla ancestry-graph -m could be helpful here...
33 =head1 Devel tricks
35 Add print in front of the shell commands invoked via backticks.
37 =head1 Devel Notes
39 There are several places where Arch and git terminology are intermixed
40 and potentially confused.
42 The notion of a "branch" in git is approximately equivalent to
43 a "archive/category--branch--version" in Arch. Also, it should be noted
44 that the "--branch" portion of "archive/category--branch--version" is really
45 optional in Arch although not many people (nor tools!) seem to know this.
46 This means that "archive/category--version" is also a valid "branch"
47 in git terms.
49 We always refer to Arch names by their fully qualified variant (which
50 means the "archive" name is prefixed.
52 For people unfamiliar with Arch, an "archive" is the term for "repository",
53 and can contain multiple, unrelated branches.
55 =cut
57 use strict;
58 use warnings;
59 use Getopt::Std;
60 use File::Temp qw(tempdir);
61 use File::Path qw(mkpath rmtree);
62 use File::Basename qw(basename dirname);
63 use Data::Dumper qw/ Dumper /;
64 use IPC::Open2;
66 $SIG{'PIPE'}="IGNORE";
67 $ENV{'TZ'}="UTC";
69 my $git_dir = $ENV{"GIT_DIR"} || ".git";
70 $ENV{"GIT_DIR"} = $git_dir;
71 my $ptag_dir = "$git_dir/archimport/tags";
73 our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
75 sub usage() {
76 print STDERR <<END;
77 Usage: git archimport # fetch/update GIT from Arch
78 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
79 repository/arch-branch [ repository/arch-branch] ...
80 END
81 exit(1);
84 getopts("fThvat:D:") or usage();
85 usage if $opt_h;
87 @ARGV >= 1 or usage();
88 # $arch_branches:
89 # values associated with keys:
90 # =1 - Arch version / git 'branch' detected via abrowse on a limit
91 # >1 - Arch version / git 'branch' of an auxiliary branch we've merged
92 my %arch_branches = map { my $branch = $_; $branch =~ s/:[^:]*$//; $branch => 1 } @ARGV;
94 # $branch_name_map:
95 # maps arch branches to git branch names
96 my %branch_name_map = map { m/^(.*):([^:]*)$/; $1 => $2 } grep { m/:/ } @ARGV;
98 $ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
99 my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
100 $opt_v && print "+ Using $tmp as temporary directory\n";
102 unless (-d $git_dir) { # initial import needs empty directory
103 opendir DIR, '.' or die "Unable to open current directory: $!\n";
104 while (my $entry = readdir DIR) {
105 $entry =~ /^\.\.?$/ or
106 die "Initial import needs an empty current working directory.\n"
108 closedir DIR
111 my $default_archive; # default Arch archive
112 my %reachable = (); # Arch repositories we can access
113 my %unreachable = (); # Arch repositories we can't access :<
114 my @psets = (); # the collection
115 my %psets = (); # the collection, by name
116 my %stats = ( # Track which strategy we used to import:
117 get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
118 simple_changeset => 0, import_or_tag => 0
121 my %rptags = (); # my reverse private tags
122 # to map a SHA1 to a commitid
123 my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
125 sub do_abrowse {
126 my $stage = shift;
127 while (my ($limit, $level) = each %arch_branches) {
128 next unless $level == $stage;
130 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
131 or die "Problems with tla abrowse: $!";
133 my %ps = (); # the current one
134 my $lastseen = '';
136 while (<ABROWSE>) {
137 chomp;
139 # first record padded w 8 spaces
140 if (s/^\s{8}\b//) {
141 my ($id, $type) = split(m/\s+/, $_, 2);
143 my %last_ps;
144 # store the record we just captured
145 if (%ps && !exists $psets{ $ps{id} }) {
146 %last_ps = %ps; # break references
147 push (@psets, \%last_ps);
148 $psets{ $last_ps{id} } = \%last_ps;
151 my $branch = extract_versionname($id);
152 %ps = ( id => $id, branch => $branch );
153 if (%last_ps && ($last_ps{branch} eq $branch)) {
154 $ps{parent_id} = $last_ps{id};
157 $arch_branches{$branch} = 1;
158 $lastseen = 'id';
160 # deal with types (should work with baz or tla):
161 if ($type =~ m/\(.*changeset\)/) {
162 $ps{type} = 's';
163 } elsif ($type =~ /\(.*import\)/) {
164 $ps{type} = 'i';
165 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
166 $ps{type} = 't';
167 # read which revision we've tagged when we parse the log
168 $ps{tag} = $1;
169 } else {
170 warn "Unknown type $type";
173 $arch_branches{$branch} = 1;
174 $lastseen = 'id';
175 } elsif (s/^\s{10}//) {
176 # 10 leading spaces or more
177 # indicate commit metadata
179 # date
180 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
181 $ps{date} = $1;
182 $lastseen = 'date';
183 } elsif ($_ eq 'merges in:') {
184 $ps{merges} = [];
185 $lastseen = 'merges';
186 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
187 my $id = $_;
188 push (@{$ps{merges}}, $id);
190 # aggressive branch finding:
191 if ($opt_D) {
192 my $branch = extract_versionname($id);
193 my $repo = extract_reponame($branch);
195 if (archive_reachable($repo) &&
196 !defined $arch_branches{$branch}) {
197 $arch_branches{$branch} = $stage + 1;
200 } else {
201 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
206 if (%ps && !exists $psets{ $ps{id} }) {
207 my %temp = %ps; # break references
208 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
209 $temp{parent_id} = $psets[$#psets]{id};
211 push (@psets, \%temp);
212 $psets{ $temp{id} } = \%temp;
215 close ABROWSE or die "$TLA abrowse failed on $limit\n";
217 } # end foreach $root
219 do_abrowse(1);
220 my $depth = 2;
221 $opt_D ||= 0;
222 while ($depth <= $opt_D) {
223 do_abrowse($depth);
224 $depth++;
227 ## Order patches by time
228 # FIXME see if we can find a more optimal way to do this by graphing
229 # the ancestry data and walking it, that way we won't have to rely on
230 # client-supplied dates
231 @psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
233 #print Dumper \@psets;
236 ## TODO cleanup irrelevant patches
237 ## and put an initial import
238 ## or a full tag
239 my $import = 0;
240 unless (-d $git_dir) { # initial import
241 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
242 print "Starting import from $psets[0]{id}\n";
243 `git-init`;
244 die $! if $?;
245 $import = 1;
246 } else {
247 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
249 } else { # progressing an import
250 # load the rptags
251 opendir(DIR, $ptag_dir)
252 || die "can't opendir: $!";
253 while (my $file = readdir(DIR)) {
254 # skip non-interesting-files
255 next unless -f "$ptag_dir/$file";
257 # convert first '--' to '/' from old git-archimport to use
258 # as an archivename/c--b--v private tag
259 if ($file !~ m!,!) {
260 my $oldfile = $file;
261 $file =~ s!--!,!;
262 print STDERR "converting old tag $oldfile to $file\n";
263 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
265 my $sha = ptag($file);
266 chomp $sha;
267 $rptags{$sha} = $file;
269 closedir DIR;
272 # process patchsets
273 # extract the Arch repository name (Arch "archive" in Arch-speak)
274 sub extract_reponame {
275 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
276 return (split(/\//, $fq_cvbr))[0];
279 sub extract_versionname {
280 my $name = shift;
281 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
282 return $name;
285 # convert a fully-qualified revision or version to a unique dirname:
286 # normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
287 # becomes: normalperson@yhbt.net-05,mpd--uclinux--1
289 # the git notion of a branch is closer to
290 # archive/category--branch--version than archive/category--branch, so we
291 # use this to convert to git branch names.
292 # Also, keep archive names but replace '/' with ',' since it won't require
293 # subdirectories, and is safer than swapping '--' which could confuse
294 # reverse-mapping when dealing with bastard branches that
295 # are just archive/category--version (no --branch)
296 sub tree_dirname {
297 my $revision = shift;
298 my $name = extract_versionname($revision);
299 $name =~ s#/#,#;
300 return $name;
303 # old versions of git-archimport just use the <category--branch> part:
304 sub old_style_branchname {
305 my $id = shift;
306 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
307 chomp $ret;
308 return $ret;
311 *git_default_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
313 # retrieve default archive, since $branch_name_map keys might not include it
314 sub get_default_archive {
315 if (!defined $default_archive) {
316 $default_archive = safe_pipe_capture($TLA,'my-default-archive');
317 chomp $default_archive;
319 return $default_archive;
322 sub git_branchname {
323 my $revision = shift;
324 my $name = extract_versionname($revision);
326 if (exists $branch_name_map{$name}) {
327 return $branch_name_map{$name};
329 } elsif ($name =~ m#^([^/]*)/(.*)$#
330 && $1 eq get_default_archive()
331 && exists $branch_name_map{$2}) {
332 # the names given in the command-line lacked the archive.
333 return $branch_name_map{$2};
335 } else {
336 return git_default_branchname($revision);
340 sub process_patchset_accurate {
341 my $ps = shift;
343 # switch to that branch if we're not already in that branch:
344 if (-e "$git_dir/refs/heads/$ps->{branch}") {
345 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
347 # remove any old stuff that got leftover:
348 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
349 rmtree(split(/\0/,$rm)) if $rm;
352 # Apply the import/changeset/merge into the working tree
353 my $dir = sync_to_ps($ps);
354 # read the new log entry:
355 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
356 die "Error in cat-log: $!" if $?;
357 chomp @commitlog;
359 # grab variables we want from the log, new fields get added to $ps:
360 # (author, date, email, summary, message body ...)
361 parselog($ps, \@commitlog);
363 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
364 # this should work when importing continuations
365 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
367 # find where we are supposed to branch from
368 if (! -e "$git_dir/refs/heads/$ps->{branch}") {
369 system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
371 # We trust Arch with the fact that this is just a tag,
372 # and it does not affect the state of the tree, so
373 # we just tag and move on. If the user really wants us
374 # to consolidate more branches into one, don't tag because
375 # the tag name would be already taken.
376 tag($ps->{id}, $branchpoint);
377 ptag($ps->{id}, $branchpoint);
378 print " * Tagged $ps->{id} at $branchpoint\n";
380 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
382 # remove any old stuff that got leftover:
383 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
384 rmtree(split(/\0/,$rm)) if $rm;
385 return 0;
386 } else {
387 warn "Tagging from unknown id unsupported\n" if $ps->{tag};
389 # allow multiple bases/imports here since Arch supports cherry-picks
390 # from unrelated trees
393 # update the index with all the changes we got
394 system('git-diff-files --name-only -z | '.
395 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
396 system('git-ls-files --others -z | '.
397 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
398 return 1;
401 # the native changeset processing strategy. This is very fast, but
402 # does not handle permissions or any renames involving directories
403 sub process_patchset_fast {
404 my $ps = shift;
406 # create the branch if needed
408 if ($ps->{type} eq 'i' && !$import) {
409 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
412 unless ($import) { # skip for import
413 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
414 # we know about this branch
415 system('git-checkout',$ps->{branch});
416 } else {
417 # new branch! we need to verify a few things
418 die "Branch on a non-tag!" unless $ps->{type} eq 't';
419 my $branchpoint = ptag($ps->{tag});
420 die "Tagging from unknown id unsupported: $ps->{tag}"
421 unless $branchpoint;
423 # find where we are supposed to branch from
424 if (! -e "$git_dir/refs/heads/$ps->{branch}") {
425 system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
427 # We trust Arch with the fact that this is just a tag,
428 # and it does not affect the state of the tree, so
429 # we just tag and move on. If the user really wants us
430 # to consolidate more branches into one, don't tag because
431 # the tag name would be already taken.
432 tag($ps->{id}, $branchpoint);
433 ptag($ps->{id}, $branchpoint);
434 print " * Tagged $ps->{id} at $branchpoint\n";
436 system('git-checkout',$ps->{branch}) == 0 or die "$! $?\n";
437 return 0;
439 die $! if $?;
443 # Apply the import/changeset/merge into the working tree
445 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
446 apply_import($ps) or die $!;
447 $stats{import_or_tag}++;
448 $import=0;
449 } elsif ($ps->{type} eq 's') {
450 apply_cset($ps);
451 $stats{simple_changeset}++;
455 # prepare update git's index, based on what arch knows
456 # about the pset, resolve parents, etc
459 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
460 die "Error in cat-archive-log: $!" if $?;
462 parselog($ps,\@commitlog);
464 # imports don't give us good info
465 # on added files. Shame on them
466 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
467 system('git-ls-files --deleted -z | '.
468 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
469 system('git-ls-files --others -z | '.
470 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
473 # TODO: handle removed_directories and renamed_directories:
475 if (my $del = $ps->{removed_files}) {
476 unlink @$del;
477 while (@$del) {
478 my @slice = splice(@$del, 0, 100);
479 system('git-update-index','--remove','--',@slice) == 0 or
480 die "Error in git-update-index --remove: $! $?\n";
484 if (my $ren = $ps->{renamed_files}) { # renamed
485 if (@$ren % 2) {
486 die "Odd number of entries in rename!?";
489 while (@$ren) {
490 my $from = shift @$ren;
491 my $to = shift @$ren;
493 unless (-d dirname($to)) {
494 mkpath(dirname($to)); # will die on err
496 # print "moving $from $to";
497 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
498 system('git-update-index','--remove','--',$from) == 0 or
499 die "Error in git-update-index --remove: $! $?\n";
500 system('git-update-index','--add','--',$to) == 0 or
501 die "Error in git-update-index --add: $! $?\n";
505 if (my $add = $ps->{new_files}) {
506 while (@$add) {
507 my @slice = splice(@$add, 0, 100);
508 system('git-update-index','--add','--',@slice) == 0 or
509 die "Error in git-update-index --add: $! $?\n";
513 if (my $mod = $ps->{modified_files}) {
514 while (@$mod) {
515 my @slice = splice(@$mod, 0, 100);
516 system('git-update-index','--',@slice) == 0 or
517 die "Error in git-update-index: $! $?\n";
520 return 1; # we successfully applied the changeset
523 if ($opt_f) {
524 print "Will import patchsets using the fast strategy\n",
525 "Renamed directories and permission changes will be missed\n";
526 *process_patchset = *process_patchset_fast;
527 } else {
528 print "Using the default (accurate) import strategy.\n",
529 "Things may be a bit slow\n";
530 *process_patchset = *process_patchset_accurate;
533 foreach my $ps (@psets) {
534 # process patchsets
535 $ps->{branch} = git_branchname($ps->{id});
538 # ensure we have a clean state
540 if (my $dirty = `git-diff-files`) {
541 die "Unclean tree when about to process $ps->{id} " .
542 " - did we fail to commit cleanly before?\n$dirty";
544 die $! if $?;
547 # skip commits already in repo
549 if (ptag($ps->{id})) {
550 $opt_v && print " * Skipping already imported: $ps->{id}\n";
551 next;
554 print " * Starting to work on $ps->{id}\n";
556 process_patchset($ps) or next;
558 # warn "errors when running git-update-index! $!";
559 my $tree = `git-write-tree`;
560 die "cannot write tree $!" if $?;
561 chomp $tree;
564 # Who's your daddy?
566 my @par;
567 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
568 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
569 my $p = <HEAD>;
570 close HEAD;
571 chomp $p;
572 push @par, '-p', $p;
573 } else {
574 if ($ps->{type} eq 's') {
575 warn "Could not find the right head for the branch $ps->{branch}";
580 if ($ps->{merges}) {
581 push @par, find_parents($ps);
585 # Commit, tag and clean state
587 $ENV{TZ} = 'GMT';
588 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
589 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
590 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
591 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
592 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
593 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
595 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
596 or die $!;
597 print WRITER $ps->{summary},"\n\n";
599 # only print message if it's not empty, to avoid a spurious blank line;
600 # also append an extra newline, so there's a blank line before the
601 # following "git-archimport-id:" line.
602 print WRITER $ps->{message},"\n\n" if ($ps->{message} ne "");
604 # make it easy to backtrack and figure out which Arch revision this was:
605 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
607 close WRITER;
608 my $commitid = <READER>; # read
609 chomp $commitid;
610 close READER;
611 waitpid $pid,0; # close;
613 if (length $commitid != 40) {
614 die "Something went wrong with the commit! $! $commitid";
617 # Update the branch
619 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
620 print HEAD $commitid;
621 close HEAD;
622 system('git-update-ref', 'HEAD', "$ps->{branch}");
624 # tag accordingly
625 ptag($ps->{id}, $commitid); # private tag
626 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
627 tag($ps->{id}, $commitid);
629 print " * Committed $ps->{id}\n";
630 print " + tree $tree\n";
631 print " + commit $commitid\n";
632 $opt_v && print " + commit date is $ps->{date} \n";
633 $opt_v && print " + parents: ",join(' ',@par),"\n";
636 if ($opt_v) {
637 foreach (sort keys %stats) {
638 print" $_: $stats{$_}\n";
641 exit 0;
643 # used by the accurate strategy:
644 sub sync_to_ps {
645 my $ps = shift;
646 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
648 $opt_v && print "sync_to_ps($ps->{id}) method: ";
650 if (-d $tree_dir) {
651 if ($ps->{type} eq 't') {
652 $opt_v && print "get (tag)\n";
653 # looks like a tag-only or (worse,) a mixed tags/changeset branch,
654 # can't rely on replay to work correctly on these
655 rmtree($tree_dir);
656 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
657 $stats{get_tag}++;
658 } else {
659 my $tree_id = arch_tree_id($tree_dir);
660 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
661 # the common case (hopefully)
662 $opt_v && print "replay\n";
663 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
664 $stats{replay}++;
665 } else {
666 # getting one tree is usually faster than getting two trees
667 # and applying the delta ...
668 rmtree($tree_dir);
669 $opt_v && print "apply-delta\n";
670 safe_pipe_capture($TLA,'get','--no-pristine',
671 $ps->{id},$tree_dir);
672 $stats{get_delta}++;
675 } else {
676 # new branch work
677 $opt_v && print "get (new tree)\n";
678 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
679 $stats{get_new}++;
682 # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
683 system('rsync','-aI','--delete','--exclude',$git_dir,
684 # '--exclude','.arch-inventory',
685 '--exclude','.arch-ids','--exclude','{arch}',
686 '--exclude','+*','--exclude',',*',
687 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
688 return $tree_dir;
691 sub apply_import {
692 my $ps = shift;
693 my $bname = git_branchname($ps->{id});
695 mkpath($tmp);
697 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
698 die "Cannot get import: $!" if $?;
699 system('rsync','-aI','--delete', '--exclude',$git_dir,
700 '--exclude','.arch-ids','--exclude','{arch}',
701 "$tmp/import/", './');
702 die "Cannot rsync import:$!" if $?;
704 rmtree("$tmp/import");
705 die "Cannot remove tempdir: $!" if $?;
708 return 1;
711 sub apply_cset {
712 my $ps = shift;
714 mkpath($tmp);
716 # get the changeset
717 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
718 die "Cannot get changeset: $!" if $?;
720 # apply patches
721 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
722 # this can be sped up considerably by doing
723 # (find | xargs cat) | patch
724 # but that can get mucked up by patches
725 # with missing trailing newlines or the standard
726 # 'missing newline' flag in the patch - possibly
727 # produced with an old/buggy diff.
728 # slow and safe, we invoke patch once per patchfile
729 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
730 die "Problem applying patches! $!" if $?;
733 # apply changed binary files
734 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
735 foreach my $mod (@modified) {
736 chomp $mod;
737 my $orig = $mod;
738 $orig =~ s/\.modified$//; # lazy
739 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
740 #print "rsync -p '$mod' '$orig'";
741 system('rsync','-p',$mod,"./$orig");
742 die "Problem applying binary changes! $!" if $?;
746 # bring in new files
747 system('rsync','-aI','--exclude',$git_dir,
748 '--exclude','.arch-ids',
749 '--exclude', '{arch}',
750 "$tmp/changeset/new-files-archive/",'./');
752 # deleted files are hinted from the commitlog processing
754 rmtree("$tmp/changeset");
758 # =for reference
759 # notes: *-files/-directories keys cannot have spaces, they're always
760 # pika-escaped. Everything after the first newline
761 # A log entry looks like:
762 # Revision: moodle-org--moodle--1.3.3--patch-15
763 # Archive: arch-eduforge@catalyst.net.nz--2004
764 # Creator: Penny Leach <penny@catalyst.net.nz>
765 # Date: Wed May 25 14:15:34 NZST 2005
766 # Standard-date: 2005-05-25 02:15:34 GMT
767 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
768 # lang/de/.arch-ids/block_html.php.id
769 # New-directories: lang/de/help/questionnaire
770 # lang/de/help/questionnaire/.arch-ids
771 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
772 # db_sears.sql db/db_sears.sql
773 # Removed-files: lang/be/docs/.arch-ids/release.html.id
774 # lang/be/docs/.arch-ids/releaseold.html.id
775 # Modified-files: admin/cron.php admin/delete.php
776 # admin/editor.html backup/lib.php backup/restore.php
777 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
778 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
779 # summary can be multiline with a leading space just like the above fields
780 # Keywords:
782 # Updating yadda tadda tadda madda
783 sub parselog {
784 my ($ps, $log) = @_;
785 my $key = undef;
787 # headers we want that contain filenames:
788 my %want_headers = (
789 new_files => 1,
790 modified_files => 1,
791 renamed_files => 1,
792 renamed_directories => 1,
793 removed_files => 1,
794 removed_directories => 1,
797 chomp (@$log);
798 while ($_ = shift @$log) {
799 if (/^Continuation-of:\s*(.*)/) {
800 $ps->{tag} = $1;
801 $key = undef;
802 } elsif (/^Summary:\s*(.*)$/ ) {
803 # summary can be multiline as long as it has a leading space.
804 # we squeeze it onto a single line, though.
805 $ps->{summary} = [ $1 ];
806 $key = 'summary';
807 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
808 $ps->{author} = $1;
809 $ps->{email} = $2;
810 $key = undef;
811 # any *-files or *-directories can be read here:
812 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
813 my $val = $2;
814 $key = lc $1;
815 $key =~ tr/-/_/; # too lazy to quote :P
816 if ($want_headers{$key}) {
817 push @{$ps->{$key}}, split(/\s+/, $val);
818 } else {
819 $key = undef;
821 } elsif (/^$/) {
822 last; # remainder of @$log that didn't get shifted off is message
823 } elsif ($key) {
824 if (/^\s+(.*)$/) {
825 if ($key eq 'summary') {
826 push @{$ps->{$key}}, $1;
827 } else { # files/directories:
828 push @{$ps->{$key}}, split(/\s+/, $1);
830 } else {
831 $key = undef;
836 # drop leading empty lines from the log message
837 while (@$log && $log->[0] eq '') {
838 shift @$log;
840 if (exists $ps->{summary} && @{$ps->{summary}}) {
841 $ps->{summary} = join(' ', @{$ps->{summary}});
843 elsif (@$log == 0) {
844 $ps->{summary} = 'empty commit message';
845 } else {
846 $ps->{summary} = $log->[0] . '...';
848 $ps->{message} = join("\n",@$log);
850 # skip Arch control files, unescape pika-escaped files
851 foreach my $k (keys %want_headers) {
852 next unless (defined $ps->{$k});
853 my @tmp = ();
854 foreach my $t (@{$ps->{$k}}) {
855 next unless length ($t);
856 next if $t =~ m!\{arch\}/!;
857 next if $t =~ m!\.arch-ids/!;
858 # should we skip this?
859 next if $t =~ m!\.arch-inventory$!;
860 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
861 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
862 if ($t =~ /\\/ ){
863 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
865 push @tmp, $t;
867 $ps->{$k} = \@tmp;
871 # write/read a tag
872 sub tag {
873 my ($tag, $commit) = @_;
875 if ($opt_o) {
876 $tag =~ s|/|--|g;
877 } else {
878 my $patchname = $tag;
879 $patchname =~ s/.*--//;
880 $tag = git_branchname ($tag) . '--' . $patchname;
883 if ($commit) {
884 open(C,">","$git_dir/refs/tags/$tag")
885 or die "Cannot create tag $tag: $!\n";
886 print C "$commit\n"
887 or die "Cannot write tag $tag: $!\n";
888 close(C)
889 or die "Cannot write tag $tag: $!\n";
890 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
891 } else { # read
892 open(C,"<","$git_dir/refs/tags/$tag")
893 or die "Cannot read tag $tag: $!\n";
894 $commit = <C>;
895 chomp $commit;
896 die "Error reading tag $tag: $!\n" unless length $commit == 40;
897 close(C)
898 or die "Cannot read tag $tag: $!\n";
899 return $commit;
903 # write/read a private tag
904 # reads fail softly if the tag isn't there
905 sub ptag {
906 my ($tag, $commit) = @_;
908 # don't use subdirs for tags yet, it could screw up other porcelains
909 $tag =~ s|/|,|g;
911 my $tag_file = "$ptag_dir/$tag";
912 my $tag_branch_dir = dirname($tag_file);
913 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
915 if ($commit) { # write
916 open(C,">",$tag_file)
917 or die "Cannot create tag $tag: $!\n";
918 print C "$commit\n"
919 or die "Cannot write tag $tag: $!\n";
920 close(C)
921 or die "Cannot write tag $tag: $!\n";
922 $rptags{$commit} = $tag
923 unless $tag =~ m/--base-0$/;
924 } else { # read
925 # if the tag isn't there, return 0
926 unless ( -s $tag_file) {
927 return 0;
929 open(C,"<",$tag_file)
930 or die "Cannot read tag $tag: $!\n";
931 $commit = <C>;
932 chomp $commit;
933 die "Error reading tag $tag: $!\n" unless length $commit == 40;
934 close(C)
935 or die "Cannot read tag $tag: $!\n";
936 unless (defined $rptags{$commit}) {
937 $rptags{$commit} = $tag;
939 return $commit;
943 sub find_parents {
945 # Identify what branches are merging into me
946 # and whether we are fully merged
947 # git-merge-base <headsha> <headsha> should tell
948 # me what the base of the merge should be
950 my $ps = shift;
952 my %branches; # holds an arrayref per branch
953 # the arrayref contains a list of
954 # merged patches between the base
955 # of the merge and the current head
957 my @parents; # parents found for this commit
959 # simple loop to split the merges
960 # per branch
961 foreach my $merge (@{$ps->{merges}}) {
962 my $branch = git_branchname($merge);
963 unless (defined $branches{$branch} ){
964 $branches{$branch} = [];
966 push @{$branches{$branch}}, $merge;
970 # foreach branch find a merge base and walk it to the
971 # head where we are, collecting the merged patchsets that
972 # Arch has recorded. Keep that in @have
973 # Compare that with the commits on the other branch
974 # between merge-base and the tip of the branch (@need)
975 # and see if we have a series of consecutive patches
976 # starting from the merge base. The tip of the series
977 # of consecutive patches merged is our new parent for
978 # that branch.
980 foreach my $branch (keys %branches) {
982 # check that we actually know about the branch
983 next unless -e "$git_dir/refs/heads/$branch";
985 my $mergebase = `git-merge-base $branch $ps->{branch}`;
986 if ($?) {
987 # Don't die here, Arch supports one-way cherry-picking
988 # between branches with no common base (or any relationship
989 # at all beforehand)
990 warn "Cannot find merge base for $branch and $ps->{branch}";
991 next;
993 chomp $mergebase;
995 # now walk up to the mergepoint collecting what patches we have
996 my $branchtip = git_rev_parse($ps->{branch});
997 my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
998 my %have; # collected merges this branch has
999 foreach my $merge (@{$ps->{merges}}) {
1000 $have{$merge} = 1;
1002 my %ancestorshave;
1003 foreach my $par (@ancestors) {
1004 $par = commitid2pset($par);
1005 if (defined $par->{merges}) {
1006 foreach my $merge (@{$par->{merges}}) {
1007 $ancestorshave{$merge}=1;
1011 # print "++++ Merges in $ps->{id} are....\n";
1012 # my @have = sort keys %have; print Dumper(\@have);
1014 # merge what we have with what ancestors have
1015 %have = (%have, %ancestorshave);
1017 # see what the remote branch has - these are the merges we
1018 # will want to have in a consecutive series from the mergebase
1019 my $otherbranchtip = git_rev_parse($branch);
1020 my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
1021 my @need;
1022 foreach my $needps (@needraw) { # get the psets
1023 $needps = commitid2pset($needps);
1024 # git-rev-list will also
1025 # list commits merged in via earlier
1026 # merges. we are only interested in commits
1027 # from the branch we're looking at
1028 if ($branch eq $needps->{branch}) {
1029 push @need, $needps->{id};
1033 # print "++++ Merges from $branch we want are....\n";
1034 # print Dumper(\@need);
1036 my $newparent;
1037 while (my $needed_commit = pop @need) {
1038 if ($have{$needed_commit}) {
1039 $newparent = $needed_commit;
1040 } else {
1041 last; # break out of the while
1044 if ($newparent) {
1045 push @parents, $newparent;
1049 } # end foreach branch
1051 # prune redundant parents
1052 my %parents;
1053 foreach my $p (@parents) {
1054 $parents{$p} = 1;
1056 foreach my $p (@parents) {
1057 next unless exists $psets{$p}{merges};
1058 next unless ref $psets{$p}{merges};
1059 my @merges = @{$psets{$p}{merges}};
1060 foreach my $merge (@merges) {
1061 if ($parents{$merge}) {
1062 delete $parents{$merge};
1067 @parents = ();
1068 foreach (keys %parents) {
1069 push @parents, '-p', ptag($_);
1071 return @parents;
1074 sub git_rev_parse {
1075 my $name = shift;
1076 my $val = `git-rev-parse $name`;
1077 die "Error: git-rev-parse $name" if $?;
1078 chomp $val;
1079 return $val;
1082 # resolve a SHA1 to a known patchset
1083 sub commitid2pset {
1084 my $commitid = shift;
1085 chomp $commitid;
1086 my $name = $rptags{$commitid}
1087 || die "Cannot find reverse tag mapping for $commitid";
1088 $name =~ s|,|/|;
1089 my $ps = $psets{$name}
1090 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1091 return $ps;
1095 # an alternative to `command` that allows input to be passed as an array
1096 # to work around shell problems with weird characters in arguments
1097 sub safe_pipe_capture {
1098 my @output;
1099 if (my $pid = open my $child, '-|') {
1100 @output = (<$child>);
1101 close $child or die join(' ',@_).": $! $?";
1102 } else {
1103 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
1105 return wantarray ? @output : join('',@output);
1108 # `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1109 sub arch_tree_id {
1110 my $dir = shift;
1111 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1112 return $ret;
1115 sub archive_reachable {
1116 my $archive = shift;
1117 return 1 if $reachable{$archive};
1118 return 0 if $unreachable{$archive};
1120 if (system "$TLA whereis-archive $archive >/dev/null") {
1121 if ($opt_a && (system($TLA,'register-archive',
1122 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1123 $reachable{$archive} = 1;
1124 return 1;
1126 print STDERR "Archive is unreachable: $archive\n";
1127 $unreachable{$archive} = 1;
1128 return 0;
1129 } else {
1130 $reachable{$archive} = 1;
1131 return 1;