pack-check.c::verify_packfile(): don't run SHA-1 update on huge data
[git/fastimport.git] / git-archimport.perl
blobada60ec240a8b3eb4ef6efea6b6223fc8523705f
1 #!/usr/bin/perl -w
3 # This tool is copyright (c) 2005, Martin Langhoff.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to walk the output of tla abrowse,
7 # fetch the changesets and apply them.
10 =head1 Invocation
12 git-archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
13 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
15 Imports a project from one or more Arch repositories. It will follow branches
16 and repositories within the namespaces defined by the <archive/branch>
17 parameters supplied. If it cannot find the remote branch a merge comes from
18 it will just import it as a regular commit. If it can find it, it will mark it
19 as a merge whenever possible.
21 See man (1) git-archimport for more details.
23 =head1 TODO
25 - create tag objects instead of ref tags
26 - audit shell-escaping of filenames
27 - hide our private tags somewhere smarter
28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
29 - sort and apply patches by graphing ancestry relations instead of just
30 relying in dates supplied in the changeset itself.
31 tla ancestry-graph -m could be helpful here...
33 =head1 Devel tricks
35 Add print in front of the shell commands invoked via backticks.
37 =head1 Devel Notes
39 There are several places where Arch and git terminology are intermixed
40 and potentially confused.
42 The notion of a "branch" in git is approximately equivalent to
43 a "archive/category--branch--version" in Arch. Also, it should be noted
44 that the "--branch" portion of "archive/category--branch--version" is really
45 optional in Arch although not many people (nor tools!) seem to know this.
46 This means that "archive/category--version" is also a valid "branch"
47 in git terms.
49 We always refer to Arch names by their fully qualified variant (which
50 means the "archive" name is prefixed.
52 For people unfamiliar with Arch, an "archive" is the term for "repository",
53 and can contain multiple, unrelated branches.
55 =cut
57 use strict;
58 use warnings;
59 use Getopt::Std;
60 use File::Temp qw(tempdir);
61 use File::Path qw(mkpath rmtree);
62 use File::Basename qw(basename dirname);
63 use Data::Dumper qw/ Dumper /;
64 use IPC::Open2;
66 $SIG{'PIPE'}="IGNORE";
67 $ENV{'TZ'}="UTC";
69 my $git_dir = $ENV{"GIT_DIR"} || ".git";
70 $ENV{"GIT_DIR"} = $git_dir;
71 my $ptag_dir = "$git_dir/archimport/tags";
73 our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
75 sub usage() {
76 print STDERR <<END;
77 Usage: ${\basename $0} # fetch/update GIT from Arch
78 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
79 repository/arch-branch [ repository/arch-branch] ...
80 END
81 exit(1);
84 getopts("fThvat:D:") or usage();
85 usage if $opt_h;
87 @ARGV >= 1 or usage();
88 # $arch_branches:
89 # values associated with keys:
90 # =1 - Arch version / git 'branch' detected via abrowse on a limit
91 # >1 - Arch version / git 'branch' of an auxiliary branch we've merged
92 my %arch_branches = map { $_ => 1 } @ARGV;
94 $ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
95 my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
96 $opt_v && print "+ Using $tmp as temporary directory\n";
98 my %reachable = (); # Arch repositories we can access
99 my %unreachable = (); # Arch repositories we can't access :<
100 my @psets = (); # the collection
101 my %psets = (); # the collection, by name
102 my %stats = ( # Track which strategy we used to import:
103 get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
104 simple_changeset => 0, import_or_tag => 0
107 my %rptags = (); # my reverse private tags
108 # to map a SHA1 to a commitid
109 my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
111 sub do_abrowse {
112 my $stage = shift;
113 while (my ($limit, $level) = each %arch_branches) {
114 next unless $level == $stage;
116 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
117 or die "Problems with tla abrowse: $!";
119 my %ps = (); # the current one
120 my $lastseen = '';
122 while (<ABROWSE>) {
123 chomp;
125 # first record padded w 8 spaces
126 if (s/^\s{8}\b//) {
127 my ($id, $type) = split(m/\s+/, $_, 2);
129 my %last_ps;
130 # store the record we just captured
131 if (%ps && !exists $psets{ $ps{id} }) {
132 %last_ps = %ps; # break references
133 push (@psets, \%last_ps);
134 $psets{ $last_ps{id} } = \%last_ps;
137 my $branch = extract_versionname($id);
138 %ps = ( id => $id, branch => $branch );
139 if (%last_ps && ($last_ps{branch} eq $branch)) {
140 $ps{parent_id} = $last_ps{id};
143 $arch_branches{$branch} = 1;
144 $lastseen = 'id';
146 # deal with types (should work with baz or tla):
147 if ($type =~ m/\(.*changeset\)/) {
148 $ps{type} = 's';
149 } elsif ($type =~ /\(.*import\)/) {
150 $ps{type} = 'i';
151 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
152 $ps{type} = 't';
153 # read which revision we've tagged when we parse the log
154 $ps{tag} = $1;
155 } else {
156 warn "Unknown type $type";
159 $arch_branches{$branch} = 1;
160 $lastseen = 'id';
161 } elsif (s/^\s{10}//) {
162 # 10 leading spaces or more
163 # indicate commit metadata
165 # date
166 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
167 $ps{date} = $1;
168 $lastseen = 'date';
169 } elsif ($_ eq 'merges in:') {
170 $ps{merges} = [];
171 $lastseen = 'merges';
172 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
173 my $id = $_;
174 push (@{$ps{merges}}, $id);
176 # aggressive branch finding:
177 if ($opt_D) {
178 my $branch = extract_versionname($id);
179 my $repo = extract_reponame($branch);
181 if (archive_reachable($repo) &&
182 !defined $arch_branches{$branch}) {
183 $arch_branches{$branch} = $stage + 1;
186 } else {
187 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
192 if (%ps && !exists $psets{ $ps{id} }) {
193 my %temp = %ps; # break references
194 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
195 $temp{parent_id} = $psets[$#psets]{id};
197 push (@psets, \%temp);
198 $psets{ $temp{id} } = \%temp;
201 close ABROWSE or die "$TLA abrowse failed on $limit\n";
203 } # end foreach $root
205 do_abrowse(1);
206 my $depth = 2;
207 $opt_D ||= 0;
208 while ($depth <= $opt_D) {
209 do_abrowse($depth);
210 $depth++;
213 ## Order patches by time
214 # FIXME see if we can find a more optimal way to do this by graphing
215 # the ancestry data and walking it, that way we won't have to rely on
216 # client-supplied dates
217 @psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
219 #print Dumper \@psets;
222 ## TODO cleanup irrelevant patches
223 ## and put an initial import
224 ## or a full tag
225 my $import = 0;
226 unless (-d $git_dir) { # initial import
227 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
228 print "Starting import from $psets[0]{id}\n";
229 `git-init-db`;
230 die $! if $?;
231 $import = 1;
232 } else {
233 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
235 } else { # progressing an import
236 # load the rptags
237 opendir(DIR, $ptag_dir)
238 || die "can't opendir: $!";
239 while (my $file = readdir(DIR)) {
240 # skip non-interesting-files
241 next unless -f "$ptag_dir/$file";
243 # convert first '--' to '/' from old git-archimport to use
244 # as an archivename/c--b--v private tag
245 if ($file !~ m!,!) {
246 my $oldfile = $file;
247 $file =~ s!--!,!;
248 print STDERR "converting old tag $oldfile to $file\n";
249 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
251 my $sha = ptag($file);
252 chomp $sha;
253 $rptags{$sha} = $file;
255 closedir DIR;
258 # process patchsets
259 # extract the Arch repository name (Arch "archive" in Arch-speak)
260 sub extract_reponame {
261 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
262 return (split(/\//, $fq_cvbr))[0];
265 sub extract_versionname {
266 my $name = shift;
267 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
268 return $name;
271 # convert a fully-qualified revision or version to a unique dirname:
272 # normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
273 # becomes: normalperson@yhbt.net-05,mpd--uclinux--1
275 # the git notion of a branch is closer to
276 # archive/category--branch--version than archive/category--branch, so we
277 # use this to convert to git branch names.
278 # Also, keep archive names but replace '/' with ',' since it won't require
279 # subdirectories, and is safer than swapping '--' which could confuse
280 # reverse-mapping when dealing with bastard branches that
281 # are just archive/category--version (no --branch)
282 sub tree_dirname {
283 my $revision = shift;
284 my $name = extract_versionname($revision);
285 $name =~ s#/#,#;
286 return $name;
289 # old versions of git-archimport just use the <category--branch> part:
290 sub old_style_branchname {
291 my $id = shift;
292 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
293 chomp $ret;
294 return $ret;
297 *git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
299 sub process_patchset_accurate {
300 my $ps = shift;
302 # switch to that branch if we're not already in that branch:
303 if (-e "$git_dir/refs/heads/$ps->{branch}") {
304 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
306 # remove any old stuff that got leftover:
307 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
308 rmtree(split(/\0/,$rm)) if $rm;
311 # Apply the import/changeset/merge into the working tree
312 my $dir = sync_to_ps($ps);
313 # read the new log entry:
314 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
315 die "Error in cat-log: $!" if $?;
316 chomp @commitlog;
318 # grab variables we want from the log, new fields get added to $ps:
319 # (author, date, email, summary, message body ...)
320 parselog($ps, \@commitlog);
322 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
323 # this should work when importing continuations
324 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
326 # find where we are supposed to branch from
327 system('git-checkout','-f','-b',$ps->{branch},
328 $branchpoint) == 0 or die "$! $?\n";
330 # remove any old stuff that got leftover:
331 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
332 rmtree(split(/\0/,$rm)) if $rm;
334 # If we trust Arch with the fact that this is just
335 # a tag, and it does not affect the state of the tree
336 # then we just tag and move on
337 tag($ps->{id}, $branchpoint);
338 ptag($ps->{id}, $branchpoint);
339 print " * Tagged $ps->{id} at $branchpoint\n";
340 return 0;
341 } else {
342 warn "Tagging from unknown id unsupported\n" if $ps->{tag};
344 # allow multiple bases/imports here since Arch supports cherry-picks
345 # from unrelated trees
348 # update the index with all the changes we got
349 system('git-diff-files --name-only -z | '.
350 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
351 system('git-ls-files --others -z | '.
352 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
353 return 1;
356 # the native changeset processing strategy. This is very fast, but
357 # does not handle permissions or any renames involving directories
358 sub process_patchset_fast {
359 my $ps = shift;
361 # create the branch if needed
363 if ($ps->{type} eq 'i' && !$import) {
364 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
367 unless ($import) { # skip for import
368 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
369 # we know about this branch
370 system('git-checkout',$ps->{branch});
371 } else {
372 # new branch! we need to verify a few things
373 die "Branch on a non-tag!" unless $ps->{type} eq 't';
374 my $branchpoint = ptag($ps->{tag});
375 die "Tagging from unknown id unsupported: $ps->{tag}"
376 unless $branchpoint;
378 # find where we are supposed to branch from
379 system('git-checkout','-b',$ps->{branch},$branchpoint);
381 # If we trust Arch with the fact that this is just
382 # a tag, and it does not affect the state of the tree
383 # then we just tag and move on
384 tag($ps->{id}, $branchpoint);
385 ptag($ps->{id}, $branchpoint);
386 print " * Tagged $ps->{id} at $branchpoint\n";
387 return 0;
389 die $! if $?;
393 # Apply the import/changeset/merge into the working tree
395 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
396 apply_import($ps) or die $!;
397 $stats{import_or_tag}++;
398 $import=0;
399 } elsif ($ps->{type} eq 's') {
400 apply_cset($ps);
401 $stats{simple_changeset}++;
405 # prepare update git's index, based on what arch knows
406 # about the pset, resolve parents, etc
409 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
410 die "Error in cat-archive-log: $!" if $?;
412 parselog($ps,\@commitlog);
414 # imports don't give us good info
415 # on added files. Shame on them
416 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
417 system('git-ls-files --deleted -z | '.
418 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
419 system('git-ls-files --others -z | '.
420 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
423 # TODO: handle removed_directories and renamed_directories:
425 if (my $del = $ps->{removed_files}) {
426 unlink @$del;
427 while (@$del) {
428 my @slice = splice(@$del, 0, 100);
429 system('git-update-index','--remove','--',@slice) == 0 or
430 die "Error in git-update-index --remove: $! $?\n";
434 if (my $ren = $ps->{renamed_files}) { # renamed
435 if (@$ren % 2) {
436 die "Odd number of entries in rename!?";
439 while (@$ren) {
440 my $from = shift @$ren;
441 my $to = shift @$ren;
443 unless (-d dirname($to)) {
444 mkpath(dirname($to)); # will die on err
446 # print "moving $from $to";
447 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
448 system('git-update-index','--remove','--',$from) == 0 or
449 die "Error in git-update-index --remove: $! $?\n";
450 system('git-update-index','--add','--',$to) == 0 or
451 die "Error in git-update-index --add: $! $?\n";
455 if (my $add = $ps->{new_files}) {
456 while (@$add) {
457 my @slice = splice(@$add, 0, 100);
458 system('git-update-index','--add','--',@slice) == 0 or
459 die "Error in git-update-index --add: $! $?\n";
463 if (my $mod = $ps->{modified_files}) {
464 while (@$mod) {
465 my @slice = splice(@$mod, 0, 100);
466 system('git-update-index','--',@slice) == 0 or
467 die "Error in git-update-index: $! $?\n";
470 return 1; # we successfully applied the changeset
473 if ($opt_f) {
474 print "Will import patchsets using the fast strategy\n",
475 "Renamed directories and permission changes will be missed\n";
476 *process_patchset = *process_patchset_fast;
477 } else {
478 print "Using the default (accurate) import strategy.\n",
479 "Things may be a bit slow\n";
480 *process_patchset = *process_patchset_accurate;
483 foreach my $ps (@psets) {
484 # process patchsets
485 $ps->{branch} = git_branchname($ps->{id});
488 # ensure we have a clean state
490 if (my $dirty = `git-diff-files`) {
491 die "Unclean tree when about to process $ps->{id} " .
492 " - did we fail to commit cleanly before?\n$dirty";
494 die $! if $?;
497 # skip commits already in repo
499 if (ptag($ps->{id})) {
500 $opt_v && print " * Skipping already imported: $ps->{id}\n";
501 next;
504 print " * Starting to work on $ps->{id}\n";
506 process_patchset($ps) or next;
508 # warn "errors when running git-update-index! $!";
509 my $tree = `git-write-tree`;
510 die "cannot write tree $!" if $?;
511 chomp $tree;
514 # Who's your daddy?
516 my @par;
517 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
518 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
519 my $p = <HEAD>;
520 close HEAD;
521 chomp $p;
522 push @par, '-p', $p;
523 } else {
524 if ($ps->{type} eq 's') {
525 warn "Could not find the right head for the branch $ps->{branch}";
530 if ($ps->{merges}) {
531 push @par, find_parents($ps);
535 # Commit, tag and clean state
537 $ENV{TZ} = 'GMT';
538 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
539 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
540 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
541 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
542 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
543 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
545 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
546 or die $!;
547 print WRITER $ps->{summary},"\n";
548 print WRITER $ps->{message},"\n";
550 # make it easy to backtrack and figure out which Arch revision this was:
551 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
553 close WRITER;
554 my $commitid = <READER>; # read
555 chomp $commitid;
556 close READER;
557 waitpid $pid,0; # close;
559 if (length $commitid != 40) {
560 die "Something went wrong with the commit! $! $commitid";
563 # Update the branch
565 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
566 print HEAD $commitid;
567 close HEAD;
568 system('git-update-ref', 'HEAD', "$ps->{branch}");
570 # tag accordingly
571 ptag($ps->{id}, $commitid); # private tag
572 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
573 tag($ps->{id}, $commitid);
575 print " * Committed $ps->{id}\n";
576 print " + tree $tree\n";
577 print " + commit $commitid\n";
578 $opt_v && print " + commit date is $ps->{date} \n";
579 $opt_v && print " + parents: ",join(' ',@par),"\n";
582 if ($opt_v) {
583 foreach (sort keys %stats) {
584 print" $_: $stats{$_}\n";
587 exit 0;
589 # used by the accurate strategy:
590 sub sync_to_ps {
591 my $ps = shift;
592 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
594 $opt_v && print "sync_to_ps($ps->{id}) method: ";
596 if (-d $tree_dir) {
597 if ($ps->{type} eq 't') {
598 $opt_v && print "get (tag)\n";
599 # looks like a tag-only or (worse,) a mixed tags/changeset branch,
600 # can't rely on replay to work correctly on these
601 rmtree($tree_dir);
602 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
603 $stats{get_tag}++;
604 } else {
605 my $tree_id = arch_tree_id($tree_dir);
606 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
607 # the common case (hopefully)
608 $opt_v && print "replay\n";
609 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
610 $stats{replay}++;
611 } else {
612 # getting one tree is usually faster than getting two trees
613 # and applying the delta ...
614 rmtree($tree_dir);
615 $opt_v && print "apply-delta\n";
616 safe_pipe_capture($TLA,'get','--no-pristine',
617 $ps->{id},$tree_dir);
618 $stats{get_delta}++;
621 } else {
622 # new branch work
623 $opt_v && print "get (new tree)\n";
624 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
625 $stats{get_new}++;
628 # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
629 system('rsync','-aI','--delete','--exclude',$git_dir,
630 # '--exclude','.arch-inventory',
631 '--exclude','.arch-ids','--exclude','{arch}',
632 '--exclude','+*','--exclude',',*',
633 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
634 return $tree_dir;
637 sub apply_import {
638 my $ps = shift;
639 my $bname = git_branchname($ps->{id});
641 mkpath($tmp);
643 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
644 die "Cannot get import: $!" if $?;
645 system('rsync','-aI','--delete', '--exclude',$git_dir,
646 '--exclude','.arch-ids','--exclude','{arch}',
647 "$tmp/import/", './');
648 die "Cannot rsync import:$!" if $?;
650 rmtree("$tmp/import");
651 die "Cannot remove tempdir: $!" if $?;
654 return 1;
657 sub apply_cset {
658 my $ps = shift;
660 mkpath($tmp);
662 # get the changeset
663 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
664 die "Cannot get changeset: $!" if $?;
666 # apply patches
667 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
668 # this can be sped up considerably by doing
669 # (find | xargs cat) | patch
670 # but that can get mucked up by patches
671 # with missing trailing newlines or the standard
672 # 'missing newline' flag in the patch - possibly
673 # produced with an old/buggy diff.
674 # slow and safe, we invoke patch once per patchfile
675 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
676 die "Problem applying patches! $!" if $?;
679 # apply changed binary files
680 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
681 foreach my $mod (@modified) {
682 chomp $mod;
683 my $orig = $mod;
684 $orig =~ s/\.modified$//; # lazy
685 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
686 #print "rsync -p '$mod' '$orig'";
687 system('rsync','-p',$mod,"./$orig");
688 die "Problem applying binary changes! $!" if $?;
692 # bring in new files
693 system('rsync','-aI','--exclude',$git_dir,
694 '--exclude','.arch-ids',
695 '--exclude', '{arch}',
696 "$tmp/changeset/new-files-archive/",'./');
698 # deleted files are hinted from the commitlog processing
700 rmtree("$tmp/changeset");
704 # =for reference
705 # notes: *-files/-directories keys cannot have spaces, they're always
706 # pika-escaped. Everything after the first newline
707 # A log entry looks like:
708 # Revision: moodle-org--moodle--1.3.3--patch-15
709 # Archive: arch-eduforge@catalyst.net.nz--2004
710 # Creator: Penny Leach <penny@catalyst.net.nz>
711 # Date: Wed May 25 14:15:34 NZST 2005
712 # Standard-date: 2005-05-25 02:15:34 GMT
713 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
714 # lang/de/.arch-ids/block_html.php.id
715 # New-directories: lang/de/help/questionnaire
716 # lang/de/help/questionnaire/.arch-ids
717 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
718 # db_sears.sql db/db_sears.sql
719 # Removed-files: lang/be/docs/.arch-ids/release.html.id
720 # lang/be/docs/.arch-ids/releaseold.html.id
721 # Modified-files: admin/cron.php admin/delete.php
722 # admin/editor.html backup/lib.php backup/restore.php
723 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
724 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
725 # summary can be multiline with a leading space just like the above fields
726 # Keywords:
728 # Updating yadda tadda tadda madda
729 sub parselog {
730 my ($ps, $log) = @_;
731 my $key = undef;
733 # headers we want that contain filenames:
734 my %want_headers = (
735 new_files => 1,
736 modified_files => 1,
737 renamed_files => 1,
738 renamed_directories => 1,
739 removed_files => 1,
740 removed_directories => 1,
743 chomp (@$log);
744 while ($_ = shift @$log) {
745 if (/^Continuation-of:\s*(.*)/) {
746 $ps->{tag} = $1;
747 $key = undef;
748 } elsif (/^Summary:\s*(.*)$/ ) {
749 # summary can be multiline as long as it has a leading space
750 $ps->{summary} = [ $1 ];
751 $key = 'summary';
752 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
753 $ps->{author} = $1;
754 $ps->{email} = $2;
755 $key = undef;
756 # any *-files or *-directories can be read here:
757 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
758 my $val = $2;
759 $key = lc $1;
760 $key =~ tr/-/_/; # too lazy to quote :P
761 if ($want_headers{$key}) {
762 push @{$ps->{$key}}, split(/\s+/, $val);
763 } else {
764 $key = undef;
766 } elsif (/^$/) {
767 last; # remainder of @$log that didn't get shifted off is message
768 } elsif ($key) {
769 if (/^\s+(.*)$/) {
770 if ($key eq 'summary') {
771 push @{$ps->{$key}}, $1;
772 } else { # files/directories:
773 push @{$ps->{$key}}, split(/\s+/, $1);
775 } else {
776 $key = undef;
781 # post-processing:
782 $ps->{summary} = join("\n",@{$ps->{summary}})."\n";
783 $ps->{message} = join("\n",@$log);
785 # skip Arch control files, unescape pika-escaped files
786 foreach my $k (keys %want_headers) {
787 next unless (defined $ps->{$k});
788 my @tmp = ();
789 foreach my $t (@{$ps->{$k}}) {
790 next unless length ($t);
791 next if $t =~ m!\{arch\}/!;
792 next if $t =~ m!\.arch-ids/!;
793 # should we skip this?
794 next if $t =~ m!\.arch-inventory$!;
795 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
796 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
797 if ($t =~ /\\/ ){
798 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
800 push @tmp, $t;
802 $ps->{$k} = \@tmp;
806 # write/read a tag
807 sub tag {
808 my ($tag, $commit) = @_;
810 if ($opt_o) {
811 $tag =~ s|/|--|g;
812 } else {
813 # don't use subdirs for tags yet, it could screw up other porcelains
814 $tag =~ s|/|,|g;
817 if ($commit) {
818 open(C,">","$git_dir/refs/tags/$tag")
819 or die "Cannot create tag $tag: $!\n";
820 print C "$commit\n"
821 or die "Cannot write tag $tag: $!\n";
822 close(C)
823 or die "Cannot write tag $tag: $!\n";
824 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
825 } else { # read
826 open(C,"<","$git_dir/refs/tags/$tag")
827 or die "Cannot read tag $tag: $!\n";
828 $commit = <C>;
829 chomp $commit;
830 die "Error reading tag $tag: $!\n" unless length $commit == 40;
831 close(C)
832 or die "Cannot read tag $tag: $!\n";
833 return $commit;
837 # write/read a private tag
838 # reads fail softly if the tag isn't there
839 sub ptag {
840 my ($tag, $commit) = @_;
842 # don't use subdirs for tags yet, it could screw up other porcelains
843 $tag =~ s|/|,|g;
845 my $tag_file = "$ptag_dir/$tag";
846 my $tag_branch_dir = dirname($tag_file);
847 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
849 if ($commit) { # write
850 open(C,">",$tag_file)
851 or die "Cannot create tag $tag: $!\n";
852 print C "$commit\n"
853 or die "Cannot write tag $tag: $!\n";
854 close(C)
855 or die "Cannot write tag $tag: $!\n";
856 $rptags{$commit} = $tag
857 unless $tag =~ m/--base-0$/;
858 } else { # read
859 # if the tag isn't there, return 0
860 unless ( -s $tag_file) {
861 return 0;
863 open(C,"<",$tag_file)
864 or die "Cannot read tag $tag: $!\n";
865 $commit = <C>;
866 chomp $commit;
867 die "Error reading tag $tag: $!\n" unless length $commit == 40;
868 close(C)
869 or die "Cannot read tag $tag: $!\n";
870 unless (defined $rptags{$commit}) {
871 $rptags{$commit} = $tag;
873 return $commit;
877 sub find_parents {
879 # Identify what branches are merging into me
880 # and whether we are fully merged
881 # git-merge-base <headsha> <headsha> should tell
882 # me what the base of the merge should be
884 my $ps = shift;
886 my %branches; # holds an arrayref per branch
887 # the arrayref contains a list of
888 # merged patches between the base
889 # of the merge and the current head
891 my @parents; # parents found for this commit
893 # simple loop to split the merges
894 # per branch
895 foreach my $merge (@{$ps->{merges}}) {
896 my $branch = git_branchname($merge);
897 unless (defined $branches{$branch} ){
898 $branches{$branch} = [];
900 push @{$branches{$branch}}, $merge;
904 # foreach branch find a merge base and walk it to the
905 # head where we are, collecting the merged patchsets that
906 # Arch has recorded. Keep that in @have
907 # Compare that with the commits on the other branch
908 # between merge-base and the tip of the branch (@need)
909 # and see if we have a series of consecutive patches
910 # starting from the merge base. The tip of the series
911 # of consecutive patches merged is our new parent for
912 # that branch.
914 foreach my $branch (keys %branches) {
916 # check that we actually know about the branch
917 next unless -e "$git_dir/refs/heads/$branch";
919 my $mergebase = `git-merge-base $branch $ps->{branch}`;
920 if ($?) {
921 # Don't die here, Arch supports one-way cherry-picking
922 # between branches with no common base (or any relationship
923 # at all beforehand)
924 warn "Cannot find merge base for $branch and $ps->{branch}";
925 next;
927 chomp $mergebase;
929 # now walk up to the mergepoint collecting what patches we have
930 my $branchtip = git_rev_parse($ps->{branch});
931 my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
932 my %have; # collected merges this branch has
933 foreach my $merge (@{$ps->{merges}}) {
934 $have{$merge} = 1;
936 my %ancestorshave;
937 foreach my $par (@ancestors) {
938 $par = commitid2pset($par);
939 if (defined $par->{merges}) {
940 foreach my $merge (@{$par->{merges}}) {
941 $ancestorshave{$merge}=1;
945 # print "++++ Merges in $ps->{id} are....\n";
946 # my @have = sort keys %have; print Dumper(\@have);
948 # merge what we have with what ancestors have
949 %have = (%have, %ancestorshave);
951 # see what the remote branch has - these are the merges we
952 # will want to have in a consecutive series from the mergebase
953 my $otherbranchtip = git_rev_parse($branch);
954 my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
955 my @need;
956 foreach my $needps (@needraw) { # get the psets
957 $needps = commitid2pset($needps);
958 # git-rev-list will also
959 # list commits merged in via earlier
960 # merges. we are only interested in commits
961 # from the branch we're looking at
962 if ($branch eq $needps->{branch}) {
963 push @need, $needps->{id};
967 # print "++++ Merges from $branch we want are....\n";
968 # print Dumper(\@need);
970 my $newparent;
971 while (my $needed_commit = pop @need) {
972 if ($have{$needed_commit}) {
973 $newparent = $needed_commit;
974 } else {
975 last; # break out of the while
978 if ($newparent) {
979 push @parents, $newparent;
983 } # end foreach branch
985 # prune redundant parents
986 my %parents;
987 foreach my $p (@parents) {
988 $parents{$p} = 1;
990 foreach my $p (@parents) {
991 next unless exists $psets{$p}{merges};
992 next unless ref $psets{$p}{merges};
993 my @merges = @{$psets{$p}{merges}};
994 foreach my $merge (@merges) {
995 if ($parents{$merge}) {
996 delete $parents{$merge};
1001 @parents = ();
1002 foreach (keys %parents) {
1003 push @parents, '-p', ptag($_);
1005 return @parents;
1008 sub git_rev_parse {
1009 my $name = shift;
1010 my $val = `git-rev-parse $name`;
1011 die "Error: git-rev-parse $name" if $?;
1012 chomp $val;
1013 return $val;
1016 # resolve a SHA1 to a known patchset
1017 sub commitid2pset {
1018 my $commitid = shift;
1019 chomp $commitid;
1020 my $name = $rptags{$commitid}
1021 || die "Cannot find reverse tag mapping for $commitid";
1022 $name =~ s|,|/|;
1023 my $ps = $psets{$name}
1024 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1025 return $ps;
1029 # an alternative to `command` that allows input to be passed as an array
1030 # to work around shell problems with weird characters in arguments
1031 sub safe_pipe_capture {
1032 my @output;
1033 if (my $pid = open my $child, '-|') {
1034 @output = (<$child>);
1035 close $child or die join(' ',@_).": $! $?";
1036 } else {
1037 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
1039 return wantarray ? @output : join('',@output);
1042 # `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1043 sub arch_tree_id {
1044 my $dir = shift;
1045 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1046 return $ret;
1049 sub archive_reachable {
1050 my $archive = shift;
1051 return 1 if $reachable{$archive};
1052 return 0 if $unreachable{$archive};
1054 if (system "$TLA whereis-archive $archive >/dev/null") {
1055 if ($opt_a && (system($TLA,'register-archive',
1056 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1057 $reachable{$archive} = 1;
1058 return 1;
1060 print STDERR "Archive is unreachable: $archive\n";
1061 $unreachable{$archive} = 1;
1062 return 0;
1063 } else {
1064 $reachable{$archive} = 1;
1065 return 1;