3 # This tool is copyright (c) 2005, Martin Langhoff.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to walk the output of tla abrowse,
7 # fetch the changesets and apply them.
11 git-archimport-script -i <archive>/<branch> [<archive>/<branch>]
12 [ <archive>/<branch> ]
14 The script expects you to provide the key roots where it can start the
15 import from an 'initial import' or 'tag' type of Arch commit. It will
16 then follow all the branching and tagging within the provided roots.
18 It will die if it sees branches that have different roots.
22 - keep track of merged patches, and mark a git merge when it happens
23 - smarter rules to parse the archive history "up" and "down"
24 - be able to continue an import where we left off
25 - audit shell-escaping of filenames
29 Add print in front of the shell commands invoked via backticks.
37 use File
::Temp
qw(tempfile);
38 use File
::Path
qw(mkpath);
39 use File
::Basename
qw(basename dirname);
40 use String
::ShellQuote
;
44 use POSIX
qw(strftime dup2);
45 use Data
::Dumper qw
/ Dumper /;
48 $SIG{'PIPE'}="IGNORE";
51 our($opt_h,$opt_v, $opt_T,
56 Usage: ${\basename $0} # fetch/update GIT from Arch
58 [ -C GIT_repository ] [ -t tempdir ]
59 repository/arch-branch [ repository/arch-branch] ...
64 getopts
("hviC:t:") or usage
();
67 @ARGV >= 1 or usage
();
68 my @arch_roots = @ARGV;
72 $tmp .= '/git-archimport/';
74 my $git_tree = $opt_C;
78 my @psets = (); # the collection
80 foreach my $root (@arch_roots) {
81 my ($arepo, $abranch) = split(m!/!, $root);
82 open ABROWSE
, "tla abrowse -f -A $arepo --desc --merges $abranch |"
83 or die "Problems with tla abrowse: $!";
85 my %ps = (); # the current one
92 # first record padded w 8 spaces
95 # store the record we just captured
97 my %temp = %ps; # break references
98 push (@psets, \
%temp);
102 my ($id, $type) = split(m/\s{3}/, $_);
107 if ($type =~ m/^\(simple changeset\)/) {
109 } elsif ($type eq '(initial import)') {
111 } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
115 warn "Unknown type $type";
121 # 10 leading spaces or more
122 # indicate commit metadata
125 if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
127 my ($date, $authoremail) = split(m/\s{2,}/, $_);
129 $ps{date
} =~ s/\bGMT$//; # strip off trailign GMT
130 if ($ps{date
} =~ m/\b\w+$/) {
131 warn 'Arch dates not in GMT?! - imported dates will be wrong';
134 $authoremail =~ m/^(.+)\s(\S+)$/;
140 } elsif ($lastseen eq 'date') {
141 # the only hint is position
142 # subject is after date
146 } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
148 $lastseen = 'merges';
150 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
151 push (@
{$ps{merges
}}, $_);
153 warn 'more metadata after merges!?';
160 my %temp = %ps; # break references
161 push (@psets, \
%temp);
165 } # end foreach $root
167 ## Order patches by time
168 @psets = sort {$a->{date
}.$b->{id
} cmp $b->{date
}.$b->{id
}} @psets;
170 #print Dumper \@psets;
173 ## TODO cleanup irrelevant patches
174 ## and put an initial import
177 unless (-d
'.git') { # initial import
178 if ($psets[0]{type
} eq 'i' || $psets[0]{type
} eq 't') {
179 print "Starting import from $psets[0]{id}\n";
184 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
189 foreach my $ps (@psets) {
191 $ps->{branch
} = branchname
($ps->{id
});
194 # ensure we have a clean state
196 if (`git diff-files`) {
197 die "Unclean tree when about to process $ps->{id} " .
198 " - did we fail to commit cleanly before?";
203 # skip commits already in repo
205 if (ptag
($ps->{id
})) {
206 $opt_v && print "Skipping already imported: $ps->{id}\n";
211 # create the branch if needed
213 if ($ps->{type
} eq 'i' && !$import) {
214 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
217 unless ($import) { # skip for import
218 if ( -e
".git/refs/heads/$ps->{branch}") {
219 # we know about this branch
220 `git checkout $ps->{branch}`;
222 # new branch! we need to verify a few things
223 die "Branch on a non-tag!" unless $ps->{type
} eq 't';
224 my $branchpoint = ptag
($ps->{tag
});
225 die "Tagging from unknown id unsupported: $ps->{tag}"
228 # find where we are supposed to branch from
229 `git checkout -b $ps->{branch} $branchpoint`;
235 # Apply the import/changeset/merge into the working tree
237 if ($ps->{type
} eq 'i' || $ps->{type
} eq 't') {
238 apply_import
($ps) or die $!;
240 } elsif ($ps->{type
} eq 's') {
245 # prepare update git's index, based on what arch knows
246 # about the pset, resolve parents, etc
250 my $commitlog = `tla cat-archive-log -A $ps->{repo} $ps->{id}`;
251 die "Error in cat-archive-log: $!" if $?
;
253 # parselog will git-add/rm files
254 # and generally prepare things for the commit
255 # NOTE: parselog will shell-quote filenames!
256 my ($sum, $msg, $add, $del, $mod, $ren) = parselog
($commitlog);
257 my $logmessage = "$sum\n$msg";
260 # imports don't give us good info
261 # on added files. Shame on them
262 if ($ps->{type
} eq 'i' || $ps->{type
} eq 't') {
263 `find . -type f -print0 | grep -zv '^./.git' | xargs -0 -l100 git-update-cache --add`;
264 `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-cache --remove`;
269 my @slice = splice(@
$add, 0, 100);
270 my $slice = join(' ', @slice);
271 `git-update-cache --add $slice`;
272 die "Error in git-update-cache --add: $!" if $?
;
276 foreach my $file (@
$del) {
277 unlink $file or die "Problems deleting $file : $!";
280 my @slice = splice(@
$del, 0, 100);
281 my $slice = join(' ', @slice);
282 `git-update-cache --remove $slice`;
283 die "Error in git-update-cache --remove: $!" if $?
;
286 if (@
$ren) { # renamed
288 die "Odd number of entries in rename!?";
292 my $from = pop @
$ren;
295 unless (-d dirname
($to)) {
296 mkpath
(dirname
($to)); # will die on err
298 #print "moving $from $to";
300 die "Error renaming $from $to : $!" if $?
;
301 `git-update-cache --remove $from`;
302 die "Error in git-update-cache --remove: $!" if $?
;
303 `git-update-cache --add $to`;
304 die "Error in git-update-cache --add: $!" if $?
;
308 if (@
$mod) { # must be _after_ renames
310 my @slice = splice(@
$mod, 0, 100);
311 my $slice = join(' ', @slice);
312 `git-update-cache $slice`;
313 die "Error in git-update-cache: $!" if $?
;
317 # warn "errors when running git-update-cache! $!";
318 $tree = `git-write-tree`;
319 die "cannot write tree $!" if $?
;
327 if ( -e
".git/refs/heads/$ps->{branch}") {
328 if (open HEAD
, "<.git/refs/heads/$ps->{branch}") {
334 if ($ps->{type
} eq 's') {
335 warn "Could not find the right head for the branch $ps->{branch}";
340 my $par = join (' ', @par);
343 # Commit, tag and clean state
346 $ENV{GIT_AUTHOR_NAME
} = $ps->{author
};
347 $ENV{GIT_AUTHOR_EMAIL
} = $ps->{email
};
348 $ENV{GIT_AUTHOR_DATE
} = $ps->{date
};
349 $ENV{GIT_COMMITTER_NAME
} = $ps->{author
};
350 $ENV{GIT_COMMITTER_EMAIL
} = $ps->{email
};
351 $ENV{GIT_COMMITTER_DATE
} = $ps->{date
};
353 my ($pid, $commit_rh, $commit_wh);
354 $commit_rh = 'commit_rh';
355 $commit_wh = 'commit_wh';
357 $pid = open2
(*READER
, *WRITER
, "git-commit-tree $tree $par")
359 print WRITER
$logmessage; # write
361 my $commitid = <READER
>; # read
364 waitpid $pid,0; # close;
366 if (length $commitid != 40) {
367 die "Something went wrong with the commit! $! $commitid";
372 open HEAD
, ">.git/refs/heads/$ps->{branch}";
373 print HEAD
$commitid;
375 unlink ('.git/HEAD');
376 symlink("refs/heads/$ps->{branch}",".git/HEAD");
379 ptag
($ps->{id
}, $commitid); # private tag
380 if ($opt_T || $ps->{type
} eq 't' || $ps->{type
} eq 'i') {
381 tag
($ps->{id
}, $commitid);
383 print " * Committed $ps->{id}\n";
384 print " + tree $tree\n";
385 print " + commit $commitid\n";
386 # print " + commit date is $ps->{date} \n";
392 my @parts = split(m/--/, $id);
393 return join('--', @parts[0..1]);
398 my $bname = branchname
($ps->{id
});
402 `tla get -s --no-pristine -A $ps->{repo} $ps->{id} $tmp/import`;
403 die "Cannot get import: $!" if $?
;
404 `rsync -v --archive --delete --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./`;
405 die "Cannot rsync import:$!" if $?
;
407 `rm -fr $tmp/import`;
408 die "Cannot remove tempdir: $!" if $?
;
420 `tla get-changeset -A $ps->{repo} $ps->{id} $tmp/changeset`;
421 die "Cannot get changeset: $!" if $?
;
424 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
425 # this can be sped up considerably by doing
426 # (find | xargs cat) | patch
427 # but that cna get mucked up by patches
428 # with missing trailing newlines or the standard
429 # 'missing newline' flag in the patch - possibly
430 # produced with an old/buggy diff.
431 # slow and safe, we invoke patch once per patchfile
432 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
433 die "Problem applying patches! $!" if $?
;
436 # apply changed binary files
437 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
438 foreach my $mod (@modified) {
441 $orig =~ s/\.modified$//; # lazy
442 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
443 #print "rsync -p '$mod' '$orig'";
444 `rsync -p $mod ./$orig`;
445 die "Problem applying binary changes! $!" if $?
;
450 `rsync --archive --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/changeset/new-files-archive/* ./`;
452 # deleted files are hinted from the commitlog processing
454 `rm -fr $tmp/changeset`;
459 # A log entry looks like
460 # Revision: moodle-org--moodle--1.3.3--patch-15
461 # Archive: arch-eduforge@catalyst.net.nz--2004
462 # Creator: Penny Leach <penny@catalyst.net.nz>
463 # Date: Wed May 25 14:15:34 NZST 2005
464 # Standard-date: 2005-05-25 02:15:34 GMT
465 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
466 # lang/de/.arch-ids/block_html.php.id
467 # New-directories: lang/de/help/questionnaire
468 # lang/de/help/questionnaire/.arch-ids
469 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
470 # db_sears.sql db/db_sears.sql
471 # Removed-files: lang/be/docs/.arch-ids/release.html.id
472 # lang/be/docs/.arch-ids/releaseold.html.id
473 # Modified-files: admin/cron.php admin/delete.php
474 # admin/editor.html backup/lib.php backup/restore.php
475 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
476 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
479 # Updating yadda tadda tadda madda
484 my (@add, @del, @mod, @ren, @kw, $sum, $msg );
486 if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
488 @add = split(m/\s+/s, $files);
491 if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
493 @del = split(m/\s+/s, $files);
496 if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
498 @mod = split(m/\s+/s, $files);
501 if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
503 @ren = split(m/\s+/s, $files);
507 if ($log =~ m/^Summary:(.+?)$/m ) {
514 if ($log =~ m/\n\n(.+)$/s) {
522 foreach my $ref ( (\
@add, \
@del, \
@mod, \
@ren) ) {
524 while (my $t = pop @
$ref) {
525 next unless length ($t);
526 next if $t =~ m!\{arch\}/!;
527 next if $t =~ m!\.arch-ids/!;
528 next if $t =~ m!\.arch-inventory$!;
529 push (@tmp, shell_quote
($t));
534 #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren];
535 return ($sum, $msg, \
@add, \
@del, \
@mod, \
@ren);
540 my ($tag, $commit) = @_;
542 $tag = shell_quote
($tag);
545 open(C
,">.git/refs/tags/$tag")
546 or die "Cannot create tag $tag: $!\n";
548 or die "Cannot write tag $tag: $!\n";
550 or die "Cannot write tag $tag: $!\n";
551 print "Created tag '$tag' on '$commit'\n" if $opt_v;
553 open(C
,"<.git/refs/tags/$tag")
554 or die "Cannot read tag $tag: $!\n";
557 die "Error reading tag $tag: $!\n" unless length $commit == 40;
559 or die "Cannot read tag $tag: $!\n";
564 # write/read a private tag
565 # reads fail softly if the tag isn't there
567 my ($tag, $commit) = @_;
569 $tag = shell_quote
($tag);
571 unless (-d
'.git/archimport/tags') {
572 mkpath
('.git/archimport/tags');
575 if ($commit) { # write
576 open(C
,">.git/archimport/tags/$tag")
577 or die "Cannot create tag $tag: $!\n";
579 or die "Cannot write tag $tag: $!\n";
581 or die "Cannot write tag $tag: $!\n";
583 # if the tag isn't there, return 0
584 unless ( -s
".git/archimport/tags/$tag") {
587 open(C
,"<.git/archimport/tags/$tag")
588 or die "Cannot read tag $tag: $!\n";
591 die "Error reading tag $tag: $!\n" unless length $commit == 40;
593 or die "Cannot read tag $tag: $!\n";