1 package Git
::SVN
::Fetcher
;
2 use vars qw
/@ISA $_ignore_regex $_preserve_empty_dirs $_placeholder_filename
3 @deleted_gpath %added_placeholder $repo_id/;
8 use File
::Basename qw
/dirname/;
10 use Git qw
/command command_oneline command_noisy command_output_pipe
11 command_input_pipe command_close_pipe
12 command_bidi_pipe command_close_bidi_pipe
/;
14 @ISA = qw(SVN::Delta::Editor);
17 # file baton members: path, mode_a, mode_b, pool, fh, blob, base
19 my ($class, $git_svn, $switch_path) = @_;
20 my $self = SVN
::Delta
::Editor
->new;
22 if (exists $git_svn->{last_commit
}) {
23 $self->{c
} = $git_svn->{last_commit
};
24 $self->{empty_symlinks
} =
25 _mark_empty_symlinks
($git_svn, $switch_path);
28 # some options are read globally, but can be overridden locally
29 # per [svn-remote "..."] section. Command-line options will *NOT*
30 # override options set in an [svn-remote "..."] section
31 $repo_id = $git_svn->{repo_id
};
32 my $k = "svn-remote.$repo_id.ignore-paths";
33 my $v = eval { command_oneline
('config', '--get', $k) };
34 $self->{ignore_regex
} = $v;
36 $k = "svn-remote.$repo_id.preserve-empty-dirs";
37 $v = eval { command_oneline
('config', '--get', '--bool', $k) };
38 if ($v && $v eq 'true') {
39 $_preserve_empty_dirs = 1;
40 $k = "svn-remote.$repo_id.placeholder-filename";
41 $v = eval { command_oneline
('config', '--get', $k) };
42 $_placeholder_filename = $v;
45 # Load the list of placeholder files added during previous invocations.
46 $k = "svn-remote.$repo_id.added-placeholder";
47 $v = eval { command_oneline
('config', '--get-all', $k) };
48 if ($_preserve_empty_dirs && $v) {
49 # command() prints errors to stderr, so we only call it if
50 # command_oneline() succeeded.
51 my @v = command
('config', '--get-all', $k);
52 $added_placeholder{ dirname
($_) } = $_ foreach @v;
56 $self->{dir_prop
} = {};
57 $self->{file_prop
} = {};
58 $self->{absent_dir
} = {};
59 $self->{absent_file
} = {};
60 require Git
::IndexInfo
;
61 $self->{gii
} = $git_svn->tmp_index_do(sub { Git
::IndexInfo
->new });
62 $self->{pathnameencoding
} = Git
::config
('svn.pathnameencoding');
66 # this uses the Ra object, so it must be called before do_{switch,update},
67 # not inside them (when the Git::SVN::Fetcher object is passed) to
69 sub _mark_empty_symlinks
{
70 my ($git_svn, $switch_path) = @_;
71 my $bool = Git
::config_bool
('svn.brokenSymlinkWorkaround');
72 return {} if (!defined($bool)) || (defined($bool) && ! $bool);
75 my ($rev, $cmt) = $git_svn->last_rev_commit;
76 return {} unless ($rev && $cmt);
78 # allow the warning to be printed for each revision we fetch to
79 # ensure the user sees it. The user can also disable the workaround
80 # on the repository even while git svn is running and the next
81 # revision fetched will skip this expensive function.
83 chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
84 my ($ls, $ctx) = command_output_pipe
(qw
/ls-tree -r -z/, $cmt);
86 my $pfx = defined($switch_path) ?
$switch_path : $git_svn->path;
87 $pfx .= '/' if length($pfx);
90 s/\A100644 blob $empty_blob\t//o or next;
91 unless ($printed_warning) {
92 print STDERR
"Scanning for empty symlinks, ",
93 "this may take a while if you have ",
95 "You may disable this with `",
96 "git config svn.brokenSymlinkWorkaround ",
98 "This may be done in a different ",
99 "terminal without restarting ",
101 $printed_warning = 1;
105 $git_svn->ra->get_file($pfx.$path, $rev, undef);
106 if ($props->{'svn:special'}) {
110 command_close_pipe
($ls, $ctx);
114 # returns true if a given path is inside a ".git" directory
116 $_[0] =~ m{(?:^|/)\.git(?:/|$)};
119 # return value: 0 -- don't ignore, 1 -- ignore
120 sub is_path_ignored
{
121 my ($self, $path) = @_;
122 return 1 if in_dot_git
($path);
123 return 1 if defined($self->{ignore_regex
}) &&
124 $path =~ m!$self->{ignore_regex}!;
125 return 0 unless defined($_ignore_regex);
126 return 1 if $path =~ m!$_ignore_regex!o;
131 my ($self, $path) = @_;
132 $self->{path_strip
} = qr/^\Q$path\E(\/|$)/ if length $path;
140 my ($self, $path, $pb, $rev) = @_;
145 my ($self, $path) = @_;
146 if (my $enc = $self->{pathnameencoding
}) {
148 Encode
::from_to
($path, 'UTF-8', $enc);
150 if ($self->{path_strip
}) {
151 $path =~ s!$self->{path_strip}!! or
152 die "Failed to strip path '$path' ($self->{path_strip})\n";
158 my ($self, $path, $rev, $pb) = @_;
159 return undef if $self->is_path_ignored($path);
161 my $gpath = $self->git_path($path);
162 return undef if ($gpath eq '');
164 # remove entire directories.
165 my ($tree) = (command
('ls-tree', '-z', $self->{c
}, "./$gpath")
166 =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
168 my ($ls, $ctx) = command_output_pipe
(qw
/ls
-tree
174 my $rmpath = "$gpath/$_";
175 $self->{gii
}->remove($rmpath);
176 print "\tD\t$rmpath\n" unless $::_q
;
178 print "\tD\t$gpath/\n" unless $::_q
;
179 command_close_pipe
($ls, $ctx);
181 $self->{gii
}->remove($gpath);
182 print "\tD\t$gpath\n" unless $::_q
;
184 # Don't add to @deleted_gpath if we're deleting a placeholder file.
185 push @deleted_gpath, $gpath unless $added_placeholder{dirname
($path)};
186 $self->{empty
}->{$path} = 0;
191 my ($self, $path, $pb, $rev) = @_;
194 goto out
if $self->is_path_ignored($path);
196 my $gpath = $self->git_path($path);
197 ($mode, $blob) = (command
('ls-tree', '-z', $self->{c
}, "./$gpath")
198 =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
199 unless (defined $mode && defined $blob) {
200 die "$path was not found in commit $self->{c} (r$rev)\n";
202 if ($mode eq '100644' && $self->{empty_symlinks
}->{$path}) {
206 { path
=> $path, mode_a
=> $mode, mode_b
=> $mode, blob
=> $blob,
207 pool
=> SVN
::Pool
->new, action
=> 'M' };
211 my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
214 if (!$self->is_path_ignored($path)) {
215 my ($dir, $file) = ($path =~ m
#^(.*?)/?([^/]+)$#);
216 delete $self->{empty
}->{$dir};
219 if ($added_placeholder{$dir}) {
220 # Remove our placeholder file, if we created one.
221 delete_entry
($self, $added_placeholder{$dir})
222 unless $path eq $added_placeholder{$dir};
223 delete $added_placeholder{$dir}
227 { path
=> $path, mode_a
=> $mode, mode_b
=> $mode,
228 pool
=> SVN
::Pool
->new, action
=> 'A' };
232 my ($self, $path, $cp_path, $cp_rev) = @_;
233 goto out
if $self->is_path_ignored($path);
234 my $gpath = $self->git_path($path);
236 my ($ls, $ctx) = command_output_pipe
(qw
/ls
-tree
242 $self->{gii
}->remove($_);
243 print "\tD\t$_\n" unless $::_q
;
244 push @deleted_gpath, $gpath;
246 command_close_pipe
($ls, $ctx);
247 $self->{empty
}->{$path} = 0;
249 my ($dir, $file) = ($path =~ m
#^(.*?)/?([^/]+)$#);
250 delete $self->{empty
}->{$dir};
251 $self->{empty
}->{$path} = 1;
253 if ($added_placeholder{$dir}) {
254 # Remove our placeholder file, if we created one.
255 delete_entry
($self, $added_placeholder{$dir});
256 delete $added_placeholder{$dir}
263 sub change_dir_prop
{
264 my ($self, $db, $prop, $value) = @_;
265 return undef if $self->is_path_ignored($db->{path
});
266 $self->{dir_prop
}->{$db->{path
}} ||= {};
267 $self->{dir_prop
}->{$db->{path
}}->{$prop} = $value;
271 sub absent_directory
{
272 my ($self, $path, $pb) = @_;
273 return undef if $self->is_path_ignored($path);
274 $self->{absent_dir
}->{$pb->{path
}} ||= [];
275 push @
{$self->{absent_dir
}->{$pb->{path
}}}, $path;
280 my ($self, $path, $pb) = @_;
281 return undef if $self->is_path_ignored($path);
282 $self->{absent_file
}->{$pb->{path
}} ||= [];
283 push @
{$self->{absent_file
}->{$pb->{path
}}}, $path;
287 sub change_file_prop
{
288 my ($self, $fb, $prop, $value) = @_;
289 return undef if $self->is_path_ignored($fb->{path
});
290 if ($prop eq 'svn:executable') {
291 if ($fb->{mode_b
} != 120000) {
292 $fb->{mode_b
} = defined $value ?
100755 : 100644;
294 } elsif ($prop eq 'svn:special') {
295 $fb->{mode_b
} = defined $value ?
120000 : 100644;
297 $self->{file_prop
}->{$fb->{path
}} ||= {};
298 $self->{file_prop
}->{$fb->{path
}}->{$prop} = $value;
303 sub apply_textdelta
{
304 my ($self, $fb, $exp) = @_;
305 return undef if $self->is_path_ignored($fb->{path
});
306 my $fh = $::_repository
->temp_acquire('svn_delta');
307 # $fh gets auto-closed() by SVN::TxDelta::apply(),
308 # (but $base does not,) so dup() it for reading in close_file
309 open my $dup, '<&', $fh or croak
$!;
310 my $base = $::_repository
->temp_acquire('git_blob');
313 my ($base_is_link, $size);
315 if ($fb->{mode_a
} eq '120000' &&
316 ! $self->{empty_symlinks
}->{$fb->{path
}}) {
317 print $base 'link ' or die "print $!\n";
321 $size = $::_repository
->cat_blob($fb->{blob
}, $base);
322 die "Failed to read object $fb->{blob}" if ($size < 0);
325 seek $base, 0, 0 or croak
$!;
326 my $got = ::md5sum
($base);
328 my $err = "Checksum mismatch: ".
329 "$fb->{path} $fb->{blob}\n" .
334 "Retrying... (possibly ",
335 "a bad symlink from SVN)\n";
336 $::_repository
->temp_reset($base);
344 seek $base, 0, 0 or croak
$!;
347 [ SVN
::TxDelta
::apply
($base, $dup, undef, $fb->{path
}, $fb->{pool
}) ];
351 my ($self, $fb, $exp) = @_;
352 return undef if $self->is_path_ignored($fb->{path
});
355 my $path = $self->git_path($fb->{path
});
356 if (my $fh = $fb->{fh
}) {
358 seek($fh, 0, 0) or croak
$!;
359 my $got = ::md5sum
($fh);
361 die "Checksum mismatch: $path\n",
362 "expected: $exp\n got: $got\n";
365 if ($fb->{mode_b
} == 120000) {
366 sysseek($fh, 0, 0) or croak
$!;
367 my $rd = sysread($fh, my $buf, 5);
370 croak
"sysread: $!\n";
372 warn "$path has mode 120000",
373 " but it points to nothing\n",
374 "converting to an empty file with mode",
376 $fb->{mode_b
} = '100644';
377 } elsif ($buf ne 'link ') {
378 warn "$path has mode 120000",
379 " but is not a link\n";
381 my $tmp_fh = $::_repository
->temp_acquire(
384 while ($res = sysread($fh, my $str, 1024)) {
385 my $out = syswrite($tmp_fh, $str, $res);
386 defined($out) && $out == $res
388 Git
::temp_path
($tmp_fh),
391 defined $res or croak
$!;
393 ($fh, $tmp_fh) = ($tmp_fh, $fh);
394 Git
::temp_release
($tmp_fh, 1);
398 $hash = $::_repository
->hash_and_insert_object(
399 Git
::temp_path
($fh));
400 $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
402 Git
::temp_release
($fb->{base
}, 1);
403 Git
::temp_release
($fh, 1);
405 $hash = $fb->{blob
} or die "no blob information\n";
408 $self->{gii
}->update($fb->{mode_b
}, $hash, $path) or croak
$!;
409 print "\t$fb->{action}\t$path\n" if $fb->{action
} && ! $::_q
;
415 $self->{nr
} = $self->{gii
}->{nr
};
417 $self->SUPER::abort_edit
(@_);
423 if ($_preserve_empty_dirs) {
426 # Any entry flagged as empty that also has an associated
427 # dir_prop represents a newly created empty directory.
428 foreach my $i (keys %{$self->{empty
}}) {
429 push @empty_dirs, $i if exists $self->{dir_prop
}->{$i};
432 # Search for directories that have become empty due subsequent
434 push @empty_dirs, $self->find_empty_directories();
436 # Finally, add a placeholder file to each empty directory.
437 $self->add_placeholder_file($_) foreach (@empty_dirs);
439 $self->stash_placeholder_list();
442 $self->{git_commit_ok
} = 1;
443 $self->{nr
} = $self->{gii
}->{nr
};
445 $self->SUPER::close_edit
(@_);
448 sub find_empty_directories
{
451 my %dirs = map { dirname
($_) => 1 } @deleted_gpath;
453 foreach my $dir (sort keys %dirs) {
456 # If there have been any additions to this directory, there is
457 # no reason to check if it is empty.
459 foreach my $t (qw
/dir_prop file_prop/) {
460 foreach my $path (keys %{ $self->{$t} }) {
461 if (exists $self->{$t}->{dirname
($path)}) {
470 # Use `git ls-tree` to get the filenames of this directory
471 # that existed prior to this particular commit.
472 my $ls = command
('ls-tree', '-z', '--name-only',
473 $self->{c
}, "$dir/");
474 my %files = map { $_ => 1 } split(/\0/, $ls);
476 # Remove the filenames that were deleted during this commit.
477 delete $files{$_} foreach (@deleted_gpath);
479 # Report the directory if there are no filenames left.
480 push @empty_dirs, $dir unless (scalar %files);
485 sub add_placeholder_file
{
486 my ($self, $dir) = @_;
487 my $path = "$dir/$_placeholder_filename";
488 my $gpath = $self->git_path($path);
490 my $fh = $::_repository
->temp_acquire($gpath);
491 my $hash = $::_repository
->hash_and_insert_object(Git
::temp_path
($fh));
492 Git
::temp_release
($fh, 1);
493 $self->{gii
}->update('100644', $hash, $gpath) or croak
$!;
495 # The directory should no longer be considered empty.
496 delete $self->{empty
}->{$dir} if exists $self->{empty
}->{$dir};
498 # Keep track of any placeholder files we create.
499 $added_placeholder{$dir} = $path;
502 sub stash_placeholder_list
{
504 my $k = "svn-remote.$repo_id.added-placeholder";
505 my $v = eval { command_oneline
('config', '--get-all', $k) };
506 command_noisy
('config', '--unset-all', $k) if $v;
507 foreach (values %added_placeholder) {
508 command_noisy
('config', '--add', $k, $_);
515 Git
::SVN
::Fetcher
- tree delta consumer
for "git svn fetch"
522 use Git::SVN::Fetcher;
525 my $gs = Git::SVN->find_by_url($url);
526 my $ra = SVN::Ra->new(url => $url);
527 my $editor = Git::SVN::Fetcher->new($gs);
528 my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '',
530 $reporter->set_path('', $old_rev, 0);
531 $reporter->finish_report;
532 my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') });
534 foreach my $path (keys %{$editor->{dir_prop}) {
535 my $props = $editor->{dir_prop}{$path};
536 foreach my $prop (keys %$props) {
537 print "property $prop at $path changed to $props->{$prop}\n";
540 foreach my $path (keys %{$editor->{empty}) {
541 my $action = $editor->{empty}{$path} ? 'added' : 'removed';
542 print "empty directory $path $action\n";
544 foreach my $path (keys %{$editor->{file_prop}) { ... }
545 foreach my $parent (keys %{$editor->{absent_dir}}) {
546 my @children = @{$editor->{abstent_dir}{$parent}};
547 print "cannot fetch directory $parent/$_: not authorized?\n"
550 foreach my $parent (keys %{$editor->{absent_file}) { ... }
554 This is a subclass of C<SVN::Delta::Editor>, which means it implements
555 callbacks to act as a consumer of Subversion tree deltas. This
556 particular implementation of those callbacks is meant to store
557 information about the resulting content which B<git svn fetch> could
558 use to populate new commits and new entries for F<unhandled.log>.
563 =item * Additions, removals, and modifications of files are propagated
564 to git-svn's index file F<$GIT_DIR/svn/$refname/index> using
567 =item * Changes in Subversion path properties are recorded in the
568 C<dir_prop> and C<file_prop> fields (which are hashes).
570 =item * Addition and removal of empty directories are indicated by
571 entries with value 1 and 0 respectively in the C<empty> hash.
573 =item * Paths that are present but cannot be conveyed (presumably due
574 to permissions) are recorded in the C<absent_file> and
575 C<absent_dirs> hashes. For each key, the corresponding value is
576 a list of paths under that directory that were present but
577 could not be conveyed.
581 The interface is unstable. Do not use this module unless you are
586 L<SVN::Delta> from the Subversion perl bindings,
587 the core L<Carp>, L<File::Basename>, and L<IO::File> modules,
588 and git's L<Git> helper module.
590 C<Git::SVN::Fetcher> has not been tested using callers other than
598 =head1 INCOMPATIBILITIES