git-svn: canonicalize newly-minted URLs
[git.git] / perl / Git / SVN / Ra.pm
blob90ec30bfff25c5cb5cd3c171fdc4a81c46e0f156
1 package Git::SVN::Ra;
2 use vars qw/@ISA $config_dir $_ignore_refs_regex $_log_window_size/;
3 use strict;
4 use warnings;
5 use SVN::Client;
6 use Git::SVN::Utils qw(
7 canonicalize_url
8 canonicalize_path
9 add_path_to_url
12 use SVN::Ra;
13 BEGIN {
14 @ISA = qw(SVN::Ra);
17 my ($ra_invalid, $can_do_switch, %ignored_err, $RA);
19 BEGIN {
20 # enforce temporary pool usage for some simple functions
21 no strict 'refs';
22 for my $f (qw/rev_proplist get_latest_revnum get_uuid get_repos_root
23 get_file/) {
24 my $SUPER = "SUPER::$f";
25 *$f = sub {
26 my $self = shift;
27 my $pool = SVN::Pool->new;
28 my @ret = $self->$SUPER(@_,$pool);
29 $pool->clear;
30 wantarray ? @ret : $ret[0];
35 sub _auth_providers () {
36 my @rv = (
37 SVN::Client::get_simple_provider(),
38 SVN::Client::get_ssl_server_trust_file_provider(),
39 SVN::Client::get_simple_prompt_provider(
40 \&Git::SVN::Prompt::simple, 2),
41 SVN::Client::get_ssl_client_cert_file_provider(),
42 SVN::Client::get_ssl_client_cert_prompt_provider(
43 \&Git::SVN::Prompt::ssl_client_cert, 2),
44 SVN::Client::get_ssl_client_cert_pw_file_provider(),
45 SVN::Client::get_ssl_client_cert_pw_prompt_provider(
46 \&Git::SVN::Prompt::ssl_client_cert_pw, 2),
47 SVN::Client::get_username_provider(),
48 SVN::Client::get_ssl_server_trust_prompt_provider(
49 \&Git::SVN::Prompt::ssl_server_trust),
50 SVN::Client::get_username_prompt_provider(
51 \&Git::SVN::Prompt::username, 2)
54 # earlier 1.6.x versions would segfault, and <= 1.5.x didn't have
55 # this function
56 if (::compare_svn_version('1.6.15') >= 0) {
57 my $config = SVN::Core::config_get_config($config_dir);
58 my ($p, @a);
59 # config_get_config returns all config files from
60 # ~/.subversion, auth_get_platform_specific_client_providers
61 # just wants the config "file".
62 @a = ($config->{'config'}, undef);
63 $p = SVN::Core::auth_get_platform_specific_client_providers(@a);
64 # Insert the return value from
65 # auth_get_platform_specific_providers
66 unshift @rv, @$p;
68 \@rv;
72 sub new {
73 my ($class, $url) = @_;
74 $url = canonicalize_url($url);
75 return $RA if ($RA && $RA->url eq $url);
77 ::_req_svn();
79 SVN::_Core::svn_config_ensure($config_dir, undef);
80 my ($baton, $callbacks) = SVN::Core::auth_open_helper(_auth_providers);
81 my $config = SVN::Core::config_get_config($config_dir);
82 $RA = undef;
83 my $dont_store_passwords = 1;
84 my $conf_t = ${$config}{'config'};
86 no warnings 'once';
87 # The usage of $SVN::_Core::SVN_CONFIG_* variables
88 # produces warnings that variables are used only once.
89 # I had not found the better way to shut them up, so
90 # the warnings of type 'once' are disabled in this block.
91 if (SVN::_Core::svn_config_get_bool($conf_t,
92 $SVN::_Core::SVN_CONFIG_SECTION_AUTH,
93 $SVN::_Core::SVN_CONFIG_OPTION_STORE_PASSWORDS,
94 1) == 0) {
95 SVN::_Core::svn_auth_set_parameter($baton,
96 $SVN::_Core::SVN_AUTH_PARAM_DONT_STORE_PASSWORDS,
97 bless (\$dont_store_passwords, "_p_void"));
99 if (SVN::_Core::svn_config_get_bool($conf_t,
100 $SVN::_Core::SVN_CONFIG_SECTION_AUTH,
101 $SVN::_Core::SVN_CONFIG_OPTION_STORE_AUTH_CREDS,
102 1) == 0) {
103 $Git::SVN::Prompt::_no_auth_cache = 1;
105 } # no warnings 'once'
107 my $self = SVN::Ra->new(url => $url, auth => $baton,
108 config => $config,
109 pool => SVN::Pool->new,
110 auth_provider_callbacks => $callbacks);
111 $RA = bless $self, $class;
113 # Make sure its canonicalized
114 $self->url($url);
115 $self->{svn_path} = $url;
116 $self->{repos_root} = $self->get_repos_root;
117 $self->{svn_path} =~ s#^\Q$self->{repos_root}\E(/|$)##;
118 $self->{cache} = { check_path => { r => 0, data => {} },
119 get_dir => { r => 0, data => {} } };
121 return $RA;
124 sub url {
125 my $self = shift;
127 if (@_) {
128 my $url = shift;
129 $self->{url} = canonicalize_url($url);
130 return;
133 return $self->{url};
136 sub check_path {
137 my ($self, $path, $r) = @_;
138 my $cache = $self->{cache}->{check_path};
139 if ($r == $cache->{r} && exists $cache->{data}->{$path}) {
140 return $cache->{data}->{$path};
142 my $pool = SVN::Pool->new;
143 my $t = $self->SUPER::check_path($path, $r, $pool);
144 $pool->clear;
145 if ($r != $cache->{r}) {
146 %{$cache->{data}} = ();
147 $cache->{r} = $r;
149 $cache->{data}->{$path} = $t;
152 sub get_dir {
153 my ($self, $dir, $r) = @_;
154 my $cache = $self->{cache}->{get_dir};
155 if ($r == $cache->{r}) {
156 if (my $x = $cache->{data}->{$dir}) {
157 return wantarray ? @$x : $x->[0];
160 my $pool = SVN::Pool->new;
161 my ($d, undef, $props) = $self->SUPER::get_dir($dir, $r, $pool);
162 my %dirents = map { $_ => { kind => $d->{$_}->kind } } keys %$d;
163 $pool->clear;
164 if ($r != $cache->{r}) {
165 %{$cache->{data}} = ();
166 $cache->{r} = $r;
168 $cache->{data}->{$dir} = [ \%dirents, $r, $props ];
169 wantarray ? (\%dirents, $r, $props) : \%dirents;
172 sub DESTROY {
173 # do not call the real DESTROY since we store ourselves in $RA
176 # get_log(paths, start, end, limit,
177 # discover_changed_paths, strict_node_history, receiver)
178 sub get_log {
179 my ($self, @args) = @_;
180 my $pool = SVN::Pool->new;
182 # svn_log_changed_path_t objects passed to get_log are likely to be
183 # overwritten even if only the refs are copied to an external variable,
184 # so we should dup the structures in their entirety. Using an
185 # externally passed pool (instead of our temporary and quickly cleared
186 # pool in Git::SVN::Ra) does not help matters at all...
187 my $receiver = pop @args;
188 my $prefix = "/".$self->{svn_path};
189 $prefix =~ s#/+($)##;
190 my $prefix_regex = qr#^\Q$prefix\E#;
191 push(@args, sub {
192 my ($paths) = $_[0];
193 return &$receiver(@_) unless $paths;
194 $_[0] = ();
195 foreach my $p (keys %$paths) {
196 my $i = $paths->{$p};
197 # Make path relative to our url, not repos_root
198 $p =~ s/$prefix_regex//;
199 my %s = map { $_ => $i->$_; }
200 qw/copyfrom_path copyfrom_rev action/;
201 if ($s{'copyfrom_path'}) {
202 $s{'copyfrom_path'} =~ s/$prefix_regex//;
203 $s{'copyfrom_path'} = canonicalize_path($s{'copyfrom_path'});
205 $_[0]{$p} = \%s;
207 &$receiver(@_);
211 # the limit parameter was not supported in SVN 1.1.x, so we
212 # drop it. Therefore, the receiver callback passed to it
213 # is made aware of this limitation by being wrapped if
214 # the limit passed to is being wrapped.
215 if (::compare_svn_version('1.2.0') <= 0) {
216 my $limit = splice(@args, 3, 1);
217 if ($limit > 0) {
218 my $receiver = pop @args;
219 push(@args, sub { &$receiver(@_) if (--$limit >= 0) });
222 my $ret = $self->SUPER::get_log(@args, $pool);
223 $pool->clear;
224 $ret;
227 sub trees_match {
228 my ($self, $url1, $rev1, $url2, $rev2) = @_;
229 my $ctx = SVN::Client->new(auth => _auth_providers);
230 my $out = IO::File->new_tmpfile;
232 # older SVN (1.1.x) doesn't take $pool as the last parameter for
233 # $ctx->diff(), so we'll create a default one
234 my $pool = SVN::Pool->new_default_sub;
236 $ra_invalid = 1; # this will open a new SVN::Ra connection to $url1
237 $ctx->diff([], $url1, $rev1, $url2, $rev2, 1, 1, 0, $out, $out);
238 $out->flush;
239 my $ret = (($out->stat)[7] == 0);
240 close $out or croak $!;
242 $ret;
245 sub get_commit_editor {
246 my ($self, $log, $cb, $pool) = @_;
248 my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef, 0) : ();
249 $self->SUPER::get_commit_editor($log, $cb, @lock, $pool);
252 sub gs_do_update {
253 my ($self, $rev_a, $rev_b, $gs, $editor) = @_;
254 my $new = ($rev_a == $rev_b);
255 my $path = $gs->path;
257 if ($new && -e $gs->{index}) {
258 unlink $gs->{index} or die
259 "Couldn't unlink index: $gs->{index}: $!\n";
261 my $pool = SVN::Pool->new;
262 $editor->set_path_strip($path);
263 my (@pc) = split m#/#, $path;
264 my $reporter = $self->do_update($rev_b, (@pc ? shift @pc : ''),
265 1, $editor, $pool);
266 my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef) : ();
268 # Since we can't rely on svn_ra_reparent being available, we'll
269 # just have to do some magic with set_path to make it so
270 # we only want a partial path.
271 my $sp = '';
272 my $final = join('/', @pc);
273 while (@pc) {
274 $reporter->set_path($sp, $rev_b, 0, @lock, $pool);
275 $sp .= '/' if length $sp;
276 $sp .= shift @pc;
278 die "BUG: '$sp' != '$final'\n" if ($sp ne $final);
280 $reporter->set_path($sp, $rev_a, $new, @lock, $pool);
282 $reporter->finish_report($pool);
283 $pool->clear;
284 $editor->{git_commit_ok};
287 # this requires SVN 1.4.3 or later (do_switch didn't work before 1.4.3, and
288 # svn_ra_reparent didn't work before 1.4)
289 sub gs_do_switch {
290 my ($self, $rev_a, $rev_b, $gs, $url_b, $editor) = @_;
291 my $path = $gs->path;
292 my $pool = SVN::Pool->new;
294 my $old_url = $self->url;
295 my $full_url = add_path_to_url( $self->url, $path );
296 my ($ra, $reparented);
298 if ($old_url =~ m#^svn(\+ssh)?://# ||
299 ($full_url =~ m#^https?://# &&
300 canonicalize_url($full_url) ne $full_url)) {
301 $_[0] = undef;
302 $self = undef;
303 $RA = undef;
304 $ra = Git::SVN::Ra->new($full_url);
305 $ra_invalid = 1;
306 } elsif ($old_url ne $full_url) {
307 SVN::_Ra::svn_ra_reparent(
308 $self->{session},
309 canonicalize_url($full_url),
310 $pool
312 $self->url($full_url);
313 $reparented = 1;
316 $ra ||= $self;
317 $url_b = canonicalize_url($url_b);
318 my $reporter = $ra->do_switch($rev_b, '', 1, $url_b, $editor, $pool);
319 my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef) : ();
320 $reporter->set_path('', $rev_a, 0, @lock, $pool);
321 $reporter->finish_report($pool);
323 if ($reparented) {
324 SVN::_Ra::svn_ra_reparent($self->{session}, $old_url, $pool);
325 $self->url($old_url);
328 $pool->clear;
329 $editor->{git_commit_ok};
332 sub longest_common_path {
333 my ($gsv, $globs) = @_;
334 my %common;
335 my $common_max = scalar @$gsv;
337 foreach my $gs (@$gsv) {
338 my @tmp = split m#/#, $gs->path;
339 my $p = '';
340 foreach (@tmp) {
341 $p .= length($p) ? "/$_" : $_;
342 $common{$p} ||= 0;
343 $common{$p}++;
346 $globs ||= [];
347 $common_max += scalar @$globs;
348 foreach my $glob (@$globs) {
349 my @tmp = split m#/#, $glob->{path}->{left};
350 my $p = '';
351 foreach (@tmp) {
352 $p .= length($p) ? "/$_" : $_;
353 $common{$p} ||= 0;
354 $common{$p}++;
358 my $longest_path = '';
359 foreach (sort {length $b <=> length $a} keys %common) {
360 if ($common{$_} == $common_max) {
361 $longest_path = $_;
362 last;
365 $longest_path;
368 sub gs_fetch_loop_common {
369 my ($self, $base, $head, $gsv, $globs) = @_;
370 return if ($base > $head);
371 my $inc = $_log_window_size;
372 my ($min, $max) = ($base, $head < $base + $inc ? $head : $base + $inc);
373 my $longest_path = longest_common_path($gsv, $globs);
374 my $ra_url = $self->url;
375 my $find_trailing_edge;
376 while (1) {
377 my %revs;
378 my $err;
379 my $err_handler = $SVN::Error::handler;
380 $SVN::Error::handler = sub {
381 ($err) = @_;
382 skip_unknown_revs($err);
384 sub _cb {
385 my ($paths, $r, $author, $date, $log) = @_;
386 [ $paths,
387 { author => $author, date => $date, log => $log } ];
389 $self->get_log([$longest_path], $min, $max, 0, 1, 1,
390 sub { $revs{$_[1]} = _cb(@_) });
391 if ($err) {
392 print "Checked through r$max\r";
393 } else {
394 $find_trailing_edge = 1;
396 if ($err and $find_trailing_edge) {
397 print STDERR "Path '$longest_path' ",
398 "was probably deleted:\n",
399 $err->expanded_message,
400 "\nWill attempt to follow ",
401 "revisions r$min .. r$max ",
402 "committed before the deletion\n";
403 my $hi = $max;
404 while (--$hi >= $min) {
405 my $ok;
406 $self->get_log([$longest_path], $min, $hi,
407 0, 1, 1, sub {
408 $ok = $_[1];
409 $revs{$_[1]} = _cb(@_) });
410 if ($ok) {
411 print STDERR "r$min .. r$ok OK\n";
412 last;
415 $find_trailing_edge = 0;
417 $SVN::Error::handler = $err_handler;
419 my %exists = map { $_->{path} => $_ } @$gsv;
420 foreach my $r (sort {$a <=> $b} keys %revs) {
421 my ($paths, $logged) = @{$revs{$r}};
423 foreach my $gs ($self->match_globs(\%exists, $paths,
424 $globs, $r)) {
425 if ($gs->rev_map_max >= $r) {
426 next;
428 next unless $gs->match_paths($paths, $r);
429 $gs->{logged_rev_props} = $logged;
430 if (my $last_commit = $gs->last_commit) {
431 $gs->assert_index_clean($last_commit);
433 my $log_entry = $gs->do_fetch($paths, $r);
434 if ($log_entry) {
435 $gs->do_git_commit($log_entry);
437 $Git::SVN::INDEX_FILES{$gs->{index}} = 1;
439 foreach my $g (@$globs) {
440 my $k = "svn-remote.$g->{remote}." .
441 "$g->{t}-maxRev";
442 Git::SVN::tmp_config($k, $r);
444 if ($ra_invalid) {
445 $_[0] = undef;
446 $self = undef;
447 $RA = undef;
448 $self = Git::SVN::Ra->new($ra_url);
449 $ra_invalid = undef;
452 # pre-fill the .rev_db since it'll eventually get filled in
453 # with '0' x40 if something new gets committed
454 foreach my $gs (@$gsv) {
455 next if $gs->rev_map_max >= $max;
456 next if defined $gs->rev_map_get($max);
457 $gs->rev_map_set($max, 0 x40);
459 foreach my $g (@$globs) {
460 my $k = "svn-remote.$g->{remote}.$g->{t}-maxRev";
461 Git::SVN::tmp_config($k, $max);
463 last if $max >= $head;
464 $min = $max + 1;
465 $max += $inc;
466 $max = $head if ($max > $head);
468 Git::SVN::gc();
471 sub get_dir_globbed {
472 my ($self, $left, $depth, $r) = @_;
474 my @x = eval { $self->get_dir($left, $r) };
475 return unless scalar @x == 3;
476 my $dirents = $x[0];
477 my @finalents;
478 foreach my $de (keys %$dirents) {
479 next if $dirents->{$de}->{kind} != $SVN::Node::dir;
480 if ($depth > 1) {
481 my @args = ("$left/$de", $depth - 1, $r);
482 foreach my $dir ($self->get_dir_globbed(@args)) {
483 push @finalents, "$de/$dir";
485 } else {
486 push @finalents, $de;
489 @finalents;
492 # return value: 0 -- don't ignore, 1 -- ignore
493 sub is_ref_ignored {
494 my ($g, $p) = @_;
495 my $refname = $g->{ref}->full_path($p);
496 return 1 if defined($g->{ignore_refs_regex}) &&
497 $refname =~ m!$g->{ignore_refs_regex}!;
498 return 0 unless defined($_ignore_refs_regex);
499 return 1 if $refname =~ m!$_ignore_refs_regex!o;
500 return 0;
503 sub match_globs {
504 my ($self, $exists, $paths, $globs, $r) = @_;
506 sub get_dir_check {
507 my ($self, $exists, $g, $r) = @_;
509 my @dirs = $self->get_dir_globbed($g->{path}->{left},
510 $g->{path}->{depth},
511 $r);
513 foreach my $de (@dirs) {
514 my $p = $g->{path}->full_path($de);
515 next if $exists->{$p};
516 next if (length $g->{path}->{right} &&
517 ($self->check_path($p, $r) !=
518 $SVN::Node::dir));
519 next unless $p =~ /$g->{path}->{regex}/;
520 $exists->{$p} = Git::SVN->init($self->url, $p, undef,
521 $g->{ref}->full_path($de), 1);
524 foreach my $g (@$globs) {
525 if (my $path = $paths->{"/$g->{path}->{left}"}) {
526 if ($path->{action} =~ /^[AR]$/) {
527 get_dir_check($self, $exists, $g, $r);
530 foreach (keys %$paths) {
531 if (/$g->{path}->{left_regex}/ &&
532 !/$g->{path}->{regex}/) {
533 next if $paths->{$_}->{action} !~ /^[AR]$/;
534 get_dir_check($self, $exists, $g, $r);
536 next unless /$g->{path}->{regex}/;
537 my $p = $1;
538 my $pathname = $g->{path}->full_path($p);
539 next if is_ref_ignored($g, $p);
540 next if $exists->{$pathname};
541 next if ($self->check_path($pathname, $r) !=
542 $SVN::Node::dir);
543 $exists->{$pathname} = Git::SVN->init(
544 $self->url, $pathname, undef,
545 $g->{ref}->full_path($p), 1);
547 my $c = '';
548 foreach (split m#/#, $g->{path}->{left}) {
549 $c .= "/$_";
550 next unless ($paths->{$c} &&
551 ($paths->{$c}->{action} =~ /^[AR]$/));
552 get_dir_check($self, $exists, $g, $r);
555 values %$exists;
558 sub minimize_url {
559 my ($self) = @_;
560 return $self->url if ($self->url eq $self->{repos_root});
561 my $url = $self->{repos_root};
562 my @components = split(m!/!, $self->{svn_path});
563 my $c = '';
564 do {
565 $url = add_path_to_url($url, $c);
566 eval {
567 my $ra = (ref $self)->new($url);
568 my $latest = $ra->get_latest_revnum;
569 $ra->get_log("", $latest, 0, 1, 0, 1, sub {});
571 } while ($@ && ($c = shift @components));
573 return canonicalize_url($url);
576 sub can_do_switch {
577 my $self = shift;
578 unless (defined $can_do_switch) {
579 my $pool = SVN::Pool->new;
580 my $rep = eval {
581 $self->do_switch(1, '', 0, $self->url,
582 SVN::Delta::Editor->new, $pool);
584 if ($@) {
585 $can_do_switch = 0;
586 } else {
587 $rep->abort_report($pool);
588 $can_do_switch = 1;
590 $pool->clear;
592 $can_do_switch;
595 sub skip_unknown_revs {
596 my ($err) = @_;
597 my $errno = $err->apr_err();
598 # Maybe the branch we're tracking didn't
599 # exist when the repo started, so it's
600 # not an error if it doesn't, just continue
602 # Wonderfully consistent library, eh?
603 # 160013 - svn:// and file://
604 # 175002 - http(s)://
605 # 175007 - http(s):// (this repo required authorization, too...)
606 # More codes may be discovered later...
607 if ($errno == 175007 || $errno == 175002 || $errno == 160013) {
608 my $err_key = $err->expanded_message;
609 # revision numbers change every time, filter them out
610 $err_key =~ s/\d+/\0/g;
611 $err_key = "$errno\0$err_key";
612 unless ($ignored_err{$err_key}) {
613 warn "W: Ignoring error from SVN, path probably ",
614 "does not exist: ($errno): ",
615 $err->expanded_message,"\n";
616 warn "W: Do not be alarmed at the above message ",
617 "git-svn is just searching aggressively for ",
618 "old history.\n",
619 "This may take a while on large repositories\n";
620 $ignored_err{$err_key} = 1;
622 return;
624 die "Error from SVN, ($errno): ", $err->expanded_message,"\n";
628 __END__
630 Git::SVN::Ra - Subversion remote access functions for git-svn
632 =head1 SYNOPSIS
634 use Git::SVN::Ra;
636 my $ra = Git::SVN::Ra->new($branchurl);
637 my ($dirents, $fetched_revnum, $props) =
638 $ra->get_dir('.', $SVN::Core::INVALID_REVNUM);
640 =head1 DESCRIPTION
642 This is a wrapper around the L<SVN::Ra> module for use by B<git-svn>.
643 It fills in some default parameters (such as the authentication
644 scheme), smooths over incompatibilities between libsvn versions, adds
645 caching, and implements some functions specific to B<git-svn>.
647 Do not use it unless you are developing git-svn. The interface will
648 change as git-svn evolves.
650 =head1 DEPENDENCIES
652 Subversion perl bindings,
653 L<Git::SVN>.
655 C<Git::SVN::Ra> has not been tested using callers other than
656 B<git-svn> itself.
658 =head1 SEE ALSO
660 L<SVN::Ra>.
662 =head1 INCOMPATIBILITIES
664 None reported.
666 =head1 BUGS
668 None.