1 package File
::Rsync
::Mirror
::Recent
;
5 use File
::Rsync
::Mirror
::Recentfile
;
11 File::Rsync::Mirror::Recent - mirroring via rsync made efficient
15 package File
::Rsync
::Mirror
::Recent
;
17 use File
::Basename
qw(basename dirname fileparse);
18 use File
::Copy
qw(cp);
19 use File
::Path
qw(mkpath);
21 use File
::Rsync
::Mirror
::Recentfile
::FakeBigFloat
qw(:all);
23 use List
::Pairwise
qw(mapp grepp);
24 use List
::Util
qw(first max);
25 use Scalar
::Util
qw(reftype);
30 use version
; our $VERSION = qv
('0.0.1');
34 B<!!!! PRE-ALPHA ALERT !!!!>
36 Nothing in here is believed to be stable, nothing yet intended for
37 public consumption. The plan is to provide a script in one of the next
38 releases that acts as a frontend for all the backend functionality.
39 Option and method names will very likely change.
41 File::Rsync::Mirror::Recent is acting at a higher level than
42 File::Rsync::Mirror::Recentfile. File::Rsync::Mirror::Recent
43 establishes a view on a collection of recentfile objects and provides
44 abstractions spanning multiple intervals associated with those.
46 B<Mostly unimplemented as of yet>. Will need to shift some accessors
47 from recentfile to recent.
51 my $rr = File::Rsync::Mirror::Recent->new
53 ignore_link_stat_errors => 1,
54 localroot => "/home/ftp/pub/PAUSE/authors",
55 remote => "pause.perl.org::authors/RECENT.recent",
72 =head2 my $obj = CLASS->new(%hash)
74 Constructor. On every argument pair the key is a method name and the
75 value is an argument to that method name.
80 my($class, @args) = @_;
81 my $self = bless {}, $class;
83 my($method,$arg) = splice @args, 0, 2;
99 "_max_one_state", # when we have no time left but want
100 # at least get one file per
101 # iteration to avoid procrastination
102 "_principal_recentfile",
105 "_runstatusfile", # frequenty dumps all rfs
106 "_logfilefordone", # turns on _logfile on all DONE
107 # systems (disk intensive)
111 split /\n/, <<'=cut'; push @accessors, grep {s/^=item\s+//} @pod_lines; }
115 =item ignore_link_stat_errors
117 as in F:R:M:Recentfile
121 Option to specify the local principal file for operations with a local
122 collection of recentfiles.
126 as in F:R:M:Recentfile
128 =item max_files_per_connection
130 as in F:R:M:Recentfile
138 XXX: this is (ATM) different from Recentfile!!!
140 =item remote_recentfile
142 Rsync address of the remote C<RECENT.recent> symlink or whichever name
143 the principal remote recentfile has.
147 Things like compress, links, times or checksums. Passed in to the
148 File::Rsync object used to run the mirror.
152 Minimum time before fetching the principal recentfile again.
156 Boolean to turn on a bit verbosity. This is in experimental stage, we
157 will have to decide which output we want when the dust has settled.
163 use accessors
@accessors;
167 =head2 $arrayref = $obj->news ( %options )
169 Testing this ATM with:
171 perl -Ilib bin/rrr-news \
174 -local /home/ftp/pub/PAUSE/authors/RECENT.recent
176 perl -Ilib bin/rrr-news \
180 -localroot /home/ftp/pub/PAUSE/authors/ \
181 -remote pause.perl.org::authors/RECENT.recent
184 Note: all parameters that can be passed to recent_events can also be specified here.
186 Note: all data are kept in memory
191 my($self, %opt) = @_;
192 my $local = $self->local;
194 if (my $remote = $self->remote) {
196 if ($localroot = $self->localroot) {
197 # nice, they know what they are doing
199 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
202 die "Alert: neither local nor remote specified, cannot continue";
205 my $rfs = $self->recentfiles;
210 $locopt{before
} = $before;
212 $locopt{max
} -= scalar @
$ret;
213 last if $locopt{max
} <= 0;
216 my $res = $rf->recent_events(%locopt);
220 if ($opt{max
} && scalar @
$ret > $opt{max
}) {
224 if ( $locopt{info
}{last} && _bigfloatlt
($locopt{info
}{last}{epoch
},$opt{after
}) ) {
227 if ( _bigfloatgt
($opt{after
},$locopt{info
}{first
}{epoch
}) ) {
234 $before = $res->[-1]{epoch
};
235 $before = $opt{before
} if $opt{before
} && _bigfloatlt
($opt{before
},$before);
240 =head2 overview ( %options )
242 returns a small table that summarizes the state of all recentfiles
243 collected in this Recent object.
245 $options{verbose}=1 increases the number of columns displayed.
247 Here is an example output:
249 Ival Cnt Max Min Span Util Cloud
250 1h 47 1225053014.38 1225049650.91 3363.47 93.4% ^ ^
251 6h 324 1225052939.66 1225033394.84 19544.82 90.5% ^ ^
252 1d 437 1225049651.53 1224966402.53 83248.99 96.4% ^ ^
253 1W 1585 1225039015.75 1224435339.46 603676.29 99.8% ^ ^
254 1M 5855 1225017376.65 1222428503.57 2588873.08 99.9% ^ ^
255 1Q 17066 1224578930.40 1216803512.90 7775417.50 100.0% ^ ^
256 1Y 15901 1223966162.56 1216766820.67 7199341.89 22.8% ^ ^
257 Z 9909 1223966162.56 1216766820.67 7199341.89 - ^ ^
259 I<Max> is the name of the interval.
261 I<Cnt> is the number of entries in this recentfile.
263 I<Max> is the highest(first) epoch in this recentfile, rounded.
265 I<Min> is the lowest(last) epoch in thie recentfile, rounded.
267 I<Span> is the timespan currently covered, rounded.
269 I<Util> is I<Span> devided by the designated timespan of this
272 I<Cloud> is ascii art illustrating the sequence of the Max and Min
277 my($self,%options) = @_;
278 my $rfs = $self->recentfiles;
280 RECENTFILE
: for my $rf (@
$rfs) {
281 my $re=$rf->recent_events;
284 my $span = $re->[0]{epoch
}-$re->[-1]{epoch
};
285 my $merged = $rf->merged;
293 $rf->dirtymark ?
sprintf("%.2f",$rf->dirtymark) : "-",
295 ($rf->interval eq "Z"
299 sprintf ("%.2f", $merged->{epoch
} || 0)),
301 sprintf ("%.2f", $re->[0]{epoch
}),
303 sprintf ("%.2f", $re->[-1]{epoch
}),
305 sprintf ("%.2f", $span),
307 ($rf->interval eq "Z"
311 sprintf ("%5.1f%%", 100 * $span / $rf->interval_secs)
314 @rank{mapp
{$b} grepp
{$a =~ /^(Max|Min)$/} @
$rfsummary} = ();
320 @rank{sort {$b <=> $a} keys %rank} = 1..keys %rank;
321 my $maxrank = max
values %rank;
322 for my $rfsummary (@s) {
323 my $string = " " x
$maxrank;
325 for my $ele (qw(Max Min)) {
326 my($r) = mapp
{$b} grepp
{$a eq $ele} @
$rfsummary;
327 push @borders, $rank{$r}-1;
329 for ($borders[0],$borders[1]) {
330 substr($string,$_,1) = "^";
332 push @
$rfsummary, "Cloud", $string;
334 unless ($options{verbose
}) {
335 my %filter = map {($_=>1)} qw(Ival Cnt Max Min Span Util Cloud);
337 $_ = [mapp
{($a,$b)} grepp
{!!$filter{$a}} @
$_];
341 for (my $i = 0; $i <= $#{$s[0]}; $i+=2) {
342 my $maxlength = max
((map { length $_->[$i+1] } @s), length $s[0][$i]);
343 push @sprintf, "%" . $maxlength . "s";
345 my $sprintf = join " ", @sprintf;
347 my $headline = sprintf $sprintf, mapp
{$a} @
{$s[0]};
348 join "", $headline, map { sprintf $sprintf, mapp
{$b} @
$_ } @s;
353 (Private method, not for public use) Keeping track of already handled
354 files. Currently it is a hash, will probably become a database with
360 my($self, $set) = @_;
362 $self->__pathdb ($set);
364 my $pathdb = $self->__pathdb;
365 unless (defined $pathdb) {
366 $self->__pathdb(+{});
368 return $self->__pathdb;
371 =head2 $recentfile = $obj->principal_recentfile ()
373 returns the principal recentfile of this tree.
377 sub principal_recentfile
{
379 my $prince = $self->_principal_recentfile;
380 return $prince if defined $prince;
381 my $local = $self->local;
383 $prince = File
::Rsync
::Mirror
::Recentfile
->new_from_file ($local);
385 if (my $remote = $self->remote) {
387 if ($localroot = $self->localroot) {
388 # nice, they know what they are doing
390 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
392 my $rf0 = $self->_recentfile_object_for_remote;
395 die "Alert: neither local nor remote specified, cannot continue";
398 $self->_principal_recentfile($prince);
402 =head2 $recentfiles_arrayref = $obj->recentfiles ()
404 returns a reference to the complete list of recentfile objects that
405 describe this tree. No guarantee is given that the represented
406 recentfiles exist or have been read. They are just bare objects.
412 my $rfs = $self->_recentfiles;
413 return $rfs if defined $rfs;
414 my $rf0 = $self->principal_recentfile;
415 my $pathdb = $self->_pathdb;
416 $rf0->_pathdb ($pathdb);
417 my $aggregator = $rf0->aggregator;
419 for my $agg (@
$aggregator) {
420 my $nrf = $rf0->_sparse_clone;
421 $nrf->interval ( $agg );
422 $nrf->have_mirrored ( 0 );
423 $nrf->_pathdb ( $pathdb );
426 $self->_recentfiles(\
@rf);
430 =head2 $success = $obj->rmirror ( %options )
432 XXX WORK IN PROGRESS XXX
434 Mirrors all recentfiles of the I<remote> address working through all
435 of them, mirroring their contents.
437 Testing this ATM with:
439 use File::Rsync::Mirror::Recent;
440 my $rrr = File::Rsync::Mirror::Recent->new(
441 ignore_link_stat_errors => 1,
442 localroot => "/home/ftp/pub/PAUSE/authors",
443 remote => "pause.perl.org::authors/RECENT.recent",
444 max_files_per_connection => 5000,
452 _runstatusfile => "recent-rmirror-state.yml",
453 _logfilefordone => "recent-rmirror-donelog.log",
455 $rrr->rmirror ( "skip-deletes" => 1, loop => 1 );
457 And since the above seems to work, I try now without the llop
460 use File::Rsync::Mirror::Recent;
462 for my $t ("authors","modules"){
463 my $rrr = File::Rsync::Mirror::Recent->new(
464 ignore_link_stat_errors => 1,
465 localroot => "/home/ftp/pub/PAUSE/$t",
466 remote => "pause.perl.org::$t/RECENT.recent",
467 max_files_per_connection => 512,
475 _runstatusfile => "recent-rmirror-state-$t.yml",
476 _logfilefordone => "recent-rmirror-donelog-$t.log",
483 $rrr->rmirror ( "skip-deletes" => 1 );
485 warn "sleeping 23\n"; sleep 23;
492 my($self, %options) = @_;
494 # my $rf0 = $self->_recentfile_object_for_remote;
495 my $rfs = $self->recentfiles;
497 my $_once_per_20s = sub {
498 $self->principal_recentfile->seed;
502 # XXX exit gracefully (reminder)
504 my $minimum_time_per_loop = 20; # XXX needs accessor: warning, if
505 # set too low, we do nothing but
506 # mirror the principal!
507 if (my $logfile = $self->_logfilefordone) {
508 for my $i (0..$#$rfs) {
509 $rfs->[$i]->done->_logfile($logfile);
513 my $ttleave = time + $minimum_time_per_loop;
514 RECENTFILE
: for my $i (0..$#$rfs) {
516 if (my $file = $self->_runstatusfile) {
517 $self->_rmirror_runstatusfile ($file, $i, \
%options);
519 if (time > $ttleave){
520 # Must make sure that one file can get fetched in any case
521 $self->_max_one_state(1);
525 $rfs->[$i+1]->done->merge($rf->done);
527 $rf->get_remote_recentfile_as_tempfile;
530 WORKUNIT
: while (time < $ttleave) {
532 $self->_rmirror_sleep_per_connection ($i);
535 $self->_rmirror_mirror ($i, \
%options);
540 $self->_max_one_state(0);
541 if ($rfs->[-1]->uptodate) {
542 $self->_rmirror_cleanup;
543 if ($options{loop}) {
548 my $sleep = $ttleave - time;
550 $self->_rmirror_endofloop_sleep ($sleep);
552 # negative time not invented yet:)
558 sub _rmirror_mirror
{
559 my($self, $i, $options) = @_;
560 my $rfs = $self->recentfiles;
562 my %locopt = %$options;
563 if ($self->_max_one_state) {
566 $locopt{piecemeal
} = 1;
567 $rf->mirror (%locopt);
570 sub _rmirror_sleep_per_connection
{
572 my $rfs = $self->recentfiles;
574 my $sleep = $rf->sleep_per_connection;
575 $sleep = 0.42 unless defined $sleep; # XXX accessor!
576 Time
::HiRes
::sleep $sleep;
577 $rfs->[$i+1]->done->merge($rf->done) if $i < $#$rfs;
580 sub _rmirror_cleanup
{
582 my $pathdb = $self->_pathdb();
583 for my $k (keys %$pathdb) {
584 delete $pathdb->{$k};
586 my $rfs = $self->recentfiles;
587 for my $i (0..$#$rfs-1) {
588 my $thismerged = $rfs->[$i]->merged;
589 my $next = $rfs->[$i+1];
590 my $nextminmax = $next->minmax;
591 # warn "DEBUG: i[$i] nextminmaxmax[$nextminmax->{max}] thismergedepoch[$thismerged->{epoch}]";
592 if (not defined $thismerged->{epoch
} or _bigfloatlt
($nextminmax->{max
},$thismerged->{epoch
})){
594 warn sprintf "DEBUG: %s seeded\n", $next->interval;
599 sub _rmirror_runstatusfile
{
600 my($self, $file, $i, $options) = @_;
601 my $rfs = $self->recentfiles;
608 self
=> [keys %$self], # passing $self leaks, dclone refuses because of globs
610 uptodate
=> {map {($_=>$rfs->[$_]->uptodate)} 0..$#$rfs},
614 sub _rmirror_endofloop_sleep
{
615 my($self, $sleep) = @_;
616 if ($self->verbose) {
619 "Dorm %d (%s secs)\n",
627 # mirrors the recentfile and instantiates the recentfile object
628 sub _recentfile_object_for_remote
{
630 # get the remote recentfile
631 my $rrfile = $self->remote or die "Alert: cannot construct a recentfile object without the 'remote' attribute";
632 my $splitter = qr{(.+)/([^/]*)};
633 my($remoteroot,$rfilename) = $rrfile =~ $splitter;
634 $self->remoteroot($remoteroot);
636 if (!defined $rfilename) {
637 die "Alert: Cannot resolve '$rrfile', does not match $splitter";
638 } elsif (not length $rfilename or $rfilename eq "RECENT.recent") {
639 ($abslfile,$rfilename) = $self->_resolve_rfilename($rfilename);
643 "ignore_link_stat_errors",
645 "max_files_per_connection",
653 $rf0 = File
::Rsync
::Mirror
::Recentfile
->new (map {($_ => $self->$_)} @need_args);
654 $rf0->resolve_recentfilename($rfilename);
655 $abslfile = $rf0->get_remote_recentfile_as_tempfile ();
657 $rf0 = File
::Rsync
::Mirror
::Recentfile
->new_from_file ( $abslfile );
658 for my $override (@need_args) {
659 $rf0->$override ( $self->$override );
665 sub _resolve_rfilename
{
666 my($self, $rfilename) = @_;
667 $rfilename = "RECENT.recent" unless length $rfilename;
668 my $abslfile = undef;
669 if ($rfilename =~ /\.recent$/) {
670 # may be a file *or* a symlink,
671 $abslfile = $self->_fetch_as_tempfile ($rfilename);
672 while (-l
$abslfile) {
673 my $symlink = readlink $abslfile;
674 if ($symlink =~ m
|/|) {
675 die "FIXME: filenames containing '/' not supported, got '$symlink'";
677 my $localrfile = File
::Spec
->catfile($self->localroot, $rfilename);
678 if (-e
$localrfile) {
679 my $old_symlink = readlink $localrfile;
680 if ($old_symlink eq $symlink) {
681 unlink $abslfile or die "Cannot unlink '$abslfile': $!";
683 unlink $localrfile; # may fail
684 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
687 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
689 $abslfile = $self->_fetch_as_tempfile ($symlink);
692 return ($abslfile, $rfilename);
695 # takes a basename, returns an absolute name, does not delete the
696 # file, throws the $fh away. Caller must rename or unlink
697 sub _fetch_as_tempfile
{
698 my($self, $rfile) = @_;
699 my($suffix) = $rfile =~ /(\.[^\.]+)$/;
700 $suffix = "" unless defined $suffix;
701 my $fh = File
::Temp
->new
702 (TEMPLATE
=> sprintf(".FRMRecent-%s-XXXX",
705 DIR
=> $self->localroot,
709 my $rsync = File
::Rsync
->new($self->rsync_options);
712 src
=> join("/",$self->remoteroot,$rfile),
713 dst
=> $fh->filename,
714 ) or die "Could not mirror '$rfile' to $fh\: ".join(" ",$rsync->err);
715 return $fh->filename;
718 =head2 (void) $obj->rmirror_loop
720 (TBD) Run rmirror in an endless loop.
729 =head2 $hash = $obj->verify
731 (TBD) Runs find on the local tree, collects all existing files from
732 recentfiles, compares their names. The returned hash contains the keys
733 C<todelete> and C<toadd>.
748 Please report any bugs or feature requests through the web interface
750 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=File-Rsync-Mirror-Recent>.
751 I will be notified, and then you'll automatically be notified of
752 progress on your bug as I make changes.
756 You can find documentation for this module with the perldoc command.
758 perldoc File::Rsync::Mirror::Recent
760 You can also look for information at:
764 =item * RT: CPAN's request tracker
766 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Rsync-Mirror-Recent>
768 =item * AnnoCPAN: Annotated CPAN documentation
770 L<http://annocpan.org/dist/File-Rsync-Mirror-Recent>
774 L<http://cpanratings.perl.org/d/File-Rsync-Mirror-Recent>
778 L<http://search.cpan.org/dist/File-Rsync-Mirror-Recent>
783 =head1 ACKNOWLEDGEMENTS
785 Thanks to RJBS for module-starter.
787 =head1 COPYRIGHT & LICENSE
789 Copyright 2008 Andreas König.
791 This program is free software; you can redistribute it and/or modify it
792 under the same terms as Perl itself.
797 1; # End of File::Rsync::Mirror::Recent