since we dropped the ttl stuff from uptodate method we need a new spot that triggers...
[rersyncrecent.git] / lib / File / Rsync / Mirror / Recent.pm
blobd748b550ab4722f346ea5139172f0c43494a9037
1 package File::Rsync::Mirror::Recent;
3 # use warnings;
4 use strict;
5 use File::Rsync::Mirror::Recentfile;
7 =encoding utf-8
9 =head1 NAME
11 File::Rsync::Mirror::Recent - mirroring via rsync made efficient
13 =cut
15 package File::Rsync::Mirror::Recent;
17 use File::Basename qw(basename dirname fileparse);
18 use File::Copy qw(cp);
19 use File::Path qw(mkpath);
20 use File::Rsync;
21 use File::Rsync::Mirror::Recentfile::FakeBigFloat qw(:all);
22 use File::Temp;
23 use List::Pairwise qw(mapp grepp);
24 use List::Util qw(first max);
25 use Scalar::Util qw(reftype);
26 use Storable;
27 use Time::HiRes qw();
28 use YAML::Syck;
30 use version; our $VERSION = qv('0.0.1');
32 =head1 SYNOPSIS
34 B<!!!! PRE-ALPHA ALERT !!!!>
36 Nothing in here is believed to be stable, nothing yet intended for
37 public consumption. The plan is to provide a script in one of the next
38 releases that acts as a frontend for all the backend functionality.
39 Option and method names will very likely change.
41 File::Rsync::Mirror::Recent is acting at a higher level than
42 File::Rsync::Mirror::Recentfile. File::Rsync::Mirror::Recent
43 establishes a view on a collection of recentfile objects and provides
44 abstractions spanning multiple intervals associated with those.
46 B<Mostly unimplemented as of yet>. Will need to shift some accessors
47 from recentfile to recent.
49 Reader/mirrorer:
51 my $rr = File::Rsync::Mirror::Recent->new
53 ignore_link_stat_errors => 1,
54 localroot => "/home/ftp/pub/PAUSE/authors",
55 remote => "pause.perl.org::authors/RECENT.recent",
56 rsync_options => {
57 compress => 1,
58 links => 1,
59 times => 1,
60 checksum => 1,
62 verbose => 1,
64 $rr->rmirror;
66 =head1 EXPORT
68 No exports.
70 =head1 CONSTRUCTORS
72 =head2 my $obj = CLASS->new(%hash)
74 Constructor. On every argument pair the key is a method name and the
75 value is an argument to that method name.
77 =cut
79 sub new {
80 my($class, @args) = @_;
81 my $self = bless {}, $class;
82 while (@args) {
83 my($method,$arg) = splice @args, 0, 2;
84 $self->$method($arg);
86 return $self;
89 =head1 ACCESSORS
91 =cut
93 my @accessors;
95 BEGIN {
96 @accessors =
98 "__pathdb",
99 "_max_one_state", # when we have no time left but want
100 # at least get one file per
101 # iteration to avoid procrastination
102 "_principal_recentfile",
103 "_recentfiles",
104 "_rsync",
105 "_runstatusfile", # frequenty dumps all rfs
106 "_logfilefordone", # turns on _logfile on all DONE
107 # systems (disk intensive)
110 my @pod_lines =
111 split /\n/, <<'=cut'; push @accessors, grep {s/^=item\s+//} @pod_lines; }
113 =over 4
115 =item ignore_link_stat_errors
117 as in F:R:M:Recentfile
119 =item local
121 Option to specify the local principal file for operations with a local
122 collection of recentfiles.
124 =item localroot
126 as in F:R:M:Recentfile
128 =item max_files_per_connection
130 as in F:R:M:Recentfile
132 =item remote
136 =item remoteroot
138 XXX: this is (ATM) different from Recentfile!!!
140 =item remote_recentfile
142 Rsync address of the remote C<RECENT.recent> symlink or whichever name
143 the principal remote recentfile has.
145 =item rsync_options
147 Things like compress, links, times or checksums. Passed in to the
148 File::Rsync object used to run the mirror.
150 =item ttl
152 Minimum time before fetching the principal recentfile again.
154 =item verbose
156 Boolean to turn on a bit verbosity. This is in experimental stage, we
157 will have to decide which output we want when the dust has settled.
159 =back
161 =cut
163 use accessors @accessors;
165 =head1 METHODS
167 =head2 $arrayref = $obj->news ( %options )
169 Testing this ATM with:
171 perl -Ilib bin/rrr-news \
172 -after 1217200539 \
173 -max 12 \
174 -local /home/ftp/pub/PAUSE/authors/RECENT.recent
176 perl -Ilib bin/rrr-news \
177 -after 1217200539 \
178 -rsync=compress=1 \
179 -rsync=links=1 \
180 -localroot /home/ftp/pub/PAUSE/authors/ \
181 -remote pause.perl.org::authors/RECENT.recent
182 -verbose
184 Note: all parameters that can be passed to recent_events can also be specified here.
186 Note: all data are kept in memory
188 =cut
190 sub news {
191 my($self, %opt) = @_;
192 my $local = $self->local;
193 unless ($local) {
194 if (my $remote = $self->remote) {
195 my $localroot;
196 if ($localroot = $self->localroot) {
197 # nice, they know what they are doing
198 } else {
199 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
201 } else {
202 die "Alert: neither local nor remote specified, cannot continue";
205 my $rfs = $self->recentfiles;
206 my $ret = [];
207 my $before;
208 for my $rf (@$rfs) {
209 my %locopt = %opt;
210 $locopt{before} = $before;
211 if ($opt{max}) {
212 $locopt{max} -= scalar @$ret;
213 last if $locopt{max} <= 0;
215 $locopt{info} = {};
216 my $res = $rf->recent_events(%locopt);
217 if (@$res){
218 push @$ret, @$res;
220 if ($opt{max} && scalar @$ret > $opt{max}) {
221 last;
223 if ($opt{after}){
224 if ( $locopt{info}{last} && _bigfloatlt($locopt{info}{last}{epoch},$opt{after}) ) {
225 last;
227 if ( _bigfloatgt($opt{after},$locopt{info}{first}{epoch}) ) {
228 last;
231 if (!@$res){
232 next;
234 $before = $res->[-1]{epoch};
235 $before = $opt{before} if $opt{before} && _bigfloatlt($opt{before},$before);
237 $ret;
240 =head2 overview ( %options )
242 returns a small table that summarizes the state of all recentfiles
243 collected in this Recent object.
245 $options{verbose}=1 increases the number of columns displayed.
247 Here is an example output:
249 Ival Cnt Max Min Span Util Cloud
250 1h 47 1225053014.38 1225049650.91 3363.47 93.4% ^ ^
251 6h 324 1225052939.66 1225033394.84 19544.82 90.5% ^ ^
252 1d 437 1225049651.53 1224966402.53 83248.99 96.4% ^ ^
253 1W 1585 1225039015.75 1224435339.46 603676.29 99.8% ^ ^
254 1M 5855 1225017376.65 1222428503.57 2588873.08 99.9% ^ ^
255 1Q 17066 1224578930.40 1216803512.90 7775417.50 100.0% ^ ^
256 1Y 15901 1223966162.56 1216766820.67 7199341.89 22.8% ^ ^
257 Z 9909 1223966162.56 1216766820.67 7199341.89 - ^ ^
259 I<Max> is the name of the interval.
261 I<Cnt> is the number of entries in this recentfile.
263 I<Max> is the highest(first) epoch in this recentfile, rounded.
265 I<Min> is the lowest(last) epoch in thie recentfile, rounded.
267 I<Span> is the timespan currently covered, rounded.
269 I<Util> is I<Span> devided by the designated timespan of this
270 recentfile.
272 I<Cloud> is ascii art illustrating the sequence of the Max and Min
273 timestamps.
275 =cut
276 sub overview {
277 my($self,%options) = @_;
278 my $rfs = $self->recentfiles;
279 my(@s,%rank);
280 RECENTFILE: for my $rf (@$rfs) {
281 my $re=$rf->recent_events;
282 my $rfsummary;
283 if (@$re) {
284 my $span = $re->[0]{epoch}-$re->[-1]{epoch};
285 my $merged = $rf->merged;
286 $rfsummary =
288 "Ival",
289 $rf->interval,
290 "Cnt",
291 scalar @$re,
292 "Dirtymark",
293 $rf->dirtymark ? sprintf("%.2f",$rf->dirtymark) : "-",
294 "Merged",
295 ($rf->interval eq "Z"
299 sprintf ("%.2f", $merged->{epoch} || 0)),
300 "Max",
301 sprintf ("%.2f", $re->[0]{epoch}),
302 "Min",
303 sprintf ("%.2f", $re->[-1]{epoch}),
304 "Span",
305 sprintf ("%.2f", $span),
306 "Util", # u9n:)
307 ($rf->interval eq "Z"
311 sprintf ("%5.1f%%", 100 * $span / $rf->interval_secs)
314 @rank{mapp {$b} grepp {$a =~ /^(Max|Min)$/} @$rfsummary} = ();
315 } else {
316 next RECENTFILE;
318 push @s, $rfsummary;
320 @rank{sort {$b <=> $a} keys %rank} = 1..keys %rank;
321 my $maxrank = max values %rank;
322 for my $rfsummary (@s) {
323 my $string = " " x $maxrank;
324 my @borders;
325 for my $ele (qw(Max Min)) {
326 my($r) = mapp {$b} grepp {$a eq $ele} @$rfsummary;
327 push @borders, $rank{$r}-1;
329 for ($borders[0],$borders[1]) {
330 substr($string,$_,1) = "^";
332 push @$rfsummary, "Cloud", $string;
334 unless ($options{verbose}) {
335 my %filter = map {($_=>1)} qw(Ival Cnt Max Min Span Util Cloud);
336 for (@s) {
337 $_ = [mapp {($a,$b)} grepp {!!$filter{$a}} @$_];
340 my @sprintf;
341 for (my $i = 0; $i <= $#{$s[0]}; $i+=2) {
342 my $maxlength = max ((map { length $_->[$i+1] } @s), length $s[0][$i]);
343 push @sprintf, "%" . $maxlength . "s";
345 my $sprintf = join " ", @sprintf;
346 $sprintf .= "\n";
347 my $headline = sprintf $sprintf, mapp {$a} @{$s[0]};
348 join "", $headline, map { sprintf $sprintf, mapp {$b} @$_ } @s;
351 =head2 _pathdb
353 (Private method, not for public use) Keeping track of already handled
354 files. Currently it is a hash, will probably become a database with
355 its own accessors.
357 =cut
359 sub _pathdb {
360 my($self, $set) = @_;
361 if ($set) {
362 $self->__pathdb ($set);
364 my $pathdb = $self->__pathdb;
365 unless (defined $pathdb) {
366 $self->__pathdb(+{});
368 return $self->__pathdb;
371 =head2 $recentfile = $obj->principal_recentfile ()
373 returns the principal recentfile of this tree.
375 =cut
377 sub principal_recentfile {
378 my($self) = @_;
379 my $prince = $self->_principal_recentfile;
380 return $prince if defined $prince;
381 my $local = $self->local;
382 if ($local) {
383 $prince = File::Rsync::Mirror::Recentfile->new_from_file ($local);
384 } else {
385 if (my $remote = $self->remote) {
386 my $localroot;
387 if ($localroot = $self->localroot) {
388 # nice, they know what they are doing
389 } else {
390 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
392 my $rf0 = $self->_recentfile_object_for_remote;
393 $prince = $rf0;
394 } else {
395 die "Alert: neither local nor remote specified, cannot continue";
398 $self->_principal_recentfile($prince);
399 return $prince;
402 =head2 $recentfiles_arrayref = $obj->recentfiles ()
404 returns a reference to the complete list of recentfile objects that
405 describe this tree. No guarantee is given that the represented
406 recentfiles exist or have been read. They are just bare objects.
408 =cut
410 sub recentfiles {
411 my($self) = @_;
412 my $rfs = $self->_recentfiles;
413 return $rfs if defined $rfs;
414 my $rf0 = $self->principal_recentfile;
415 my $pathdb = $self->_pathdb;
416 $rf0->_pathdb ($pathdb);
417 my $aggregator = $rf0->aggregator;
418 my @rf = $rf0;
419 for my $agg (@$aggregator) {
420 my $nrf = $rf0->_sparse_clone;
421 $nrf->interval ( $agg );
422 $nrf->have_mirrored ( 0 );
423 $nrf->_pathdb ( $pathdb );
424 push @rf, $nrf;
426 $self->_recentfiles(\@rf);
427 return \@rf;
430 =head2 $success = $obj->rmirror ( %options )
432 XXX WORK IN PROGRESS XXX
434 Mirrors all recentfiles of the I<remote> address working through all
435 of them, mirroring their contents.
437 Testing this ATM with:
439 use File::Rsync::Mirror::Recent;
440 my $rrr = File::Rsync::Mirror::Recent->new(
441 ignore_link_stat_errors => 1,
442 localroot => "/home/ftp/pub/PAUSE/authors",
443 remote => "pause.perl.org::authors/RECENT.recent",
444 max_files_per_connection => 5000,
445 rsync_options => {
446 compress => 1,
447 links => 1,
448 times => 1,
449 checksum => 0,
451 verbose => 1,
452 _runstatusfile => "recent-rmirror-state.yml",
453 _logfilefordone => "recent-rmirror-donelog.log",
455 $rrr->rmirror ( "skip-deletes" => 1, loop => 1 );
457 And since the above seems to work, I try now without the llop
458 parameter:
460 use File::Rsync::Mirror::Recent;
461 my @rrr;
462 for my $t ("authors","modules"){
463 my $rrr = File::Rsync::Mirror::Recent->new(
464 ignore_link_stat_errors => 1,
465 localroot => "/home/ftp/pub/PAUSE/$t",
466 remote => "pause.perl.org::$t/RECENT.recent",
467 max_files_per_connection => 512,
468 rsync_options => {
469 compress => 1,
470 links => 1,
471 times => 1,
472 checksum => 0,
474 verbose => 1,
475 _runstatusfile => "recent-rmirror-state-$t.yml",
476 _logfilefordone => "recent-rmirror-donelog-$t.log",
477 ttl => 5,
479 push @rrr, $rrr;
481 while (){
482 for my $rrr (@rrr){
483 $rrr->rmirror ( "skip-deletes" => 1 );
485 warn "sleeping 23\n"; sleep 23;
489 =cut
491 sub rmirror {
492 my($self, %options) = @_;
494 # my $rf0 = $self->_recentfile_object_for_remote;
495 my $rfs = $self->recentfiles;
497 my $_once_per_20s = sub {
498 $self->principal_recentfile->seed;
500 $_once_per_20s->();
501 my $_sigint = sub {
502 # XXX exit gracefully (reminder)
504 my $minimum_time_per_loop = 20; # XXX needs accessor: warning, if
505 # set too low, we do nothing but
506 # mirror the principal!
507 if (my $logfile = $self->_logfilefordone) {
508 for my $i (0..$#$rfs) {
509 $rfs->[$i]->done->_logfile($logfile);
512 LOOP: while () {
513 my $ttleave = time + $minimum_time_per_loop;
514 RECENTFILE: for my $i (0..$#$rfs) {
515 my $rf = $rfs->[$i];
516 if (my $file = $self->_runstatusfile) {
517 $self->_rmirror_runstatusfile ($file, $i, \%options);
519 if (time > $ttleave){
520 # Must make sure that one file can get fetched in any case
521 $self->_max_one_state(1);
523 if ($rf->uptodate){
524 if ($i < $#$rfs){
525 $rfs->[$i+1]->done->merge($rf->done);
527 $rf->get_remote_recentfile_as_tempfile;
528 next RECENTFILE;
529 } else {
530 WORKUNIT: while (time < $ttleave) {
531 if ($rf->uptodate) {
532 $self->_rmirror_sleep_per_connection ($i);
533 next RECENTFILE;
534 } else {
535 $self->_rmirror_mirror ($i, \%options);
540 $self->_max_one_state(0);
541 if ($rfs->[-1]->uptodate) {
542 $self->_rmirror_cleanup;
543 if ($options{loop}) {
544 } else {
545 last LOOP;
548 my $sleep = $ttleave - time;
549 if ($sleep > 0.01) {
550 $self->_rmirror_endofloop_sleep ($sleep);
551 } else {
552 # negative time not invented yet:)
554 $_once_per_20s->();
558 sub _rmirror_mirror {
559 my($self, $i, $options) = @_;
560 my $rfs = $self->recentfiles;
561 my $rf = $rfs->[$i];
562 my %locopt = %$options;
563 if ($self->_max_one_state) {
564 $locopt{max} = 1;
566 $locopt{piecemeal} = 1;
567 $rf->mirror (%locopt);
570 sub _rmirror_sleep_per_connection {
571 my($self, $i) = @_;
572 my $rfs = $self->recentfiles;
573 my $rf = $rfs->[$i];
574 my $sleep = $rf->sleep_per_connection;
575 $sleep = 0.42 unless defined $sleep; # XXX accessor!
576 Time::HiRes::sleep $sleep;
577 $rfs->[$i+1]->done->merge($rf->done) if $i < $#$rfs;
580 sub _rmirror_cleanup {
581 my($self) = @_;
582 my $pathdb = $self->_pathdb();
583 for my $k (keys %$pathdb) {
584 delete $pathdb->{$k};
586 my $rfs = $self->recentfiles;
587 for my $i (0..$#$rfs-1) {
588 my $thismerged = $rfs->[$i]->merged;
589 my $next = $rfs->[$i+1];
590 my $nextminmax = $next->minmax;
591 # warn "DEBUG: i[$i] nextminmaxmax[$nextminmax->{max}] thismergedepoch[$thismerged->{epoch}]";
592 if (not defined $thismerged->{epoch} or _bigfloatlt($nextminmax->{max},$thismerged->{epoch})){
593 $next->seed;
594 warn sprintf "DEBUG: %s seeded\n", $next->interval;
599 sub _rmirror_runstatusfile {
600 my($self, $file, $i, $options) = @_;
601 my $rfs = $self->recentfiles;
602 require YAML::Syck;
603 YAML::Syck::DumpFile
605 $file,
606 {i => $i,
607 options => $options,
608 self => [keys %$self], # passing $self leaks, dclone refuses because of globs
609 time => time,
610 uptodate => {map {($_=>$rfs->[$_]->uptodate)} 0..$#$rfs},
614 sub _rmirror_endofloop_sleep {
615 my($self, $sleep) = @_;
616 if ($self->verbose) {
617 printf STDERR
619 "Dorm %d (%s secs)\n",
620 time,
621 $sleep,
623 sleep $sleep;
627 # mirrors the recentfile and instantiates the recentfile object
628 sub _recentfile_object_for_remote {
629 my($self) = @_;
630 # get the remote recentfile
631 my $rrfile = $self->remote or die "Alert: cannot construct a recentfile object without the 'remote' attribute";
632 my $splitter = qr{(.+)/([^/]*)};
633 my($remoteroot,$rfilename) = $rrfile =~ $splitter;
634 $self->remoteroot($remoteroot);
635 my $abslfile;
636 if (!defined $rfilename) {
637 die "Alert: Cannot resolve '$rrfile', does not match $splitter";
638 } elsif (not length $rfilename or $rfilename eq "RECENT.recent") {
639 ($abslfile,$rfilename) = $self->_resolve_rfilename($rfilename);
641 my @need_args =
643 "ignore_link_stat_errors",
644 "localroot",
645 "max_files_per_connection",
646 "remoteroot",
647 "rsync_options",
648 "verbose",
649 "ttl",
651 my $rf0;
652 unless ($abslfile) {
653 $rf0 = File::Rsync::Mirror::Recentfile->new (map {($_ => $self->$_)} @need_args);
654 $rf0->resolve_recentfilename($rfilename);
655 $abslfile = $rf0->get_remote_recentfile_as_tempfile ();
657 $rf0 = File::Rsync::Mirror::Recentfile->new_from_file ( $abslfile );
658 for my $override (@need_args) {
659 $rf0->$override ( $self->$override );
661 $rf0->is_slave (1);
662 return $rf0;
665 sub _resolve_rfilename {
666 my($self, $rfilename) = @_;
667 $rfilename = "RECENT.recent" unless length $rfilename;
668 my $abslfile = undef;
669 if ($rfilename =~ /\.recent$/) {
670 # may be a file *or* a symlink,
671 $abslfile = $self->_fetch_as_tempfile ($rfilename);
672 while (-l $abslfile) {
673 my $symlink = readlink $abslfile;
674 if ($symlink =~ m|/|) {
675 die "FIXME: filenames containing '/' not supported, got '$symlink'";
677 my $localrfile = File::Spec->catfile($self->localroot, $rfilename);
678 if (-e $localrfile) {
679 my $old_symlink = readlink $localrfile;
680 if ($old_symlink eq $symlink) {
681 unlink $abslfile or die "Cannot unlink '$abslfile': $!";
682 } else {
683 unlink $localrfile; # may fail
684 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
686 } else {
687 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
689 $abslfile = $self->_fetch_as_tempfile ($symlink);
692 return ($abslfile, $rfilename);
695 # takes a basename, returns an absolute name, does not delete the
696 # file, throws the $fh away. Caller must rename or unlink
697 sub _fetch_as_tempfile {
698 my($self, $rfile) = @_;
699 my($suffix) = $rfile =~ /(\.[^\.]+)$/;
700 $suffix = "" unless defined $suffix;
701 my $fh = File::Temp->new
702 (TEMPLATE => sprintf(".FRMRecent-%s-XXXX",
703 $rfile,
705 DIR => $self->localroot,
706 SUFFIX => $suffix,
707 UNLINK => 0,
709 my $rsync = File::Rsync->new($self->rsync_options);
710 $rsync->exec
712 src => join("/",$self->remoteroot,$rfile),
713 dst => $fh->filename,
714 ) or die "Could not mirror '$rfile' to $fh\: ".join(" ",$rsync->err);
715 return $fh->filename;
718 =head2 (void) $obj->rmirror_loop
720 (TBD) Run rmirror in an endless loop.
722 =cut
724 sub rmirror_loop {
725 my($self) = @_;
726 die "FIXME";
729 =head2 $hash = $obj->verify
731 (TBD) Runs find on the local tree, collects all existing files from
732 recentfiles, compares their names. The returned hash contains the keys
733 C<todelete> and C<toadd>.
735 =cut
737 sub verify {
738 my($self) = @_;
739 die "FIXME";
742 =head1 AUTHOR
744 Andreas König
746 =head1 BUGS
748 Please report any bugs or feature requests through the web interface
750 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=File-Rsync-Mirror-Recent>.
751 I will be notified, and then you'll automatically be notified of
752 progress on your bug as I make changes.
754 =head1 SUPPORT
756 You can find documentation for this module with the perldoc command.
758 perldoc File::Rsync::Mirror::Recent
760 You can also look for information at:
762 =over 4
764 =item * RT: CPAN's request tracker
766 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Rsync-Mirror-Recent>
768 =item * AnnoCPAN: Annotated CPAN documentation
770 L<http://annocpan.org/dist/File-Rsync-Mirror-Recent>
772 =item * CPAN Ratings
774 L<http://cpanratings.perl.org/d/File-Rsync-Mirror-Recent>
776 =item * Search CPAN
778 L<http://search.cpan.org/dist/File-Rsync-Mirror-Recent>
780 =back
783 =head1 ACKNOWLEDGEMENTS
785 Thanks to RJBS for module-starter.
787 =head1 COPYRIGHT & LICENSE
789 Copyright 2008 Andreas König.
791 This program is free software; you can redistribute it and/or modify it
792 under the same terms as Perl itself.
795 =cut
797 1; # End of File::Rsync::Mirror::Recent