remove debugging leftover; prepare release
[rersyncrecent.git] / lib / File / Rsync / Mirror / Recent.pm
blob9b0f0fa7bcbb19a87ed473553db00c10abef81e8
1 package File::Rsync::Mirror::Recent;
3 # use warnings;
4 use strict;
5 use File::Rsync::Mirror::Recentfile;
7 =encoding utf-8
9 =head1 NAME
11 File::Rsync::Mirror::Recent - mirroring via rsync made efficient
13 =cut
15 package File::Rsync::Mirror::Recent;
17 use File::Basename qw(basename dirname fileparse);
18 use File::Copy qw(cp);
19 use File::Path qw(mkpath);
20 use File::Rsync;
21 use File::Rsync::Mirror::Recentfile::FakeBigFloat qw(:all);
22 use File::Temp;
23 use List::Pairwise qw(mapp grepp);
24 use List::Util qw(first max);
25 use Scalar::Util qw(reftype);
26 use Storable;
27 use Time::HiRes qw();
28 use YAML::Syck;
30 use version; our $VERSION = qv('0.0.4');
32 =head1 SYNOPSIS
34 B<!!!! PRE-ALPHA ALERT !!!!>
36 Nothing in here is believed to be stable, nothing yet intended for
37 public consumption. The plan is to provide scripts that act as
38 frontends for all the backend functionality. Option and method names
39 may still change.
41 For the rationale see the section BACKGROUND.
43 The documentation in here is normally not needed because the code is
44 meant to be run from several standalone programs. For a quick
45 overview, see the file README.mirrorcpan and the bin/ directory of the
46 distribution. For the architectural ideas see the section THE
47 ARCHITECTURE OF A COLLECTION OF RECENTFILES below.
49 File::Rsync::Mirror::Recent establishes a view on a collection of
50 File::Rsync::Mirror::Recentfile objects and provides abstractions
51 spanning multiple time intervals associated with those.
53 =head1 EXPORT
55 No exports.
57 =head1 CONSTRUCTORS
59 =head2 my $obj = CLASS->new(%hash)
61 Constructor. On every argument pair the key is a method name and the
62 value is an argument to that method name.
64 =cut
66 sub new {
67 my($class, @args) = @_;
68 my $self = bless {}, $class;
69 while (@args) {
70 my($method,$arg) = splice @args, 0, 2;
71 $self->$method($arg);
73 return $self;
76 =head1 ACCESSORS
78 =cut
80 my @accessors;
82 BEGIN {
83 @accessors =
85 "__pathdb",
86 "_dirtymark", # keeps track of the dirtymark of the recentfiles
87 "_logfilefordone", # turns on _logfile on all DONE
88 # systems (disk intensive)
89 "_max_one_state", # when we have no time left but want
90 # at least get one file per
91 # iteration to avoid procrastination
92 "_principal_recentfile",
93 "_recentfiles",
94 "_rsync",
95 "_runstatusfile", # frequenty dumps all rfs
98 my @pod_lines =
99 split /\n/, <<'=cut'; push @accessors, grep {s/^=item\s+//} @pod_lines; }
101 =over 4
103 =item ignore_link_stat_errors
105 as in F:R:M:Recentfile
107 =item local
109 Option to specify the local principal file for operations with a local
110 collection of recentfiles.
112 =item localroot
114 as in F:R:M:Recentfile
116 =item max_files_per_connection
118 as in F:R:M:Recentfile
120 =item remote
122 The remote principal recentfile in rsync notation. E.g.
124 pause.perl.org::authors/RECENT.recent
126 =item remoteroot
128 as in F:R:M:Recentfile
130 =item remote_recentfile
132 Rsync address of the remote C<RECENT.recent> symlink or whichever name
133 the principal remote recentfile has.
135 =item rsync_options
137 Things like compress, links, times or checksums. Passed in to the
138 File::Rsync object used to run the mirror.
140 =item tempdir
142 as in F:R:M:Recentfile
144 =item ttl
146 Minimum time before fetching the principal recentfile again.
148 =item _verbose
150 Boolean to turn on a bit verbosity. Use the method C<verbose> to also
151 set the verbosity of associated Recentfile objects.
153 =back
155 =cut
157 use accessors @accessors;
159 =head1 METHODS
161 =head2 $arrayref = $obj->news ( %options )
163 Test this with:
165 perl -Ilib bin/rrr-news \
166 -after 1217200539 \
167 -max 12 \
168 -local /home/ftp/pub/PAUSE/authors/RECENT.recent
170 perl -Ilib bin/rrr-news \
171 -after 1217200539 \
172 -rsync=compress=1 \
173 -rsync=links=1 \
174 -localroot /home/ftp/pub/PAUSE/authors/ \
175 -remote pause.perl.org::authors/RECENT.recent
176 -verbose
178 Note: all parameters that can be passed to recent_events can also be specified here.
180 Note: all data are kept in memory
182 =cut
184 sub news {
185 my($self, %opt) = @_;
186 my $local = $self->local;
187 unless ($local) {
188 if (my $remote = $self->remote) {
189 my $localroot;
190 if ($localroot = $self->localroot) {
191 # nice, they know what they are doing
192 } else {
193 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
195 } else {
196 die "Alert: neither local nor remote specified, cannot continue";
199 my $rfs = $self->recentfiles;
200 my $ret = [];
201 my $before;
202 for my $rf (@$rfs) {
203 my %locopt = %opt;
204 $locopt{before} = $before;
205 if ($opt{max}) {
206 $locopt{max} -= scalar @$ret;
207 last if $locopt{max} <= 0;
209 $locopt{info} = {};
210 my $res = $rf->recent_events(%locopt);
211 if (@$res){
212 push @$ret, @$res;
214 if ($opt{max} && scalar @$ret > $opt{max}) {
215 last;
217 if ($opt{after}){
218 if ( $locopt{info}{last} && _bigfloatlt($locopt{info}{last}{epoch},$opt{after}) ) {
219 last;
221 if ( _bigfloatgt($opt{after},$locopt{info}{first}{epoch}) ) {
222 last;
225 if (!@$res){
226 next;
228 $before = $res->[-1]{epoch};
229 $before = $opt{before} if $opt{before} && _bigfloatlt($opt{before},$before);
231 $ret;
234 =head2 overview ( %options )
236 returns a small table that summarizes the state of all recentfiles
237 collected in this Recent object.
239 $options{verbose}=1 increases the number of columns displayed.
241 Here is an example output:
243 Ival Cnt Max Min Span Util Cloud
244 1h 47 1225053014.38 1225049650.91 3363.47 93.4% ^ ^
245 6h 324 1225052939.66 1225033394.84 19544.82 90.5% ^ ^
246 1d 437 1225049651.53 1224966402.53 83248.99 96.4% ^ ^
247 1W 1585 1225039015.75 1224435339.46 603676.29 99.8% ^ ^
248 1M 5855 1225017376.65 1222428503.57 2588873.08 99.9% ^ ^
249 1Q 17066 1224578930.40 1216803512.90 7775417.50 100.0% ^ ^
250 1Y 15901 1223966162.56 1216766820.67 7199341.89 22.8% ^ ^
251 Z 9909 1223966162.56 1216766820.67 7199341.89 - ^ ^
253 I<Max> is the name of the interval.
255 I<Cnt> is the number of entries in this recentfile.
257 I<Max> is the highest(first) epoch in this recentfile, rounded.
259 I<Min> is the lowest(last) epoch in thie recentfile, rounded.
261 I<Span> is the timespan currently covered, rounded.
263 I<Util> is I<Span> devided by the designated timespan of this
264 recentfile.
266 I<Cloud> is ascii art illustrating the sequence of the Max and Min
267 timestamps.
269 =cut
270 sub overview {
271 my($self,%options) = @_;
272 my $rfs = $self->recentfiles;
273 my(@s,%rank);
274 RECENTFILE: for my $rf (@$rfs) {
275 my $re=$rf->recent_events;
276 my $rfsummary;
277 if (@$re) {
278 my $span = $re->[0]{epoch}-$re->[-1]{epoch};
279 my $merged = $rf->merged;
280 $rfsummary =
282 "Ival",
283 $rf->interval,
284 "Cnt",
285 scalar @$re,
286 "Dirtymark",
287 $rf->dirtymark ? sprintf("%.2f",$rf->dirtymark) : "-",
288 "Merged",
289 ($rf->interval eq "Z"
293 sprintf ("%.2f", $merged->{epoch} || 0)),
294 "Max",
295 sprintf ("%.2f", $re->[0]{epoch}),
296 "Min",
297 sprintf ("%.2f", $re->[-1]{epoch}),
298 "Span",
299 sprintf ("%.2f", $span),
300 "Util", # u9n:)
301 ($rf->interval eq "Z"
305 sprintf ("%5.1f%%", 100 * $span / $rf->interval_secs)
308 @rank{mapp {$b} grepp {$a =~ /^(Max|Min)$/} @$rfsummary} = ();
309 } else {
310 next RECENTFILE;
312 push @s, $rfsummary;
314 @rank{sort {$b <=> $a} keys %rank} = 1..keys %rank;
315 my $maxrank = max values %rank;
316 for my $rfsummary (@s) {
317 my $string = " " x $maxrank;
318 my @borders;
319 for my $ele (qw(Max Min)) {
320 my($r) = mapp {$b} grepp {$a eq $ele} @$rfsummary;
321 push @borders, $rank{$r}-1;
323 for ($borders[0],$borders[1]) {
324 substr($string,$_,1) = "^";
326 push @$rfsummary, "Cloud", $string;
328 unless ($options{verbose}) {
329 my %filter = map {($_=>1)} qw(Ival Cnt Max Min Span Util Cloud);
330 for (@s) {
331 $_ = [mapp {($a,$b)} grepp {!!$filter{$a}} @$_];
334 my @sprintf;
335 for (my $i = 0; $i <= $#{$s[0]}; $i+=2) {
336 my $maxlength = max ((map { length $_->[$i+1] } @s), length $s[0][$i]);
337 push @sprintf, "%" . $maxlength . "s";
339 my $sprintf = join " ", @sprintf;
340 $sprintf .= "\n";
341 my $headline = sprintf $sprintf, mapp {$a} @{$s[0]};
342 join "", $headline, map { sprintf $sprintf, mapp {$b} @$_ } @s;
345 =head2 _pathdb
347 Keeping track of already handled files. Currently it is a hash, will
348 probably become a database with its own accessors.
350 =cut
352 sub _pathdb {
353 my($self, $set) = @_;
354 if ($set) {
355 $self->__pathdb ($set);
357 my $pathdb = $self->__pathdb;
358 unless (defined $pathdb) {
359 $self->__pathdb(+{});
361 return $self->__pathdb;
364 =head2 $recentfile = $obj->principal_recentfile ()
366 returns the principal recentfile object of this tree.
368 =cut
369 # mirrors the recentfile and instantiates the recentfile object
370 sub _principal_recentfile_fromremote {
371 my($self) = @_;
372 # get the remote recentfile
373 my $rrfile = $self->remote or die "Alert: cannot construct a recentfile object without the 'remote' attribute";
374 my $splitter = qr{(.+)/([^/]*)};
375 my($remoteroot,$rfilename) = $rrfile =~ $splitter;
376 $self->remoteroot($remoteroot);
377 my($abslfile, $fh);
378 if (!defined $rfilename) {
379 die "Alert: Cannot resolve '$rrfile', does not match $splitter";
380 } elsif (not length $rfilename or $rfilename eq "RECENT.recent") {
381 ($abslfile,$rfilename,$fh) = $self->_principal_recentfile_fromremote_resosymlink($rfilename);
383 my @need_args =
385 "ignore_link_stat_errors",
386 "localroot",
387 "max_files_per_connection",
388 "remoteroot",
389 "rsync_options",
390 "tempdir",
391 "ttl",
392 "verbose",
394 my $rf0;
395 unless ($abslfile) {
396 $rf0 = File::Rsync::Mirror::Recentfile->new (map {($_ => $self->$_)} @need_args);
397 $rf0->split_rfilename($rfilename);
398 $abslfile = $rf0->get_remote_recentfile_as_tempfile ();
400 $rf0 = File::Rsync::Mirror::Recentfile->new_from_file ( $abslfile );
401 $rf0->_current_tempfile ( $abslfile );
402 $rf0->_current_tempfile_fh ( $fh );
403 $rf0->_use_tempfile (1);
404 for my $override (@need_args) {
405 $rf0->$override ( $self->$override );
407 $rf0->is_slave (1);
408 return $rf0;
410 sub principal_recentfile {
411 my($self) = @_;
412 my $rf0 = $self->_principal_recentfile;
413 return $rf0 if defined $rf0;
414 my $local = $self->local;
415 if ($local) {
416 $rf0 = File::Rsync::Mirror::Recentfile->new_from_file ($local);
417 } else {
418 if (my $remote = $self->remote) {
419 my $localroot;
420 if ($localroot = $self->localroot) {
421 # nice, they know what they are doing
422 } else {
423 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
425 $rf0 = $self->_principal_recentfile_fromremote;
426 } else {
427 die "Alert: neither local nor remote specified, cannot continue";
430 $self->_principal_recentfile($rf0);
431 return $rf0;
434 =head2 $recentfiles_arrayref = $obj->recentfiles ()
436 returns a reference to the complete list of recentfile objects that
437 describe this tree. No guarantee is given that the represented
438 recentfiles exist or have been read. They are just bare objects.
440 =cut
442 sub recentfiles {
443 my($self) = @_;
444 my $rfs = $self->_recentfiles;
445 return $rfs if defined $rfs;
446 my $rf0 = $self->principal_recentfile;
447 my $pathdb = $self->_pathdb;
448 $rf0->_pathdb ($pathdb);
449 my $aggregator = $rf0->aggregator;
450 my @rf = $rf0;
451 for my $agg (@$aggregator) {
452 my $nrf = $rf0->_sparse_clone;
453 $nrf->interval ( $agg );
454 $nrf->have_mirrored ( 0 );
455 $nrf->_pathdb ( $pathdb );
456 push @rf, $nrf;
458 $self->_recentfiles(\@rf);
459 return \@rf;
462 =head2 $success = $obj->rmirror ( %options )
464 Mirrors all recentfiles of the I<remote> address working through all
465 of them, mirroring their contents.
467 Test this with:
469 use File::Rsync::Mirror::Recent;
470 my $rrr = File::Rsync::Mirror::Recent->new(
471 ignore_link_stat_errors => 1,
472 localroot => "/home/ftp/pub/PAUSE/authors",
473 remote => "pause.perl.org::authors/RECENT.recent",
474 max_files_per_connection => 5000,
475 rsync_options => {
476 compress => 1,
477 links => 1,
478 times => 1,
479 checksum => 0,
481 verbose => 1,
482 _runstatusfile => "recent-rmirror-state.yml",
483 _logfilefordone => "recent-rmirror-donelog.log",
485 $rrr->rmirror ( "skip-deletes" => 1, loop => 1 );
487 Or try without the loop parameter and write the loop yourself:
489 use File::Rsync::Mirror::Recent;
490 my @rrr;
491 for my $t ("authors","modules"){
492 my $rrr = File::Rsync::Mirror::Recent->new(
493 ignore_link_stat_errors => 1,
494 localroot => "/home/ftp/pub/PAUSE/$t",
495 remote => "pause.perl.org::$t/RECENT.recent",
496 max_files_per_connection => 512,
497 rsync_options => {
498 compress => 1,
499 links => 1,
500 times => 1,
501 checksum => 0,
503 verbose => 1,
504 _runstatusfile => "recent-rmirror-state-$t.yml",
505 _logfilefordone => "recent-rmirror-donelog-$t.log",
506 ttl => 5,
508 push @rrr, $rrr;
510 while (){
511 for my $rrr (@rrr){
512 $rrr->rmirror ( "skip-deletes" => 1 );
514 warn "sleeping 23\n"; sleep 23;
518 =cut
519 sub _fullseed {
520 my($self) = @_;
521 for ( @{$self->recentfiles} ) { $_->seed(1) }
523 sub rmirror {
524 my($self, %options) = @_;
526 my $rfs = $self->recentfiles;
528 my $_every_20_seconds = sub {
529 $self->principal_recentfile->seed;
531 $_every_20_seconds->();
532 my $_sigint = sub {
533 # XXX exit gracefully (reminder)
536 # XXX needs accessor: warning, if set too low, we do nothing but
537 # mirror the principal!
538 my $minimum_time_per_loop = 20;
540 if (my $logfile = $self->_logfilefordone) {
541 for my $i (0..$#$rfs) {
542 $rfs->[$i]->done->_logfile($logfile);
545 if (my $dirtymark = $self->principal_recentfile->dirtymark) {
546 my $mydm = $self->_dirtymark;
547 if (!defined $mydm or $dirtymark ne $mydm) {
548 $self->_dirtymark($dirtymark);
551 LOOP: while () {
552 my $ttleave = time + $minimum_time_per_loop;
553 RECENTFILE: for my $i (0..$#$rfs) {
554 my $rf = $rfs->[$i];
555 if (my $file = $self->_runstatusfile) {
556 $self->_rmirror_runstatusfile ($file, $i, \%options);
558 if (time > $ttleave){
559 # Must make sure that one file can get fetched in any case
560 $self->_max_one_state(1);
562 if ($rf->seeded) {
563 $self->_rmirror_mirror ($i, \%options);
564 } elsif ($rf->uptodate){
565 if ($i < $#$rfs){
566 $rfs->[$i+1]->done->merge($rf->done);
568 # no further seed necessary because "every_20_seconds" does it
569 next RECENTFILE;
571 WORKUNIT: while (time < $ttleave) {
572 if ($rf->uptodate) {
573 $self->_rmirror_sleep_per_connection ($i);
574 next RECENTFILE;
575 } else {
576 $self->_rmirror_mirror ($i, \%options);
580 $self->_max_one_state(0);
581 if ($rfs->[-1]->uptodate) {
582 $self->_rmirror_cleanup;
583 if ($options{loop}) {
584 } else {
585 last LOOP;
588 my $sleep = $ttleave - time;
589 if ($sleep > 0.01) {
590 $self->_rmirror_endofloop_sleep ($sleep);
591 } else {
592 # negative time not invented yet:)
594 $_every_20_seconds->();
598 sub _rmirror_mirror {
599 my($self, $i, $options) = @_;
600 my $rfs = $self->recentfiles;
601 my $rf = $rfs->[$i];
602 my %locopt = %$options;
603 if ($self->_max_one_state) {
604 $locopt{max} = 1;
606 $locopt{piecemeal} = 1;
607 $rf->mirror (%locopt);
608 if (my $dirtymark = $rf->dirtymark) {
609 my $mydm = $self->_dirtymark;
610 if (!defined $mydm or $dirtymark ne $mydm) {
611 $self->_dirtymark($dirtymark);
612 $self->_fullseed;
617 sub _rmirror_sleep_per_connection {
618 my($self, $i) = @_;
619 my $rfs = $self->recentfiles;
620 my $rf = $rfs->[$i];
621 my $sleep = $rf->sleep_per_connection;
622 $sleep = 0.42 unless defined $sleep; # XXX accessor!
623 Time::HiRes::sleep $sleep;
624 $rfs->[$i+1]->done->merge($rf->done) if $i < $#$rfs;
627 sub _rmirror_cleanup {
628 my($self) = @_;
629 my $pathdb = $self->_pathdb();
630 for my $k (keys %$pathdb) {
631 delete $pathdb->{$k};
633 my $rfs = $self->recentfiles;
634 for my $i (0..$#$rfs-1) {
635 my $thismerged = $rfs->[$i]->merged;
636 my $next = $rfs->[$i+1];
637 my $nextminmax = $next->minmax;
638 # warn "DEBUG: i[$i] nextminmaxmax[$nextminmax->{max}] thismergedepoch[$thismerged->{epoch}]";
639 if (not defined $thismerged->{epoch} or _bigfloatlt($nextminmax->{max},$thismerged->{epoch})){
640 $next->seed;
641 # warn sprintf "DEBUG: next iv %s seeded since next-minmax-max[$nextminmax->{max}]lt this-merged-epoch[$thismerged->{epoch}]\n", $next->interval;
646 sub _rmirror_runstatusfile {
647 my($self, $file, $i, $options) = @_;
648 my $rfs = $self->recentfiles;
649 require YAML::Syck;
650 YAML::Syck::DumpFile
652 $file,
653 {i => $i,
654 options => $options,
655 self => [keys %$self], # passing $self leaks, dclone refuses because of globs
656 time => time,
657 uptodate => {map {($_=>$rfs->[$_]->uptodate)} 0..$#$rfs},
661 sub _rmirror_endofloop_sleep {
662 my($self, $sleep) = @_;
663 if ($self->verbose) {
664 printf STDERR
666 "Dorm %d (%s secs)\n",
667 time,
668 $sleep,
670 sleep $sleep;
674 # it returns two things: abslfile and rfilename. But the abslfile is
675 # undef when the rfilename ends in .recent. A weird interface, my
676 # friend.
677 sub _principal_recentfile_fromremote_resosymlink {
678 my($self, $rfilename) = @_;
679 $rfilename = "RECENT.recent" unless length $rfilename;
680 my $abslfile = undef;
681 my $fh;
682 if ($rfilename =~ /\.recent$/) {
683 # may be a file *or* a symlink,
684 ($abslfile,$fh) = $self->_fetch_as_tempfile ($rfilename);
685 while (-l $abslfile) {
686 my $symlink = readlink $abslfile;
687 if ($symlink =~ m|/|) {
688 die "FIXME: filenames containing '/' not supported, got '$symlink'";
690 my $localrfile = File::Spec->catfile($self->localroot, $rfilename);
691 if (-e $localrfile) {
692 my $old_symlink = readlink $localrfile;
693 if ($old_symlink eq $symlink) {
694 unlink $abslfile or die "Cannot unlink '$abslfile': $!";
695 } else {
696 unlink $localrfile; # may fail
697 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
699 } else {
700 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
702 ($abslfile,$fh) = $self->_fetch_as_tempfile ($symlink);
705 return ($abslfile, $rfilename, $fh);
708 # takes a basename, returns an absolute name, does not delete the
709 # file, throws the $fh away. Caller must rename or unlink
711 # XXX needs to activate the fh in the rf0 so that it is able to unlink
712 # the file. I would like that the file is used immediately by $rf0
713 sub _fetch_as_tempfile {
714 my($self, $rfile) = @_;
715 my($suffix) = $rfile =~ /(\.[^\.]+)$/;
716 $suffix = "" unless defined $suffix;
717 my $fh = File::Temp->new
718 (TEMPLATE => sprintf(".FRMRecent-%s-XXXX",
719 $rfile,
721 DIR => $self->tempdir || $self->localroot,
722 SUFFIX => $suffix,
723 UNLINK => 0,
725 my $rsync;
726 unless ($rsync = File::Rsync->new($self->rsync_options)) {
727 require Carp;
728 Carp::confess(YAML::Syck::Dump($self->rsync_options));
730 my $dst = $fh->filename;
731 $rsync->exec
733 src => join("/",$self->remoteroot,$rfile),
734 dst => $dst,
735 ) or die "Could not mirror '$rfile' to $fh\: ".join(" ",$rsync->err);
736 unless (-l $dst) {
737 my $mode = 0644;
738 chmod $mode, $dst or die "Could not chmod $mode '$dst': $!";
740 return($dst,$fh);
743 =head2 $verbose = $obj->verbose ( $set )
745 Getter/setter method to set verbosity for this object and all
746 associated Recentfile objects.
748 =cut
749 sub verbose {
750 my($self,$set) = @_;
751 if (defined $set) {
752 for ( @{$self->recentfiles} ) { $_->verbose($set) }
753 $self->_verbose ($set);
755 my $x = $self->_verbose;
756 unless (defined $x) {
757 $x = 0;
758 $self->_verbose ($x);
760 return $x;
764 =head1 THE ARCHITECTURE OF A COLLECTION OF RECENTFILES
766 The idea is that we want to have a short file that records really
767 recent changes. So that a fresh mirror can be kept fresh as long as
768 the connectivity is given. Then we want longer files that record the
769 history before. So when the mirror falls behind the update period
770 reflected in the shortest file, it can complement the list of recent
771 file events with the next one. And if this is not long enough we want
772 another one, again a bit longer. And we want one that completes the
773 history back to the oldest file. The index files do contain the
774 complete list of current files. The longer a period covered by an
775 index file is gone the less often the index file is updated. For
776 practical reasons adjacent files will often overlap a bit but this is
777 neither necessary nor enforced. That's the basic idea. The following
778 example represents a tree that has a few updates every day:
780 RECENT.recent -> RECENT-1h.yaml
781 RECENT-6h.yaml
782 RECENT-1d.yaml
783 RECENT-1M.yaml
784 RECENT-1W.yaml
785 RECENT-1Q.yaml
786 RECENT-1Y.yaml
787 RECENT-Z.yaml
789 The first file is the principal file, in so far it is the one that is
790 written first after a filesystem change. Usually a symlink links to it
791 with a filename that has the same filenameroot and the suffix
792 C<.recent>. On systems that do not support symlinks there is a plain
793 copy maintained instead.
795 The last file, the Z file, contains the complementary files that are
796 in none of the other files. It does never contain C<deletes>. Besides
797 this it serves the role of a recovery mechanism or spill over pond.
798 When things go wrong, it's a valuable controlling instance to hold the
799 differences between the collection of limited interval files and the
800 actual filesystem.
802 =head2 THE INDIVIDUAL RECENTFILE
804 A I<recentfile> consists of a hash that has two keys: C<meta> and
805 C<recent>. The C<meta> part has metadata and the C<recent> part has a
806 list of fileobjects.
808 =head2 THE META PART
810 Here we find things that are pretty much self explaining: all
811 lowercase attributes are accessors and as such explained somewhere
812 above in this manpage. The uppercase attribute C<Producers> contains
813 version information about involved software components. Nothing to
814 worry about as I believe.
816 =head2 THE RECENT PART
818 This is the interesting part. Every entry refers to some filesystem
819 change (with path, epoch, type).
821 The I<epoch> value is the point in time when some change was
822 I<registered> but can be set to arbitrary values. Do not be tempted to
823 believe that the entry has a direct relation to something like
824 modification time or change time on the filesystem level. They are not
825 reflecting release dates. (If you want exact release dates: Barbie is
826 providing a database of them. See
827 http://use.perl.org/~barbie/journal/37907).
829 All these entries can be devided into two types (denoted by the
830 I<type> attribute): C<new>s and C<delete>s. Changes and creations are
831 C<new>s. Deletes are C<delete>s.
833 Besides an I<epoch> and a I<type> attribute we find a third one:
834 I<path>. This path is relative to the directory we find the
835 I<recentfile> in.
837 The order of the entries in the I<recentfile> is by decreasing epoch
838 attribute. These are unique floating point numbers. When the server
839 has ntp running correctly, then the timestamps are usually reflecting
840 a real epoch. If time is running backwards, we trump the system epoch
841 with strictly monotonically increasing floating point timestamps and
842 guarantee they are unique.
844 =head1 CORRUPTION AND RECOVERY
846 If the origin host breaks the promise to deliver consistent and
847 complete I<recentfiles> then the way back to sanity shall be achieved
848 through traditional rsyncing between the hosts. But don't forget to
849 report it as a bug:)
851 =head1 BACKGROUND
853 This is about speeding up rsync operation on large trees. Uses a small
854 metadata cocktail and pull technology.
856 =head2 NON-COMPETITORS
858 File::Mirror JWU/File-Mirror/File-Mirror-0.10.tar.gz only local trees
859 Mirror::YAML ADAMK/Mirror-YAML-0.03.tar.gz some sort of inner circle
860 Net::DownloadMirror KNORR/Net-DownloadMirror-0.04.tar.gz FTP sites and stuff
861 Net::MirrorDir KNORR/Net-MirrorDir-0.05.tar.gz dito
862 Net::UploadMirror KNORR/Net-UploadMirror-0.06.tar.gz dito
863 Pushmi::Mirror CLKAO/Pushmi-v1.0.0.tar.gz something SVK
865 rsnapshot www.rsnapshot.org focus on backup
866 csync www.csync.org more like unison
867 multi-rsync sourceforge 167893 lan push to many
869 =head2 COMPETITORS
871 The problem to solve which clusters and ftp mirrors and otherwise
872 replicated datasets like CPAN share: how to transfer only a minimum
873 amount of data to determine the diff between two hosts.
875 Normally it takes a long time to determine the diff itself before it
876 can be transferred. Known solutions at the time of this writing are
877 csync2, and rsync 3 batch mode.
879 For many years the best solution was csync2 which solves the problem
880 by maintaining a sqlite database on both ends and talking a highly
881 sophisticated protocol to quickly determine which files to send and
882 which to delete at any given point in time. Csync2 is often
883 inconvenient because it is push technology and the act of syncing
884 demands quite an intimate relationship between the sender and the
885 receiver. This is hard to achieve in an environment of loosely coupled
886 sites where the number of sites is large or connections are
887 unreliable or network topology is changing.
889 Rsync 3 batch mode works around these problems by providing rsync-able
890 batch files which allow receiving nodes to replay the history of the
891 other nodes. This reduces the need to have an incestuous relation but
892 it has the disadvantage that these batch files replicate the contents
893 of the involved files. This seems inappropriate when the nodes already
894 have a means of communicating over rsync.
896 rersyncrecent solves this problem with a couple of (usually 2-10)
897 lightweight index files which cover different overlapping time
898 intervals. The master writes these files and the clients/slaves can
899 construct the full tree from the information contained in them. The
900 most recent index file usually covers the last seconds or minutes or
901 hours of the tree and depending on the needs, slaves can rsync every
902 few seconds or minutes and then bring their trees in full sync.
904 The rersyncrecent mode was developed for CPAN but as it is convenient
905 and economic it is also a general purpose solution. I'm looking
906 forward to see a CPAN backbone that is only a few seconds behind
907 PAUSE. And then ... the first FUSE based CPAN filesystem anyone?
909 =head1 FUTURE DIRECTIONS
911 Currently the origin server must keep track of injected and removed
912 files. Should be supported by an inotify-based assistant.
914 Convince other users outside the CPAN like
915 http://fedoraproject.org/wiki/Infrastructure/Mirroring
917 =head1 SEE ALSO
919 L<File::Rsync::Mirror::Recentfile>,
920 L<File::Rsync::Mirror::Recentfile::Done>,
921 L<File::Rsync::Mirror::Recentfile::FakeBigFloat>
923 =head1 BUGS
925 Please report any bugs or feature requests through the web interface
927 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=File-Rsync-Mirror-Recent>.
928 I will be notified, and then you'll automatically be notified of
929 progress on your bug as I make changes.
931 =head1 SUPPORT
933 You can find documentation for this module with the perldoc command.
935 perldoc File::Rsync::Mirror::Recent
937 You can also look for information at:
939 =over 4
941 =item * RT: CPAN's request tracker
943 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Rsync-Mirror-Recent>
945 =item * AnnoCPAN: Annotated CPAN documentation
947 L<http://annocpan.org/dist/File-Rsync-Mirror-Recent>
949 =item * CPAN Ratings
951 L<http://cpanratings.perl.org/d/File-Rsync-Mirror-Recent>
953 =item * Search CPAN
955 L<http://search.cpan.org/dist/File-Rsync-Mirror-Recent>
957 =back
960 =head1 ACKNOWLEDGEMENTS
962 Thanks to RJBS for module-starter.
964 =head1 AUTHOR
966 Andreas König
968 =head1 COPYRIGHT & LICENSE
970 Copyright 2008, 2009 Andreas König.
972 This program is free software; you can redistribute it and/or modify it
973 under the same terms as Perl itself.
976 =cut
978 1; # End of File::Rsync::Mirror::Recent