more pod shuffling
[rersyncrecent.git] / lib / File / Rsync / Mirror / Recent.pm
blobaaf8df2545063f2654c1bdea06004627e1b707e0
1 package File::Rsync::Mirror::Recent;
3 # use warnings;
4 use strict;
5 use File::Rsync::Mirror::Recentfile;
7 =encoding utf-8
9 =head1 NAME
11 File::Rsync::Mirror::Recent - mirroring via rsync made efficient
13 =cut
15 package File::Rsync::Mirror::Recent;
17 use File::Basename qw(basename dirname fileparse);
18 use File::Copy qw(cp);
19 use File::Path qw(mkpath);
20 use File::Rsync;
21 use File::Rsync::Mirror::Recentfile::FakeBigFloat qw(:all);
22 use File::Temp;
23 use List::Pairwise qw(mapp grepp);
24 use List::Util qw(first max);
25 use Scalar::Util qw(reftype);
26 use Storable;
27 use Time::HiRes qw();
28 use YAML::Syck;
30 use version; our $VERSION = qv('0.0.1');
32 =head1 SYNOPSIS
34 B<!!!! PRE-ALPHA ALERT !!!!>
36 Nothing in here is believed to be stable, nothing yet intended for
37 public consumption. The plan is to provide scripts that act as
38 frontends for all the backend functionality. Option and method names
39 may still change.
41 For the rationale see the section BACKGROUND.
43 The documentation in here is normally not needed because the code is
44 meant to be run from several standalone programs. For a quick
45 overview, see the file README.mirrorcpan and the bin/ directory of the
46 distribution. For the architectural ideas see the section THE
47 ARCHITECTURE OF A COLLECTION OF RECENTFILES below.
49 File::Rsync::Mirror::Recent establishes a view on a collection of
50 File::Rsync::Mirror::Recentfile objects and provides abstractions
51 spanning multiple intervals associated with those.
53 =head1 EXPORT
55 No exports.
57 =head1 CONSTRUCTORS
59 =head2 my $obj = CLASS->new(%hash)
61 Constructor. On every argument pair the key is a method name and the
62 value is an argument to that method name.
64 =cut
66 sub new {
67 my($class, @args) = @_;
68 my $self = bless {}, $class;
69 while (@args) {
70 my($method,$arg) = splice @args, 0, 2;
71 $self->$method($arg);
73 return $self;
76 =head1 ACCESSORS
78 =cut
80 my @accessors;
82 BEGIN {
83 @accessors =
85 "__pathdb",
86 "_max_one_state", # when we have no time left but want
87 # at least get one file per
88 # iteration to avoid procrastination
89 "_principal_recentfile",
90 "_recentfiles",
91 "_rsync",
92 "_runstatusfile", # frequenty dumps all rfs
93 "_logfilefordone", # turns on _logfile on all DONE
94 # systems (disk intensive)
97 my @pod_lines =
98 split /\n/, <<'=cut'; push @accessors, grep {s/^=item\s+//} @pod_lines; }
100 =over 4
102 =item ignore_link_stat_errors
104 as in F:R:M:Recentfile
106 =item local
108 Option to specify the local principal file for operations with a local
109 collection of recentfiles.
111 =item localroot
113 as in F:R:M:Recentfile
115 =item max_files_per_connection
117 as in F:R:M:Recentfile
119 =item remote
123 =item remoteroot
125 XXX: this is (ATM) different from Recentfile!!!
127 =item remote_recentfile
129 Rsync address of the remote C<RECENT.recent> symlink or whichever name
130 the principal remote recentfile has.
132 =item rsync_options
134 Things like compress, links, times or checksums. Passed in to the
135 File::Rsync object used to run the mirror.
137 =item ttl
139 Minimum time before fetching the principal recentfile again.
141 =item verbose
143 Boolean to turn on a bit verbosity. This is in experimental stage, we
144 will have to decide which output we want when the dust has settled.
146 =back
148 =cut
150 use accessors @accessors;
152 =head1 METHODS
154 =head2 $arrayref = $obj->news ( %options )
156 Test this with:
158 perl -Ilib bin/rrr-news \
159 -after 1217200539 \
160 -max 12 \
161 -local /home/ftp/pub/PAUSE/authors/RECENT.recent
163 perl -Ilib bin/rrr-news \
164 -after 1217200539 \
165 -rsync=compress=1 \
166 -rsync=links=1 \
167 -localroot /home/ftp/pub/PAUSE/authors/ \
168 -remote pause.perl.org::authors/RECENT.recent
169 -verbose
171 Note: all parameters that can be passed to recent_events can also be specified here.
173 Note: all data are kept in memory
175 =cut
177 sub news {
178 my($self, %opt) = @_;
179 my $local = $self->local;
180 unless ($local) {
181 if (my $remote = $self->remote) {
182 my $localroot;
183 if ($localroot = $self->localroot) {
184 # nice, they know what they are doing
185 } else {
186 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
188 } else {
189 die "Alert: neither local nor remote specified, cannot continue";
192 my $rfs = $self->recentfiles;
193 my $ret = [];
194 my $before;
195 for my $rf (@$rfs) {
196 my %locopt = %opt;
197 $locopt{before} = $before;
198 if ($opt{max}) {
199 $locopt{max} -= scalar @$ret;
200 last if $locopt{max} <= 0;
202 $locopt{info} = {};
203 my $res = $rf->recent_events(%locopt);
204 if (@$res){
205 push @$ret, @$res;
207 if ($opt{max} && scalar @$ret > $opt{max}) {
208 last;
210 if ($opt{after}){
211 if ( $locopt{info}{last} && _bigfloatlt($locopt{info}{last}{epoch},$opt{after}) ) {
212 last;
214 if ( _bigfloatgt($opt{after},$locopt{info}{first}{epoch}) ) {
215 last;
218 if (!@$res){
219 next;
221 $before = $res->[-1]{epoch};
222 $before = $opt{before} if $opt{before} && _bigfloatlt($opt{before},$before);
224 $ret;
227 =head2 overview ( %options )
229 returns a small table that summarizes the state of all recentfiles
230 collected in this Recent object.
232 $options{verbose}=1 increases the number of columns displayed.
234 Here is an example output:
236 Ival Cnt Max Min Span Util Cloud
237 1h 47 1225053014.38 1225049650.91 3363.47 93.4% ^ ^
238 6h 324 1225052939.66 1225033394.84 19544.82 90.5% ^ ^
239 1d 437 1225049651.53 1224966402.53 83248.99 96.4% ^ ^
240 1W 1585 1225039015.75 1224435339.46 603676.29 99.8% ^ ^
241 1M 5855 1225017376.65 1222428503.57 2588873.08 99.9% ^ ^
242 1Q 17066 1224578930.40 1216803512.90 7775417.50 100.0% ^ ^
243 1Y 15901 1223966162.56 1216766820.67 7199341.89 22.8% ^ ^
244 Z 9909 1223966162.56 1216766820.67 7199341.89 - ^ ^
246 I<Max> is the name of the interval.
248 I<Cnt> is the number of entries in this recentfile.
250 I<Max> is the highest(first) epoch in this recentfile, rounded.
252 I<Min> is the lowest(last) epoch in thie recentfile, rounded.
254 I<Span> is the timespan currently covered, rounded.
256 I<Util> is I<Span> devided by the designated timespan of this
257 recentfile.
259 I<Cloud> is ascii art illustrating the sequence of the Max and Min
260 timestamps.
262 =cut
263 sub overview {
264 my($self,%options) = @_;
265 my $rfs = $self->recentfiles;
266 my(@s,%rank);
267 RECENTFILE: for my $rf (@$rfs) {
268 my $re=$rf->recent_events;
269 my $rfsummary;
270 if (@$re) {
271 my $span = $re->[0]{epoch}-$re->[-1]{epoch};
272 my $merged = $rf->merged;
273 $rfsummary =
275 "Ival",
276 $rf->interval,
277 "Cnt",
278 scalar @$re,
279 "Dirtymark",
280 $rf->dirtymark ? sprintf("%.2f",$rf->dirtymark) : "-",
281 "Merged",
282 ($rf->interval eq "Z"
286 sprintf ("%.2f", $merged->{epoch} || 0)),
287 "Max",
288 sprintf ("%.2f", $re->[0]{epoch}),
289 "Min",
290 sprintf ("%.2f", $re->[-1]{epoch}),
291 "Span",
292 sprintf ("%.2f", $span),
293 "Util", # u9n:)
294 ($rf->interval eq "Z"
298 sprintf ("%5.1f%%", 100 * $span / $rf->interval_secs)
301 @rank{mapp {$b} grepp {$a =~ /^(Max|Min)$/} @$rfsummary} = ();
302 } else {
303 next RECENTFILE;
305 push @s, $rfsummary;
307 @rank{sort {$b <=> $a} keys %rank} = 1..keys %rank;
308 my $maxrank = max values %rank;
309 for my $rfsummary (@s) {
310 my $string = " " x $maxrank;
311 my @borders;
312 for my $ele (qw(Max Min)) {
313 my($r) = mapp {$b} grepp {$a eq $ele} @$rfsummary;
314 push @borders, $rank{$r}-1;
316 for ($borders[0],$borders[1]) {
317 substr($string,$_,1) = "^";
319 push @$rfsummary, "Cloud", $string;
321 unless ($options{verbose}) {
322 my %filter = map {($_=>1)} qw(Ival Cnt Max Min Span Util Cloud);
323 for (@s) {
324 $_ = [mapp {($a,$b)} grepp {!!$filter{$a}} @$_];
327 my @sprintf;
328 for (my $i = 0; $i <= $#{$s[0]}; $i+=2) {
329 my $maxlength = max ((map { length $_->[$i+1] } @s), length $s[0][$i]);
330 push @sprintf, "%" . $maxlength . "s";
332 my $sprintf = join " ", @sprintf;
333 $sprintf .= "\n";
334 my $headline = sprintf $sprintf, mapp {$a} @{$s[0]};
335 join "", $headline, map { sprintf $sprintf, mapp {$b} @$_ } @s;
338 =head2 _pathdb
340 Keeping track of already handled files. Currently it is a hash, will
341 probably become a database with its own accessors.
343 =cut
345 sub _pathdb {
346 my($self, $set) = @_;
347 if ($set) {
348 $self->__pathdb ($set);
350 my $pathdb = $self->__pathdb;
351 unless (defined $pathdb) {
352 $self->__pathdb(+{});
354 return $self->__pathdb;
357 =head2 $recentfile = $obj->principal_recentfile ()
359 returns the principal recentfile of this tree.
361 =cut
363 sub principal_recentfile {
364 my($self) = @_;
365 my $prince = $self->_principal_recentfile;
366 return $prince if defined $prince;
367 my $local = $self->local;
368 if ($local) {
369 $prince = File::Rsync::Mirror::Recentfile->new_from_file ($local);
370 } else {
371 if (my $remote = $self->remote) {
372 my $localroot;
373 if ($localroot = $self->localroot) {
374 # nice, they know what they are doing
375 } else {
376 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
378 my $rf0 = $self->_recentfile_object_for_remote;
379 $prince = $rf0;
380 } else {
381 die "Alert: neither local nor remote specified, cannot continue";
384 $self->_principal_recentfile($prince);
385 return $prince;
388 =head2 $recentfiles_arrayref = $obj->recentfiles ()
390 returns a reference to the complete list of recentfile objects that
391 describe this tree. No guarantee is given that the represented
392 recentfiles exist or have been read. They are just bare objects.
394 =cut
396 sub recentfiles {
397 my($self) = @_;
398 my $rfs = $self->_recentfiles;
399 return $rfs if defined $rfs;
400 my $rf0 = $self->principal_recentfile;
401 my $pathdb = $self->_pathdb;
402 $rf0->_pathdb ($pathdb);
403 my $aggregator = $rf0->aggregator;
404 my @rf = $rf0;
405 for my $agg (@$aggregator) {
406 my $nrf = $rf0->_sparse_clone;
407 $nrf->interval ( $agg );
408 $nrf->have_mirrored ( 0 );
409 $nrf->_pathdb ( $pathdb );
410 push @rf, $nrf;
412 $self->_recentfiles(\@rf);
413 return \@rf;
416 =head2 $success = $obj->rmirror ( %options )
418 Mirrors all recentfiles of the I<remote> address working through all
419 of them, mirroring their contents.
421 Test this with:
423 use File::Rsync::Mirror::Recent;
424 my $rrr = File::Rsync::Mirror::Recent->new(
425 ignore_link_stat_errors => 1,
426 localroot => "/home/ftp/pub/PAUSE/authors",
427 remote => "pause.perl.org::authors/RECENT.recent",
428 max_files_per_connection => 5000,
429 rsync_options => {
430 compress => 1,
431 links => 1,
432 times => 1,
433 checksum => 0,
435 verbose => 1,
436 _runstatusfile => "recent-rmirror-state.yml",
437 _logfilefordone => "recent-rmirror-donelog.log",
439 $rrr->rmirror ( "skip-deletes" => 1, loop => 1 );
441 Or try without the loop parameter and write the loop yourself:
443 use File::Rsync::Mirror::Recent;
444 my @rrr;
445 for my $t ("authors","modules"){
446 my $rrr = File::Rsync::Mirror::Recent->new(
447 ignore_link_stat_errors => 1,
448 localroot => "/home/ftp/pub/PAUSE/$t",
449 remote => "pause.perl.org::$t/RECENT.recent",
450 max_files_per_connection => 512,
451 rsync_options => {
452 compress => 1,
453 links => 1,
454 times => 1,
455 checksum => 0,
457 verbose => 1,
458 _runstatusfile => "recent-rmirror-state-$t.yml",
459 _logfilefordone => "recent-rmirror-donelog-$t.log",
460 ttl => 5,
462 push @rrr, $rrr;
464 while (){
465 for my $rrr (@rrr){
466 $rrr->rmirror ( "skip-deletes" => 1 );
468 warn "sleeping 23\n"; sleep 23;
472 =cut
473 sub rmirror {
474 my($self, %options) = @_;
476 # my $rf0 = $self->_recentfile_object_for_remote;
477 my $rfs = $self->recentfiles;
479 my $_every_20_seconds = sub {
480 $self->principal_recentfile->seed;
482 $_every_20_seconds->();
483 my $_sigint = sub {
484 # XXX exit gracefully (reminder)
486 my $minimum_time_per_loop = 20; # XXX needs accessor: warning, if
487 # set too low, we do nothing but
488 # mirror the principal!
489 if (my $logfile = $self->_logfilefordone) {
490 for my $i (0..$#$rfs) {
491 $rfs->[$i]->done->_logfile($logfile);
494 LOOP: while () {
495 my $ttleave = time + $minimum_time_per_loop;
496 RECENTFILE: for my $i (0..$#$rfs) {
497 my $rf = $rfs->[$i];
498 if (my $file = $self->_runstatusfile) {
499 $self->_rmirror_runstatusfile ($file, $i, \%options);
501 if (time > $ttleave){
502 # Must make sure that one file can get fetched in any case
503 $self->_max_one_state(1);
505 if ($rf->seeded) {
506 $self->_rmirror_mirror ($i, \%options);
507 } elsif ($rf->uptodate){
508 if ($i < $#$rfs){
509 $rfs->[$i+1]->done->merge($rf->done);
511 # no further seed necessary because "every_20_seconds" does it
512 next RECENTFILE;
513 } else {
514 WORKUNIT: while (time < $ttleave) {
515 if ($rf->uptodate) {
516 $self->_rmirror_sleep_per_connection ($i);
517 next RECENTFILE;
518 } else {
519 $self->_rmirror_mirror ($i, \%options);
524 $self->_max_one_state(0);
525 if ($rfs->[-1]->uptodate) {
526 $self->_rmirror_cleanup;
527 if ($options{loop}) {
528 } else {
529 last LOOP;
532 my $sleep = $ttleave - time;
533 if ($sleep > 0.01) {
534 $self->_rmirror_endofloop_sleep ($sleep);
535 } else {
536 # negative time not invented yet:)
538 $_every_20_seconds->();
542 sub _rmirror_mirror {
543 my($self, $i, $options) = @_;
544 my $rfs = $self->recentfiles;
545 my $rf = $rfs->[$i];
546 my %locopt = %$options;
547 if ($self->_max_one_state) {
548 $locopt{max} = 1;
550 $locopt{piecemeal} = 1;
551 $rf->mirror (%locopt);
554 sub _rmirror_sleep_per_connection {
555 my($self, $i) = @_;
556 my $rfs = $self->recentfiles;
557 my $rf = $rfs->[$i];
558 my $sleep = $rf->sleep_per_connection;
559 $sleep = 0.42 unless defined $sleep; # XXX accessor!
560 Time::HiRes::sleep $sleep;
561 $rfs->[$i+1]->done->merge($rf->done) if $i < $#$rfs;
564 sub _rmirror_cleanup {
565 my($self) = @_;
566 my $pathdb = $self->_pathdb();
567 for my $k (keys %$pathdb) {
568 delete $pathdb->{$k};
570 my $rfs = $self->recentfiles;
571 for my $i (0..$#$rfs-1) {
572 my $thismerged = $rfs->[$i]->merged;
573 my $next = $rfs->[$i+1];
574 my $nextminmax = $next->minmax;
575 # warn "DEBUG: i[$i] nextminmaxmax[$nextminmax->{max}] thismergedepoch[$thismerged->{epoch}]";
576 if (not defined $thismerged->{epoch} or _bigfloatlt($nextminmax->{max},$thismerged->{epoch})){
577 $next->seed;
578 warn sprintf "DEBUG: next iv %s seeded since next-minmax-max[$nextminmax->{max}]lt this-merged-epoch[$thismerged->{epoch}]\n", $next->interval;
583 sub _rmirror_runstatusfile {
584 my($self, $file, $i, $options) = @_;
585 my $rfs = $self->recentfiles;
586 require YAML::Syck;
587 YAML::Syck::DumpFile
589 $file,
590 {i => $i,
591 options => $options,
592 self => [keys %$self], # passing $self leaks, dclone refuses because of globs
593 time => time,
594 uptodate => {map {($_=>$rfs->[$_]->uptodate)} 0..$#$rfs},
598 sub _rmirror_endofloop_sleep {
599 my($self, $sleep) = @_;
600 if ($self->verbose) {
601 printf STDERR
603 "Dorm %d (%s secs)\n",
604 time,
605 $sleep,
607 sleep $sleep;
611 # mirrors the recentfile and instantiates the recentfile object
612 sub _recentfile_object_for_remote {
613 my($self) = @_;
614 # get the remote recentfile
615 my $rrfile = $self->remote or die "Alert: cannot construct a recentfile object without the 'remote' attribute";
616 my $splitter = qr{(.+)/([^/]*)};
617 my($remoteroot,$rfilename) = $rrfile =~ $splitter;
618 $self->remoteroot($remoteroot);
619 my $abslfile;
620 if (!defined $rfilename) {
621 die "Alert: Cannot resolve '$rrfile', does not match $splitter";
622 } elsif (not length $rfilename or $rfilename eq "RECENT.recent") {
623 ($abslfile,$rfilename) = $self->_resolve_rfilename($rfilename);
625 my @need_args =
627 "ignore_link_stat_errors",
628 "localroot",
629 "max_files_per_connection",
630 "remoteroot",
631 "rsync_options",
632 "verbose",
633 "ttl",
635 my $rf0;
636 unless ($abslfile) {
637 $rf0 = File::Rsync::Mirror::Recentfile->new (map {($_ => $self->$_)} @need_args);
638 $rf0->resolve_recentfilename($rfilename);
639 $abslfile = $rf0->get_remote_recentfile_as_tempfile ();
641 $rf0 = File::Rsync::Mirror::Recentfile->new_from_file ( $abslfile );
642 for my $override (@need_args) {
643 $rf0->$override ( $self->$override );
645 $rf0->is_slave (1);
646 return $rf0;
649 sub _resolve_rfilename {
650 my($self, $rfilename) = @_;
651 $rfilename = "RECENT.recent" unless length $rfilename;
652 my $abslfile = undef;
653 if ($rfilename =~ /\.recent$/) {
654 # may be a file *or* a symlink,
655 $abslfile = $self->_fetch_as_tempfile ($rfilename);
656 while (-l $abslfile) {
657 my $symlink = readlink $abslfile;
658 if ($symlink =~ m|/|) {
659 die "FIXME: filenames containing '/' not supported, got '$symlink'";
661 my $localrfile = File::Spec->catfile($self->localroot, $rfilename);
662 if (-e $localrfile) {
663 my $old_symlink = readlink $localrfile;
664 if ($old_symlink eq $symlink) {
665 unlink $abslfile or die "Cannot unlink '$abslfile': $!";
666 } else {
667 unlink $localrfile; # may fail
668 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
670 } else {
671 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
673 $abslfile = $self->_fetch_as_tempfile ($symlink);
676 return ($abslfile, $rfilename);
679 # takes a basename, returns an absolute name, does not delete the
680 # file, throws the $fh away. Caller must rename or unlink
681 sub _fetch_as_tempfile {
682 my($self, $rfile) = @_;
683 my($suffix) = $rfile =~ /(\.[^\.]+)$/;
684 $suffix = "" unless defined $suffix;
685 my $fh = File::Temp->new
686 (TEMPLATE => sprintf(".FRMRecent-%s-XXXX",
687 $rfile,
689 DIR => $self->localroot,
690 SUFFIX => $suffix,
691 UNLINK => 0,
693 my $rsync = File::Rsync->new($self->rsync_options);
694 $rsync->exec
696 src => join("/",$self->remoteroot,$rfile),
697 dst => $fh->filename,
698 ) or die "Could not mirror '$rfile' to $fh\: ".join(" ",$rsync->err);
699 return $fh->filename;
703 =head1 THE ARCHITECTURE OF A COLLECTION OF RECENTFILES
705 The idea is that we want to have a short file that records really
706 recent changes. So that a fresh mirror can be kept fresh as long as
707 the connectivity is given. Then we want longer files that record the
708 history before. So when the mirror falls behind the update period
709 reflected in the shortest file, it can complement the list of recent
710 file events with the next one. And if this is not long enough we want
711 another one, again a bit longer. And we want one that completes the
712 history back to the oldest file. The index files do contain the
713 complete list of current files. The longer a period covered by an
714 index file is gone the less often the index file is updated. For
715 practical reasons adjacent files will often overlap a bit but this is
716 neither necessary nor enforced. That's the basic idea. The following
717 example represents a tree that has a few updates every day:
719 RECENT.recent -> RECENT-1h.yaml
720 RECENT-6h.yaml
721 RECENT-1d.yaml
722 RECENT-1M.yaml
723 RECENT-1W.yaml
724 RECENT-1Q.yaml
725 RECENT-1Y.yaml
726 RECENT-Z.yaml
728 The first file is the principal file, in so far it is the one that is
729 written first after a filesystem change. Usually a symlink links to it
730 with a filename that has the same filenameroot and the suffix
731 C<.recent>. On systems that do not support symlinks there is a plain
732 copy maintained instead.
734 The last file, the Z file, contains the complementary files that are
735 in none of the other files. It does never contain C<deletes>. Besides
736 this it serves the role of a recovery mechanism or spill over pond.
737 When things go wrong, it's a valuable controlling instance to hold the
738 differences between the collection of limited interval files and the
739 actual filesystem.
741 =head2 THE INDIVIDUAL RECENTFILE
743 A I<recentfile> consists of a hash that has two keys: C<meta> and
744 C<recent>. The C<meta> part has metadata and the C<recent> part has a
745 list of fileobjects.
747 =head2 THE META PART
749 Here we find things that are pretty much self explaining: all
750 lowercase attributes are accessors and as such explained somewhere
751 above in this manpage. The uppercase attribute C<Producers> contains
752 version information about involved software components. Nothing to
753 worry about as I believe.
755 =head2 THE RECENT PART
757 This is the interesting part. Every entry refers to some filesystem
758 change (with path, epoch, type).
760 The I<epoch> value is the point in time when some change was
761 I<registered> but can be set to arbitrary values. Do not be tempted to
762 believe that the entry has a direct relation to something like
763 modification time or change time on the filesystem level. They are not
764 reflecting release dates. (If you want exact release dates: Barbie is
765 providing a database of them. See
766 http://use.perl.org/~barbie/journal/37907).
768 All these entries can be devided into two types (denoted by the
769 I<type> attribute): C<new>s and C<delete>s. Changes and creations are
770 C<new>s. Deletes are C<delete>s.
772 Besides an I<epoch> and a I<type> attribute we find a third one:
773 I<path>. This path is relative to the directory we find the
774 I<recentfile> in.
776 The order of the entries in the I<recentfile> is by decreasing epoch
777 attribute. These are unique floating point numbers. When the server
778 has ntp running correctly, then the timestamps are usually reflecting
779 a real epoch. If time is running backwards, we trump the system epoch
780 with strictly monotonically increasing floating point timestamps and
781 guarantee they are unique.
783 =head1 CORRUPTION AND RECOVERY
785 If the origin host breaks the promise to deliver consistent and
786 complete I<recentfiles> then the way back to sanity shall be achieved
787 through traditional rsyncing between the hosts. But don't forget to
788 report it as a bug:)
790 =head1 BACKGROUND
792 This is about speeding up rsync operation on large trees. Uses a small
793 metadata cocktail and pull technology.
795 =head2 NON-COMPETITORS
797 File::Mirror JWU/File-Mirror/File-Mirror-0.10.tar.gz only local trees
798 Mirror::YAML ADAMK/Mirror-YAML-0.03.tar.gz some sort of inner circle
799 Net::DownloadMirror KNORR/Net-DownloadMirror-0.04.tar.gz FTP sites and stuff
800 Net::MirrorDir KNORR/Net-MirrorDir-0.05.tar.gz dito
801 Net::UploadMirror KNORR/Net-UploadMirror-0.06.tar.gz dito
802 Pushmi::Mirror CLKAO/Pushmi-v1.0.0.tar.gz something SVK
804 rsnapshot www.rsnapshot.org focus on backup
805 csync www.csync.org more like unison
806 multi-rsync sourceforge 167893 lan push to many
808 =head2 COMPETITORS
810 The problem to solve which clusters and ftp mirrors and otherwise
811 replicated datasets like CPAN share: how to transfer only a minimum
812 amount of data to determine the diff between two hosts.
814 Normally it takes a long time to determine the diff itself before it
815 can be transferred. Known solutions at the time of this writing are
816 csync2, and rsync 3 batch mode.
818 For many years the best solution was csync2 which solves the problem
819 by maintaining a sqlite database on both ends and talking a highly
820 sophisticated protocol to quickly determine which files to send and
821 which to delete at any given point in time. Csync2 is often
822 inconvenient because it is push technology and the act of syncing
823 demands quite an intimate relationship between the sender and the
824 receiver. This is hard to achieve in an environment of loosely coupled
825 sites where the number of sites is large or connections are
826 unreliable or network topology is changing.
828 Rsync 3 batch mode works around these problems by providing rsync-able
829 batch files which allow receiving nodes to replay the history of the
830 other nodes. This reduces the need to have an incestuous relation but
831 it has the disadvantage that these batch files replicate the contents
832 of the involved files. This seems inappropriate when the nodes already
833 have a means of communicating over rsync.
835 rersyncrecent solves this problem with a couple of (usually 2-10)
836 index files which cover different overlapping time intervals. The
837 master writes these files and the clients/slaves can construct the
838 full tree from the information contained in them. The most recent
839 index file usually covers the last seconds or minutes or hours of the
840 tree and depending on the needs, slaves can rsync every few seconds or
841 minutes and then bring their trees in full sync.
843 The rersyncrecent mode was developed for CPAN but I hope it is a
844 convenient and economic general purpose solution. I'm looking forward
845 to see a CPAN backbone that is only a few seconds behind PAUSE. And
846 then ... the first FUSE based CPAN filesystem anyone?
848 =head1 FUTURE DIRECTIONS
850 Currently the origin server must keep track of injected and removed
851 files. Should be supported by an inotify-based assistant.
853 =head1 SEE ALSO
855 L<File::Rsync::Mirror::Recentfile>,
856 L<File::Rsync::Mirror::Recentfile::Done>,
857 L<File::Rsync::Mirror::Recentfile::FakeBigFloat>
859 =head1 BUGS
861 Please report any bugs or feature requests through the web interface
863 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=File-Rsync-Mirror-Recent>.
864 I will be notified, and then you'll automatically be notified of
865 progress on your bug as I make changes.
867 =head1 SUPPORT
869 You can find documentation for this module with the perldoc command.
871 perldoc File::Rsync::Mirror::Recent
873 You can also look for information at:
875 =over 4
877 =item * RT: CPAN's request tracker
879 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Rsync-Mirror-Recent>
881 =item * AnnoCPAN: Annotated CPAN documentation
883 L<http://annocpan.org/dist/File-Rsync-Mirror-Recent>
885 =item * CPAN Ratings
887 L<http://cpanratings.perl.org/d/File-Rsync-Mirror-Recent>
889 =item * Search CPAN
891 L<http://search.cpan.org/dist/File-Rsync-Mirror-Recent>
893 =back
896 =head1 ACKNOWLEDGEMENTS
898 Thanks to RJBS for module-starter.
900 =head1 AUTHOR
902 Andreas König
904 =head1 COPYRIGHT & LICENSE
906 Copyright 2008, 2009 Andreas König.
908 This program is free software; you can redistribute it and/or modify it
909 under the same terms as Perl itself.
912 =cut
914 1; # End of File::Rsync::Mirror::Recent