upcoming new release
[rersyncrecent.git] / lib / File / Rsync / Mirror / Recent.pm
blob14d97f31368a2029c202ce95cb5fbe1c4e98b867
1 package File::Rsync::Mirror::Recent;
3 # use warnings;
4 use strict;
5 use File::Rsync::Mirror::Recentfile;
7 =encoding utf-8
9 =head1 NAME
11 File::Rsync::Mirror::Recent - mirroring via rsync made efficient
13 =cut
15 package File::Rsync::Mirror::Recent;
17 use File::Basename qw(basename dirname fileparse);
18 use File::Copy qw(cp);
19 use File::Path qw(mkpath);
20 use File::Rsync;
21 use File::Rsync::Mirror::Recentfile::FakeBigFloat qw(:all);
22 use File::Temp;
23 use List::Pairwise qw(mapp grepp);
24 use List::Util qw(first max);
25 use Scalar::Util qw(reftype);
26 use Storable;
27 use Time::HiRes qw();
28 use YAML::Syck;
30 use version; our $VERSION = qv('0.0.3');
32 =head1 SYNOPSIS
34 B<!!!! PRE-ALPHA ALERT !!!!>
36 Nothing in here is believed to be stable, nothing yet intended for
37 public consumption. The plan is to provide scripts that act as
38 frontends for all the backend functionality. Option and method names
39 may still change.
41 For the rationale see the section BACKGROUND.
43 The documentation in here is normally not needed because the code is
44 meant to be run from several standalone programs. For a quick
45 overview, see the file README.mirrorcpan and the bin/ directory of the
46 distribution. For the architectural ideas see the section THE
47 ARCHITECTURE OF A COLLECTION OF RECENTFILES below.
49 File::Rsync::Mirror::Recent establishes a view on a collection of
50 File::Rsync::Mirror::Recentfile objects and provides abstractions
51 spanning multiple intervals associated with those.
53 =head1 EXPORT
55 No exports.
57 =head1 CONSTRUCTORS
59 =head2 my $obj = CLASS->new(%hash)
61 Constructor. On every argument pair the key is a method name and the
62 value is an argument to that method name.
64 =cut
66 sub new {
67 my($class, @args) = @_;
68 my $self = bless {}, $class;
69 while (@args) {
70 my($method,$arg) = splice @args, 0, 2;
71 $self->$method($arg);
73 return $self;
76 =head1 ACCESSORS
78 =cut
80 my @accessors;
82 BEGIN {
83 @accessors =
85 "__pathdb",
86 "_max_one_state", # when we have no time left but want
87 # at least get one file per
88 # iteration to avoid procrastination
89 "_principal_recentfile",
90 "_recentfiles",
91 "_rsync",
92 "_runstatusfile", # frequenty dumps all rfs
93 "_logfilefordone", # turns on _logfile on all DONE
94 # systems (disk intensive)
97 my @pod_lines =
98 split /\n/, <<'=cut'; push @accessors, grep {s/^=item\s+//} @pod_lines; }
100 =over 4
102 =item ignore_link_stat_errors
104 as in F:R:M:Recentfile
106 =item local
108 Option to specify the local principal file for operations with a local
109 collection of recentfiles.
111 =item localroot
113 as in F:R:M:Recentfile
115 =item max_files_per_connection
117 as in F:R:M:Recentfile
119 =item remote
123 =item remoteroot
125 XXX: this is (ATM) different from Recentfile!!!
127 =item remote_recentfile
129 Rsync address of the remote C<RECENT.recent> symlink or whichever name
130 the principal remote recentfile has.
132 =item rsync_options
134 Things like compress, links, times or checksums. Passed in to the
135 File::Rsync object used to run the mirror.
137 =item ttl
139 Minimum time before fetching the principal recentfile again.
141 =item _verbose
143 Boolean to turn on a bit verbosity. Use the method C<verbose> to also
144 set the verbosity of associated Recentfile objects.
146 =back
148 =cut
150 use accessors @accessors;
152 =head1 METHODS
154 =head2 $arrayref = $obj->news ( %options )
156 Test this with:
158 perl -Ilib bin/rrr-news \
159 -after 1217200539 \
160 -max 12 \
161 -local /home/ftp/pub/PAUSE/authors/RECENT.recent
163 perl -Ilib bin/rrr-news \
164 -after 1217200539 \
165 -rsync=compress=1 \
166 -rsync=links=1 \
167 -localroot /home/ftp/pub/PAUSE/authors/ \
168 -remote pause.perl.org::authors/RECENT.recent
169 -verbose
171 Note: all parameters that can be passed to recent_events can also be specified here.
173 Note: all data are kept in memory
175 =cut
177 sub news {
178 my($self, %opt) = @_;
179 my $local = $self->local;
180 unless ($local) {
181 if (my $remote = $self->remote) {
182 my $localroot;
183 if ($localroot = $self->localroot) {
184 # nice, they know what they are doing
185 } else {
186 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
188 } else {
189 die "Alert: neither local nor remote specified, cannot continue";
192 my $rfs = $self->recentfiles;
193 my $ret = [];
194 my $before;
195 for my $rf (@$rfs) {
196 my %locopt = %opt;
197 $locopt{before} = $before;
198 if ($opt{max}) {
199 $locopt{max} -= scalar @$ret;
200 last if $locopt{max} <= 0;
202 $locopt{info} = {};
203 my $res = $rf->recent_events(%locopt);
204 if (@$res){
205 push @$ret, @$res;
207 if ($opt{max} && scalar @$ret > $opt{max}) {
208 last;
210 if ($opt{after}){
211 if ( $locopt{info}{last} && _bigfloatlt($locopt{info}{last}{epoch},$opt{after}) ) {
212 last;
214 if ( _bigfloatgt($opt{after},$locopt{info}{first}{epoch}) ) {
215 last;
218 if (!@$res){
219 next;
221 $before = $res->[-1]{epoch};
222 $before = $opt{before} if $opt{before} && _bigfloatlt($opt{before},$before);
224 $ret;
227 =head2 overview ( %options )
229 returns a small table that summarizes the state of all recentfiles
230 collected in this Recent object.
232 $options{verbose}=1 increases the number of columns displayed.
234 Here is an example output:
236 Ival Cnt Max Min Span Util Cloud
237 1h 47 1225053014.38 1225049650.91 3363.47 93.4% ^ ^
238 6h 324 1225052939.66 1225033394.84 19544.82 90.5% ^ ^
239 1d 437 1225049651.53 1224966402.53 83248.99 96.4% ^ ^
240 1W 1585 1225039015.75 1224435339.46 603676.29 99.8% ^ ^
241 1M 5855 1225017376.65 1222428503.57 2588873.08 99.9% ^ ^
242 1Q 17066 1224578930.40 1216803512.90 7775417.50 100.0% ^ ^
243 1Y 15901 1223966162.56 1216766820.67 7199341.89 22.8% ^ ^
244 Z 9909 1223966162.56 1216766820.67 7199341.89 - ^ ^
246 I<Max> is the name of the interval.
248 I<Cnt> is the number of entries in this recentfile.
250 I<Max> is the highest(first) epoch in this recentfile, rounded.
252 I<Min> is the lowest(last) epoch in thie recentfile, rounded.
254 I<Span> is the timespan currently covered, rounded.
256 I<Util> is I<Span> devided by the designated timespan of this
257 recentfile.
259 I<Cloud> is ascii art illustrating the sequence of the Max and Min
260 timestamps.
262 =cut
263 sub overview {
264 my($self,%options) = @_;
265 my $rfs = $self->recentfiles;
266 my(@s,%rank);
267 RECENTFILE: for my $rf (@$rfs) {
268 my $re=$rf->recent_events;
269 my $rfsummary;
270 if (@$re) {
271 my $span = $re->[0]{epoch}-$re->[-1]{epoch};
272 my $merged = $rf->merged;
273 $rfsummary =
275 "Ival",
276 $rf->interval,
277 "Cnt",
278 scalar @$re,
279 "Dirtymark",
280 $rf->dirtymark ? sprintf("%.2f",$rf->dirtymark) : "-",
281 "Merged",
282 ($rf->interval eq "Z"
286 sprintf ("%.2f", $merged->{epoch} || 0)),
287 "Max",
288 sprintf ("%.2f", $re->[0]{epoch}),
289 "Min",
290 sprintf ("%.2f", $re->[-1]{epoch}),
291 "Span",
292 sprintf ("%.2f", $span),
293 "Util", # u9n:)
294 ($rf->interval eq "Z"
298 sprintf ("%5.1f%%", 100 * $span / $rf->interval_secs)
301 @rank{mapp {$b} grepp {$a =~ /^(Max|Min)$/} @$rfsummary} = ();
302 } else {
303 next RECENTFILE;
305 push @s, $rfsummary;
307 @rank{sort {$b <=> $a} keys %rank} = 1..keys %rank;
308 my $maxrank = max values %rank;
309 for my $rfsummary (@s) {
310 my $string = " " x $maxrank;
311 my @borders;
312 for my $ele (qw(Max Min)) {
313 my($r) = mapp {$b} grepp {$a eq $ele} @$rfsummary;
314 push @borders, $rank{$r}-1;
316 for ($borders[0],$borders[1]) {
317 substr($string,$_,1) = "^";
319 push @$rfsummary, "Cloud", $string;
321 unless ($options{verbose}) {
322 my %filter = map {($_=>1)} qw(Ival Cnt Max Min Span Util Cloud);
323 for (@s) {
324 $_ = [mapp {($a,$b)} grepp {!!$filter{$a}} @$_];
327 my @sprintf;
328 for (my $i = 0; $i <= $#{$s[0]}; $i+=2) {
329 my $maxlength = max ((map { length $_->[$i+1] } @s), length $s[0][$i]);
330 push @sprintf, "%" . $maxlength . "s";
332 my $sprintf = join " ", @sprintf;
333 $sprintf .= "\n";
334 my $headline = sprintf $sprintf, mapp {$a} @{$s[0]};
335 join "", $headline, map { sprintf $sprintf, mapp {$b} @$_ } @s;
338 =head2 _pathdb
340 Keeping track of already handled files. Currently it is a hash, will
341 probably become a database with its own accessors.
343 =cut
345 sub _pathdb {
346 my($self, $set) = @_;
347 if ($set) {
348 $self->__pathdb ($set);
350 my $pathdb = $self->__pathdb;
351 unless (defined $pathdb) {
352 $self->__pathdb(+{});
354 return $self->__pathdb;
357 =head2 $recentfile = $obj->principal_recentfile ()
359 returns the principal recentfile of this tree.
361 =cut
363 sub principal_recentfile {
364 my($self) = @_;
365 my $prince = $self->_principal_recentfile;
366 return $prince if defined $prince;
367 my $local = $self->local;
368 if ($local) {
369 $prince = File::Rsync::Mirror::Recentfile->new_from_file ($local);
370 } else {
371 if (my $remote = $self->remote) {
372 my $localroot;
373 if ($localroot = $self->localroot) {
374 # nice, they know what they are doing
375 } else {
376 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
378 my $rf0 = $self->_recentfile_object_for_remote;
379 $prince = $rf0;
380 } else {
381 die "Alert: neither local nor remote specified, cannot continue";
384 $self->_principal_recentfile($prince);
385 return $prince;
388 =head2 $recentfiles_arrayref = $obj->recentfiles ()
390 returns a reference to the complete list of recentfile objects that
391 describe this tree. No guarantee is given that the represented
392 recentfiles exist or have been read. They are just bare objects.
394 =cut
396 sub recentfiles {
397 my($self) = @_;
398 my $rfs = $self->_recentfiles;
399 return $rfs if defined $rfs;
400 my $rf0 = $self->principal_recentfile;
401 my $pathdb = $self->_pathdb;
402 $rf0->_pathdb ($pathdb);
403 my $aggregator = $rf0->aggregator;
404 my @rf = $rf0;
405 for my $agg (@$aggregator) {
406 my $nrf = $rf0->_sparse_clone;
407 $nrf->interval ( $agg );
408 $nrf->have_mirrored ( 0 );
409 $nrf->_pathdb ( $pathdb );
410 push @rf, $nrf;
412 $self->_recentfiles(\@rf);
413 return \@rf;
416 =head2 $success = $obj->rmirror ( %options )
418 Mirrors all recentfiles of the I<remote> address working through all
419 of them, mirroring their contents.
421 Test this with:
423 use File::Rsync::Mirror::Recent;
424 my $rrr = File::Rsync::Mirror::Recent->new(
425 ignore_link_stat_errors => 1,
426 localroot => "/home/ftp/pub/PAUSE/authors",
427 remote => "pause.perl.org::authors/RECENT.recent",
428 max_files_per_connection => 5000,
429 rsync_options => {
430 compress => 1,
431 links => 1,
432 times => 1,
433 checksum => 0,
435 verbose => 1,
436 _runstatusfile => "recent-rmirror-state.yml",
437 _logfilefordone => "recent-rmirror-donelog.log",
439 $rrr->rmirror ( "skip-deletes" => 1, loop => 1 );
441 Or try without the loop parameter and write the loop yourself:
443 use File::Rsync::Mirror::Recent;
444 my @rrr;
445 for my $t ("authors","modules"){
446 my $rrr = File::Rsync::Mirror::Recent->new(
447 ignore_link_stat_errors => 1,
448 localroot => "/home/ftp/pub/PAUSE/$t",
449 remote => "pause.perl.org::$t/RECENT.recent",
450 max_files_per_connection => 512,
451 rsync_options => {
452 compress => 1,
453 links => 1,
454 times => 1,
455 checksum => 0,
457 verbose => 1,
458 _runstatusfile => "recent-rmirror-state-$t.yml",
459 _logfilefordone => "recent-rmirror-donelog-$t.log",
460 ttl => 5,
462 push @rrr, $rrr;
464 while (){
465 for my $rrr (@rrr){
466 $rrr->rmirror ( "skip-deletes" => 1 );
468 warn "sleeping 23\n"; sleep 23;
472 =cut
473 sub rmirror {
474 my($self, %options) = @_;
476 # my $rf0 = $self->_recentfile_object_for_remote;
477 my $rfs = $self->recentfiles;
479 my $_every_20_seconds = sub {
480 $self->principal_recentfile->seed;
482 $_every_20_seconds->();
483 my $_sigint = sub {
484 # XXX exit gracefully (reminder)
486 my $minimum_time_per_loop = 20; # XXX needs accessor: warning, if
487 # set too low, we do nothing but
488 # mirror the principal!
489 if (my $logfile = $self->_logfilefordone) {
490 for my $i (0..$#$rfs) {
491 $rfs->[$i]->done->_logfile($logfile);
494 LOOP: while () {
495 my $ttleave = time + $minimum_time_per_loop;
496 RECENTFILE: for my $i (0..$#$rfs) {
497 my $rf = $rfs->[$i];
498 if (my $file = $self->_runstatusfile) {
499 $self->_rmirror_runstatusfile ($file, $i, \%options);
501 if (time > $ttleave){
502 # Must make sure that one file can get fetched in any case
503 $self->_max_one_state(1);
505 if ($rf->seeded) {
506 $self->_rmirror_mirror ($i, \%options);
507 } elsif ($rf->uptodate){
508 if ($i < $#$rfs){
509 $rfs->[$i+1]->done->merge($rf->done);
511 # no further seed necessary because "every_20_seconds" does it
512 next RECENTFILE;
513 } else {
514 WORKUNIT: while (time < $ttleave) {
515 if ($rf->uptodate) {
516 $self->_rmirror_sleep_per_connection ($i);
517 next RECENTFILE;
518 } else {
519 $self->_rmirror_mirror ($i, \%options);
524 $self->_max_one_state(0);
525 if ($rfs->[-1]->uptodate) {
526 $self->_rmirror_cleanup;
527 if ($options{loop}) {
528 } else {
529 last LOOP;
532 my $sleep = $ttleave - time;
533 if ($sleep > 0.01) {
534 $self->_rmirror_endofloop_sleep ($sleep);
535 } else {
536 # negative time not invented yet:)
538 $_every_20_seconds->();
542 sub _rmirror_mirror {
543 my($self, $i, $options) = @_;
544 my $rfs = $self->recentfiles;
545 my $rf = $rfs->[$i];
546 my %locopt = %$options;
547 if ($self->_max_one_state) {
548 $locopt{max} = 1;
550 $locopt{piecemeal} = 1;
551 $rf->mirror (%locopt);
554 sub _rmirror_sleep_per_connection {
555 my($self, $i) = @_;
556 my $rfs = $self->recentfiles;
557 my $rf = $rfs->[$i];
558 my $sleep = $rf->sleep_per_connection;
559 $sleep = 0.42 unless defined $sleep; # XXX accessor!
560 Time::HiRes::sleep $sleep;
561 $rfs->[$i+1]->done->merge($rf->done) if $i < $#$rfs;
564 sub _rmirror_cleanup {
565 my($self) = @_;
566 my $pathdb = $self->_pathdb();
567 for my $k (keys %$pathdb) {
568 delete $pathdb->{$k};
570 my $rfs = $self->recentfiles;
571 for my $i (0..$#$rfs-1) {
572 my $thismerged = $rfs->[$i]->merged;
573 my $next = $rfs->[$i+1];
574 my $nextminmax = $next->minmax;
575 # warn "DEBUG: i[$i] nextminmaxmax[$nextminmax->{max}] thismergedepoch[$thismerged->{epoch}]";
576 if (not defined $thismerged->{epoch} or _bigfloatlt($nextminmax->{max},$thismerged->{epoch})){
577 $next->seed;
578 # warn sprintf "DEBUG: next iv %s seeded since next-minmax-max[$nextminmax->{max}]lt this-merged-epoch[$thismerged->{epoch}]\n", $next->interval;
583 sub _rmirror_runstatusfile {
584 my($self, $file, $i, $options) = @_;
585 my $rfs = $self->recentfiles;
586 require YAML::Syck;
587 YAML::Syck::DumpFile
589 $file,
590 {i => $i,
591 options => $options,
592 self => [keys %$self], # passing $self leaks, dclone refuses because of globs
593 time => time,
594 uptodate => {map {($_=>$rfs->[$_]->uptodate)} 0..$#$rfs},
598 sub _rmirror_endofloop_sleep {
599 my($self, $sleep) = @_;
600 if ($self->verbose) {
601 printf STDERR
603 "Dorm %d (%s secs)\n",
604 time,
605 $sleep,
607 sleep $sleep;
611 # mirrors the recentfile and instantiates the recentfile object
612 sub _recentfile_object_for_remote {
613 my($self) = @_;
614 # get the remote recentfile
615 my $rrfile = $self->remote or die "Alert: cannot construct a recentfile object without the 'remote' attribute";
616 my $splitter = qr{(.+)/([^/]*)};
617 my($remoteroot,$rfilename) = $rrfile =~ $splitter;
618 $self->remoteroot($remoteroot);
619 my $abslfile;
620 if (!defined $rfilename) {
621 die "Alert: Cannot resolve '$rrfile', does not match $splitter";
622 } elsif (not length $rfilename or $rfilename eq "RECENT.recent") {
623 ($abslfile,$rfilename) = $self->_resolve_rfilename($rfilename);
625 my @need_args =
627 "ignore_link_stat_errors",
628 "localroot",
629 "max_files_per_connection",
630 "remoteroot",
631 "rsync_options",
632 "verbose",
633 "ttl",
635 my $rf0;
636 unless ($abslfile) {
637 $rf0 = File::Rsync::Mirror::Recentfile->new (map {($_ => $self->$_)} @need_args);
638 $rf0->resolve_recentfilename($rfilename);
639 $abslfile = $rf0->get_remote_recentfile_as_tempfile ();
641 $rf0 = File::Rsync::Mirror::Recentfile->new_from_file ( $abslfile );
642 for my $override (@need_args) {
643 $rf0->$override ( $self->$override );
645 $rf0->is_slave (1);
646 return $rf0;
649 sub _resolve_rfilename {
650 my($self, $rfilename) = @_;
651 $rfilename = "RECENT.recent" unless length $rfilename;
652 my $abslfile = undef;
653 if ($rfilename =~ /\.recent$/) {
654 # may be a file *or* a symlink,
655 $abslfile = $self->_fetch_as_tempfile ($rfilename);
656 while (-l $abslfile) {
657 my $symlink = readlink $abslfile;
658 if ($symlink =~ m|/|) {
659 die "FIXME: filenames containing '/' not supported, got '$symlink'";
661 my $localrfile = File::Spec->catfile($self->localroot, $rfilename);
662 if (-e $localrfile) {
663 my $old_symlink = readlink $localrfile;
664 if ($old_symlink eq $symlink) {
665 unlink $abslfile or die "Cannot unlink '$abslfile': $!";
666 } else {
667 unlink $localrfile; # may fail
668 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
670 } else {
671 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
673 $abslfile = $self->_fetch_as_tempfile ($symlink);
676 return ($abslfile, $rfilename);
679 # takes a basename, returns an absolute name, does not delete the
680 # file, throws the $fh away. Caller must rename or unlink
681 sub _fetch_as_tempfile {
682 my($self, $rfile) = @_;
683 my($suffix) = $rfile =~ /(\.[^\.]+)$/;
684 $suffix = "" unless defined $suffix;
685 my $fh = File::Temp->new
686 (TEMPLATE => sprintf(".FRMRecent-%s-XXXX",
687 $rfile,
689 DIR => $self->localroot,
690 SUFFIX => $suffix,
691 UNLINK => 0,
693 my $rsync = File::Rsync->new($self->rsync_options);
694 $rsync->exec
696 src => join("/",$self->remoteroot,$rfile),
697 dst => $fh->filename,
698 ) or die "Could not mirror '$rfile' to $fh\: ".join(" ",$rsync->err);
699 return $fh->filename;
702 =head2 $verbose = $obj->verbose ( $set )
704 Getter/setter method to set verbosity for this object and all
705 associated Recentfile objects.
707 =cut
708 sub verbose {
709 my($self,$set) = @_;
710 if (defined $set) {
711 for ( @{$self->recentfiles} ) { $_->verbose($set) }
712 $self->_verbose ($set);
714 my $x = $self->_verbose;
715 unless (defined $x) {
716 $x = 0;
717 $self->_verbose ($x);
719 return $x;
723 =head1 THE ARCHITECTURE OF A COLLECTION OF RECENTFILES
725 The idea is that we want to have a short file that records really
726 recent changes. So that a fresh mirror can be kept fresh as long as
727 the connectivity is given. Then we want longer files that record the
728 history before. So when the mirror falls behind the update period
729 reflected in the shortest file, it can complement the list of recent
730 file events with the next one. And if this is not long enough we want
731 another one, again a bit longer. And we want one that completes the
732 history back to the oldest file. The index files do contain the
733 complete list of current files. The longer a period covered by an
734 index file is gone the less often the index file is updated. For
735 practical reasons adjacent files will often overlap a bit but this is
736 neither necessary nor enforced. That's the basic idea. The following
737 example represents a tree that has a few updates every day:
739 RECENT.recent -> RECENT-1h.yaml
740 RECENT-6h.yaml
741 RECENT-1d.yaml
742 RECENT-1M.yaml
743 RECENT-1W.yaml
744 RECENT-1Q.yaml
745 RECENT-1Y.yaml
746 RECENT-Z.yaml
748 The first file is the principal file, in so far it is the one that is
749 written first after a filesystem change. Usually a symlink links to it
750 with a filename that has the same filenameroot and the suffix
751 C<.recent>. On systems that do not support symlinks there is a plain
752 copy maintained instead.
754 The last file, the Z file, contains the complementary files that are
755 in none of the other files. It does never contain C<deletes>. Besides
756 this it serves the role of a recovery mechanism or spill over pond.
757 When things go wrong, it's a valuable controlling instance to hold the
758 differences between the collection of limited interval files and the
759 actual filesystem.
761 =head2 THE INDIVIDUAL RECENTFILE
763 A I<recentfile> consists of a hash that has two keys: C<meta> and
764 C<recent>. The C<meta> part has metadata and the C<recent> part has a
765 list of fileobjects.
767 =head2 THE META PART
769 Here we find things that are pretty much self explaining: all
770 lowercase attributes are accessors and as such explained somewhere
771 above in this manpage. The uppercase attribute C<Producers> contains
772 version information about involved software components. Nothing to
773 worry about as I believe.
775 =head2 THE RECENT PART
777 This is the interesting part. Every entry refers to some filesystem
778 change (with path, epoch, type).
780 The I<epoch> value is the point in time when some change was
781 I<registered> but can be set to arbitrary values. Do not be tempted to
782 believe that the entry has a direct relation to something like
783 modification time or change time on the filesystem level. They are not
784 reflecting release dates. (If you want exact release dates: Barbie is
785 providing a database of them. See
786 http://use.perl.org/~barbie/journal/37907).
788 All these entries can be devided into two types (denoted by the
789 I<type> attribute): C<new>s and C<delete>s. Changes and creations are
790 C<new>s. Deletes are C<delete>s.
792 Besides an I<epoch> and a I<type> attribute we find a third one:
793 I<path>. This path is relative to the directory we find the
794 I<recentfile> in.
796 The order of the entries in the I<recentfile> is by decreasing epoch
797 attribute. These are unique floating point numbers. When the server
798 has ntp running correctly, then the timestamps are usually reflecting
799 a real epoch. If time is running backwards, we trump the system epoch
800 with strictly monotonically increasing floating point timestamps and
801 guarantee they are unique.
803 =head1 CORRUPTION AND RECOVERY
805 If the origin host breaks the promise to deliver consistent and
806 complete I<recentfiles> then the way back to sanity shall be achieved
807 through traditional rsyncing between the hosts. But don't forget to
808 report it as a bug:)
810 =head1 BACKGROUND
812 This is about speeding up rsync operation on large trees. Uses a small
813 metadata cocktail and pull technology.
815 =head2 NON-COMPETITORS
817 File::Mirror JWU/File-Mirror/File-Mirror-0.10.tar.gz only local trees
818 Mirror::YAML ADAMK/Mirror-YAML-0.03.tar.gz some sort of inner circle
819 Net::DownloadMirror KNORR/Net-DownloadMirror-0.04.tar.gz FTP sites and stuff
820 Net::MirrorDir KNORR/Net-MirrorDir-0.05.tar.gz dito
821 Net::UploadMirror KNORR/Net-UploadMirror-0.06.tar.gz dito
822 Pushmi::Mirror CLKAO/Pushmi-v1.0.0.tar.gz something SVK
824 rsnapshot www.rsnapshot.org focus on backup
825 csync www.csync.org more like unison
826 multi-rsync sourceforge 167893 lan push to many
828 =head2 COMPETITORS
830 The problem to solve which clusters and ftp mirrors and otherwise
831 replicated datasets like CPAN share: how to transfer only a minimum
832 amount of data to determine the diff between two hosts.
834 Normally it takes a long time to determine the diff itself before it
835 can be transferred. Known solutions at the time of this writing are
836 csync2, and rsync 3 batch mode.
838 For many years the best solution was csync2 which solves the problem
839 by maintaining a sqlite database on both ends and talking a highly
840 sophisticated protocol to quickly determine which files to send and
841 which to delete at any given point in time. Csync2 is often
842 inconvenient because it is push technology and the act of syncing
843 demands quite an intimate relationship between the sender and the
844 receiver. This is hard to achieve in an environment of loosely coupled
845 sites where the number of sites is large or connections are
846 unreliable or network topology is changing.
848 Rsync 3 batch mode works around these problems by providing rsync-able
849 batch files which allow receiving nodes to replay the history of the
850 other nodes. This reduces the need to have an incestuous relation but
851 it has the disadvantage that these batch files replicate the contents
852 of the involved files. This seems inappropriate when the nodes already
853 have a means of communicating over rsync.
855 rersyncrecent solves this problem with a couple of (usually 2-10)
856 index files which cover different overlapping time intervals. The
857 master writes these files and the clients/slaves can construct the
858 full tree from the information contained in them. The most recent
859 index file usually covers the last seconds or minutes or hours of the
860 tree and depending on the needs, slaves can rsync every few seconds or
861 minutes and then bring their trees in full sync.
863 The rersyncrecent mode was developed for CPAN but I hope it is a
864 convenient and economic general purpose solution. I'm looking forward
865 to see a CPAN backbone that is only a few seconds behind PAUSE. And
866 then ... the first FUSE based CPAN filesystem anyone?
868 =head1 FUTURE DIRECTIONS
870 Currently the origin server must keep track of injected and removed
871 files. Should be supported by an inotify-based assistant.
873 =head1 SEE ALSO
875 L<File::Rsync::Mirror::Recentfile>,
876 L<File::Rsync::Mirror::Recentfile::Done>,
877 L<File::Rsync::Mirror::Recentfile::FakeBigFloat>
879 =head1 BUGS
881 Please report any bugs or feature requests through the web interface
883 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=File-Rsync-Mirror-Recent>.
884 I will be notified, and then you'll automatically be notified of
885 progress on your bug as I make changes.
887 =head1 SUPPORT
889 You can find documentation for this module with the perldoc command.
891 perldoc File::Rsync::Mirror::Recent
893 You can also look for information at:
895 =over 4
897 =item * RT: CPAN's request tracker
899 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Rsync-Mirror-Recent>
901 =item * AnnoCPAN: Annotated CPAN documentation
903 L<http://annocpan.org/dist/File-Rsync-Mirror-Recent>
905 =item * CPAN Ratings
907 L<http://cpanratings.perl.org/d/File-Rsync-Mirror-Recent>
909 =item * Search CPAN
911 L<http://search.cpan.org/dist/File-Rsync-Mirror-Recent>
913 =back
916 =head1 ACKNOWLEDGEMENTS
918 Thanks to RJBS for module-starter.
920 =head1 AUTHOR
922 Andreas König
924 =head1 COPYRIGHT & LICENSE
926 Copyright 2008, 2009 Andreas König.
928 This program is free software; you can redistribute it and/or modify it
929 under the same terms as Perl itself.
932 =cut
934 1; # End of File::Rsync::Mirror::Recent