1 package File
::Rsync
::Mirror
::Recent
;
5 use File
::Rsync
::Mirror
::Recentfile
;
11 File::Rsync::Mirror::Recent - mirroring via rsync made efficient
15 package File
::Rsync
::Mirror
::Recent
;
17 use File
::Basename
qw(basename dirname fileparse);
18 use File
::Copy
qw(cp);
19 use File
::Path
qw(mkpath);
21 use File
::Rsync
::Mirror
::Recentfile
::Done
(); # at least needed by thaw()
22 use File
::Rsync
::Mirror
::Recentfile
::FakeBigFloat
qw(:all);
24 use List
::Pairwise
qw(mapp grepp);
25 use List
::Util
qw(first max);
26 use Scalar
::Util
qw(blessed reftype);
31 use version
; our $VERSION = qv
('0.0.8');
35 B<!!!! PRE-ALPHA ALERT !!!!>
37 Nothing in here is believed to be stable, nothing yet intended for
38 public consumption. The plan is to provide scripts that act as
39 frontends for all the backend functionality. Option and method names
42 For the rationale see the section BACKGROUND.
44 The documentation in here is normally not needed because the code is
45 meant to be run from several standalone programs. For a quick
46 overview, see the file README.mirrorcpan and the bin/ directory of the
47 distribution. For the architectural ideas see the section THE
48 ARCHITECTURE OF A COLLECTION OF RECENTFILES below.
50 File::Rsync::Mirror::Recent establishes a view on a collection of
51 File::Rsync::Mirror::Recentfile objects and provides abstractions
52 spanning multiple time intervals associated with those.
60 =head2 my $obj = CLASS->new(%hash)
62 Constructor. On every argument pair the key is a method name and the
63 value is an argument to that method name.
68 my($class, @args) = @_;
69 my $self = bless {}, $class;
71 my($method,$arg) = splice @args, 0, 2;
77 =head2 my $obj = CLASS->thaw($statusfile)
79 Constructor from a statusfile left over from a previous
80 rmirror run. See also C<runstatusfile>.
84 sub _thaw_if_small_enough
{
86 return if -s
$file > 100_000
; # XXX should read and look how
87 # many lines we have for
88 # "reduced_self.-__pathdb"?
89 return $self->thaw($file);
92 my($self, $file) = @_;
93 die "thaw called without statusfile argument" unless defined $file;
94 die "Alert: statusfile '$file' not found" unless -e
$file;
98 while (not mkdir "$file.lock") {
100 Time
::HiRes
::sleep $sleeptime;
101 my $waiting = time - $start;
103 warn "*** waiting ($waiting) for lock ($err) ***";
108 my $serialized = YAML
::Syck
::LoadFile
($file);
109 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
110 warn sprintf "DEBUG: Process $$ reading '$file' (size=$size). It was written %d seconds ago", time-$serialized->{time};
111 my $charged_self = $serialized->{reduced_self
};
112 my $class = blessed
$self;
113 bless $charged_self, $class;
114 my $rfs = $serialized->{reduced_rfs
};
115 my $rfclass = $class . "file"; # "Recent" . "file"
116 my $pathdb = $charged_self->_pathdb;
119 $rf->_pathdb($pathdb);
121 $charged_self->_recentfiles($rfs);
122 $charged_self->_principal_recentfile($rfs->[0]);
123 # die "FIXME: thaw all recentfiles from reduced_rfs into _recentfiles as well, watch out for pathdb and rsync";
124 return $charged_self;
137 "_dirtymark", # keeps track of the dirtymark of the recentfiles
138 "_logfilefordone", # turns on _logfile on all DONE
139 # systems (disk intensive)
140 "_max_one_state", # when we have no time left but want
141 # at least get one file per
142 # iteration to avoid procrastination
143 "_principal_recentfile",
146 "_runstatusfile", # occasionally dumps all rfs
147 "_verbose", # internal variable for verbose setter/getter
148 "_verboselog", # internal variable for verboselog setter/getter
152 split /\n/, <<'=cut'; push @accessors, grep {s/^=item\s+//} @pod_lines; }
156 =item ignore_link_stat_errors
158 as in F:R:M:Recentfile
162 Option to specify the local principal file for operations with a local
163 collection of recentfiles.
167 as in F:R:M:Recentfile
169 =item max_files_per_connection
171 as in F:R:M:Recentfile
175 The remote principal recentfile in rsync notation. E.g.
177 pause.perl.org::authors/RECENT.recent
181 as in F:R:M:Recentfile
183 =item remote_recentfile
185 Rsync address of the remote C<RECENT.recent> symlink or whichever name
186 the principal remote recentfile has.
190 Things like compress, links, times or checksums. Passed in to the
191 File::Rsync object used to run the mirror.
195 as in F:R:M:Recentfile
199 Minimum time before fetching the principal recentfile again.
205 use accessors
@accessors;
209 =head2 $arrayref = $obj->news ( %options )
213 perl -Ilib bin/rrr-news \
216 -local /home/ftp/pub/PAUSE/authors/RECENT.recent
218 perl -Ilib bin/rrr-news \
222 -localroot /home/ftp/pub/PAUSE/authors/ \
223 -remote pause.perl.org::authors/RECENT.recent
226 Note: all parameters that can be passed to recent_events can also be specified here.
228 Note: all data are kept in memory
233 my($self, %opt) = @_;
234 my $local = $self->local;
236 if (my $remote = $self->remote) {
238 if ($localroot = $self->localroot) {
239 # nice, they know what they are doing
241 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
244 die "Alert: neither local nor remote specified, cannot continue";
247 my $rfs = $self->recentfiles;
252 $locopt{before
} = $before;
254 $locopt{max
} -= scalar @
$ret;
255 last if $locopt{max
} <= 0;
258 my $res = $rf->recent_events(%locopt);
262 if ($opt{max
} && scalar @
$ret > $opt{max
}) {
266 if ( $locopt{info
}{last} && _bigfloatlt
($locopt{info
}{last}{epoch
},$opt{after
}) ) {
269 if ( _bigfloatgt
($opt{after
},$locopt{info
}{first
}{epoch
}) ) {
276 $before = $res->[-1]{epoch
};
277 $before = $opt{before
} if $opt{before
} && _bigfloatlt
($opt{before
},$before);
282 =head2 overview ( %options )
284 returns a small table that summarizes the state of all recentfiles
285 collected in this Recent object.
287 $options{verbose}=1 increases the number of columns displayed.
289 Here is an example output:
291 Ival Cnt Max Min Span Util Cloud
292 1h 47 1225053014.38 1225049650.91 3363.47 93.4% ^ ^
293 6h 324 1225052939.66 1225033394.84 19544.82 90.5% ^ ^
294 1d 437 1225049651.53 1224966402.53 83248.99 96.4% ^ ^
295 1W 1585 1225039015.75 1224435339.46 603676.29 99.8% ^ ^
296 1M 5855 1225017376.65 1222428503.57 2588873.08 99.9% ^ ^
297 1Q 17066 1224578930.40 1216803512.90 7775417.50 100.0% ^ ^
298 1Y 15901 1223966162.56 1216766820.67 7199341.89 22.8% ^ ^
299 Z 9909 1223966162.56 1216766820.67 7199341.89 - ^ ^
301 I<Max> is the name of the interval.
303 I<Cnt> is the number of entries in this recentfile.
305 I<Max> is the highest(first) epoch in this recentfile, rounded.
307 I<Min> is the lowest(last) epoch in this recentfile, rounded.
309 I<Span> is the timespan currently covered, rounded.
311 I<Util> is I<Span> devided by the designated timespan of this
314 I<Cloud> is ascii art illustrating the sequence of the Max and Min
319 my($self,%options) = @_;
320 my $rfs = $self->recentfiles;
322 RECENTFILE
: for my $rf (@
$rfs) {
323 my $re=$rf->recent_events;
326 my $span = $re->[0]{epoch
}-$re->[-1]{epoch
};
327 my $merged = $rf->merged;
335 $rf->dirtymark ?
sprintf("%.2f",$rf->dirtymark) : "-",
337 sprintf ("%.2f", $rf->{ORIG
}{Producers
}{time}||0),
339 ($rf->interval eq "Z"
343 sprintf ("%.2f", $merged->{epoch
} || 0)),
345 sprintf ("%.2f", $re->[0]{epoch
}),
347 sprintf ("%.2f", $re->[-1]{epoch
}),
349 sprintf ("%.2f", $span),
351 ($rf->interval eq "Z"
355 sprintf ("%5.1f%%", 100 * $span / $rf->interval_secs)
358 @rank{mapp
{$b} grepp
{$a =~ /^(Max|Min)$/} @
$rfsummary} = ();
364 @rank{sort {$b <=> $a} keys %rank} = 1..keys %rank;
365 my $maxrank = max
values %rank;
366 for my $rfsummary (@s) {
367 my $string = " " x
$maxrank;
369 for my $ele (qw(Max Min)) {
370 my($r) = mapp
{$b} grepp
{$a eq $ele} @
$rfsummary;
371 push @borders, $rank{$r}-1;
373 for ($borders[0],$borders[1]) {
374 substr($string,$_,1) = "^";
376 push @
$rfsummary, "Cloud", $string;
378 unless ($options{verbose
}) {
379 my %filter = map {($_=>1)} qw(Ival Cnt Max Min Span Util Cloud);
381 $_ = [mapp
{($a,$b)} grepp
{!!$filter{$a}} @
$_];
385 for (my $i = 0; $i <= $#{$s[0]}; $i+=2) {
386 my $maxlength = max
((map { length $_->[$i+1] } @s), length $s[0][$i]);
387 push @sprintf, "%" . $maxlength . "s";
389 my $sprintf = join " ", @sprintf;
391 my $headline = sprintf $sprintf, mapp
{$a} @
{$s[0]};
392 join "", $headline, map { sprintf $sprintf, mapp
{$b} @
$_ } @s;
397 Keeping track of already handled files. Currently it is a hash, will
398 probably become a database with its own accessors.
403 my($self, $set) = @_;
405 $self->__pathdb ($set);
407 my $pathdb = $self->__pathdb;
408 unless (defined $pathdb) {
409 $self->__pathdb(+{});
411 return $self->__pathdb;
414 =head2 $recentfile = $obj->principal_recentfile ()
416 returns the principal recentfile object of this tree.
419 # mirrors the recentfile and instantiates the recentfile object
420 sub _principal_recentfile_fromremote
{
422 # get the remote recentfile
423 my $rrfile = $self->remote or die "Alert: cannot construct a recentfile object without the 'remote' attribute";
424 my $splitter = qr{(.+)/([^/]*)};
425 my($remoteroot,$rfilename) = $rrfile =~ $splitter;
426 $self->remoteroot($remoteroot);
428 if (!defined $rfilename) {
429 die "Alert: Cannot resolve '$rrfile', does not match $splitter";
430 } elsif (not length $rfilename or $rfilename eq "RECENT.recent") {
431 ($abslfile,$rfilename,$fh) = $self->_principal_recentfile_fromremote_resosymlink($rfilename);
435 "ignore_link_stat_errors",
437 "max_files_per_connection",
447 $rf0 = File
::Rsync
::Mirror
::Recentfile
->new (map {($_ => $self->$_)} @need_args);
448 $rf0->split_rfilename($rfilename);
449 $abslfile = $rf0->get_remote_recentfile_as_tempfile ();
451 $rf0 = File
::Rsync
::Mirror
::Recentfile
->new_from_file ( $abslfile );
452 $rf0->_current_tempfile ( $abslfile );
453 $rf0->_current_tempfile_fh ( $fh );
454 $rf0->_use_tempfile (1);
455 for my $override (@need_args) {
456 $rf0->$override ( $self->$override );
461 sub principal_recentfile
{
463 my $rf0 = $self->_principal_recentfile;
464 return $rf0 if defined $rf0;
465 my $local = $self->local;
467 $rf0 = File
::Rsync
::Mirror
::Recentfile
->new_from_file ($local);
469 if (my $remote = $self->remote) {
471 if ($localroot = $self->localroot) {
472 # nice, they know what they are doing
474 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
476 $rf0 = $self->_principal_recentfile_fromremote;
478 die "Alert: neither local nor remote specified, cannot continue";
481 $self->_principal_recentfile($rf0);
485 =head2 $recentfiles_arrayref = $obj->recentfiles ()
487 returns a reference to the complete list of recentfile objects that
488 describe this tree. No guarantee is given that the represented
489 recentfiles exist or have been read. They are just bare objects.
495 my $rfs = $self->_recentfiles;
496 return $rfs if defined $rfs;
497 my $rf0 = $self->principal_recentfile;
498 my $pathdb = $self->_pathdb;
499 $rf0->_pathdb ($pathdb);
500 my $aggregator = $rf0->aggregator;
502 for my $agg (@
$aggregator) {
503 my $nrf = $rf0->_sparse_clone;
504 $nrf->interval ( $agg );
505 $nrf->have_mirrored ( 0 );
506 $nrf->_pathdb ( $pathdb );
509 $self->_recentfiles(\
@rf);
513 =head2 $success = $obj->rmirror ( %options )
515 Mirrors all recentfiles of the I<remote> address working through all
516 of them, mirroring their contents.
520 use File::Rsync::Mirror::Recent;
521 my $rrr = File::Rsync::Mirror::Recent->new(
522 ignore_link_stat_errors => 1,
523 localroot => "/home/ftp/pub/PAUSE/authors",
524 remote => "pause.perl.org::authors/RECENT.recent",
525 max_files_per_connection => 5000,
533 _runstatusfile => "recent-rmirror-state.yml",
534 _logfilefordone => "recent-rmirror-donelog.log",
536 $rrr->rmirror ( "skip-deletes" => 1, loop => 1 );
538 Or try without the loop parameter and write the loop yourself:
540 use File::Rsync::Mirror::Recent;
542 for my $t ("authors","modules"){
543 my $rrr = File::Rsync::Mirror::Recent->new(
544 ignore_link_stat_errors => 1,
545 localroot => "/home/ftp/pub/PAUSE/$t",
546 remote => "pause.perl.org::$t/RECENT.recent",
547 max_files_per_connection => 512,
555 _runstatusfile => "recent-rmirror-state-$t.yml",
556 _logfilefordone => "recent-rmirror-donelog-$t.log",
563 $rrr->rmirror ( "skip-deletes" => 1 );
565 warn "sleeping 23\n"; sleep 23;
570 # _alluptodate is unused but at least it worked last time I needed it,
571 # so let us keep it around
574 my $sdm = $self->_dirtymark;
575 return unless defined $sdm;
576 for my $rf (@
{$self->recentfiles}) {
577 return if $rf->seeded;
578 my $rfdm = $rf->dirtymark;
579 return unless defined $rfdm;
580 return unless $rfdm eq $sdm;
581 my $done = $rf->done;
582 return unless defined $done;
583 my $done_intervals = $done->_intervals;
584 return if !defined $done_intervals;
585 # nonono, may be more than one, only covered it must be:
586 # return if @$done_intervals > 1;
587 my $minmax = $rf->minmax;
588 return unless defined $minmax;
589 return unless $done->covered(@
$minmax{qw(max min)});
596 for ( @
{$self->recentfiles} ) { $_->seed(1) }
599 my($self, %options) = @_;
601 my $rfs = $self->recentfiles;
603 $self->principal_recentfile->seed;
605 # XXX exit gracefully (reminder)
608 # XXX needs accessor: warning, if set too low, we do nothing but
609 # mirror the principal!
610 my $minimum_time_per_loop = 20;
612 if (my $logfile = $self->_logfilefordone) {
613 for my $i (0..$#$rfs) {
614 $rfs->[$i]->done->_logfile($logfile);
617 if (my $dirtymark = $self->principal_recentfile->dirtymark) {
618 my $mydm = $self->_dirtymark;
620 $self->_dirtymark($dirtymark);
621 } elsif ($dirtymark ne $mydm) {
622 if ($self->verbose) {
624 if (my $vl = $self->verboselog) {
625 open $fh, ">>", $vl or die "Could not open >> '$vl': $!";
629 print $fh "NewDirtymark: old[$mydm] new[$dirtymark]\n";
631 $self->_dirtymark($dirtymark);
634 my $file = $self->runstatusfile;
635 $self->_rmirror_runstatusfile_write ($file, \
%options);
636 $self->_rmirror_loop($minimum_time_per_loop,\
%options);
640 my($self,$minimum_time_per_loop,$options) = @_;
642 my $ttleave = time + $minimum_time_per_loop;
643 my $file = $self->runstatusfile;
644 my $child = $self->_thaw_if_small_enough ($file);
645 if ($child && $child->recentfiles->[-1]->uptodate) {
646 warn "DEBUG: parent process[$$] about to leave loop";
650 if ($options->{loop}) {
655 if (! defined $pid) {
659 $self = $child || $self->thaw ($file);
660 my $rfs = $self->recentfiles;
661 $self->principal_recentfile->seed;
662 RECENTFILE
: for my $i (0..$#$rfs) {
664 if (time > $ttleave) {
665 # Must make sure that one file can get fetched in any case
666 $self->_max_one_state(1);
669 $self->_rmirror_mirror ($i, $options);
670 } elsif ($rf->uptodate) {
672 $rfs->[$i+1]->done->merge($rf->done);
674 # no further seed necessary because "periodic" does it
677 WORKUNIT
: while (time < $ttleave) {
679 $self->_rmirror_sleep_per_connection ($i);
682 $self->_rmirror_mirror ($i, $options);
685 if ($self->_max_one_state) {
689 $self->_max_one_state(0);
690 $self->_rmirror_runstatusfile_write ($file, $options);
691 if ($rfs->[-1]->uptodate) {
692 $self->_rmirror_cleanup;
693 my $file = $self->runstatusfile;
694 unless ($options->{loop}) {
695 warn "DEBUG: uptodate child process[$$] about to leave loop";
699 } elsif ($options->{loop}) {
700 warn "DEBUG: child process[$$] about to leave loop";
706 my $sleep = $ttleave - time;
708 $self->_rmirror_endofloop_sleep ($sleep);
710 # negative time not invented yet:)
715 sub _rmirror_mirror
{
716 my($self, $i, $options) = @_;
717 my $rfs = $self->recentfiles;
719 my %locopt = %$options;
720 if ($self->_max_one_state) {
723 $locopt{piecemeal
} = 1;
724 $rf->mirror (%locopt);
726 # we limit to 0 for the case that upstream is broken and has
727 # more than one timestamp (happened on PAUSE 200903)
728 if (my $dirtymark = $rf->dirtymark) {
729 my $mydm = $self->_dirtymark;
730 if (!defined $mydm or $dirtymark ne $mydm) {
731 $self->_dirtymark($dirtymark);
738 sub _rmirror_sleep_per_connection
{
740 my $rfs = $self->recentfiles;
742 my $sleep = $rf->sleep_per_connection;
743 $sleep = 0.42 unless defined $sleep;
744 Time
::HiRes
::sleep $sleep;
745 $rfs->[$i+1]->done->merge($rf->done) if $i < $#$rfs;
748 sub _rmirror_cleanup
{
750 my $pathdb = $self->_pathdb();
751 for my $k (keys %$pathdb) {
752 delete $pathdb->{$k};
754 my $rfs = $self->recentfiles;
755 for my $i (0..$#$rfs-1) {
756 my $thismerged = $rfs->[$i]->merged;
757 my $next = $rfs->[$i+1];
758 my $nextminmax = $next->minmax;
759 # warn "DEBUG: i[$i] nextminmaxmax[$nextminmax->{max}] thismergedepoch[$thismerged->{epoch}]";
760 if (not defined $thismerged->{epoch
} or _bigfloatlt
($nextminmax->{max
},$thismerged->{epoch
})){
762 # warn sprintf "DEBUG: next iv %s seeded since next-minmax-max[$nextminmax->{max}]lt this-merged-epoch[$thismerged->{epoch}]\n", $next->interval;
767 =head2 $file = $obj->runstatusfile ($set)
769 Getter/setter for C<_runstatusfile> attribute. Defaults to a temporary
770 file created by C<File::Temp>. A status file is required for
771 C<rmirror> working. Since it may be interesting for debugging
772 purposes, you may want to specify a permanent file for this.
778 $self->_runstatusfile ($set);
780 my $x = $self->_runstatusfile;
781 unless (defined $x) {
783 my $tfile = File
::Temp
->new
785 TEMPLATE
=> "Recent-XXXX",
791 $self->_runstatusfile($tfile->filename);
793 return $self->_runstatusfile;
796 # unused code.... it was an oops, discovered the thaw() method too
797 # late, and starting writing this here....
798 sub _rmirror_runstatusfile_read
{
799 my($self, $file) = @_;
803 # XXX is locking useful here?
804 while (not mkdir "$file.lock") {
805 Time
::HiRes
::sleep 0.2;
806 warn "*** waiting for lock ***" if time - $start >= 3;
808 my $yml = YAML
::Syck
::LoadFile
$file;
809 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
810 my $rself = $yml->{reduced_self
};
811 my $rfs = $yml->{reduced_rfs
};
812 # XXX bring them into self
815 sub _rmirror_runstatusfile_write
{
816 my($self, $file, $options) = @_;
818 while (my($k,$v) = each %$self) {
819 next if $k =~ /^-(_principal_recentfile|_recentfiles)$/;
822 my $rfs = $self->recentfiles;
824 for my $i (0..$#$rfs) {
826 while (my($k,$v) = each %$rf) {
827 next if $k =~ /^-(_current_tempfile_fh|_pathdb|_rsync)$/;
828 $rrfs->[$i]{$k} = $rfs->[$i]{$k};
833 while (not mkdir "$file.lock") {
834 Time
::HiRes
::sleep 0.02;
835 warn "*** waiting for lock ***" if time - $start >= 3;
843 reduced_rfs
=> $rrfs,
844 reduced_self
=> $rself,
846 rename "$file.new", $file or die "Could not rename: $!";
847 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
850 sub _rmirror_endofloop_sleep
{
851 my($self, $sleep) = @_;
852 if ($self->verbose) {
854 if (my $vl = $self->verboselog) {
855 open $fh, ">>", $vl or die "Could not open >> '$vl': $!";
861 "Dorm %d (%s secs)\n",
869 # it returns two things: abslfile and rfilename. But the abslfile is
870 # undef when the rfilename ends in .recent. A weird interface, my
872 sub _principal_recentfile_fromremote_resosymlink
{
873 my($self, $rfilename) = @_;
874 $rfilename = "RECENT.recent" unless length $rfilename;
875 my $abslfile = undef;
877 if ($rfilename =~ /\.recent$/) {
878 # may be a file *or* a symlink,
879 ($abslfile,$fh) = $self->_fetch_as_tempfile ($rfilename);
880 while (-l
$abslfile) {
881 my $symlink = readlink $abslfile;
882 if ($symlink =~ m
|/|) {
883 die "FIXME: filenames containing '/' not supported, got '$symlink'";
885 my $localrfile = File
::Spec
->catfile($self->localroot, $rfilename);
886 if (-e
$localrfile) {
887 my $old_symlink = readlink $localrfile;
888 if ($old_symlink eq $symlink) {
889 unlink $abslfile or die "Cannot unlink '$abslfile': $!";
891 unlink $localrfile; # may fail
892 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
895 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
897 ($abslfile,$fh) = $self->_fetch_as_tempfile ($symlink);
900 return ($abslfile, $rfilename, $fh);
903 # takes a basename, returns an absolute name, does not delete the
904 # file, throws the $fh away. Caller must rename or unlink
906 # XXX needs to activate the fh in the rf0 so that it is able to unlink
907 # the file. I would like that the file is used immediately by $rf0
908 sub _fetch_as_tempfile
{
909 my($self, $rfile) = @_;
910 my($suffix) = $rfile =~ /(\.[^\.]+)$/;
911 $suffix = "" unless defined $suffix;
912 my $fh = File
::Temp
->new
913 (TEMPLATE
=> sprintf(".FRMRecent-%s-XXXX",
916 DIR
=> $self->tempdir || $self->localroot,
921 unless ($rsync = File
::Rsync
->new($self->rsync_options)) {
923 Carp
::confess
(YAML
::Syck
::Dump
($self->rsync_options));
925 my $dst = $fh->filename;
926 local($ENV{LANG
}) = "C";
929 src
=> join("/",$self->remoteroot,$rfile),
931 ) or die "Could not mirror '$rfile' to $fh\: ".join(" ",$rsync->err);
934 chmod $mode, $dst or die "Could not chmod $mode '$dst': $!";
939 =head2 $verbose = $obj->verbose ( $set )
941 Getter/setter method to set verbosity for this F:R:M:Recent object and
942 all associated Recentfile objects.
948 for ( @
{$self->recentfiles} ) { $_->verbose($set) }
949 $self->_verbose ($set);
951 my $x = $self->_verbose;
952 unless (defined $x) {
954 $self->_verbose ($x);
960 =head2 my $vl = $obj->verboselog ( $set )
962 Getter/setter method for the path to the logfile to write verbose
963 progress information to.
965 Note: This is a primitive stop gap solution to get simple verbose
966 logging working. The program still sends error messages to STDERR.
967 Switching to Log4perl or similar is probably the way to go. TBD.
973 for ( @
{$self->recentfiles} ) { $_->verboselog($set) }
974 $self->_verboselog ($set);
976 my $x = $self->_verboselog;
977 unless (defined $x) {
979 $self->_verboselog ($x);
984 =head1 THE ARCHITECTURE OF A COLLECTION OF RECENTFILES
986 The idea is that we want to have a short file that records really
987 recent changes. So that a fresh mirror can be kept fresh as long as
988 the connectivity is given. Then we want longer files that record the
989 history before. So when the mirror falls behind the update period
990 reflected in the shortest file, it can complement the list of recent
991 file events with the next one. And if this is not long enough we want
992 another one, again a bit longer. And we want one that completes the
993 history back to the oldest file. The index files do contain the
994 complete list of current files. The longer a period covered by an
995 index file is gone the less often the index file is updated. For
996 practical reasons adjacent files will often overlap a bit but this is
997 neither necessary nor enforced. That's the basic idea. The following
998 example represents a tree that has a few updates every day:
1000 RECENT.recent -> RECENT-1h.yaml
1010 The first file is the principal file, in so far it is the one that is
1011 written first after a filesystem change. Usually a symlink links to it
1012 with a filename that has the same filenameroot and the suffix
1013 C<.recent>. On systems that do not support symlinks there is a plain
1014 copy maintained instead.
1016 The last file, the Z file, contains the complementary files that are
1017 in none of the other files. It may contain C<delete> events but often
1018 C<delete> events are discarded at the transition to the Z file.
1020 =head2 THE INDIVIDUAL RECENTFILE
1022 A I<recentfile> consists of a hash that has two keys: C<meta> and
1023 C<recent>. The C<meta> part has metadata and the C<recent> part has a
1024 list of fileobjects.
1026 =head2 THE META PART
1028 Here we find things that are pretty much self explaining: all
1029 lowercase attributes are accessors and as such explained in the
1030 manpages. The uppercase attribute C<Producers> contains version
1031 information about involved software components. Nothing to worry about
1034 =head2 THE RECENT PART
1036 This is the interesting part. Every entry refers to some filesystem
1037 change (with path, epoch, type).
1039 The I<epoch> value is the point in time when some change was
1040 I<registered> but can be set to arbitrary values. Do not be tempted to
1041 believe that the entry has a direct relation to something like
1042 modification time or change time on the filesystem level. They are not
1043 reflecting release dates. (If you want exact release dates: Barbie is
1044 providing a database of them. See
1045 http://use.perl.org/~barbie/journal/37907).
1047 All these entries can be devided into two types (denoted by the
1048 I<type> attribute): C<new>s and C<delete>s. Changes and creations are
1049 C<new>s. Deletes are C<delete>s.
1051 Besides an I<epoch> and a I<type> attribute we find a third one:
1052 I<path>. This path is relative to the directory we find the
1055 The order of the entries in the I<recentfile> is by decreasing epoch
1056 attribute. These are unique floating point numbers. When the server
1057 has ntp running correctly, then the timestamps are usually reflecting
1058 a real epoch. If time is running backwards, we trump the system epoch
1059 with strictly monotonically increasing floating point timestamps and
1060 guarantee they are unique.
1062 =head1 CORRUPTION AND RECOVERY
1064 If the origin host breaks the promise to deliver consistent and
1065 complete I<recentfiles> then it must update its C<dirtymark> and all
1066 slaves must discard what they cosider the truth. In the worst case
1067 that something goes wrong despite the dirtymark mechanism the way back
1068 to sanity can always be achieved through traditional rsyncing between
1073 This is about speeding up rsync operation on large trees. Uses a small
1074 metadata cocktail and pull technology.
1076 rersyncrecent solves this problem with a couple of (usually 2-10)
1077 lightweight index files which cover different overlapping time
1078 intervals. The master writes these files and the clients/slaves can
1079 construct the full tree from the information contained in them. The
1080 most recent index file usually covers the last seconds or minutes or
1081 hours of the tree and depending on the needs, slaves can rsync every
1082 few seconds or minutes and then bring their trees in full sync.
1084 The rersyncrecent model was developed for CPAN but as it is both
1085 convenient and economic it is also a general purpose solution. I'm
1086 looking forward to see a CPAN backbone that is only a few seconds
1089 =head2 NON-COMPETITORS
1091 File::Mirror JWU/File-Mirror/File-Mirror-0.10.tar.gz only local trees
1092 Mirror::YAML ADAMK/Mirror-YAML-0.03.tar.gz some sort of inner circle
1093 Net::DownloadMirror KNORR/Net-DownloadMirror-0.04.tar.gz FTP sites and stuff
1094 Net::MirrorDir KNORR/Net-MirrorDir-0.05.tar.gz dito
1095 Net::UploadMirror KNORR/Net-UploadMirror-0.06.tar.gz dito
1096 Pushmi::Mirror CLKAO/Pushmi-v1.0.0.tar.gz something SVK
1098 rsnapshot www.rsnapshot.org focus on backup
1099 csync www.csync.org more like unison
1100 multi-rsync sourceforge 167893 lan push to many
1101 chasm chasmd.org per-directory manifests
1105 The problem to solve which clusters and ftp mirrors and otherwise
1106 replicated datasets like CPAN share: how to transfer only a minimum
1107 amount of data to determine the diff between two hosts.
1109 Normally it takes a long time to determine the diff itself before it
1110 can be transferred. Known solutions at the time of this writing are
1111 csync2, and rsync 3 batch mode.
1113 For many years the best solution was B<csync2> which solves the
1114 problem by maintaining a sqlite database on both ends and talking a
1115 highly sophisticated protocol to quickly determine which files to send
1116 and which to delete at any given point in time. Csync2 is often
1117 inconvenient because it is push technology and the act of syncing
1118 demands quite an intimate relationship between the sender and the
1119 receiver. This is hard to achieve in an environment of loosely coupled
1120 sites where the number of sites is large or connections are unreliable
1121 or network topology is changing.
1123 B<Rsync 3 batch mode> works around these problems by providing
1124 rsync-able batch files which allow receiving nodes to replay the
1125 history of the other nodes. This reduces the need to have an
1126 incestuous relation but it has the disadvantage that these batch files
1127 replicate the contents of the involved files. This seems inappropriate
1128 when the nodes already have a means of communicating over rsync.
1130 =head2 HONORABLE MENTION
1132 B<instantmirror> at https://fedorahosted.org/InstantMirror/ is an
1133 ambitious project that tries to combine various technologies (squid,
1134 bittorrent) to overcome the current slowness with the main focus on
1135 fedora. It's been founded in 2009-03 and at the time of this writing
1136 it is still a bit early to comment on.
1140 If the tree of the master server is changing faster than the bandwidth
1141 permits to mirror then additional protocols may need to be deployed.
1142 Certainly p2p/bittorrent can help in such situations because
1143 downloading sites help each other and bittorrent chunks large files
1146 =head1 FUTURE DIRECTIONS
1148 Currently the origin server must keep track of injected and removed
1149 files. Should be supported by an inotify-based assistant.
1151 Convince other users outside the CPAN like
1152 http://fedoraproject.org/wiki/Infrastructure/Mirroring
1156 L<File::Rsync::Mirror::Recentfile>,
1157 L<File::Rsync::Mirror::Recentfile::Done>,
1158 L<File::Rsync::Mirror::Recentfile::FakeBigFloat>
1162 Please report any bugs or feature requests through the web interface
1164 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=File-Rsync-Mirror-Recent>.
1165 I will be notified, and then you'll automatically be notified of
1166 progress on your bug as I make changes.
1170 You can find documentation for this module with the perldoc command.
1172 perldoc File::Rsync::Mirror::Recent
1174 You can also look for information at:
1178 =item * RT: CPAN's request tracker
1180 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Rsync-Mirror-Recent>
1182 =item * AnnoCPAN: Annotated CPAN documentation
1184 L<http://annocpan.org/dist/File-Rsync-Mirror-Recent>
1186 =item * CPAN Ratings
1188 L<http://cpanratings.perl.org/d/File-Rsync-Mirror-Recent>
1192 L<http://search.cpan.org/dist/File-Rsync-Mirror-Recent>
1197 =head1 ACKNOWLEDGEMENTS
1199 Thanks to RJBS for module-starter.
1205 =head1 COPYRIGHT & LICENSE
1207 Copyright 2008, 2009 Andreas König.
1209 This program is free software; you can redistribute it and/or modify it
1210 under the same terms as Perl itself.
1215 1; # End of File::Rsync::Mirror::Recent