1 package File
::Rsync
::Mirror
::Recent
;
5 use File
::Rsync
::Mirror
::Recentfile
;
11 File::Rsync::Mirror::Recent - mirroring via rsync made efficient
15 package File
::Rsync
::Mirror
::Recent
;
17 use File
::Basename
qw(basename dirname fileparse);
18 use File
::Copy
qw(cp);
19 use File
::Path
qw(mkpath);
21 use File
::Rsync
::Mirror
::Recentfile
::Done
(); # at least needed by thaw()
22 use File
::Rsync
::Mirror
::Recentfile
::FakeBigFloat
qw(:all);
24 use List
::Pairwise
qw(mapp grepp);
25 use List
::Util
qw(first max);
26 use Scalar
::Util
qw(blessed reftype);
31 use version
; our $VERSION = qv
('0.1.1');
35 The documentation in here is normally not needed because the code is
36 considered to be run from several standalone programs. For a quick
37 overview, see the file README.mirrorcpan and the bin/ directory of the
38 distribution. For the architectural ideas see the section THE
39 ARCHITECTURE OF A COLLECTION OF RECENTFILES below.
41 File::Rsync::Mirror::Recent establishes a view on a collection of
42 File::Rsync::Mirror::Recentfile objects and provides abstractions
43 spanning multiple time intervals associated with those.
51 =head2 my $obj = CLASS->new(%hash)
53 Constructor. On every argument pair the key is a method name and the
54 value is an argument to that method name.
59 my($class, @args) = @_;
60 my $self = bless {}, $class;
62 my($method,$arg) = splice @args, 0, 2;
68 =head2 my $obj = CLASS->thaw($statusfile)
70 Constructor from a statusfile left over from a previous
71 rmirror run. See also C<runstatusfile>.
75 sub _thaw_without_pathdb
{
77 open my $fh, $file or die "Can't open '$file': $!";
80 my $tfile = File
::Temp
->new
82 TEMPLATE
=> "Recent-thaw-XXXX",
91 if (/$template_for_eop/) {
94 } elsif (/(\s+)-\s*__pathdb\s*:/) {
96 my $next_attr = sprintf "^%s\\S", " ?" x
length($1);
97 $template_for_eop = qr{$next_attr};
99 print $tfile $_ unless $in_pathdb;
101 close $tfile or die "Could not close: $!";
102 my $return = $self->thaw($tfile->filename);
103 $return->_havelostpathdb(1);
104 unlink $tfile->filename;
108 my($self, $file) = @_;
109 die "thaw called without statusfile argument" unless defined $file;
112 Carp
::confess
("Alert: statusfile '$file' not found");
116 my $sleeptime = 0.02;
117 while (not mkdir "$file.lock") {
119 Time
::HiRes
::sleep $sleeptime;
120 my $waiting = time - $start;
122 warn "*** waiting ($waiting) for lock ($err) ***";
127 my $serialized = YAML
::Syck
::LoadFile
($file);
128 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
129 my $charged_self = $serialized->{reduced_self
};
130 my $class = blessed
$self;
131 bless $charged_self, $class;
132 my $rfs = $serialized->{reduced_rfs
};
133 my $rfclass = $class . "file"; # "Recent" . "file"
134 my $pathdb = $charged_self->_pathdb;
137 $rf->_pathdb($pathdb);
139 $charged_self->_recentfiles($rfs);
140 $charged_self->_principal_recentfile($rfs->[0]);
141 # die "FIXME: thaw all recentfiles from reduced_rfs into _recentfiles as well, watch out for pathdb and rsync";
142 return $charged_self;
155 "_dirtymark", # keeps track of the dirtymark of the recentfiles
156 "_havelostpathdb", # boolean
157 "_have_written_statusfile", # boolean
158 "_logfilefordone", # turns on _logfile on all DONE
159 # systems (disk intensive)
160 "_max_one_state", # when we have no time left but want
161 # at least get one file per
162 # iteration to avoid procrastination
163 "_principal_recentfile",
166 "_runstatusfile", # occasionally dumps all rfs
167 "_verbose", # internal variable for verbose setter/getter
168 "_verboselog", # internal variable for verboselog setter/getter
172 split /\n/, <<'=cut'; push @accessors, grep {s/^=item\s+//} @pod_lines; }
176 =item ignore_link_stat_errors
178 as in F:R:M:Recentfile
182 Option to specify the local principal file for operations with a local
183 collection of recentfiles.
187 as in F:R:M:Recentfile
189 =item max_files_per_connection
191 as in F:R:M:Recentfile
195 The remote principal recentfile in rsync notation. E.g.
197 pause.perl.org::authors/RECENT.recent
201 as in F:R:M:Recentfile
203 =item remote_recentfile
205 Rsync address of the remote C<RECENT.recent> symlink or whichever name
206 the principal remote recentfile has.
210 Things like compress, links, times or checksums. Passed in to the
211 File::Rsync object used to run the mirror.
215 as in F:R:M:Recentfile
219 Minimum time before fetching the principal recentfile again.
225 use accessors
@accessors;
229 =head2 $arrayref = $obj->news ( %options )
233 perl -Ilib bin/rrr-news \
236 -local /home/ftp/pub/PAUSE/authors/RECENT.recent
238 perl -Ilib bin/rrr-news \
242 -localroot /home/ftp/pub/PAUSE/authors/ \
243 -remote pause.perl.org::authors/RECENT.recent
246 All parameters that can be passed to
247 File:Rsync:Mirror:Recentfile::recent_events() can also be specified
250 One additional option is supported. If C<$Options{callback}> is
251 specified, it must be a subref. This sub is called whenever one chunk
252 of events is found. The first argument to the callback is a reference
253 to the currently accumulated array of events.
255 Note: all data are kept in memory.
260 my($self, %opt) = @_;
261 my $local = $self->local;
263 if (my $remote = $self->remote) {
265 if ($localroot = $self->localroot) {
266 # nice, they know what they are doing
268 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
271 die "Alert: neither local nor remote specified, cannot continue";
274 my $rfs = $self->recentfiles;
279 $locopt{before
} = $before;
281 $locopt{max
} -= scalar @
$ret;
282 last if $locopt{max
} <= 0;
285 my $res = $rf->recent_events(%locopt);
289 if ($opt{max
} && scalar @
$ret > $opt{max
}) {
293 if ( $locopt{info
}{last} && _bigfloatlt
($locopt{info
}{last}{epoch
},$opt{after
}) ) {
296 if ( _bigfloatgt
($opt{after
},$locopt{info
}{first
}{epoch
}) ) {
303 $before = $res->[-1]{epoch
};
304 $before = $opt{before
} if $opt{before
} && _bigfloatlt
($opt{before
},$before);
305 if (my $sub = $opt{callback
}) {
312 =head2 overview ( %options )
314 returns a small table that summarizes the state of all recentfiles
315 collected in this Recent object.
317 $options{verbose}=1 increases the number of columns displayed.
319 Here is an example output:
321 Ival Cnt Max Min Span Util Cloud
322 1h 47 1225053014.38 1225049650.91 3363.47 93.4% ^ ^
323 6h 324 1225052939.66 1225033394.84 19544.82 90.5% ^ ^
324 1d 437 1225049651.53 1224966402.53 83248.99 96.4% ^ ^
325 1W 1585 1225039015.75 1224435339.46 603676.29 99.8% ^ ^
326 1M 5855 1225017376.65 1222428503.57 2588873.08 99.9% ^ ^
327 1Q 17066 1224578930.40 1216803512.90 7775417.50 100.0% ^ ^
328 1Y 15901 1223966162.56 1216766820.67 7199341.89 22.8% ^ ^
329 Z 9909 1223966162.56 1216766820.67 7199341.89 - ^ ^
331 I<Max> is the name of the interval.
333 I<Cnt> is the number of entries in this recentfile.
335 I<Max> is the highest(first) epoch in this recentfile, rounded.
337 I<Min> is the lowest(last) epoch in this recentfile, rounded.
339 I<Span> is the timespan currently covered, rounded.
341 I<Util> is I<Span> devided by the designated timespan of this
344 I<Cloud> is ascii art illustrating the sequence of the Max and Min
349 my($self,%options) = @_;
350 my $rfs = $self->recentfiles;
352 RECENTFILE
: for my $rf (@
$rfs) {
353 my $re=$rf->recent_events;
356 my $span = $re->[0]{epoch
}-$re->[-1]{epoch
};
357 my $merged = $rf->merged;
365 $rf->dirtymark ?
sprintf("%.2f",$rf->dirtymark) : "-",
367 sprintf ("%.2f", $rf->{ORIG
}{Producers
}{time}||0),
369 ($rf->interval eq "Z"
373 sprintf ("%.2f", $merged->{epoch
} || 0)),
375 sprintf ("%.2f", $re->[0]{epoch
}),
377 sprintf ("%.2f", $re->[-1]{epoch
}),
379 sprintf ("%.2f", $span),
381 ($rf->interval eq "Z"
385 sprintf ("%5.1f%%", 100 * $span / $rf->interval_secs)
388 @rank{mapp
{$b} grepp
{$a =~ /^(Max|Min)$/} @
$rfsummary} = ();
394 @rank{sort {$b <=> $a} keys %rank} = 1..keys %rank;
395 my $maxrank = max
values %rank;
396 for my $rfsummary (@s) {
397 my $string = " " x
$maxrank;
399 for my $ele (qw(Max Min)) {
400 my($r) = mapp
{$b} grepp
{$a eq $ele} @
$rfsummary;
401 push @borders, $rank{$r}-1;
403 for ($borders[0],$borders[1]) {
404 substr($string,$_,1) = "^";
406 push @
$rfsummary, "Cloud", $string;
408 unless ($options{verbose
}) {
409 my %filter = map {($_=>1)} qw(Ival Cnt Max Min Span Util Cloud);
411 $_ = [mapp
{($a,$b)} grepp
{!!$filter{$a}} @
$_];
415 for (my $i = 0; $i <= $#{$s[0]}; $i+=2) {
416 my $maxlength = max
((map { length $_->[$i+1] } @s), length $s[0][$i]);
417 push @sprintf, "%" . $maxlength . "s";
419 my $sprintf = join " ", @sprintf;
421 my $headline = sprintf $sprintf, mapp
{$a} @
{$s[0]};
422 join "", $headline, map { sprintf $sprintf, mapp
{$b} @
$_ } @s;
427 Keeping track of already handled files. Currently it is a hash, will
428 probably become a database with its own accessors.
433 my($self, $set) = @_;
435 $self->__pathdb ($set);
437 my $pathdb = $self->__pathdb;
438 unless (defined $pathdb) {
439 $self->__pathdb(+{});
441 return $self->__pathdb;
444 =head2 $recentfile = $obj->principal_recentfile ()
446 returns the principal recentfile object of this tree.
449 # mirrors the recentfile and instantiates the recentfile object
450 sub _principal_recentfile_fromremote
{
452 # get the remote recentfile
453 my $rrfile = $self->remote or die "Alert: cannot construct a recentfile object without the 'remote' attribute";
454 my $splitter = qr{(.+)/([^/]*)};
455 my($remoteroot,$rfilename) = $rrfile =~ $splitter;
456 $self->remoteroot($remoteroot);
458 if (!defined $rfilename) {
459 die "Alert: Cannot resolve '$rrfile', does not match $splitter";
460 } elsif (not length $rfilename or $rfilename eq "RECENT.recent") {
461 ($abslfile,$rfilename,$fh) = $self->_principal_recentfile_fromremote_resosymlink($rfilename);
465 "ignore_link_stat_errors",
467 "max_files_per_connection",
477 $rf0 = File
::Rsync
::Mirror
::Recentfile
->new (map {($_ => $self->$_)} @need_args);
478 $rf0->split_rfilename($rfilename);
479 $abslfile = $rf0->get_remote_recentfile_as_tempfile ();
481 $rf0 = File
::Rsync
::Mirror
::Recentfile
->new_from_file ( $abslfile );
482 $rf0->_current_tempfile ( $abslfile );
483 $rf0->_current_tempfile_fh ( $fh );
484 $rf0->_use_tempfile (1);
485 for my $override (@need_args) {
486 $rf0->$override ( $self->$override );
491 sub principal_recentfile
{
493 my $rf0 = $self->_principal_recentfile;
494 return $rf0 if defined $rf0;
495 my $local = $self->local;
497 $rf0 = File
::Rsync
::Mirror
::Recentfile
->new_from_file ($local);
499 if (my $remote = $self->remote) {
501 if ($localroot = $self->localroot) {
502 # nice, they know what they are doing
504 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
506 $rf0 = $self->_principal_recentfile_fromremote;
508 die "Alert: neither local nor remote specified, cannot continue";
511 $self->_principal_recentfile($rf0);
515 =head2 $recentfiles_arrayref = $obj->recentfiles ()
517 returns a reference to the complete list of recentfile objects that
518 describe this tree. No guarantee is given that the represented
519 recentfiles exist or have been read. They are just bare objects.
525 my $rfs = $self->_recentfiles;
526 return $rfs if defined $rfs;
527 my $rf0 = $self->principal_recentfile;
528 my $pathdb = $self->_pathdb;
529 $rf0->_pathdb ($pathdb);
530 my $aggregator = $rf0->aggregator;
532 for my $agg (@
$aggregator) {
533 my $nrf = $rf0->_sparse_clone;
534 $nrf->interval ( $agg );
535 $nrf->have_mirrored ( 0 );
536 $nrf->_pathdb ( $pathdb );
539 $self->_recentfiles(\
@rf);
543 =head2 $success = $obj->rmirror ( %options )
545 Mirrors all recentfiles of the I<remote> address working through all
546 of them, mirroring their contents.
550 use File::Rsync::Mirror::Recent;
551 my $rrr = File::Rsync::Mirror::Recent->new(
552 ignore_link_stat_errors => 1,
553 localroot => "/home/ftp/pub/PAUSE/authors",
554 remote => "pause.perl.org::authors/RECENT.recent",
555 max_files_per_connection => 5000,
563 _runstatusfile => "recent-rmirror-state.yml",
564 _logfilefordone => "recent-rmirror-donelog.log",
566 $rrr->rmirror ( "skip-deletes" => 1, loop => 1 );
568 Or try without the loop parameter and write the loop yourself:
570 use File::Rsync::Mirror::Recent;
572 for my $t ("authors","modules"){
573 my $rrr = File::Rsync::Mirror::Recent->new(
574 ignore_link_stat_errors => 1,
575 localroot => "/home/ftp/pub/PAUSE/$t",
576 remote => "pause.perl.org::$t/RECENT.recent",
577 max_files_per_connection => 512,
585 _runstatusfile => "recent-rmirror-state-$t.yml",
586 _logfilefordone => "recent-rmirror-donelog-$t.log",
593 $rrr->rmirror ( "skip-deletes" => 1 );
595 warn "sleeping 23\n"; sleep 23;
600 # _alluptodate is unused but at least it worked last time I needed it,
601 # so let us keep it around
604 my $sdm = $self->_dirtymark;
605 return unless defined $sdm;
606 for my $rf (@
{$self->recentfiles}) {
607 return if $rf->seeded;
608 my $rfdm = $rf->dirtymark;
609 return unless defined $rfdm;
610 return unless $rfdm eq $sdm;
611 my $done = $rf->done;
612 return unless defined $done;
613 my $done_intervals = $done->_intervals;
614 return if !defined $done_intervals;
615 # nonono, may be more than one, only covered it must be:
616 # return if @$done_intervals > 1;
617 my $minmax = $rf->minmax;
618 return unless defined $minmax;
619 return unless $done->covered(@
$minmax{qw(max min)});
626 for ( @
{$self->recentfiles} ) { $_->seed(1) }
629 my($self, %options) = @_;
631 my $rfs = $self->recentfiles;
633 $self->principal_recentfile->seed;
635 # XXX exit gracefully (reminder)
638 # XXX needs accessor: warning, if set too low, we do nothing but
639 # mirror the principal!
640 my $minimum_time_per_loop = 20;
642 if (my $logfile = $self->_logfilefordone) {
643 for my $i (0..$#$rfs) {
644 $rfs->[$i]->done->_logfile($logfile);
647 if (my $dirtymark = $self->principal_recentfile->dirtymark) {
648 my $mydm = $self->_dirtymark;
650 $self->_dirtymark($dirtymark);
651 } elsif ($dirtymark ne $mydm) {
652 if ($self->verbose) {
654 if (my $vl = $self->verboselog) {
655 open $fh, ">>", $vl or die "Could not open >> '$vl': $!";
659 print $fh "NewDirtymark: old[$mydm] new[$dirtymark]\n";
661 $self->_dirtymark($dirtymark);
664 my $rstfile = $self->runstatusfile;
665 unless ($self->_have_written_statusfile) {
666 $self->_rmirror_runstatusfile_write ($rstfile, \
%options);
667 $self->_have_written_statusfile(1);
669 $self->_rmirror_loop($minimum_time_per_loop,\
%options);
673 my($self,$minimum_time_per_loop,$options) = @_;
675 my $ttleave = time + $minimum_time_per_loop;
676 my $rstfile = $self->runstatusfile;
677 my $otherproc = $self->_thaw_without_pathdb ($rstfile);
679 if (! defined $pid) {
680 warn "Contention: $!";
686 $self = $self->thaw ($rstfile);
687 my $rfs = $self->recentfiles;
688 $self->principal_recentfile->seed;
689 RECENTFILE
: for my $i (0..$#$rfs) {
691 if (time > $ttleave) {
692 # Must make sure that one file can get fetched in any case
693 $self->_max_one_state(1);
696 $self->_rmirror_mirror ($i, $options);
697 } elsif ($rf->uptodate) {
699 $rfs->[$i+1]->done->merge($rf->done);
701 # no further seed necessary because "periodic" does it
704 WORKUNIT
: while (time < $ttleave) {
706 $self->_rmirror_sleep_per_connection ($i);
709 $self->_rmirror_mirror ($i, $options);
712 if ($self->_max_one_state) {
716 $self->_max_one_state(0);
718 if ($rfs->[-1]->uptodate) {
719 $self->_rmirror_cleanup;
721 unless ($options->{loop}) {
724 $self->_rmirror_runstatusfile_write ($rstfile, $options);
729 $otherproc = $self->_thaw_without_pathdb ($rstfile);
730 if (!$options->{loop} && $otherproc && $otherproc->recentfiles->[-1]->uptodate) {
733 my $sleep = $ttleave - time;
735 $self->_rmirror_endofloop_sleep ($sleep);
737 # negative time not invented yet:)
742 sub _rmirror_mirror
{
743 my($self, $i, $options) = @_;
744 my $rfs = $self->recentfiles;
746 my %locopt = %$options;
747 if ($self->_max_one_state) {
750 $locopt{piecemeal
} = 1;
751 $rf->mirror (%locopt);
753 # we limit to 0 for the case that upstream is broken and has
754 # more than one timestamp (happened on PAUSE 200903)
755 if (my $dirtymark = $rf->dirtymark) {
756 my $mydm = $self->_dirtymark;
757 if (!defined $mydm or $dirtymark ne $mydm) {
758 $self->_dirtymark($dirtymark);
765 sub _rmirror_sleep_per_connection
{
767 my $rfs = $self->recentfiles;
769 my $sleep = $rf->sleep_per_connection;
770 $sleep = 0.42 unless defined $sleep;
771 Time
::HiRes
::sleep $sleep;
772 $rfs->[$i+1]->done->merge($rf->done) if $i < $#$rfs;
775 sub _rmirror_cleanup
{
777 my $pathdb = $self->_pathdb();
778 for my $k (keys %$pathdb) {
779 delete $pathdb->{$k};
781 my $rfs = $self->recentfiles;
782 for my $i (0..$#$rfs-1) {
783 my $thismerged = $rfs->[$i]->merged;
784 my $next = $rfs->[$i+1];
785 my $nextminmax = $next->minmax;
786 if (not defined $thismerged->{epoch
} or _bigfloatlt
($nextminmax->{max
},$thismerged->{epoch
})){
792 =head2 $file = $obj->runstatusfile ($set)
794 Getter/setter for C<_runstatusfile> attribute. Defaults to a temporary
795 file created by C<File::Temp>. A status file is required for
796 C<rmirror> working. Since it may be interesting for debugging
797 purposes, you may want to specify a permanent file for this.
803 $self->_runstatusfile ($set);
805 my $x = $self->_runstatusfile;
806 unless (defined $x) {
808 my $tfile = File
::Temp
->new
810 TEMPLATE
=> "Recent-XXXX",
816 $self->_runstatusfile($tfile->filename);
818 return $self->_runstatusfile;
821 # unused code.... it was an oops, discovered the thaw() method too
822 # late, and starting writing this here....
823 sub _rmirror_runstatusfile_read
{
824 my($self, $file) = @_;
828 # XXX is locking useful here?
829 while (not mkdir "$file.lock") {
830 Time
::HiRes
::sleep 0.2;
831 warn "*** waiting for lock ***" if time - $start >= 3;
833 my $yml = YAML
::Syck
::LoadFile
$file;
834 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
835 my $rself = $yml->{reduced_self
};
836 my $rfs = $yml->{reduced_rfs
};
837 # XXX bring them into self
840 sub _rmirror_runstatusfile_write
{
841 my($self, $file, $options) = @_;
843 while (my($k,$v) = each %$self) {
844 next if $k =~ /^-(_principal_recentfile|_recentfiles)$/;
847 my $rfs = $self->recentfiles;
849 for my $i (0..$#$rfs) {
851 while (my($k,$v) = each %$rf) {
852 next if $k =~ /^-(_current_tempfile_fh|_pathdb|_rsync)$/;
853 $rrfs->[$i]{$k} = $rfs->[$i]{$k};
858 while (not mkdir "$file.lock") {
859 Time
::HiRes
::sleep 0.15;
860 warn "*** waiting for lock ***" if time - $start >= 3;
868 reduced_rfs
=> $rrfs,
869 reduced_self
=> $rself,
871 rename "$file.new", $file or die "Could not rename: $!";
872 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
875 sub _rmirror_endofloop_sleep
{
876 my($self, $sleep) = @_;
877 if ($self->verbose) {
879 if (my $vl = $self->verboselog) {
880 open $fh, ">>", $vl or die "Could not open >> '$vl': $!";
886 "Dorm %d (%s secs)\n",
894 # it returns two things: abslfile and rfilename. But the abslfile is
895 # undef when the rfilename ends in .recent. A weird interface, my
897 sub _principal_recentfile_fromremote_resosymlink
{
898 my($self, $rfilename) = @_;
899 $rfilename = "RECENT.recent" unless length $rfilename;
900 my $abslfile = undef;
902 if ($rfilename =~ /\.recent$/) {
903 # may be a file *or* a symlink,
904 ($abslfile,$fh) = $self->_fetch_as_tempfile ($rfilename);
905 while (-l
$abslfile) {
906 my $symlink = readlink $abslfile;
907 if ($symlink =~ m
|/|) {
908 die "FIXME: filenames containing '/' not supported, got '$symlink'";
910 my $localrfile = File
::Spec
->catfile($self->localroot, $rfilename);
911 if (-e
$localrfile) {
912 my $old_symlink = readlink $localrfile;
913 if ($old_symlink eq $symlink) {
914 unlink $abslfile or die "Cannot unlink '$abslfile': $!";
916 unlink $localrfile; # may fail
917 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
920 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
922 ($abslfile,$fh) = $self->_fetch_as_tempfile ($symlink);
925 return ($abslfile, $rfilename, $fh);
928 # takes a basename, returns an absolute name, does not delete the
929 # file, throws the $fh away. Caller must rename or unlink
931 # XXX needs to activate the fh in the rf0 so that it is able to unlink
932 # the file. I would like that the file is used immediately by $rf0
933 sub _fetch_as_tempfile
{
934 my($self, $rfile) = @_;
935 my($suffix) = $rfile =~ /(\.[^\.]+)$/;
936 $suffix = "" unless defined $suffix;
937 my $fh = File
::Temp
->new
938 (TEMPLATE
=> sprintf(".FRMRecent-%s-XXXX",
941 DIR
=> $self->tempdir || $self->localroot,
946 unless ($rsync = File
::Rsync
->new($self->rsync_options)) {
948 Carp
::confess
(YAML
::Syck
::Dump
($self->rsync_options));
950 my $dst = $fh->filename;
951 local($ENV{LANG
}) = "C";
954 src
=> join("/",$self->remoteroot,$rfile),
956 ) or die "Could not mirror '$rfile' to $fh\: ".join(" ",$rsync->err);
959 chmod $mode, $dst or die "Could not chmod $mode '$dst': $!";
964 =head2 $verbose = $obj->verbose ( $set )
966 Getter/setter method to set verbosity for this F:R:M:Recent object and
967 all associated Recentfile objects.
973 for ( @
{$self->recentfiles} ) { $_->verbose($set) }
974 $self->_verbose ($set);
976 my $x = $self->_verbose;
977 unless (defined $x) {
979 $self->_verbose ($x);
985 =head2 my $vl = $obj->verboselog ( $set )
987 Getter/setter method for the path to the logfile to write verbose
988 progress information to.
990 Note: This is a primitive stop gap solution to get simple verbose
991 logging working. The program still sends error messages to STDERR.
992 Switching to Log4perl or similar is probably the way to go. TBD.
998 for ( @
{$self->recentfiles} ) { $_->verboselog($set) }
999 $self->_verboselog ($set);
1001 my $x = $self->_verboselog;
1002 unless (defined $x) {
1004 $self->_verboselog ($x);
1009 =head1 THE ARCHITECTURE OF A COLLECTION OF RECENTFILES
1011 The idea is that we want to have a short file that records really
1012 recent changes. So that a fresh mirror can be kept fresh as long as
1013 the connectivity is given. Then we want longer files that record the
1014 history before. So when the mirror falls behind the update period
1015 reflected in the shortest file, it can complement the list of recent
1016 file events with the next one. And if this is not long enough we want
1017 another one, again a bit longer. And we want one that completes the
1018 history back to the oldest file. The index files do contain the
1019 complete list of current files. The longer a period covered by an
1020 index file is gone the less often the index file is updated. For
1021 practical reasons adjacent files will often overlap a bit but this is
1022 neither necessary nor enforced. That's the basic idea. The following
1023 example represents a tree that has a few updates every day:
1025 RECENT.recent -> RECENT-1h.yaml
1035 The first file is the principal file, in so far it is the one that is
1036 written first after a filesystem change. Usually a symlink links to it
1037 with a filename that has the same filenameroot and the suffix
1038 C<.recent>. On systems that do not support symlinks there is a plain
1039 copy maintained instead.
1041 The last file, the Z file, contains the complementary files that are
1042 in none of the other files. It may contain C<delete> events but often
1043 C<delete> events are discarded at the transition to the Z file.
1045 =head2 THE INDIVIDUAL RECENTFILE
1047 A I<recentfile> consists of a hash that has two keys: C<meta> and
1048 C<recent>. The C<meta> part has metadata and the C<recent> part has a
1049 list of fileobjects.
1051 =head2 THE META PART
1053 Here we find things that are pretty much self explaining: all
1054 lowercase attributes are accessors and as such explained in the
1055 manpages. The uppercase attribute C<Producers> contains version
1056 information about involved software components. Nothing to worry about
1059 =head2 THE RECENT PART
1061 This is the interesting part. Every entry refers to some filesystem
1062 change (with path, epoch, type).
1064 The I<epoch> value is the point in time when some change was
1065 I<registered> but can be set to arbitrary values. Do not be tempted to
1066 believe that the entry has a direct relation to something like
1067 modification time or change time on the filesystem level. They are not
1068 reflecting release dates. (If you want exact release dates: Barbie is
1069 providing a database of them. See
1070 http://use.perl.org/~barbie/journal/37907).
1072 All these entries can be devided into two types (denoted by the
1073 I<type> attribute): C<new>s and C<delete>s. Changes and creations are
1074 C<new>s. Deletes are C<delete>s.
1076 Besides an I<epoch> and a I<type> attribute we find a third one:
1077 I<path>. This path is relative to the directory we find the
1080 The order of the entries in the I<recentfile> is by decreasing epoch
1081 attribute. These are unique floating point numbers. When the server
1082 has ntp running correctly, then the timestamps are usually reflecting
1083 a real epoch. If time is running backwards, we trump the system epoch
1084 with strictly monotonically increasing floating point timestamps and
1085 guarantee they are unique.
1087 =head1 CORRUPTION AND RECOVERY
1089 If the origin host breaks the promise to deliver consistent and
1090 complete I<recentfiles> then it must update its C<dirtymark> and all
1091 slaves must discard what they cosider the truth. In the worst case
1092 that something goes wrong despite the dirtymark mechanism the way back
1093 to sanity can always be achieved through traditional rsyncing between
1098 This is about speeding up rsync operation on large trees. Uses a small
1099 metadata cocktail and pull technology.
1101 rersyncrecent solves this problem with a couple of (usually 2-10)
1102 lightweight index files which cover different overlapping time
1103 intervals. The master writes these files and the clients/slaves can
1104 construct the full tree from the information contained in them. The
1105 most recent index file usually covers the last seconds or minutes or
1106 hours of the tree and depending on the needs, slaves can rsync every
1107 few seconds or minutes and then bring their trees in full sync.
1109 The rersyncrecent model was developed for CPAN but as it is both
1110 convenient and economic it is also a general purpose solution. I'm
1111 looking forward to see a CPAN backbone that is only a few seconds
1114 =head2 NON-COMPETITORS
1116 File::Mirror JWU/File-Mirror/File-Mirror-0.10.tar.gz only local trees
1117 Mirror::YAML ADAMK/Mirror-YAML-0.03.tar.gz some sort of inner circle
1118 Net::DownloadMirror KNORR/Net-DownloadMirror-0.04.tar.gz FTP sites and stuff
1119 Net::MirrorDir KNORR/Net-MirrorDir-0.05.tar.gz dito
1120 Net::UploadMirror KNORR/Net-UploadMirror-0.06.tar.gz dito
1121 Pushmi::Mirror CLKAO/Pushmi-v1.0.0.tar.gz something SVK
1123 rsnapshot www.rsnapshot.org focus on backup
1124 csync www.csync.org more like unison
1125 multi-rsync sourceforge 167893 lan push to many
1126 chasm chasmd.org per-directory manifests
1130 The problem to solve which clusters and ftp mirrors and otherwise
1131 replicated datasets like CPAN share: how to transfer only a minimum
1132 amount of data to determine the diff between two hosts.
1134 Normally it takes a long time to determine the diff itself before it
1135 can be transferred. Known solutions at the time of this writing are
1136 csync2, and rsync 3 batch mode.
1138 For many years the best solution was B<csync2> which solves the
1139 problem by maintaining a sqlite database on both ends and talking a
1140 highly sophisticated protocol to quickly determine which files to send
1141 and which to delete at any given point in time. Csync2 is often
1142 inconvenient because it is push technology and the act of syncing
1143 demands quite an intimate relationship between the sender and the
1144 receiver. This is hard to achieve in an environment of loosely coupled
1145 sites where the number of sites is large or connections are unreliable
1146 or network topology is changing.
1148 B<Rsync 3 batch mode> works around these problems by providing
1149 rsync-able batch files which allow receiving nodes to replay the
1150 history of the other nodes. This reduces the need to have an
1151 incestuous relation but it has the disadvantage that these batch files
1152 replicate the contents of the involved files. This seems inappropriate
1153 when the nodes already have a means of communicating over rsync.
1155 =head2 HONORABLE MENTION
1157 B<instantmirror> at https://fedorahosted.org/InstantMirror/ is an
1158 ambitious project that tries to combine various technologies (squid,
1159 bittorrent) to overcome the current slowness with the main focus on
1160 fedora. It's been founded in 2009-03 and at the time of this writing
1161 it is still a bit early to comment on.
1165 If the tree of the master server is changing faster than the bandwidth
1166 permits to mirror then additional protocols may need to be deployed.
1167 Certainly p2p/bittorrent can help in such situations because
1168 downloading sites help each other and bittorrent chunks large files
1171 =head1 FUTURE DIRECTIONS
1173 Currently the origin server must keep track of injected and removed
1174 files. Should be supported by an inotify-based assistant.
1176 Convince other users outside the CPAN like
1177 http://fedoraproject.org/wiki/Infrastructure/Mirroring
1181 L<File::Rsync::Mirror::Recentfile>,
1182 L<File::Rsync::Mirror::Recentfile::Done>,
1183 L<File::Rsync::Mirror::Recentfile::FakeBigFloat>
1187 Please report any bugs or feature requests through the web interface
1189 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=File-Rsync-Mirror-Recent>.
1190 I will be notified, and then you'll automatically be notified of
1191 progress on your bug as I make changes.
1195 You can find documentation for this module with the perldoc command.
1197 perldoc File::Rsync::Mirror::Recent
1199 You can also look for information at:
1203 =item * RT: CPAN's request tracker
1205 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Rsync-Mirror-Recent>
1207 =item * AnnoCPAN: Annotated CPAN documentation
1209 L<http://annocpan.org/dist/File-Rsync-Mirror-Recent>
1211 =item * CPAN Ratings
1213 L<http://cpanratings.perl.org/d/File-Rsync-Mirror-Recent>
1217 L<http://search.cpan.org/dist/File-Rsync-Mirror-Recent>
1222 =head1 ACKNOWLEDGEMENTS
1224 Thanks to RJBS for module-starter.
1230 =head1 COPYRIGHT & LICENSE
1232 Copyright 2008, 2009 Andreas König.
1234 This program is free software; you can redistribute it and/or modify it
1235 under the same terms as Perl itself.
1240 1; # End of File::Rsync::Mirror::Recent