1 package File
::Rsync
::Mirror
::Recent
;
5 use File
::Rsync
::Mirror
::Recentfile
;
11 File::Rsync::Mirror::Recent - mirroring via rsync made efficient
15 package File
::Rsync
::Mirror
::Recent
;
17 use File
::Basename
qw(basename dirname fileparse);
18 use File
::Copy
qw(cp);
19 use File
::Path
qw(mkpath);
21 use File
::Rsync
::Mirror
::Recentfile
::Done
(); # at least needed by thaw()
22 use File
::Rsync
::Mirror
::Recentfile
::FakeBigFloat
qw(:all);
24 use List
::Pairwise
qw(mapp grepp);
25 use List
::Util
qw(first max);
26 use Scalar
::Util
qw(blessed reftype);
31 use version
; our $VERSION = qv
('0.0.8');
35 The documentation in here is normally not needed because the code is
36 considered to be run from several standalone programs. For a quick
37 overview, see the file README.mirrorcpan and the bin/ directory of the
38 distribution. For the architectural ideas see the section THE
39 ARCHITECTURE OF A COLLECTION OF RECENTFILES below.
41 File::Rsync::Mirror::Recent establishes a view on a collection of
42 File::Rsync::Mirror::Recentfile objects and provides abstractions
43 spanning multiple time intervals associated with those.
51 =head2 my $obj = CLASS->new(%hash)
53 Constructor. On every argument pair the key is a method name and the
54 value is an argument to that method name.
59 my($class, @args) = @_;
60 my $self = bless {}, $class;
62 my($method,$arg) = splice @args, 0, 2;
68 =head2 my $obj = CLASS->thaw($statusfile)
70 Constructor from a statusfile left over from a previous
71 rmirror run. See also C<runstatusfile>.
75 sub _thaw_without_pathdb
{
77 open my $fh, $file or die "Can't open '$file': $!";
80 my $tfile = File
::Temp
->new
82 TEMPLATE
=> "Recent-thaw-XXXX",
91 if (/$template_for_eop/) {
94 } elsif (/(\s+)-\s*__pathdb\s*:/) {
96 my $next_attr = sprintf "^%s\\S", " ?" x
length($1);
97 $template_for_eop = qr{$next_attr};
99 print $tfile $_ unless $in_pathdb;
101 close $tfile or die "Could not close: $!";
102 my $return = $self->thaw($tfile->filename);
103 $return->_havelostpathdb(1);
104 unlink $tfile->filename;
108 my($self, $file) = @_;
109 die "thaw called without statusfile argument" unless defined $file;
112 Carp
::confess
("Alert: statusfile '$file' not found");
116 my $sleeptime = 0.02;
117 while (not mkdir "$file.lock") {
119 Time
::HiRes
::sleep $sleeptime;
120 my $waiting = time - $start;
122 warn "*** waiting ($waiting) for lock ($err) ***";
127 my $serialized = YAML
::Syck
::LoadFile
($file);
128 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
129 my $charged_self = $serialized->{reduced_self
};
130 my $class = blessed
$self;
131 bless $charged_self, $class;
132 my $rfs = $serialized->{reduced_rfs
};
133 my $rfclass = $class . "file"; # "Recent" . "file"
134 my $pathdb = $charged_self->_pathdb;
137 $rf->_pathdb($pathdb);
139 $charged_self->_recentfiles($rfs);
140 $charged_self->_principal_recentfile($rfs->[0]);
141 # die "FIXME: thaw all recentfiles from reduced_rfs into _recentfiles as well, watch out for pathdb and rsync";
142 return $charged_self;
155 "_dirtymark", # keeps track of the dirtymark of the recentfiles
156 "_havelostpathdb", # boolean
157 "_have_written_statusfile", # boolean
158 "_logfilefordone", # turns on _logfile on all DONE
159 # systems (disk intensive)
160 "_max_one_state", # when we have no time left but want
161 # at least get one file per
162 # iteration to avoid procrastination
163 "_principal_recentfile",
166 "_runstatusfile", # occasionally dumps all rfs
167 "_verbose", # internal variable for verbose setter/getter
168 "_verboselog", # internal variable for verboselog setter/getter
172 split /\n/, <<'=cut'; push @accessors, grep {s/^=item\s+//} @pod_lines; }
176 =item ignore_link_stat_errors
178 as in F:R:M:Recentfile
182 Option to specify the local principal file for operations with a local
183 collection of recentfiles.
187 as in F:R:M:Recentfile
189 =item max_files_per_connection
191 as in F:R:M:Recentfile
195 The remote principal recentfile in rsync notation. E.g.
197 pause.perl.org::authors/RECENT.recent
201 as in F:R:M:Recentfile
203 =item remote_recentfile
205 Rsync address of the remote C<RECENT.recent> symlink or whichever name
206 the principal remote recentfile has.
210 Things like compress, links, times or checksums. Passed in to the
211 File::Rsync object used to run the mirror.
215 as in F:R:M:Recentfile
219 Minimum time before fetching the principal recentfile again.
225 use accessors
@accessors;
229 =head2 $arrayref = $obj->news ( %options )
233 perl -Ilib bin/rrr-news \
236 -local /home/ftp/pub/PAUSE/authors/RECENT.recent
238 perl -Ilib bin/rrr-news \
242 -localroot /home/ftp/pub/PAUSE/authors/ \
243 -remote pause.perl.org::authors/RECENT.recent
246 Note: all parameters that can be passed to
247 File:Rsync:Mirror:Recentfile::recent_events() can also be specified
250 Note: all data are kept in memory
255 my($self, %opt) = @_;
256 my $local = $self->local;
258 if (my $remote = $self->remote) {
260 if ($localroot = $self->localroot) {
261 # nice, they know what they are doing
263 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
266 die "Alert: neither local nor remote specified, cannot continue";
269 my $rfs = $self->recentfiles;
274 $locopt{before
} = $before;
276 $locopt{max
} -= scalar @
$ret;
277 last if $locopt{max
} <= 0;
280 my $res = $rf->recent_events(%locopt);
284 if ($opt{max
} && scalar @
$ret > $opt{max
}) {
288 if ( $locopt{info
}{last} && _bigfloatlt
($locopt{info
}{last}{epoch
},$opt{after
}) ) {
291 if ( _bigfloatgt
($opt{after
},$locopt{info
}{first
}{epoch
}) ) {
298 $before = $res->[-1]{epoch
};
299 $before = $opt{before
} if $opt{before
} && _bigfloatlt
($opt{before
},$before);
304 =head2 overview ( %options )
306 returns a small table that summarizes the state of all recentfiles
307 collected in this Recent object.
309 $options{verbose}=1 increases the number of columns displayed.
311 Here is an example output:
313 Ival Cnt Max Min Span Util Cloud
314 1h 47 1225053014.38 1225049650.91 3363.47 93.4% ^ ^
315 6h 324 1225052939.66 1225033394.84 19544.82 90.5% ^ ^
316 1d 437 1225049651.53 1224966402.53 83248.99 96.4% ^ ^
317 1W 1585 1225039015.75 1224435339.46 603676.29 99.8% ^ ^
318 1M 5855 1225017376.65 1222428503.57 2588873.08 99.9% ^ ^
319 1Q 17066 1224578930.40 1216803512.90 7775417.50 100.0% ^ ^
320 1Y 15901 1223966162.56 1216766820.67 7199341.89 22.8% ^ ^
321 Z 9909 1223966162.56 1216766820.67 7199341.89 - ^ ^
323 I<Max> is the name of the interval.
325 I<Cnt> is the number of entries in this recentfile.
327 I<Max> is the highest(first) epoch in this recentfile, rounded.
329 I<Min> is the lowest(last) epoch in this recentfile, rounded.
331 I<Span> is the timespan currently covered, rounded.
333 I<Util> is I<Span> devided by the designated timespan of this
336 I<Cloud> is ascii art illustrating the sequence of the Max and Min
341 my($self,%options) = @_;
342 my $rfs = $self->recentfiles;
344 RECENTFILE
: for my $rf (@
$rfs) {
345 my $re=$rf->recent_events;
348 my $span = $re->[0]{epoch
}-$re->[-1]{epoch
};
349 my $merged = $rf->merged;
357 $rf->dirtymark ?
sprintf("%.2f",$rf->dirtymark) : "-",
359 sprintf ("%.2f", $rf->{ORIG
}{Producers
}{time}||0),
361 ($rf->interval eq "Z"
365 sprintf ("%.2f", $merged->{epoch
} || 0)),
367 sprintf ("%.2f", $re->[0]{epoch
}),
369 sprintf ("%.2f", $re->[-1]{epoch
}),
371 sprintf ("%.2f", $span),
373 ($rf->interval eq "Z"
377 sprintf ("%5.1f%%", 100 * $span / $rf->interval_secs)
380 @rank{mapp
{$b} grepp
{$a =~ /^(Max|Min)$/} @
$rfsummary} = ();
386 @rank{sort {$b <=> $a} keys %rank} = 1..keys %rank;
387 my $maxrank = max
values %rank;
388 for my $rfsummary (@s) {
389 my $string = " " x
$maxrank;
391 for my $ele (qw(Max Min)) {
392 my($r) = mapp
{$b} grepp
{$a eq $ele} @
$rfsummary;
393 push @borders, $rank{$r}-1;
395 for ($borders[0],$borders[1]) {
396 substr($string,$_,1) = "^";
398 push @
$rfsummary, "Cloud", $string;
400 unless ($options{verbose
}) {
401 my %filter = map {($_=>1)} qw(Ival Cnt Max Min Span Util Cloud);
403 $_ = [mapp
{($a,$b)} grepp
{!!$filter{$a}} @
$_];
407 for (my $i = 0; $i <= $#{$s[0]}; $i+=2) {
408 my $maxlength = max
((map { length $_->[$i+1] } @s), length $s[0][$i]);
409 push @sprintf, "%" . $maxlength . "s";
411 my $sprintf = join " ", @sprintf;
413 my $headline = sprintf $sprintf, mapp
{$a} @
{$s[0]};
414 join "", $headline, map { sprintf $sprintf, mapp
{$b} @
$_ } @s;
419 Keeping track of already handled files. Currently it is a hash, will
420 probably become a database with its own accessors.
425 my($self, $set) = @_;
427 $self->__pathdb ($set);
429 my $pathdb = $self->__pathdb;
430 unless (defined $pathdb) {
431 $self->__pathdb(+{});
433 return $self->__pathdb;
436 =head2 $recentfile = $obj->principal_recentfile ()
438 returns the principal recentfile object of this tree.
441 # mirrors the recentfile and instantiates the recentfile object
442 sub _principal_recentfile_fromremote
{
444 # get the remote recentfile
445 my $rrfile = $self->remote or die "Alert: cannot construct a recentfile object without the 'remote' attribute";
446 my $splitter = qr{(.+)/([^/]*)};
447 my($remoteroot,$rfilename) = $rrfile =~ $splitter;
448 $self->remoteroot($remoteroot);
450 if (!defined $rfilename) {
451 die "Alert: Cannot resolve '$rrfile', does not match $splitter";
452 } elsif (not length $rfilename or $rfilename eq "RECENT.recent") {
453 ($abslfile,$rfilename,$fh) = $self->_principal_recentfile_fromremote_resosymlink($rfilename);
457 "ignore_link_stat_errors",
459 "max_files_per_connection",
469 $rf0 = File
::Rsync
::Mirror
::Recentfile
->new (map {($_ => $self->$_)} @need_args);
470 $rf0->split_rfilename($rfilename);
471 $abslfile = $rf0->get_remote_recentfile_as_tempfile ();
473 $rf0 = File
::Rsync
::Mirror
::Recentfile
->new_from_file ( $abslfile );
474 $rf0->_current_tempfile ( $abslfile );
475 $rf0->_current_tempfile_fh ( $fh );
476 $rf0->_use_tempfile (1);
477 for my $override (@need_args) {
478 $rf0->$override ( $self->$override );
483 sub principal_recentfile
{
485 my $rf0 = $self->_principal_recentfile;
486 return $rf0 if defined $rf0;
487 my $local = $self->local;
489 $rf0 = File
::Rsync
::Mirror
::Recentfile
->new_from_file ($local);
491 if (my $remote = $self->remote) {
493 if ($localroot = $self->localroot) {
494 # nice, they know what they are doing
496 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
498 $rf0 = $self->_principal_recentfile_fromremote;
500 die "Alert: neither local nor remote specified, cannot continue";
503 $self->_principal_recentfile($rf0);
507 =head2 $recentfiles_arrayref = $obj->recentfiles ()
509 returns a reference to the complete list of recentfile objects that
510 describe this tree. No guarantee is given that the represented
511 recentfiles exist or have been read. They are just bare objects.
517 my $rfs = $self->_recentfiles;
518 return $rfs if defined $rfs;
519 my $rf0 = $self->principal_recentfile;
520 my $pathdb = $self->_pathdb;
521 $rf0->_pathdb ($pathdb);
522 my $aggregator = $rf0->aggregator;
524 for my $agg (@
$aggregator) {
525 my $nrf = $rf0->_sparse_clone;
526 $nrf->interval ( $agg );
527 $nrf->have_mirrored ( 0 );
528 $nrf->_pathdb ( $pathdb );
531 $self->_recentfiles(\
@rf);
535 =head2 $success = $obj->rmirror ( %options )
537 Mirrors all recentfiles of the I<remote> address working through all
538 of them, mirroring their contents.
542 use File::Rsync::Mirror::Recent;
543 my $rrr = File::Rsync::Mirror::Recent->new(
544 ignore_link_stat_errors => 1,
545 localroot => "/home/ftp/pub/PAUSE/authors",
546 remote => "pause.perl.org::authors/RECENT.recent",
547 max_files_per_connection => 5000,
555 _runstatusfile => "recent-rmirror-state.yml",
556 _logfilefordone => "recent-rmirror-donelog.log",
558 $rrr->rmirror ( "skip-deletes" => 1, loop => 1 );
560 Or try without the loop parameter and write the loop yourself:
562 use File::Rsync::Mirror::Recent;
564 for my $t ("authors","modules"){
565 my $rrr = File::Rsync::Mirror::Recent->new(
566 ignore_link_stat_errors => 1,
567 localroot => "/home/ftp/pub/PAUSE/$t",
568 remote => "pause.perl.org::$t/RECENT.recent",
569 max_files_per_connection => 512,
577 _runstatusfile => "recent-rmirror-state-$t.yml",
578 _logfilefordone => "recent-rmirror-donelog-$t.log",
585 $rrr->rmirror ( "skip-deletes" => 1 );
587 warn "sleeping 23\n"; sleep 23;
592 # _alluptodate is unused but at least it worked last time I needed it,
593 # so let us keep it around
596 my $sdm = $self->_dirtymark;
597 return unless defined $sdm;
598 for my $rf (@
{$self->recentfiles}) {
599 return if $rf->seeded;
600 my $rfdm = $rf->dirtymark;
601 return unless defined $rfdm;
602 return unless $rfdm eq $sdm;
603 my $done = $rf->done;
604 return unless defined $done;
605 my $done_intervals = $done->_intervals;
606 return if !defined $done_intervals;
607 # nonono, may be more than one, only covered it must be:
608 # return if @$done_intervals > 1;
609 my $minmax = $rf->minmax;
610 return unless defined $minmax;
611 return unless $done->covered(@
$minmax{qw(max min)});
618 for ( @
{$self->recentfiles} ) { $_->seed(1) }
621 my($self, %options) = @_;
623 my $rfs = $self->recentfiles;
625 $self->principal_recentfile->seed;
627 # XXX exit gracefully (reminder)
630 # XXX needs accessor: warning, if set too low, we do nothing but
631 # mirror the principal!
632 my $minimum_time_per_loop = 20;
634 if (my $logfile = $self->_logfilefordone) {
635 for my $i (0..$#$rfs) {
636 $rfs->[$i]->done->_logfile($logfile);
639 if (my $dirtymark = $self->principal_recentfile->dirtymark) {
640 my $mydm = $self->_dirtymark;
642 $self->_dirtymark($dirtymark);
643 } elsif ($dirtymark ne $mydm) {
644 if ($self->verbose) {
646 if (my $vl = $self->verboselog) {
647 open $fh, ">>", $vl or die "Could not open >> '$vl': $!";
651 print $fh "NewDirtymark: old[$mydm] new[$dirtymark]\n";
653 $self->_dirtymark($dirtymark);
656 my $rstfile = $self->runstatusfile;
657 unless ($self->_have_written_statusfile) {
658 $self->_rmirror_runstatusfile_write ($rstfile, \
%options);
659 $self->_have_written_statusfile(1);
661 $self->_rmirror_loop($minimum_time_per_loop,\
%options);
665 my($self,$minimum_time_per_loop,$options) = @_;
667 my $ttleave = time + $minimum_time_per_loop;
668 my $rstfile = $self->runstatusfile;
669 my $otherproc = $self->_thaw_without_pathdb ($rstfile);
671 if (! defined $pid) {
672 warn "Contention: $!";
678 $self = $self->thaw ($rstfile);
679 my $rfs = $self->recentfiles;
680 $self->principal_recentfile->seed;
681 RECENTFILE
: for my $i (0..$#$rfs) {
683 if (time > $ttleave) {
684 # Must make sure that one file can get fetched in any case
685 $self->_max_one_state(1);
688 $self->_rmirror_mirror ($i, $options);
689 } elsif ($rf->uptodate) {
691 $rfs->[$i+1]->done->merge($rf->done);
693 # no further seed necessary because "periodic" does it
696 WORKUNIT
: while (time < $ttleave) {
698 $self->_rmirror_sleep_per_connection ($i);
701 $self->_rmirror_mirror ($i, $options);
704 if ($self->_max_one_state) {
708 $self->_max_one_state(0);
710 if ($rfs->[-1]->uptodate) {
711 $self->_rmirror_cleanup;
713 unless ($options->{loop}) {
716 $self->_rmirror_runstatusfile_write ($rstfile, $options);
721 $otherproc = $self->_thaw_without_pathdb ($rstfile);
722 if (!$options->{loop} && $otherproc && $otherproc->recentfiles->[-1]->uptodate) {
725 my $sleep = $ttleave - time;
727 $self->_rmirror_endofloop_sleep ($sleep);
729 # negative time not invented yet:)
734 sub _rmirror_mirror
{
735 my($self, $i, $options) = @_;
736 my $rfs = $self->recentfiles;
738 my %locopt = %$options;
739 if ($self->_max_one_state) {
742 $locopt{piecemeal
} = 1;
743 $rf->mirror (%locopt);
745 # we limit to 0 for the case that upstream is broken and has
746 # more than one timestamp (happened on PAUSE 200903)
747 if (my $dirtymark = $rf->dirtymark) {
748 my $mydm = $self->_dirtymark;
749 if (!defined $mydm or $dirtymark ne $mydm) {
750 $self->_dirtymark($dirtymark);
757 sub _rmirror_sleep_per_connection
{
759 my $rfs = $self->recentfiles;
761 my $sleep = $rf->sleep_per_connection;
762 $sleep = 0.42 unless defined $sleep;
763 Time
::HiRes
::sleep $sleep;
764 $rfs->[$i+1]->done->merge($rf->done) if $i < $#$rfs;
767 sub _rmirror_cleanup
{
769 my $pathdb = $self->_pathdb();
770 for my $k (keys %$pathdb) {
771 delete $pathdb->{$k};
773 my $rfs = $self->recentfiles;
774 for my $i (0..$#$rfs-1) {
775 my $thismerged = $rfs->[$i]->merged;
776 my $next = $rfs->[$i+1];
777 my $nextminmax = $next->minmax;
778 if (not defined $thismerged->{epoch
} or _bigfloatlt
($nextminmax->{max
},$thismerged->{epoch
})){
784 =head2 $file = $obj->runstatusfile ($set)
786 Getter/setter for C<_runstatusfile> attribute. Defaults to a temporary
787 file created by C<File::Temp>. A status file is required for
788 C<rmirror> working. Since it may be interesting for debugging
789 purposes, you may want to specify a permanent file for this.
795 $self->_runstatusfile ($set);
797 my $x = $self->_runstatusfile;
798 unless (defined $x) {
800 my $tfile = File
::Temp
->new
802 TEMPLATE
=> "Recent-XXXX",
808 $self->_runstatusfile($tfile->filename);
810 return $self->_runstatusfile;
813 # unused code.... it was an oops, discovered the thaw() method too
814 # late, and starting writing this here....
815 sub _rmirror_runstatusfile_read
{
816 my($self, $file) = @_;
820 # XXX is locking useful here?
821 while (not mkdir "$file.lock") {
822 Time
::HiRes
::sleep 0.2;
823 warn "*** waiting for lock ***" if time - $start >= 3;
825 my $yml = YAML
::Syck
::LoadFile
$file;
826 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
827 my $rself = $yml->{reduced_self
};
828 my $rfs = $yml->{reduced_rfs
};
829 # XXX bring them into self
832 sub _rmirror_runstatusfile_write
{
833 my($self, $file, $options) = @_;
835 while (my($k,$v) = each %$self) {
836 next if $k =~ /^-(_principal_recentfile|_recentfiles)$/;
839 my $rfs = $self->recentfiles;
841 for my $i (0..$#$rfs) {
843 while (my($k,$v) = each %$rf) {
844 next if $k =~ /^-(_current_tempfile_fh|_pathdb|_rsync)$/;
845 $rrfs->[$i]{$k} = $rfs->[$i]{$k};
850 while (not mkdir "$file.lock") {
851 Time
::HiRes
::sleep 0.15;
852 warn "*** waiting for lock ***" if time - $start >= 3;
860 reduced_rfs
=> $rrfs,
861 reduced_self
=> $rself,
863 rename "$file.new", $file or die "Could not rename: $!";
864 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
867 sub _rmirror_endofloop_sleep
{
868 my($self, $sleep) = @_;
869 if ($self->verbose) {
871 if (my $vl = $self->verboselog) {
872 open $fh, ">>", $vl or die "Could not open >> '$vl': $!";
878 "Dorm %d (%s secs)\n",
886 # it returns two things: abslfile and rfilename. But the abslfile is
887 # undef when the rfilename ends in .recent. A weird interface, my
889 sub _principal_recentfile_fromremote_resosymlink
{
890 my($self, $rfilename) = @_;
891 $rfilename = "RECENT.recent" unless length $rfilename;
892 my $abslfile = undef;
894 if ($rfilename =~ /\.recent$/) {
895 # may be a file *or* a symlink,
896 ($abslfile,$fh) = $self->_fetch_as_tempfile ($rfilename);
897 while (-l
$abslfile) {
898 my $symlink = readlink $abslfile;
899 if ($symlink =~ m
|/|) {
900 die "FIXME: filenames containing '/' not supported, got '$symlink'";
902 my $localrfile = File
::Spec
->catfile($self->localroot, $rfilename);
903 if (-e
$localrfile) {
904 my $old_symlink = readlink $localrfile;
905 if ($old_symlink eq $symlink) {
906 unlink $abslfile or die "Cannot unlink '$abslfile': $!";
908 unlink $localrfile; # may fail
909 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
912 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
914 ($abslfile,$fh) = $self->_fetch_as_tempfile ($symlink);
917 return ($abslfile, $rfilename, $fh);
920 # takes a basename, returns an absolute name, does not delete the
921 # file, throws the $fh away. Caller must rename or unlink
923 # XXX needs to activate the fh in the rf0 so that it is able to unlink
924 # the file. I would like that the file is used immediately by $rf0
925 sub _fetch_as_tempfile
{
926 my($self, $rfile) = @_;
927 my($suffix) = $rfile =~ /(\.[^\.]+)$/;
928 $suffix = "" unless defined $suffix;
929 my $fh = File
::Temp
->new
930 (TEMPLATE
=> sprintf(".FRMRecent-%s-XXXX",
933 DIR
=> $self->tempdir || $self->localroot,
938 unless ($rsync = File
::Rsync
->new($self->rsync_options)) {
940 Carp
::confess
(YAML
::Syck
::Dump
($self->rsync_options));
942 my $dst = $fh->filename;
943 local($ENV{LANG
}) = "C";
946 src
=> join("/",$self->remoteroot,$rfile),
948 ) or die "Could not mirror '$rfile' to $fh\: ".join(" ",$rsync->err);
951 chmod $mode, $dst or die "Could not chmod $mode '$dst': $!";
956 =head2 $verbose = $obj->verbose ( $set )
958 Getter/setter method to set verbosity for this F:R:M:Recent object and
959 all associated Recentfile objects.
965 for ( @
{$self->recentfiles} ) { $_->verbose($set) }
966 $self->_verbose ($set);
968 my $x = $self->_verbose;
969 unless (defined $x) {
971 $self->_verbose ($x);
977 =head2 my $vl = $obj->verboselog ( $set )
979 Getter/setter method for the path to the logfile to write verbose
980 progress information to.
982 Note: This is a primitive stop gap solution to get simple verbose
983 logging working. The program still sends error messages to STDERR.
984 Switching to Log4perl or similar is probably the way to go. TBD.
990 for ( @
{$self->recentfiles} ) { $_->verboselog($set) }
991 $self->_verboselog ($set);
993 my $x = $self->_verboselog;
994 unless (defined $x) {
996 $self->_verboselog ($x);
1001 =head1 THE ARCHITECTURE OF A COLLECTION OF RECENTFILES
1003 The idea is that we want to have a short file that records really
1004 recent changes. So that a fresh mirror can be kept fresh as long as
1005 the connectivity is given. Then we want longer files that record the
1006 history before. So when the mirror falls behind the update period
1007 reflected in the shortest file, it can complement the list of recent
1008 file events with the next one. And if this is not long enough we want
1009 another one, again a bit longer. And we want one that completes the
1010 history back to the oldest file. The index files do contain the
1011 complete list of current files. The longer a period covered by an
1012 index file is gone the less often the index file is updated. For
1013 practical reasons adjacent files will often overlap a bit but this is
1014 neither necessary nor enforced. That's the basic idea. The following
1015 example represents a tree that has a few updates every day:
1017 RECENT.recent -> RECENT-1h.yaml
1027 The first file is the principal file, in so far it is the one that is
1028 written first after a filesystem change. Usually a symlink links to it
1029 with a filename that has the same filenameroot and the suffix
1030 C<.recent>. On systems that do not support symlinks there is a plain
1031 copy maintained instead.
1033 The last file, the Z file, contains the complementary files that are
1034 in none of the other files. It may contain C<delete> events but often
1035 C<delete> events are discarded at the transition to the Z file.
1037 =head2 THE INDIVIDUAL RECENTFILE
1039 A I<recentfile> consists of a hash that has two keys: C<meta> and
1040 C<recent>. The C<meta> part has metadata and the C<recent> part has a
1041 list of fileobjects.
1043 =head2 THE META PART
1045 Here we find things that are pretty much self explaining: all
1046 lowercase attributes are accessors and as such explained in the
1047 manpages. The uppercase attribute C<Producers> contains version
1048 information about involved software components. Nothing to worry about
1051 =head2 THE RECENT PART
1053 This is the interesting part. Every entry refers to some filesystem
1054 change (with path, epoch, type).
1056 The I<epoch> value is the point in time when some change was
1057 I<registered> but can be set to arbitrary values. Do not be tempted to
1058 believe that the entry has a direct relation to something like
1059 modification time or change time on the filesystem level. They are not
1060 reflecting release dates. (If you want exact release dates: Barbie is
1061 providing a database of them. See
1062 http://use.perl.org/~barbie/journal/37907).
1064 All these entries can be devided into two types (denoted by the
1065 I<type> attribute): C<new>s and C<delete>s. Changes and creations are
1066 C<new>s. Deletes are C<delete>s.
1068 Besides an I<epoch> and a I<type> attribute we find a third one:
1069 I<path>. This path is relative to the directory we find the
1072 The order of the entries in the I<recentfile> is by decreasing epoch
1073 attribute. These are unique floating point numbers. When the server
1074 has ntp running correctly, then the timestamps are usually reflecting
1075 a real epoch. If time is running backwards, we trump the system epoch
1076 with strictly monotonically increasing floating point timestamps and
1077 guarantee they are unique.
1079 =head1 CORRUPTION AND RECOVERY
1081 If the origin host breaks the promise to deliver consistent and
1082 complete I<recentfiles> then it must update its C<dirtymark> and all
1083 slaves must discard what they cosider the truth. In the worst case
1084 that something goes wrong despite the dirtymark mechanism the way back
1085 to sanity can always be achieved through traditional rsyncing between
1090 This is about speeding up rsync operation on large trees. Uses a small
1091 metadata cocktail and pull technology.
1093 rersyncrecent solves this problem with a couple of (usually 2-10)
1094 lightweight index files which cover different overlapping time
1095 intervals. The master writes these files and the clients/slaves can
1096 construct the full tree from the information contained in them. The
1097 most recent index file usually covers the last seconds or minutes or
1098 hours of the tree and depending on the needs, slaves can rsync every
1099 few seconds or minutes and then bring their trees in full sync.
1101 The rersyncrecent model was developed for CPAN but as it is both
1102 convenient and economic it is also a general purpose solution. I'm
1103 looking forward to see a CPAN backbone that is only a few seconds
1106 =head2 NON-COMPETITORS
1108 File::Mirror JWU/File-Mirror/File-Mirror-0.10.tar.gz only local trees
1109 Mirror::YAML ADAMK/Mirror-YAML-0.03.tar.gz some sort of inner circle
1110 Net::DownloadMirror KNORR/Net-DownloadMirror-0.04.tar.gz FTP sites and stuff
1111 Net::MirrorDir KNORR/Net-MirrorDir-0.05.tar.gz dito
1112 Net::UploadMirror KNORR/Net-UploadMirror-0.06.tar.gz dito
1113 Pushmi::Mirror CLKAO/Pushmi-v1.0.0.tar.gz something SVK
1115 rsnapshot www.rsnapshot.org focus on backup
1116 csync www.csync.org more like unison
1117 multi-rsync sourceforge 167893 lan push to many
1118 chasm chasmd.org per-directory manifests
1122 The problem to solve which clusters and ftp mirrors and otherwise
1123 replicated datasets like CPAN share: how to transfer only a minimum
1124 amount of data to determine the diff between two hosts.
1126 Normally it takes a long time to determine the diff itself before it
1127 can be transferred. Known solutions at the time of this writing are
1128 csync2, and rsync 3 batch mode.
1130 For many years the best solution was B<csync2> which solves the
1131 problem by maintaining a sqlite database on both ends and talking a
1132 highly sophisticated protocol to quickly determine which files to send
1133 and which to delete at any given point in time. Csync2 is often
1134 inconvenient because it is push technology and the act of syncing
1135 demands quite an intimate relationship between the sender and the
1136 receiver. This is hard to achieve in an environment of loosely coupled
1137 sites where the number of sites is large or connections are unreliable
1138 or network topology is changing.
1140 B<Rsync 3 batch mode> works around these problems by providing
1141 rsync-able batch files which allow receiving nodes to replay the
1142 history of the other nodes. This reduces the need to have an
1143 incestuous relation but it has the disadvantage that these batch files
1144 replicate the contents of the involved files. This seems inappropriate
1145 when the nodes already have a means of communicating over rsync.
1147 =head2 HONORABLE MENTION
1149 B<instantmirror> at https://fedorahosted.org/InstantMirror/ is an
1150 ambitious project that tries to combine various technologies (squid,
1151 bittorrent) to overcome the current slowness with the main focus on
1152 fedora. It's been founded in 2009-03 and at the time of this writing
1153 it is still a bit early to comment on.
1157 If the tree of the master server is changing faster than the bandwidth
1158 permits to mirror then additional protocols may need to be deployed.
1159 Certainly p2p/bittorrent can help in such situations because
1160 downloading sites help each other and bittorrent chunks large files
1163 =head1 FUTURE DIRECTIONS
1165 Currently the origin server must keep track of injected and removed
1166 files. Should be supported by an inotify-based assistant.
1168 Convince other users outside the CPAN like
1169 http://fedoraproject.org/wiki/Infrastructure/Mirroring
1173 L<File::Rsync::Mirror::Recentfile>,
1174 L<File::Rsync::Mirror::Recentfile::Done>,
1175 L<File::Rsync::Mirror::Recentfile::FakeBigFloat>
1179 Please report any bugs or feature requests through the web interface
1181 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=File-Rsync-Mirror-Recent>.
1182 I will be notified, and then you'll automatically be notified of
1183 progress on your bug as I make changes.
1187 You can find documentation for this module with the perldoc command.
1189 perldoc File::Rsync::Mirror::Recent
1191 You can also look for information at:
1195 =item * RT: CPAN's request tracker
1197 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Rsync-Mirror-Recent>
1199 =item * AnnoCPAN: Annotated CPAN documentation
1201 L<http://annocpan.org/dist/File-Rsync-Mirror-Recent>
1203 =item * CPAN Ratings
1205 L<http://cpanratings.perl.org/d/File-Rsync-Mirror-Recent>
1209 L<http://search.cpan.org/dist/File-Rsync-Mirror-Recent>
1214 =head1 ACKNOWLEDGEMENTS
1216 Thanks to RJBS for module-starter.
1222 =head1 COPYRIGHT & LICENSE
1224 Copyright 2008, 2009 Andreas König.
1226 This program is free software; you can redistribute it and/or modify it
1227 under the same terms as Perl itself.
1232 1; # End of File::Rsync::Mirror::Recent