releasing the trial release as stable
[rersyncrecent.git] / lib / File / Rsync / Mirror / Recentfile.pm
blob9af2e23b5b355e33b87dcab4c2b454badb0c6560
1 package File::Rsync::Mirror::Recentfile;
3 # use warnings;
4 use strict;
6 =encoding utf-8
8 =head1 NAME
10 File::Rsync::Mirror::Recentfile - mirroring via rsync made efficient
12 =cut
14 my $HAVE = {};
15 for my $package (
16 "Data::Serializer",
17 "File::Rsync"
18 ) {
19 $HAVE->{$package} = eval qq{ require $package; };
21 use Config;
22 use File::Basename qw(basename dirname fileparse);
23 use File::Copy qw(cp);
24 use File::Path qw(mkpath);
25 use File::Rsync::Mirror::Recentfile::FakeBigFloat qw(:all);
26 use File::Temp;
27 use List::Util qw(first max min);
28 use Scalar::Util qw(blessed reftype);
29 use Storable;
30 use Time::HiRes qw();
31 use YAML::Syck;
33 use version; our $VERSION = qv('0.0.9');
35 use constant MAX_INT => ~0>>1; # anything better?
36 use constant DEFAULT_PROTOCOL => 1;
38 # cf. interval_secs
39 my %seconds;
41 # maybe subclass if this mapping is bad?
42 my %serializers;
44 =head1 SYNOPSIS
46 Writer (of a single file):
48 use File::Rsync::Mirror::Recentfile;
49 my $fr = File::Rsync::Mirror::Recentfile->new
51 interval => q(6h),
52 filenameroot => "RECENT",
53 comment => "These 'RECENT' files are part of a test of a new CPAN mirroring concept. Please ignore them for now.",
54 localroot => "/home/ftp/pub/PAUSE/authors/",
55 aggregator => [qw(1d 1W 1M 1Q 1Y Z)],
57 $rf->update("/home/ftp/pub/PAUSE/authors/id/A/AN/ANDK/CPAN-1.92_63.tar.gz","new");
59 Reader/mirrorer:
61 my $rf = File::Rsync::Mirror::Recentfile->new
63 filenameroot => "RECENT",
64 interval => q(6h),
65 localroot => "/home/ftp/pub/PAUSE/authors",
66 remote_dir => "",
67 remote_host => "pause.perl.org",
68 remote_module => "authors",
69 rsync_options => {
70 compress => 1,
71 'rsync-path' => '/usr/bin/rsync',
72 links => 1,
73 times => 1,
74 'omit-dir-times' => 1,
75 checksum => 1,
77 verbose => 1,
79 $rf->mirror;
81 Aggregator (usually the writer):
83 my $rf = File::Rsync::Mirror::Recentfile->new_from_file ( $file );
84 $rf->aggregate;
86 =head1 DESCRIPTION
88 Lower level than F:R:M:Recent, handles one recentfile. Whereas a tree
89 is always composed of several recentfiles, controlled by the
90 F:R:M:Recent object. The Recentfile object has to do the bookkeeping
91 for a single timeslice.
93 =head1 EXPORT
95 No exports.
97 =head1 CONSTRUCTORS / DESTRUCTOR
99 =head2 my $obj = CLASS->new(%hash)
101 Constructor. On every argument pair the key is a method name and the
102 value is an argument to that method name.
104 If a recentfile for this resource already exists, metadata that are
105 not defined by the constructor will be fetched from there as soon as
106 it is being read by recent_events().
108 =cut
110 sub new {
111 my($class, @args) = @_;
112 my $self = bless {}, $class;
113 while (@args) {
114 my($method,$arg) = splice @args, 0, 2;
115 $self->$method($arg);
117 unless (defined $self->protocol) {
118 $self->protocol(DEFAULT_PROTOCOL);
120 unless (defined $self->filenameroot) {
121 $self->filenameroot("RECENT");
123 unless (defined $self->serializer_suffix) {
124 $self->serializer_suffix(".yaml");
126 return $self;
129 =head2 my $obj = CLASS->new_from_file($file)
131 Constructor. $file is a I<recentfile>.
133 =cut
135 sub new_from_file {
136 my($class, $file) = @_;
137 my $self = bless {}, $class;
138 $self->_rfile($file);
139 #?# $self->lock;
140 my $serialized = do { open my $fh, $file or die "Could not open '$file': $!";
141 local $/;
142 <$fh>;
144 # XXX: we can skip this step when the metadata are sufficient, but
145 # we cannot parse the file without some magic stuff about
146 # serialized formats
147 while (-l $file) {
148 my($name,$path) = fileparse $file;
149 my $symlink = readlink $file;
150 if ($symlink =~ m|/|) {
151 die "FIXME: filenames containing '/' not supported, got $symlink";
153 $file = File::Spec->catfile ( $path, $symlink );
155 my($name,$path,$suffix) = fileparse $file, keys %serializers;
156 $self->serializer_suffix($suffix);
157 $self->localroot($path);
158 die "Could not determine file format from suffix" unless $suffix;
159 my $deserialized;
160 if ($suffix eq ".yaml") {
161 require YAML::Syck;
162 $deserialized = YAML::Syck::LoadFile($file);
163 } elsif ($HAVE->{"Data::Serializer"}) {
164 my $serializer = Data::Serializer->new
165 ( serializer => $serializers{$suffix} );
166 $deserialized = $serializer->raw_deserialize($serialized);
167 } else {
168 die "Data::Serializer not installed, cannot proceed with suffix '$suffix'";
170 while (my($k,$v) = each %{$deserialized->{meta}}) {
171 next if $k ne lc $k; # "Producers"
172 $self->$k($v);
174 unless (defined $self->protocol) {
175 $self->protocol(DEFAULT_PROTOCOL);
177 return $self;
180 =head2 DESTROY
182 A simple unlock.
184 =cut
185 sub DESTROY {
186 my $self = shift;
187 $self->unlock;
188 unless ($self->_current_tempfile_fh) {
189 if (my $tempfile = $self->_current_tempfile) {
190 if (-e $tempfile) {
191 # unlink $tempfile; # may fail in global destruction
197 =head1 ACCESSORS
199 =cut
201 my @accessors;
203 BEGIN {
204 @accessors = (
205 "_current_tempfile",
206 "_current_tempfile_fh",
207 "_delayed_operations",
208 "_done",
209 "_interval",
210 "_is_locked",
211 "_localroot",
212 "_merged",
213 "_pathdb",
214 "_remember_last_uptodate_call",
215 "_remote_dir",
216 "_remoteroot",
217 "_requires_fsck",
218 "_rfile",
219 "_rsync",
220 "__verified_tempdir",
221 "_seeded",
222 "_uptodateness_ever_reached",
223 "_use_tempfile",
226 my @pod_lines =
227 split /\n/, <<'=cut'; push @accessors, grep {s/^=item\s+//} @pod_lines; }
229 =over 4
231 =item aggregator
233 A list of interval specs that tell the aggregator which I<recentfile>s
234 are to be produced.
236 =item canonize
238 The name of a method to canonize the path before rsyncing. Only
239 supported value is C<naive_path_normalize>. Defaults to that.
241 =item comment
243 A comment about this tree and setup.
245 =item dirtymark
247 A timestamp. The dirtymark is updated whenever an out of band change
248 on the origin server is performed that violates the protocol. Say,
249 they add or remove files in the middle somewhere. Slaves must react
250 with a devaluation of their C<done> structure which then leads to a
251 full re-sync of all files. Implementation note: dirtymark may increase
252 or decrease.
254 =item filenameroot
256 The (prefix of the) filename we use for this I<recentfile>. Defaults to
257 C<RECENT>. The string must not contain a directory separator.
259 =item have_mirrored
261 Timestamp remembering when we mirrored this recentfile the last time.
262 Only relevant for slaves.
264 =item ignore_link_stat_errors
266 If set to true, rsync errors are ignored that complain about link stat
267 errors. These seem to happen only when there are files missing at the
268 origin. In race conditions this can always happen, so it defaults to
269 true.
271 =item is_slave
273 If set to true, this object will fetch a new recentfile from remote
274 when the timespan between the last mirror (see have_mirrored) and now
275 is too large (see C<ttl>).
277 =item keep_delete_objects_forever
279 The default for delete events is that they are passed through the
280 collection of recentfile objects until they reach the Z file. There
281 they get dropped so that the associated file object ceases to exist at
282 all. By setting C<keep_delete_objects_forever> the delete objects are
283 kept forever. This makes the Z file larger but has the advantage that
284 slaves that have interrupted mirroring for a long time still can clean
285 up their copy.
287 =item locktimeout
289 After how many seconds shall we die if we cannot lock a I<recentfile>?
290 Defaults to 600 seconds.
292 =item loopinterval
294 When mirror_loop is called, this accessor can specify how much time
295 every loop shall at least take. If the work of a loop is done before
296 that time has gone, sleeps for the rest of the time. Defaults to
297 arbitrary 42 seconds.
299 =item max_files_per_connection
301 Maximum number of files that are transferred on a single rsync call.
302 Setting it higher means higher performance at the price of holding
303 connections longer and potentially disturbing other users in the pool.
304 Defaults to the arbitrary value 42.
306 =item max_rsync_errors
308 When rsync operations encounter that many errors without any resetting
309 success in between, then we die. Defaults to unlimited. A value of
310 -1 means we run forever ignoring all rsync errors.
312 =item minmax
314 Hashref remembering when we read the recent_events from this file the
315 last time and what the timespan was.
317 =item protocol
319 When the RECENT file format changes, we increment the protocol. We try
320 to support older protocols in later releases.
322 =item remote_host
324 The host we are mirroring from. Leave empty for the local filesystem.
326 =item remote_module
328 Rsync servers have so called modules to separate directory trees from
329 each other. Put here the name of the module under which we are
330 mirroring. Leave empty for local filesystem.
332 =item rsync_options
334 Things like compress, links, times or checksums. Passed in to the
335 File::Rsync object used to run the mirror.
337 =item serializer_suffix
339 Mostly untested accessor. The only well tested format for
340 I<recentfile>s at the moment is YAML. It is used with YAML::Syck via
341 Data::Serializer. But in principle other formats are supported as
342 well. See section SERIALIZERS below.
344 =item sleep_per_connection
346 Sleep that many seconds (floating point OK) after every chunk of rsyncing
347 has finished. Defaults to arbitrary 0.42.
349 =item tempdir
351 Directory to write temporary files to. Must allow rename operations
352 into the tree which usually means it must live on the same partition
353 as the target directory. Defaults to C<< $self->localroot >>.
355 =item ttl
357 Time to live. Number of seconds after which this recentfile must be
358 fetched again from the origin server. Only relevant for slaves.
359 Defaults to arbitrary 24.2 seconds.
361 =item verbose
363 Boolean to turn on a bit verbosity.
365 =item verboselog
367 Path to the logfile to write verbose progress information to. This is
368 a primitive stop gap solution to get simple verbose logging working.
369 Switching to Log4perl or similar is probably the way to go.
371 =back
373 =cut
375 use accessors @accessors;
377 =head1 METHODS
379 =head2 (void) $obj->aggregate( %options )
381 Takes all intervals that are collected in the accessor called
382 aggregator. Sorts them by actual length of the interval.
383 Removes those that are shorter than our own interval. Then merges this
384 object into the next larger object. The merging continues upwards
385 as long as the next I<recentfile> is old enough to warrant a merge.
387 If a merge is warranted is decided according to the interval of the
388 previous interval so that larger files are not so often updated as
389 smaller ones. If $options{force} is true, all files get updated.
391 Here is an example to illustrate the behaviour. Given aggregators
393 1h 1d 1W 1M 1Q 1Y Z
395 then
397 1h updates 1d on every call to aggregate()
398 1d updates 1W earliest after 1h
399 1W updates 1M earliest after 1d
400 1M updates 1Q earliest after 1W
401 1Q updates 1Y earliest after 1M
402 1Y updates Z earliest after 1Q
404 Note that all but the smallest recentfile get updated at an arbitrary
405 rate and as such are quite useless on their own.
407 =cut
409 sub aggregate {
410 my($self, %option) = @_;
411 my %seen_interval;
412 my @aggs = sort { $a->{secs} <=> $b->{secs} }
413 grep { !$seen_interval{$_->{interval}}++ && $_->{secs} >= $self->interval_secs }
414 map { { interval => $_, secs => $self->interval_secs($_)} }
415 $self->interval, @{$self->aggregator || []};
416 $self->update;
417 $aggs[0]{object} = $self;
418 AGGREGATOR: for my $i (0..$#aggs-1) {
419 my $this = $aggs[$i]{object};
420 my $next = $this->_sparse_clone;
421 $next->interval($aggs[$i+1]{interval});
422 my $want_merge = 0;
423 if ($option{force} || $i == 0) {
424 $want_merge = 1;
425 } else {
426 my $next_rfile = $next->rfile;
427 if (-e $next_rfile) {
428 my $prev = $aggs[$i-1]{object};
429 local $^T = time;
430 my $next_age = 86400 * -M $next_rfile;
431 if ($next_age > $prev->interval_secs) {
432 $want_merge = 1;
434 } else {
435 $want_merge = 1;
438 if ($want_merge) {
439 $next->merge($this);
440 $aggs[$i+1]{object} = $next;
441 } else {
442 last AGGREGATOR;
447 # collect file size and mtime for all files of this aggregate
448 sub _debug_aggregate {
449 my($self) = @_;
450 my @aggs = sort { $a->{secs} <=> $b->{secs} }
451 map { { interval => $_, secs => $self->interval_secs($_)} }
452 $self->interval, @{$self->aggregator || []};
453 my $report = [];
454 for my $i (0..$#aggs) {
455 my $this = Storable::dclone $self;
456 $this->interval($aggs[$i]{interval});
457 my $rfile = $this->rfile;
458 my @stat = stat $rfile;
459 push @$report, {rfile => $rfile, size => $stat[7], mtime => $stat[9]};
461 $report;
464 # (void) $self->_assert_symlink()
465 sub _assert_symlink {
466 my($self) = @_;
467 my $recentrecentfile = File::Spec->catfile
469 $self->localroot,
470 sprintf
472 "%s.recent",
473 $self->filenameroot
476 if ($Config{d_symlink} eq "define") {
477 my $howto_create_symlink; # 0=no need; 1=straight symlink; 2=rename symlink
478 if (-l $recentrecentfile) {
479 my $found_symlink = readlink $recentrecentfile;
480 if ($found_symlink eq $self->rfilename) {
481 return;
482 } else {
483 $howto_create_symlink = 2;
485 } else {
486 $howto_create_symlink = 1;
488 if (1 == $howto_create_symlink) {
489 symlink $self->rfilename, $recentrecentfile or die "Could not create symlink '$recentrecentfile': $!"
490 } else {
491 unlink "$recentrecentfile.$$"; # may fail
492 symlink $self->rfilename, "$recentrecentfile.$$" or die "Could not create symlink '$recentrecentfile.$$': $!";
493 rename "$recentrecentfile.$$", $recentrecentfile or die "Could not rename '$recentrecentfile.$$' to $recentrecentfile: $!";
495 } else {
496 warn "Warning: symlinks not supported on this system, doing a copy instead\n";
497 unlink "$recentrecentfile.$$"; # may fail
498 cp $self->rfilename, "$recentrecentfile.$$" or die "Could not copy to '$recentrecentfile.$$': $!";
499 rename "$recentrecentfile.$$", $recentrecentfile or die "Could not rename '$recentrecentfile.$$' to $recentrecentfile: $!";
503 =head2 $hashref = $obj->delayed_operations
505 A hash of hashes containing unlink and rmdir operations which had to
506 wait until the recentfile got unhidden in order to not confuse
507 downstream mirrors (in case we have some).
509 =cut
511 sub delayed_operations {
512 my($self) = @_;
513 my $x = $self->_delayed_operations;
514 unless (defined $x) {
515 $x = {
516 unlink => {},
517 rmdir => {},
519 $self->_delayed_operations ($x);
521 return $x;
524 =head2 $done = $obj->done
526 C<$done> is a reference to a L<File::Rsync::Mirror::Recentfile::Done>
527 object that keeps track of rsync activities. Only needed and used when
528 we are a mirroring slave.
530 =cut
532 sub done {
533 my($self) = @_;
534 require File::Rsync::Mirror::Recentfile::Done;
535 my $done = $self->_done;
536 if (!$done) {
537 $done = File::Rsync::Mirror::Recentfile::Done->new();
538 $done->_rfinterval ($self->interval);
539 $self->_done ( $done );
540 } elsif (!blessed $done) {
541 # when the serializer does not support blessed objects
542 bless $done, 'File::Rsync::Mirror::Recentfile::Done';
543 $self->_done ( $done );
545 return $done;
548 =head2 $tempfilename = $obj->get_remote_recentfile_as_tempfile ()
550 Stores the remote I<recentfile> locally as a tempfile. The caller is
551 responsible to remove the file after use.
553 Note: if you're intending to act as an rsync server for other slaves,
554 then you must prefer this method to fetch that file with
555 get_remotefile(). Otherwise downstream mirrors would expect you to
556 already have mirrored all the files that are in the I<recentfile>
557 before you have them mirrored.
559 =cut
561 sub get_remote_recentfile_as_tempfile {
562 my($self) = @_;
563 mkpath $self->localroot;
564 my $fh;
565 my $trfilename;
566 if ( $self->_use_tempfile() ) {
567 if ($self->ttl_reached) {
568 $fh = $self->_current_tempfile_fh;
569 $trfilename = $self->rfilename;
570 } else {
571 return $self->_current_tempfile;
573 } else {
574 $trfilename = $self->rfilename;
577 my $dst;
578 if ($fh) {
579 $dst = $self->_current_tempfile;
580 } else {
581 $fh = $self->_get_remote_rat_provide_tempfile_object ($trfilename);
582 $dst = $fh->filename;
583 $self->_current_tempfile ($dst);
584 my $rfile = eval { $self->rfile; }; # may fail (RECENT.recent has no rfile)
585 if (defined $rfile && -e $rfile) {
586 # saving on bandwidth. Might need to be configurable
587 # $self->bandwidth_is_cheap?
588 cp $rfile, $dst or die "Could not copy '$rfile' to '$dst': $!"
591 my $src = join ("/",
592 $self->remoteroot,
593 $trfilename,
595 if ($self->verbose) {
596 my $doing = -e $dst ? "Sync" : "Get";
597 my $display_dst = join "/", "...", basename(dirname($dst)), basename($dst);
598 my $LFH = $self->_logfilehandle;
599 printf $LFH
601 "%-4s %d (1/1/%s) temp %s ... ",
602 $doing,
603 time,
604 $self->interval,
605 $display_dst,
608 my $gaveup = 0;
609 my $retried = 0;
610 local($ENV{LANG}) = "C";
611 while (!$self->rsync->exec(
612 src => $src,
613 dst => $dst,
614 )) {
615 $self->register_rsync_error ($self->rsync->err);
616 if (++$retried >= 3) {
617 warn "XXX giving up";
618 $gaveup = 1;
619 last;
622 if ($gaveup) {
623 my $LFH = $self->_logfilehandle;
624 printf $LFH "Warning: gave up mirroring %s, will try again later", $self->interval;
625 } else {
626 $self->_refresh_internals ($dst);
627 $self->have_mirrored (Time::HiRes::time);
628 $self->un_register_rsync_error ();
630 $self->unseed;
631 if ($self->verbose) {
632 my $LFH = $self->_logfilehandle;
633 print $LFH "DONE\n";
635 my $mode = 0644;
636 chmod $mode, $dst or die "Could not chmod $mode '$dst': $!";
637 return $dst;
640 sub _verified_tempdir {
641 my($self) = @_;
642 my $tempdir = $self->__verified_tempdir();
643 return $tempdir if defined $tempdir;
644 unless ($tempdir = $self->tempdir) {
645 $tempdir = $self->localroot;
647 unless (-d $tempdir) {
648 mkpath $tempdir;
650 $self->__verified_tempdir($tempdir);
651 return $tempdir;
654 sub _get_remote_rat_provide_tempfile_object {
655 my($self, $trfilename) = @_;
656 my $_verified_tempdir = $self->_verified_tempdir;
657 my $fh = File::Temp->new
658 (TEMPLATE => sprintf(".FRMRecent-%s-XXXX",
659 $trfilename,
661 DIR => $_verified_tempdir,
662 SUFFIX => $self->serializer_suffix,
663 UNLINK => $self->_use_tempfile,
665 my $mode = 0644;
666 my $dst = $fh->filename;
667 chmod $mode, $dst or die "Could not chmod $mode '$dst': $!";
668 if ($self->_use_tempfile) {
669 $self->_current_tempfile_fh ($fh); # delay self destruction
671 return $fh;
674 sub _logfilehandle {
675 my($self) = @_;
676 my $fh;
677 if (my $vl = $self->verboselog) {
678 open $fh, ">>", $vl or die "Could not open >> '$vl': $!";
679 } else {
680 $fh = \*STDERR;
682 return $fh;
685 =head2 $localpath = $obj->get_remotefile ( $relative_path )
687 Rsyncs one single remote file to local filesystem.
689 Note: no locking is done on this file. Any number of processes may
690 mirror this object.
692 Note II: do not use for recentfiles. If you are a cascading
693 slave/server combination, it would confuse other slaves. They would
694 expect the contents of these recentfiles to be available. Use
695 get_remote_recentfile_as_tempfile() instead.
697 =cut
699 sub get_remotefile {
700 my($self, $path) = @_;
701 my $dst = File::Spec->catfile($self->localroot, $path);
702 mkpath dirname $dst;
703 if ($self->verbose) {
704 my $doing = -e $dst ? "Sync" : "Get";
705 my $LFH = $self->_logfilehandle;
706 printf $LFH
708 "%-4s %d (1/1/%s) %s ... ",
709 $doing,
710 time,
711 $self->interval,
712 $path,
715 local($ENV{LANG}) = "C";
716 my $remoteroot = $self->remoteroot or die "Alert: missing remoteroot. Cannot continue";
717 while (!$self->rsync->exec(
718 src => join("/",
719 $remoteroot,
720 $path),
721 dst => $dst,
722 )) {
723 $self->register_rsync_error ($self->rsync->err);
725 $self->un_register_rsync_error ();
726 if ($self->verbose) {
727 my $LFH = $self->_logfilehandle;
728 print $LFH "DONE\n";
730 return $dst;
733 =head2 $obj->interval ( $interval_spec )
735 Get/set accessor. $interval_spec is a string and described below in
736 the section INTERVAL SPEC.
738 =cut
740 sub interval {
741 my ($self, $interval) = @_;
742 if (@_ >= 2) {
743 $self->_interval($interval);
744 $self->_rfile(undef);
746 $interval = $self->_interval;
747 unless (defined $interval) {
748 # do not ask the $self too much, it recurses!
749 require Carp;
750 Carp::confess("Alert: interval undefined for '".$self."'. Cannot continue.");
752 return $interval;
755 =head2 $secs = $obj->interval_secs ( $interval_spec )
757 $interval_spec is described below in the section INTERVAL SPEC. If
758 empty defaults to the inherent interval for this object.
760 =cut
762 sub interval_secs {
763 my ($self, $interval) = @_;
764 $interval ||= $self->interval;
765 unless (defined $interval) {
766 die "interval_secs() called without argument on an object without a declared one";
768 my ($n,$t) = $interval =~ /^(\d*)([smhdWMQYZ]$)/ or
769 die "Could not determine seconds from interval[$interval]";
770 if ($interval eq "Z") {
771 return MAX_INT;
772 } elsif (exists $seconds{$t} and $n =~ /^\d+$/) {
773 return $seconds{$t}*$n;
774 } else {
775 die "Invalid interval specification: n[$n]t[$t]";
779 =head2 $obj->localroot ( $localroot )
781 Get/set accessor. The local root of the tree. Guaranteed without
782 trailing slash.
784 =cut
786 sub localroot {
787 my ($self, $localroot) = @_;
788 if (@_ >= 2) {
789 $localroot =~ s|/$||;
790 $self->_localroot($localroot);
791 $self->_rfile(undef);
793 $localroot = $self->_localroot;
796 =head2 $ret = $obj->local_path($path_found_in_recentfile)
798 Combines the path to our local mirror and the path of an object found
799 in this I<recentfile>. In other words: the target of a mirror operation.
801 Implementation note: We split on slashes and then use
802 File::Spec::catfile to adjust to the local operating system.
804 =cut
806 sub local_path {
807 my($self,$path) = @_;
808 unless (defined $path) {
809 # seems like a degenerated case
810 return $self->localroot;
812 my @p = split m|/|, $path;
813 File::Spec->catfile($self->localroot,@p);
816 =head2 (void) $obj->lock
818 Locking is implemented with an C<mkdir> on a locking directory
819 (C<.lock> appended to $rfile).
821 =cut
823 sub lock {
824 my ($self) = @_;
825 # not using flock because it locks on filehandles instead of
826 # old school ressources.
827 my $locked = $self->_is_locked and return;
828 my $rfile = $self->rfile;
829 # XXX need a way to allow breaking the lock
830 my $start = time;
831 my $locktimeout = $self->locktimeout || 600;
832 my %have_warned;
833 my $lockdir = "$rfile.lock";
834 my $procfile = "$lockdir/process";
835 GETLOCK: while (not mkdir $lockdir) {
836 if (open my $fh, "<", $procfile) {
837 chomp(my $process = <$fh>);
838 if (0) {
839 } elsif ($process !~ /^\d+$/) {
840 warn "Warning: unknown process holds a lock in '$lockdir', waiting..." unless $have_warned{unknown}++;
841 } elsif ($$ == $process) {
842 last GETLOCK;
843 } elsif (kill 0, $process) {
844 warn "Warning: process $process holds a lock in '$lockdir', waiting..." unless $have_warned{$process}++;
845 } else {
846 warn "Warning: breaking lock held by process $process";
847 sleep 1;
848 last GETLOCK;
850 } else {
851 warn "Warning: unknown process holds a lock in '$lockdir', waiting..." unless $have_warned{unknown}++;
853 Time::HiRes::sleep 0.01;
854 if (time - $start > $locktimeout) {
855 die "Could not acquire lockdirectory '$rfile.lock': $!";
857 } # GETLOCK
858 open my $fh, ">", $procfile or die "Could not open >$procfile\: $!";
859 print $fh $$, "\n";
860 close $fh or die "Could not close: $!";
861 $self->_is_locked (1);
864 =head2 (void) $obj->merge ($other)
866 Bulk update of this object with another one. It's used to merge a
867 smaller and younger $other object into the current one. If this file
868 is a C<Z> file, then we normally do not merge in objects of type
869 C<delete>; this can be overridden by setting
870 keep_delete_objects_forever. But if we encounter an object of type
871 delete we delete the corresponding C<new> object if we have it.
873 If there is nothing to be merged, nothing is done.
875 =cut
877 sub merge {
878 my($self, $other) = @_;
879 $self->_merge_sanitycheck ( $other );
880 $other->lock;
881 my $other_recent = $other->recent_events || [];
882 $self->lock;
883 $self->_merge_locked ( $other, $other_recent );
884 $self->unlock;
885 $other->unlock;
888 sub _merge_locked {
889 my($self, $other, $other_recent) = @_;
890 my $my_recent = $self->recent_events || [];
892 # calculate the target time span
893 my $myepoch = $my_recent->[0] ? $my_recent->[0]{epoch} : undef;
894 my $epoch = $other_recent->[0] ? $other_recent->[0]{epoch} : $myepoch;
895 my $oldest_allowed = 0;
896 my $something_done;
897 unless ($my_recent->[0]) {
898 # obstetrics
899 $something_done = 1;
901 if ($epoch) {
902 if (($other->dirtymark||0) ne ($self->dirtymark||0)) {
903 $oldest_allowed = 0;
904 $something_done = 1;
905 } elsif (my $merged = $self->merged) {
906 my $secs = $self->interval_secs();
907 $oldest_allowed = min($epoch - $secs, $merged->{epoch}||0);
908 if (@$other_recent and
909 _bigfloatlt($other_recent->[-1]{epoch}, $oldest_allowed)
911 $oldest_allowed = $other_recent->[-1]{epoch};
914 while (@$my_recent && _bigfloatlt($my_recent->[-1]{epoch}, $oldest_allowed)) {
915 pop @$my_recent;
916 $something_done = 1;
920 my %have_path;
921 my $other_recent_filtered = [];
922 for my $oev (@$other_recent) {
923 my $oevepoch = $oev->{epoch} || 0;
924 next if _bigfloatlt($oevepoch, $oldest_allowed);
925 my $path = $oev->{path};
926 next if $have_path{$path}++;
927 if ( $self->interval eq "Z"
928 and $oev->{type} eq "delete"
929 and ! $self->keep_delete_objects_forever
931 # do nothing
932 } else {
933 if (!$myepoch || _bigfloatgt($oevepoch, $myepoch)) {
934 $something_done = 1;
936 push @$other_recent_filtered, { epoch => $oev->{epoch}, path => $path, type => $oev->{type} };
939 if ($something_done) {
940 $self->_merge_something_done ($other_recent_filtered, $my_recent, $other_recent, $other, \%have_path, $epoch);
944 sub _merge_something_done {
945 my($self, $other_recent_filtered, $my_recent, $other_recent, $other, $have_path, $epoch) = @_;
946 my $recent = [];
947 my $epoch_conflict = 0;
948 my $last_epoch;
949 ZIP: while (@$other_recent_filtered || @$my_recent) {
950 my $event;
951 if (!@$my_recent ||
952 @$other_recent_filtered && _bigfloatge($other_recent_filtered->[0]{epoch},$my_recent->[0]{epoch})) {
953 $event = shift @$other_recent_filtered;
954 } else {
955 $event = shift @$my_recent;
956 next ZIP if $have_path->{$event->{path}}++;
958 $epoch_conflict=1 if defined $last_epoch && $event->{epoch} eq $last_epoch;
959 $last_epoch = $event->{epoch};
960 push @$recent, $event;
962 if ($epoch_conflict) {
963 my %have_epoch;
964 for (my $i = $#$recent;$i>=0;$i--) {
965 my $epoch = $recent->[$i]{epoch};
966 if ($have_epoch{$epoch}++) {
967 while ($have_epoch{$epoch}) {
968 $epoch = _increase_a_bit($epoch);
970 $recent->[$i]{epoch} = $epoch;
971 $have_epoch{$epoch}++;
975 if (!$self->dirtymark || $other->dirtymark ne $self->dirtymark) {
976 $self->dirtymark ( $other->dirtymark );
978 $self->write_recent($recent);
979 $other->merged({
980 time => Time::HiRes::time, # not used anywhere
981 epoch => $recent->[0]{epoch},
982 into_interval => $self->interval, # not used anywhere
984 $other->write_recent($other_recent);
987 sub _merge_sanitycheck {
988 my($self, $other) = @_;
989 if ($self->interval_secs <= $other->interval_secs) {
990 require Carp;
991 Carp::confess
992 (sprintf
994 "Alert: illegal merge operation of a bigger interval[%d] into a smaller[%d]",
995 $self->interval_secs,
996 $other->interval_secs,
1001 =head2 merged
1003 Hashref denoting when this recentfile has been merged into some other
1004 at which epoch.
1006 =cut
1008 sub merged {
1009 my($self, $set) = @_;
1010 if (defined $set) {
1011 $self->_merged ($set);
1013 my $merged = $self->_merged;
1014 my $into;
1015 if ($merged and $into = $merged->{into_interval} and defined $self->_interval) {
1016 # sanity checks
1017 if ($into eq $self->interval) {
1018 require Carp;
1019 Carp::cluck(sprintf
1021 "Warning: into_interval[%s] same as own interval[%s]. Danger ahead.",
1022 $into,
1023 $self->interval,
1025 } elsif ($self->interval_secs($into) < $self->interval_secs) {
1026 require Carp;
1027 Carp::cluck(sprintf
1029 "Warning: into_interval_secs[%s] smaller than own interval_secs[%s] on interval[%s]. Danger ahead.",
1030 $self->interval_secs($into),
1031 $self->interval_secs,
1032 $self->interval,
1036 $merged;
1039 =head2 $hashref = $obj->meta_data
1041 Returns the hashref of metadata that the server has to add to the
1042 I<recentfile>.
1044 =cut
1046 sub meta_data {
1047 my($self) = @_;
1048 my $ret = $self->{meta};
1049 for my $m (
1050 "aggregator",
1051 "canonize",
1052 "comment",
1053 "dirtymark",
1054 "filenameroot",
1055 "interval",
1056 "merged",
1057 "minmax",
1058 "protocol",
1059 "serializer_suffix",
1061 my $v = $self->$m;
1062 if (defined $v) {
1063 $ret->{$m} = $v;
1066 # XXX need to reset the Producer if I am a writer, keep it when I
1067 # am a reader
1068 $ret->{Producers} ||= {
1069 __PACKAGE__, "$VERSION", # stringified it looks better
1070 '$0', $0,
1071 'time', Time::HiRes::time,
1073 $ret->{dirtymark} ||= Time::HiRes::time;
1074 return $ret;
1077 =head2 $success = $obj->mirror ( %options )
1079 Mirrors the files in this I<recentfile> as reported by
1080 C<recent_events>. Options named C<after>, C<before>, C<max> are passed
1081 through to the C<recent_events> call. The boolean option C<piecemeal>,
1082 if true, causes C<mirror> to only rsync C<max_files_per_connection>
1083 and keep track of the rsynced files so that future calls will rsync
1084 different files until all files are brought to sync.
1086 =cut
1088 sub mirror {
1089 my($self, %options) = @_;
1090 my $trecentfile = $self->get_remote_recentfile_as_tempfile();
1091 $self->_use_tempfile (1);
1092 # skip-deletes is inadequat for passthrough within mirror. We
1093 # would never reach uptodateness when a delete were on a
1094 # borderline
1095 my %passthrough = map { ($_ => $options{$_}) } qw(before after max);
1096 my ($recent_events) = $self->recent_events(%passthrough);
1097 my(@error, @dlcollector); # download-collector: array containing paths we need
1098 my $first_item = 0;
1099 my $last_item = $#$recent_events;
1100 my $done = $self->done;
1101 my $pathdb = $self->_pathdb;
1102 ITEM: for my $i ($first_item..$last_item) {
1103 my $status = +{};
1104 $self->_mirror_item
1107 $recent_events,
1108 $last_item,
1109 $done,
1110 $pathdb,
1111 \@dlcollector,
1112 \%options,
1113 $status,
1114 \@error,
1116 last if $i == $last_item;
1117 if ($status->{mustreturn}){
1118 if ($self->_current_tempfile && ! $self->_current_tempfile_fh) {
1119 # looks like a bug somewhere else
1120 my $t = $self->_current_tempfile;
1121 unlink $t or die "Could not unlink '$t': $!";
1122 $self->_current_tempfile(undef);
1123 $self->_use_tempfile(0);
1125 return;
1128 if (@dlcollector) {
1129 my $success = eval { $self->_mirror_dlcollector (\@dlcollector,$pathdb,$recent_events);};
1130 if (!$success || $@) {
1131 warn "Warning: Unknown error while mirroring: $@";
1132 push @error, $@;
1133 sleep 1;
1136 if ($self->verbose) {
1137 my $LFH = $self->_logfilehandle;
1138 print $LFH "DONE\n";
1140 # once we've gone to the end we consider ourselves free of obligations
1141 $self->unseed;
1142 $self->_mirror_unhide_tempfile ($trecentfile);
1143 $self->_mirror_perform_delayed_ops(\%options);
1144 return !@error;
1147 sub _mirror_item {
1148 my($self,
1150 $recent_events,
1151 $last_item,
1152 $done,
1153 $pathdb,
1154 $dlcollector,
1155 $options,
1156 $status,
1157 $error,
1158 ) = @_;
1159 my $recent_event = $recent_events->[$i];
1160 return if $done->covered ( $recent_event->{epoch} );
1161 if ($pathdb) {
1162 my $rec = $pathdb->{$recent_event->{path}};
1163 if ($rec && $rec->{recentepoch}) {
1164 if (_bigfloatgt
1165 ( $rec->{recentepoch}, $recent_event->{epoch} )){
1166 $done->register ($recent_events, [$i]);
1167 return;
1171 my $dst = $self->local_path($recent_event->{path});
1172 if ($recent_event->{type} eq "new"){
1173 $self->_mirror_item_new
1175 $dst,
1177 $last_item,
1178 $recent_events,
1179 $recent_event,
1180 $dlcollector,
1181 $pathdb,
1182 $status,
1183 $error,
1184 $options,
1186 } elsif ($recent_event->{type} eq "delete") {
1187 my $activity;
1188 if ($options->{'skip-deletes'}) {
1189 $activity = "skipped";
1190 } else {
1191 my @lstat = lstat $dst;
1192 if (! -e _) {
1193 $activity = "not_found";
1194 } elsif (-l _ or not -d _) {
1195 $self->delayed_operations->{unlink}{$dst}++;
1196 $activity = "deleted";
1197 } else {
1198 $self->delayed_operations->{rmdir}{$dst}++;
1199 $activity = "deleted";
1202 $done->register ($recent_events, [$i]);
1203 if ($pathdb) {
1204 $self->_mirror_register_path($pathdb,[$recent_event],$activity);
1206 } else {
1207 warn "Warning: invalid upload type '$recent_event->{type}'";
1211 sub _mirror_item_new {
1212 my($self,
1213 $dst,
1215 $last_item,
1216 $recent_events,
1217 $recent_event,
1218 $dlcollector,
1219 $pathdb,
1220 $status,
1221 $error,
1222 $options,
1223 ) = @_;
1224 if ($self->verbose) {
1225 my $doing = -e $dst ? "Sync" : "Get";
1226 my $LFH = $self->_logfilehandle;
1227 printf $LFH
1229 "%-4s %d (%d/%d/%s) %s ... ",
1230 $doing,
1231 time,
1232 1+$i,
1233 1+$last_item,
1234 $self->interval,
1235 $recent_event->{path},
1238 my $max_files_per_connection = $self->max_files_per_connection || 42;
1239 my $success;
1240 if ($self->verbose) {
1241 my $LFH = $self->_logfilehandle;
1242 print $LFH "\n";
1244 push @$dlcollector, { rev => $recent_event, i => $i };
1245 if (@$dlcollector >= $max_files_per_connection) {
1246 $success = eval {$self->_mirror_dlcollector ($dlcollector,$pathdb,$recent_events);};
1247 my $sleep = $self->sleep_per_connection;
1248 $sleep = 0.42 unless defined $sleep;
1249 Time::HiRes::sleep $sleep;
1250 if ($options->{piecemeal}) {
1251 $status->{mustreturn} = 1;
1252 return;
1254 } else {
1255 return;
1257 if (!$success || $@) {
1258 warn "Warning: Error while mirroring: $@";
1259 push @$error, $@;
1260 sleep 1;
1262 if ($self->verbose) {
1263 my $LFH = $self->_logfilehandle;
1264 print $LFH "DONE\n";
1268 sub _mirror_dlcollector {
1269 my($self,$xcoll,$pathdb,$recent_events) = @_;
1270 my $success = $self->mirror_path([map {$_->{rev}{path}} @$xcoll]);
1271 if ($pathdb) {
1272 $self->_mirror_register_path($pathdb,[map {$_->{rev}} @$xcoll],"rsync");
1274 $self->done->register($recent_events, [map {$_->{i}} @$xcoll]);
1275 @$xcoll = ();
1276 return $success;
1279 sub _mirror_register_path {
1280 my($self,$pathdb,$coll,$activity) = @_;
1281 my $time = time;
1282 for my $item (@$coll) {
1283 $pathdb->{$item->{path}} =
1285 recentepoch => $item->{epoch},
1286 ($activity."_on") => $time,
1291 sub _mirror_unhide_tempfile {
1292 my($self, $trecentfile) = @_;
1293 my $rfile = $self->rfile;
1294 if (rename $trecentfile, $rfile) {
1295 # warn "DEBUG: renamed '$trecentfile' to '$rfile'";
1296 } else {
1297 require Carp;
1298 Carp::confess("Could not rename '$trecentfile' to '$rfile': $!");
1300 $self->_use_tempfile (0);
1301 if (my $ctfh = $self->_current_tempfile_fh) {
1302 $ctfh->unlink_on_destroy (0);
1303 $self->_current_tempfile_fh (undef);
1307 sub _mirror_perform_delayed_ops {
1308 my($self,$options) = @_;
1309 my $delayed = $self->delayed_operations;
1310 for my $dst (keys %{$delayed->{unlink}}) {
1311 unless (unlink $dst) {
1312 require Carp;
1313 Carp::cluck ( "Warning: Error while unlinking '$dst': $!" ) if $options->{verbose};
1315 if ($self->verbose) {
1316 my $doing = "Del";
1317 my $LFH = $self->_logfilehandle;
1318 printf $LFH
1320 "%-4s %d (%s) %s DONE\n",
1321 $doing,
1322 time,
1323 $self->interval,
1324 $dst,
1326 delete $delayed->{unlink}{$dst};
1329 for my $dst (sort {length($b) <=> length($a)} keys %{$delayed->{rmdir}}) {
1330 unless (rmdir $dst) {
1331 require Carp;
1332 Carp::cluck ( "Warning: Error on rmdir '$dst': $!" ) if $options->{verbose};
1334 if ($self->verbose) {
1335 my $doing = "Del";
1336 my $LFH = $self->_logfilehandle;
1337 printf $LFH
1339 "%-4s %d (%s) %s DONE\n",
1340 $doing,
1341 time,
1342 $self->interval,
1343 $dst,
1345 delete $delayed->{rmdir}{$dst};
1350 =head2 $success = $obj->mirror_path ( $arrref | $path )
1352 If the argument is a scalar it is treated as a path. The remote path
1353 is mirrored into the local copy. $path is the path found in the
1354 I<recentfile>, i.e. it is relative to the root directory of the
1355 mirror.
1357 If the argument is an array reference then all elements are treated as
1358 a path below the current tree and all are rsynced with a single
1359 command (and a single connection).
1361 =cut
1363 sub mirror_path {
1364 my($self,$path) = @_;
1365 # XXX simplify the two branches such that $path is treated as
1366 # [$path] maybe even demand the argument as an arrayref to
1367 # simplify docs and code. (rsync-over-recentfile-2.pl uses the
1368 # interface)
1369 if (ref $path and ref $path eq "ARRAY") {
1370 my $dst = $self->localroot;
1371 mkpath dirname $dst;
1372 my($fh) = File::Temp->new(TEMPLATE => sprintf(".%s-XXXX",
1373 lc $self->filenameroot,
1375 TMPDIR => 1,
1376 UNLINK => 0,
1378 for my $p (@$path) {
1379 print $fh $p, "\n";
1381 $fh->flush;
1382 $fh->unlink_on_destroy(1);
1383 my $gaveup = 0;
1384 my $retried = 0;
1385 local($ENV{LANG}) = "C";
1386 while (!$self->rsync->exec
1388 src => join("/",
1389 $self->remoteroot,
1391 dst => $dst,
1392 'files-from' => $fh->filename,
1393 )) {
1394 my(@err) = $self->rsync->err;
1395 if ($self->_my_ignore_link_stat_errors && "@err" =~ m{^ rsync: \s link_stat }x ) {
1396 if ($self->verbose) {
1397 my $LFH = $self->_logfilehandle;
1398 print $LFH "Info: ignoring link_stat error '@err'";
1400 return 1;
1402 $self->register_rsync_error (@err);
1403 if (++$retried >= 3) {
1404 my $batchsize = @$path;
1405 warn "The number of rsync retries now reached 3 within a batch of size $batchsize. Error was '@err'. Giving up now, will retry later, ";
1406 $gaveup = 1;
1407 last;
1409 sleep 1;
1411 unless ($gaveup) {
1412 $self->un_register_rsync_error ();
1414 } else {
1415 my $dst = $self->local_path($path);
1416 mkpath dirname $dst;
1417 local($ENV{LANG}) = "C";
1418 while (!$self->rsync->exec
1420 src => join("/",
1421 $self->remoteroot,
1422 $path
1424 dst => $dst,
1425 )) {
1426 my(@err) = $self->rsync->err;
1427 if ($self->_my_ignore_link_stat_errors && "@err" =~ m{^ rsync: \s link_stat }x ) {
1428 if ($self->verbose) {
1429 my $LFH = $self->_logfilehandle;
1430 print $LFH "Info: ignoring link_stat error '@err'";
1432 return 1;
1434 $self->register_rsync_error (@err);
1436 $self->un_register_rsync_error ();
1438 return 1;
1441 sub _my_ignore_link_stat_errors {
1442 my($self) = @_;
1443 my $x = $self->ignore_link_stat_errors;
1444 $x = 1 unless defined $x;
1445 return $x;
1448 sub _my_current_rfile {
1449 my($self) = @_;
1450 my $rfile;
1451 if ($self->_use_tempfile) {
1452 $rfile = $self->_current_tempfile;
1454 unless ($rfile && -s $rfile) {
1455 $rfile = $self->rfile;
1457 return $rfile;
1460 =head2 $path = $obj->naive_path_normalize ($path)
1462 Takes an absolute unix style path as argument and canonicalizes it to
1463 a shorter path if possible, removing things like double slashes or
1464 C</./> and removes references to C<../> directories to get a shorter
1465 unambiguos path. This is used to make the code easier that determines
1466 if a file passed to C<upgrade()> is indeed below our C<localroot>.
1468 =cut
1470 sub naive_path_normalize {
1471 my($self,$path) = @_;
1472 $path =~ s|/+|/|g;
1473 1 while $path =~ s|/[^/]+/\.\./|/|;
1474 $path =~ s|/$||;
1475 $path;
1478 =head2 $ret = $obj->read_recent_1 ( $data )
1480 Delegate of C<recent_events()> on protocol 1
1482 =cut
1484 sub read_recent_1 {
1485 my($self, $data) = @_;
1486 return $data->{recent};
1489 =head2 $array_ref = $obj->recent_events ( %options )
1491 Note: the code relies on the resource being written atomically. We
1492 cannot lock because we may have no write access. If the caller has
1493 write access (eg. aggregate() or update()), it has to care for any
1494 necessary locking and it MUST write atomically.
1496 If C<$options{after}> is specified, only file events after this
1497 timestamp are returned.
1499 If C<$options{before}> is specified, only file events before this
1500 timestamp are returned.
1502 If C<$options{max}> is specified only a maximum of this many most
1503 recent events is returned.
1505 If C<$options{'skip-deletes'}> is specified, no files-to-be-deleted
1506 will be returned.
1508 If C<$options{contains}> is specified the value must be a hash
1509 reference containing a query. The query may contain the keys C<epoch>,
1510 C<path>, and C<type>. Each represents a condition that must be met. If
1511 there is more than one such key, the conditions are ANDed.
1513 If C<$options{info}> is specified, it must be a hashref. This hashref
1514 will be filled with metadata about the unfiltered recent_events of
1515 this object, in key C<first> there is the first item, in key C<last>
1516 is the last.
1518 =cut
1520 sub recent_events {
1521 my ($self, %options) = @_;
1522 my $info = $options{info};
1523 if ($self->is_slave) {
1524 # XXX seems dubious, might produce tempfiles without removing them?
1525 $self->get_remote_recentfile_as_tempfile;
1527 my $rfile_or_tempfile = $self->_my_current_rfile or return [];
1528 -e $rfile_or_tempfile or return [];
1529 my $suffix = $self->serializer_suffix;
1530 my ($data) = eval {
1531 $self->_try_deserialize
1533 $suffix,
1534 $rfile_or_tempfile,
1537 my $err = $@;
1538 if ($err or !$data) {
1539 return [];
1541 my $re;
1542 if (reftype $data eq 'ARRAY') { # protocol 0
1543 $re = $data;
1544 } else {
1545 $re = $self->_recent_events_protocol_x
1547 $data,
1548 $rfile_or_tempfile,
1551 return $re unless grep {defined $options{$_}} qw(after before contains max skip-deletes);
1552 $self->_recent_events_handle_options ($re, \%options);
1555 # File::Rsync::Mirror::Recentfile::_recent_events_handle_options
1556 sub _recent_events_handle_options {
1557 my($self, $re, $options) = @_;
1558 my $last_item = $#$re;
1559 my $info = $options->{info};
1560 if ($info) {
1561 $info->{first} = $re->[0];
1562 $info->{last} = $re->[-1];
1564 if (defined $options->{after}) {
1565 if ($re->[0]{epoch} > $options->{after}) {
1566 if (
1567 my $f = first
1568 {$re->[$_]{epoch} <= $options->{after}}
1569 0..$#$re
1571 $last_item = $f-1;
1573 } else {
1574 $last_item = -1;
1577 my $first_item = 0;
1578 if (defined $options->{before}) {
1579 if ($re->[0]{epoch} > $options->{before}) {
1580 if (
1581 my $f = first
1582 {$re->[$_]{epoch} < $options->{before}}
1583 0..$last_item
1585 $first_item = $f;
1587 } else {
1588 $first_item = 0;
1591 if (0 != $first_item || -1 != $last_item) {
1592 @$re = splice @$re, $first_item, 1+$last_item-$first_item;
1594 if ($options->{'skip-deletes'}) {
1595 @$re = grep { $_->{type} ne "delete" } @$re;
1597 if (my $contopt = $options->{contains}) {
1598 my $seen_allowed = 0;
1599 for my $allow (qw(epoch path type)) {
1600 if (exists $contopt->{$allow}) {
1601 $seen_allowed++;
1602 my $v = $contopt->{$allow};
1603 @$re = grep { $_->{$allow} eq $v } @$re;
1606 if (keys %$contopt > $seen_allowed) {
1607 require Carp;
1608 Carp::confess
1609 (sprintf "unknown query: %s", join ", ", %$contopt);
1612 if ($options->{max} && @$re > $options->{max}) {
1613 @$re = splice @$re, 0, $options->{max};
1615 $re;
1618 sub _recent_events_protocol_x {
1619 my($self,
1620 $data,
1621 $rfile_or_tempfile,
1622 ) = @_;
1623 my $meth = sprintf "read_recent_%d", $data->{meta}{protocol};
1624 # we may be reading meta for the first time
1625 while (my($k,$v) = each %{$data->{meta}}) {
1626 if ($k ne lc $k){ # "Producers"
1627 $self->{ORIG}{$k} = $v;
1628 next;
1630 next if defined $self->$k;
1631 $self->$k($v);
1633 my $re = $self->$meth ($data);
1634 my $minmax;
1635 if (my @stat = stat $rfile_or_tempfile) {
1636 $minmax = { mtime => $stat[9] };
1637 } else {
1638 # defensive because ABH encountered:
1640 #### Sync 1239828608 (1/1/Z) temp .../authors/.FRMRecent-RECENT-Z.yaml-
1641 #### Ydr_.yaml ... DONE
1642 #### Cannot stat '/mirrors/CPAN/authors/.FRMRecent-RECENT-Z.yaml-
1643 #### Ydr_.yaml': No such file or directory at /usr/lib/perl5/site_perl/
1644 #### 5.8.8/File/Rsync/Mirror/Recentfile.pm line 1558.
1645 #### unlink0: /mirrors/CPAN/authors/.FRMRecent-RECENT-Z.yaml-Ydr_.yaml is
1646 #### gone already at cpan-pause.pl line 0
1648 my $LFH = $self->_logfilehandle;
1649 print $LFH "Warning (maybe harmless): Cannot stat '$rfile_or_tempfile': $!"
1651 if (@$re) {
1652 $minmax->{min} = $re->[-1]{epoch};
1653 $minmax->{max} = $re->[0]{epoch};
1655 $self->minmax ( $minmax );
1656 return $re;
1659 sub _try_deserialize {
1660 my($self,
1661 $suffix,
1662 $rfile_or_tempfile,
1663 ) = @_;
1664 if ($suffix eq ".yaml") {
1665 require YAML::Syck;
1666 YAML::Syck::LoadFile($rfile_or_tempfile);
1667 } elsif ($HAVE->{"Data::Serializer"}) {
1668 my $serializer = Data::Serializer->new
1669 ( serializer => $serializers{$suffix} );
1670 my $serialized = do
1672 open my $fh, $rfile_or_tempfile or die "Could not open: $!";
1673 local $/;
1674 <$fh>;
1676 $serializer->raw_deserialize($serialized);
1677 } else {
1678 die "Data::Serializer not installed, cannot proceed with suffix '$suffix'";
1682 sub _refresh_internals {
1683 my($self, $dst) = @_;
1684 my $class = ref $self;
1685 my $rfpeek = $class->new_from_file ($dst);
1686 for my $acc (qw(
1687 _merged
1688 minmax
1689 )) {
1690 $self->$acc ( $rfpeek->$acc );
1692 my $old_dirtymark = $self->dirtymark;
1693 my $new_dirtymark = $rfpeek->dirtymark;
1694 if ($old_dirtymark && $new_dirtymark && $new_dirtymark ne $old_dirtymark) {
1695 $self->done->reset;
1696 $self->dirtymark ( $new_dirtymark );
1697 $self->_uptodateness_ever_reached(0);
1698 $self->seed;
1702 =head2 $ret = $obj->rfilename
1704 Just the basename of our I<recentfile>, composed from C<filenameroot>,
1705 a dash, C<interval>, and C<serializer_suffix>. E.g. C<RECENT-6h.yaml>
1707 =cut
1709 sub rfilename {
1710 my($self) = @_;
1711 my $file = sprintf("%s-%s%s",
1712 $self->filenameroot,
1713 $self->interval,
1714 $self->serializer_suffix,
1716 return $file;
1719 =head2 $str = $self->remote_dir
1721 The directory we are mirroring from.
1723 =cut
1725 sub remote_dir {
1726 my($self, $set) = @_;
1727 if (defined $set) {
1728 $self->_remote_dir ($set);
1730 my $x = $self->_remote_dir;
1731 $self->is_slave (1);
1732 return $x;
1735 =head2 $str = $obj->remoteroot
1737 =head2 (void) $obj->remoteroot ( $set )
1739 Get/Set the composed prefix needed when rsyncing from a remote module.
1740 If remote_host, remote_module, and remote_dir are set, it is composed
1741 from these.
1743 =cut
1745 sub remoteroot {
1746 my($self, $set) = @_;
1747 if (defined $set) {
1748 $self->_remoteroot($set);
1750 my $remoteroot = $self->_remoteroot;
1751 unless (defined $remoteroot) {
1752 $remoteroot = sprintf
1754 "%s%s%s",
1755 defined $self->remote_host ? ($self->remote_host."::") : "",
1756 defined $self->remote_module ? ($self->remote_module."/") : "",
1757 defined $self->remote_dir ? $self->remote_dir : "",
1759 $self->_remoteroot($remoteroot);
1761 return $remoteroot;
1764 =head2 (void) $obj->split_rfilename ( $recentfilename )
1766 Inverse method to C<rfilename>. C<$recentfilename> is a plain filename
1767 of the pattern
1769 $filenameroot-$interval$serializer_suffix
1771 e.g.
1773 RECENT-1M.yaml
1775 This filename is split into its parts and the parts are fed to the
1776 object itself.
1778 =cut
1780 sub split_rfilename {
1781 my($self, $rfname) = @_;
1782 my($splitter) = qr(^(.+)-([^-\.]+)(\.[^\.]+));
1783 if (my($f,$i,$s) = $rfname =~ $splitter) {
1784 $self->filenameroot ($f);
1785 $self->interval ($i);
1786 $self->serializer_suffix ($s);
1787 } else {
1788 die "Alert: cannot split '$rfname', doesn't match '$splitter'";
1790 return;
1793 =head2 my $rfile = $obj->rfile
1795 Returns the full path of the I<recentfile>
1797 =cut
1799 sub rfile {
1800 my($self) = @_;
1801 my $rfile = $self->_rfile;
1802 return $rfile if defined $rfile;
1803 $rfile = File::Spec->catfile
1804 ($self->localroot,
1805 $self->rfilename,
1807 $self->_rfile ($rfile);
1808 return $rfile;
1811 =head2 $rsync_obj = $obj->rsync
1813 The File::Rsync object that this object uses for communicating with an
1814 upstream server.
1816 =cut
1818 sub rsync {
1819 my($self) = @_;
1820 my $rsync = $self->_rsync;
1821 unless (defined $rsync) {
1822 my $rsync_options = $self->rsync_options || {};
1823 if ($HAVE->{"File::Rsync"}) {
1824 $rsync = File::Rsync->new($rsync_options);
1825 $self->_rsync($rsync);
1826 } else {
1827 die "File::Rsync required for rsync operations. Cannot continue";
1830 return $rsync;
1833 =head2 (void) $obj->register_rsync_error(@err)
1835 =head2 (void) $obj->un_register_rsync_error()
1837 Register_rsync_error is called whenever the File::Rsync object fails
1838 on an exec (say, connection doesn't succeed). It issues a warning and
1839 sleeps for an increasing amount of time. Un_register_rsync_error
1840 resets the error count. See also accessor C<max_rsync_errors>.
1842 =cut
1845 my $no_success_count = 0;
1846 my $no_success_time = 0;
1847 sub register_rsync_error {
1848 my($self, @err) = @_;
1849 chomp @err;
1850 $no_success_time = time;
1851 $no_success_count++;
1852 my $max_rsync_errors = $self->max_rsync_errors;
1853 $max_rsync_errors = MAX_INT unless defined $max_rsync_errors;
1854 if ($max_rsync_errors>=0 && $no_success_count >= $max_rsync_errors) {
1855 require Carp;
1856 Carp::confess
1858 sprintf
1860 "Alert: Error while rsyncing (%s): '%s', error count: %d, exiting now,",
1861 $self->interval,
1862 join(" ",@err),
1863 $no_success_count,
1866 my $sleep = 12 * $no_success_count;
1867 $sleep = 300 if $sleep > 300;
1868 require Carp;
1869 Carp::cluck
1870 (sprintf
1872 "Warning: %s, Error while rsyncing (%s): '%s', sleeping %d",
1873 scalar(localtime($no_success_time)),
1874 $self->interval,
1875 join(" ",@err),
1876 $sleep,
1878 sleep $sleep
1880 sub un_register_rsync_error {
1881 my($self) = @_;
1882 $no_success_time = 0;
1883 $no_success_count = 0;
1887 =head2 $clone = $obj->_sparse_clone
1889 Clones just as much from itself that it does not hurt. Experimental
1890 method.
1892 Note: what fits better: sparse or shallow? Other suggestions?
1894 =cut
1896 sub _sparse_clone {
1897 my($self) = @_;
1898 my $new = bless {}, ref $self;
1899 for my $m (qw(
1900 _interval
1901 _localroot
1902 _remoteroot
1903 _rfile
1904 _use_tempfile
1905 aggregator
1906 filenameroot
1907 ignore_link_stat_errors
1908 is_slave
1909 max_files_per_connection
1910 protocol
1911 rsync_options
1912 serializer_suffix
1913 sleep_per_connection
1914 tempdir
1915 verbose
1916 )) {
1917 my $o = $self->$m;
1918 $o = Storable::dclone $o if ref $o;
1919 $new->$m($o);
1921 $new;
1924 =head2 $boolean = OBJ->ttl_reached ()
1926 =cut
1928 sub ttl_reached {
1929 my($self) = @_;
1930 my $have_mirrored = $self->have_mirrored || 0;
1931 my $now = Time::HiRes::time;
1932 my $ttl = $self->ttl;
1933 $ttl = 24.2 unless defined $ttl;
1934 if ($now > $have_mirrored + $ttl) {
1935 return 1;
1937 return 0;
1940 =head2 (void) $obj->unlock()
1942 Unlocking is implemented with an C<rmdir> on a locking directory
1943 (C<.lock> appended to $rfile).
1945 =cut
1947 sub unlock {
1948 my($self) = @_;
1949 return unless $self->_is_locked;
1950 my $rfile = $self->rfile;
1951 unlink "$rfile.lock/process" or warn "Could not unlink lockfile '$rfile.lock/process': $!";
1952 rmdir "$rfile.lock" or warn "Could not rmdir lockdir '$rfile.lock': $!";;
1953 $self->_is_locked (0);
1956 =head2 unseed
1958 Sets this recentfile in the state of not 'seeded'.
1960 =cut
1961 sub unseed {
1962 my($self) = @_;
1963 $self->seeded(0);
1966 =head2 $ret = $obj->update ($path, $type)
1968 =head2 $ret = $obj->update ($path, "new", $dirty_epoch)
1970 =head2 $ret = $obj->update ()
1972 Enter one file into the local I<recentfile>. $path is the (usually
1973 absolute) path. If the path is outside I<our> tree, then it is
1974 ignored.
1976 C<$type> is one of C<new> or C<delete>.
1978 Events of type C<new> may set $dirty_epoch. $dirty_epoch is normally
1979 not used and the epoch is calculated by the update() routine itself
1980 based on current time. But if there is the demand to insert a
1981 not-so-current file into the dataset, then the caller sets
1982 $dirty_epoch. This causes the epoch of the registered event to become
1983 $dirty_epoch or -- if the exact value given is already taken -- a tiny
1984 bit more. As compensation the dirtymark of the whole dataset is set to
1985 now or the current epoch, whichever is higher. Note: setting the
1986 dirty_epoch to the future is prohibited as it's very unlikely to be
1987 intended: it definitely might wreak havoc with the index files.
1989 The new file event is unshifted (or, if dirty_epoch is set, inserted
1990 at the place it belongs to, according to the rule to have a sequence
1991 of strictly decreasing timestamps) to the array of recent_events and
1992 the array is shortened to the length of the timespan allowed. This is
1993 usually the timespan specified by the interval of this recentfile but
1994 as long as this recentfile has not been merged to another one, the
1995 timespan may grow without bounds.
1997 The third form runs an update without inserting a new file. This may
1998 be desired to truncate a recentfile.
2000 =cut
2001 sub _epoch_monotonically_increasing {
2002 my($self,$epoch,$recent) = @_;
2003 return $epoch unless @$recent; # the first one goes unoffended
2004 if (_bigfloatgt("".$epoch,$recent->[0]{epoch})) {
2005 return $epoch;
2006 } else {
2007 return _increase_a_bit($recent->[0]{epoch});
2010 sub update {
2011 my($self,$path,$type,$dirty_epoch) = @_;
2012 if (defined $path or defined $type or defined $dirty_epoch) {
2013 die "update called without path argument" unless defined $path;
2014 die "update called without type argument" unless defined $type;
2015 die "update called with illegal type argument: $type" unless $type =~ /(new|delete)/;
2017 $self->lock;
2018 my $ctx = $self->_locked_batch_update([{path=>$path,type=>$type,epoch=>$dirty_epoch}]);
2019 $self->write_recent($ctx->{recent}) if $ctx->{something_done};
2020 $self->_assert_symlink;
2021 $self->unlock;
2024 =head2 $obj->batch_update($batch)
2026 Like update but for many files. $batch is an arrayref containing
2027 hashrefs with the structure
2030 path => $path,
2031 type => $type,
2032 epoch => $epoch,
2037 =cut
2038 sub batch_update {
2039 my($self,$batch) = @_;
2040 $self->lock;
2041 my $ctx = $self->_locked_batch_update($batch);
2042 $self->write_recent($ctx->{recent}) if $ctx->{something_done};
2043 $self->_assert_symlink;
2044 $self->unlock;
2046 sub _locked_batch_update {
2047 my($self,$batch) = @_;
2048 my $something_done = 0;
2049 my $recent = $self->recent_events;
2050 unless ($recent->[0]) {
2051 # obstetrics
2052 $something_done = 1;
2054 my %paths_in_recent = map { $_->{path} => undef } @$recent;
2055 my $interval = $self->interval;
2056 my $canonmeth = $self->canonize;
2057 unless ($canonmeth) {
2058 $canonmeth = "naive_path_normalize";
2060 my $oldest_allowed = 0;
2061 my $setting_new_dirty_mark = 0;
2062 my $console;
2063 if ($self->verbose && @$batch > 1) {
2064 eval {require Time::Progress};
2065 warn "dollarat[$@]" if $@;
2066 $| = 1;
2067 $console = new Time::Progress;
2068 $console->attr( min => 1, max => scalar @$batch );
2069 print "\n";
2071 my $i = 0;
2072 my $memo_splicepos;
2073 ITEM: for my $item (sort {($b->{epoch}||0) <=> ($a->{epoch}||0)} @$batch) {
2074 $i++;
2075 print $console->report( "\rdone %p elapsed: %L (%l sec), ETA %E (%e sec)", $i ) if $console and not $i % 50;
2076 my $ctx = $self->_update_batch_item($item,$canonmeth,$recent,$setting_new_dirty_mark,$oldest_allowed,$something_done,\%paths_in_recent,$memo_splicepos);
2077 $something_done = $ctx->{something_done};
2078 $oldest_allowed = $ctx->{oldest_allowed};
2079 $setting_new_dirty_mark = $ctx->{setting_new_dirty_mark};
2080 $recent = $ctx->{recent};
2081 $memo_splicepos = $ctx->{memo_splicepos};
2083 print "\n" if $console;
2084 if ($setting_new_dirty_mark) {
2085 $oldest_allowed = 0;
2087 TRUNCATE: while (@$recent) {
2088 if (_bigfloatlt($recent->[-1]{epoch}, $oldest_allowed)) {
2089 pop @$recent;
2090 $something_done = 1;
2091 } else {
2092 last TRUNCATE;
2095 return {something_done=>$something_done,recent=>$recent};
2097 sub _update_batch_item {
2098 my($self,$item,$canonmeth,$recent,$setting_new_dirty_mark,$oldest_allowed,$something_done,$paths_in_recent,$memo_splicepos) = @_;
2099 my($path,$type,$dirty_epoch) = @{$item}{qw(path type epoch)};
2100 if (defined $path or defined $type or defined $dirty_epoch) {
2101 $path = $self->$canonmeth($path);
2103 # you must calculate the time after having locked, of course
2104 my $now = Time::HiRes::time;
2106 my $epoch;
2107 if (defined $dirty_epoch && _bigfloatgt($now,$dirty_epoch)) {
2108 $epoch = $dirty_epoch;
2109 } else {
2110 $epoch = $self->_epoch_monotonically_increasing($now,$recent);
2112 $recent ||= [];
2113 my $merged = $self->merged;
2114 if ($merged->{epoch} && !$setting_new_dirty_mark) {
2115 my $virtualnow = _bigfloatmax($now,$epoch);
2116 # for the lower bound I think we need no big math, we calc already
2117 my $secs = $self->interval_secs();
2118 $oldest_allowed = min($virtualnow - $secs, $merged->{epoch}, $epoch);
2119 } else {
2120 # as long as we are not merged at all, no limits!
2122 my $lrd = $self->localroot;
2123 if (defined $path && $path =~ s|^\Q$lrd\E||) {
2124 $path =~ s|^/||;
2125 my $splicepos;
2126 # remove the older duplicates of this $path, irrespective of $type:
2127 if (defined $dirty_epoch) {
2128 my $ctx = $self->_update_with_dirty_epoch($path,$recent,$epoch,$paths_in_recent,$memo_splicepos);
2129 $recent = $ctx->{recent};
2130 $splicepos = $ctx->{splicepos};
2131 $epoch = $ctx->{epoch};
2132 my $dirtymark = $self->dirtymark;
2133 my $new_dm = $now;
2134 if (_bigfloatgt($epoch, $now)) { # just in case we had to increase it
2135 $new_dm = $epoch;
2137 $self->dirtymark($new_dm);
2138 $setting_new_dirty_mark = 1;
2139 if (not defined $merged->{epoch} or _bigfloatlt($epoch,$merged->{epoch})) {
2140 $self->merged(+{});
2142 } else {
2143 $recent = [ grep { $_->{path} ne $path } @$recent ];
2144 $splicepos = 0;
2146 if (defined $splicepos) {
2147 splice @$recent, $splicepos, 0, { epoch => $epoch, path => $path, type => $type };
2148 $paths_in_recent->{$path} = undef;
2150 $memo_splicepos = $splicepos;
2151 $something_done = 1;
2153 return
2155 something_done => $something_done,
2156 oldest_allowed => $oldest_allowed,
2157 setting_new_dirty_mark => $setting_new_dirty_mark,
2158 recent => $recent,
2159 memo_splicepos => $memo_splicepos,
2162 sub _update_with_dirty_epoch {
2163 my($self,$path,$recent,$epoch,$paths_in_recent,$memo_splicepos) = @_;
2164 my $splicepos;
2165 my $new_recent = [];
2166 if (exists $paths_in_recent->{$path}) {
2167 my $cancel = 0;
2168 KNOWN_EVENT: for my $i (0..$#$recent) {
2169 if ($recent->[$i]{path} eq $path) {
2170 if ($recent->[$i]{epoch} eq $epoch) {
2171 # nothing to do
2172 $cancel = 1;
2173 last KNOWN_EVENT;
2175 } else {
2176 push @$new_recent, $recent->[$i];
2179 @$recent = @$new_recent unless $cancel;
2181 if (!exists $recent->[0] or _bigfloatgt($epoch,$recent->[0]{epoch})) {
2182 $splicepos = 0;
2183 } elsif (_bigfloatlt($epoch,$recent->[-1]{epoch})) {
2184 $splicepos = @$recent;
2185 } else {
2186 my $startingpoint;
2187 if (_bigfloatgt($memo_splicepos<=$#$recent && $epoch, $recent->[$memo_splicepos]{epoch})) {
2188 $startingpoint = 0;
2189 } else {
2190 $startingpoint = $memo_splicepos;
2192 RECENT: for my $i ($startingpoint..$#$recent) {
2193 my $ev = $recent->[$i];
2194 if ($epoch eq $recent->[$i]{epoch}) {
2195 $epoch = _increase_a_bit($epoch, $i ? $recent->[$i-1]{epoch} : undef);
2197 if (_bigfloatgt($epoch,$recent->[$i]{epoch})) {
2198 $splicepos = $i;
2199 last RECENT;
2203 return {
2204 recent => $recent,
2205 splicepos => $splicepos,
2206 epoch => $epoch,
2210 =head2 seed
2212 Sets this recentfile in the state of 'seeded' which means it has to
2213 re-evaluate its uptodateness.
2215 =cut
2216 sub seed {
2217 my($self) = @_;
2218 $self->seeded(1);
2221 =head2 seeded
2223 Tells if the recentfile is in the state 'seeded'.
2225 =cut
2226 sub seeded {
2227 my($self, $set) = @_;
2228 if (defined $set) {
2229 $self->_seeded ($set);
2231 my $x = $self->_seeded;
2232 unless (defined $x) {
2233 $x = 0;
2234 $self->_seeded ($x);
2236 return $x;
2239 =head2 uptodate
2241 True if this object has mirrored the complete interval covered by the
2242 current recentfile.
2244 =cut
2245 sub uptodate {
2246 my($self) = @_;
2247 my $uptodate;
2248 my $why;
2249 if ($self->_uptodateness_ever_reached and not $self->seeded) {
2250 $why = "saturated";
2251 $uptodate = 1;
2253 # it's too easy to misconfigure ttl and related timings and then
2254 # never reach uptodateness, so disabled 2009-03-22
2255 if (0 and not defined $uptodate) {
2256 if ($self->ttl_reached){
2257 $why = "ttl_reached returned true, so we are not uptodate";
2258 $uptodate = 0 ;
2261 unless (defined $uptodate) {
2262 # look if recentfile has unchanged timestamp
2263 my $minmax = $self->minmax;
2264 if (exists $minmax->{mtime}) {
2265 my $rfile = $self->_my_current_rfile;
2266 my @stat = stat $rfile;
2267 if (@stat) {
2268 my $mtime = $stat[9];
2269 if (defined $mtime && defined $minmax->{mtime} && $mtime > $minmax->{mtime}) {
2270 $why = "mtime[$mtime] of rfile[$rfile] > minmax/mtime[$minmax->{mtime}], so we are not uptodate";
2271 $uptodate = 0;
2272 } else {
2273 my $covered = $self->done->covered(@$minmax{qw(max min)});
2274 $why = sprintf "minmax covered[%s], so we return that", defined $covered ? $covered : "UNDEF";
2275 $uptodate = $covered;
2277 } else {
2278 require Carp;
2279 $why = "Could not stat '$rfile': $!";
2280 Carp::cluck($why);
2281 $uptodate = 0;
2285 unless (defined $uptodate) {
2286 $why = "fallthrough, so not uptodate";
2287 $uptodate = 0;
2289 if ($uptodate) {
2290 $self->_uptodateness_ever_reached(1);
2292 my $remember =
2294 uptodate => $uptodate,
2295 why => $why,
2297 $self->_remember_last_uptodate_call($remember);
2298 return $uptodate;
2301 =head2 $obj->write_recent ($recent_files_arrayref)
2303 Writes a I<recentfile> based on the current reflection of the current
2304 state of the tree limited by the current interval.
2306 =cut
2307 sub _resort {
2308 my($self) = @_;
2309 @{$_[1]} = sort { _bigfloatcmp($b->{epoch},$a->{epoch}) } @{$_[1]};
2310 return;
2312 sub write_recent {
2313 my ($self,$recent) = @_;
2314 die "write_recent called without argument" unless defined $recent;
2315 my $Last_epoch;
2316 SANITYCHECK: for my $i (0..$#$recent) {
2317 if (defined($Last_epoch) and _bigfloatge($recent->[$i]{epoch},$Last_epoch)) {
2318 require Carp;
2319 Carp::confess(sprintf "Warning: disorder '%s'>='%s', re-sorting %s\n",
2320 $recent->[$i]{epoch}, $Last_epoch, $self->interval);
2321 # you may want to:
2322 # $self->_resort($recent);
2323 # last SANITYCHECK;
2325 $Last_epoch = $recent->[$i]{epoch};
2327 my $minmax = $self->minmax;
2328 if (!defined $minmax->{max} || _bigfloatlt($minmax->{max},$recent->[0]{epoch})) {
2329 $minmax->{max} = @$recent && exists $recent->[0]{epoch} ? $recent->[0]{epoch} : undef;
2331 if (!defined $minmax->{min} || _bigfloatlt($minmax->{min},$recent->[-1]{epoch})) {
2332 $minmax->{min} = @$recent && exists $recent->[-1]{epoch} ? $recent->[-1]{epoch} : undef;
2334 $self->minmax($minmax);
2335 my $meth = sprintf "write_%d", $self->protocol;
2336 $self->$meth($recent);
2339 =head2 $obj->write_0 ($recent_files_arrayref)
2341 Delegate of C<write_recent()> on protocol 0
2343 =cut
2345 sub write_0 {
2346 my ($self,$recent) = @_;
2347 my $rfile = $self->rfile;
2348 YAML::Syck::DumpFile("$rfile.new",$recent);
2349 rename "$rfile.new", $rfile or die "Could not rename to '$rfile': $!";
2352 =head2 $obj->write_1 ($recent_files_arrayref)
2354 Delegate of C<write_recent()> on protocol 1
2356 =cut
2358 sub write_1 {
2359 my ($self,$recent) = @_;
2360 my $rfile = $self->rfile;
2361 my $suffix = $self->serializer_suffix;
2362 my $data = {
2363 meta => $self->meta_data,
2364 recent => $recent,
2366 my $serialized;
2367 if ($suffix eq ".yaml") {
2368 $serialized = YAML::Syck::Dump($data);
2369 } elsif ($HAVE->{"Data::Serializer"}) {
2370 my $serializer = Data::Serializer->new
2371 ( serializer => $serializers{$suffix} );
2372 $serialized = $serializer->raw_serialize($data);
2373 } else {
2374 die "Data::Serializer not installed, cannot proceed with suffix '$suffix'";
2376 open my $fh, ">", "$rfile.new" or die "Could not open >'$rfile.new': $!";
2377 print $fh $serialized;
2378 close $fh or die "Could not close '$rfile.new': $!";
2379 rename "$rfile.new", $rfile or die "Could not rename to '$rfile': $!";
2382 BEGIN {
2383 my $nq = qr/[^"]+/; # non-quotes
2384 my @pod_lines =
2385 split /\n/, <<'=cut'; %serializers = map { my @x = /"($nq)"\s+=>\s+"($nq)"/; @x } grep {s/^=item\s+C<<\s+(.+)\s+>>$/$1/} @pod_lines; }
2387 =head1 SERIALIZERS
2389 The following suffixes are supported and trigger the use of these
2390 serializers:
2392 =over 4
2394 =item C<< ".yaml" => "YAML::Syck" >>
2396 =item C<< ".json" => "JSON" >>
2398 =item C<< ".sto" => "Storable" >>
2400 =item C<< ".dd" => "Data::Dumper" >>
2402 =back
2404 =cut
2406 BEGIN {
2407 my @pod_lines =
2408 split /\n/, <<'=cut'; %seconds = map { eval } grep {s/^=item\s+C<<(.+)>>$/$1/} @pod_lines; }
2410 =head1 INTERVAL SPEC
2412 An interval spec is a primitive way to express time spans. Normally it
2413 is composed from an integer and a letter.
2415 As a special case, a string that consists only of the single letter
2416 C<Z>, stands for MAX_INT seconds.
2418 The following letters express the specified number of seconds:
2420 =over 4
2422 =item C<< s => 1 >>
2424 =item C<< m => 60 >>
2426 =item C<< h => 60*60 >>
2428 =item C<< d => 60*60*24 >>
2430 =item C<< W => 60*60*24*7 >>
2432 =item C<< M => 60*60*24*30 >>
2434 =item C<< Q => 60*60*24*90 >>
2436 =item C<< Y => 60*60*24*365.25 >>
2438 =back
2440 =cut
2442 =head1 SEE ALSO
2444 L<File::Rsync::Mirror::Recent>,
2445 L<File::Rsync::Mirror::Recentfile::Done>,
2446 L<File::Rsync::Mirror::Recentfile::FakeBigFloat>
2448 =head1 BUGS
2450 Please report any bugs or feature requests through the web interface
2452 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=File-Rsync-Mirror-Recentfile>.
2453 I will be notified, and then you'll automatically be notified of
2454 progress on your bug as I make changes.
2456 =head1 KNOWN BUGS
2458 Memory hungry: it seems all memory is allocated during the initial
2459 rsync where a list of all files is maintained in memory.
2461 =head1 SUPPORT
2463 You can find documentation for this module with the perldoc command.
2465 perldoc File::Rsync::Mirror::Recentfile
2467 You can also look for information at:
2469 =over 4
2471 =item * RT: CPAN's request tracker
2473 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Rsync-Mirror-Recentfile>
2475 =item * AnnoCPAN: Annotated CPAN documentation
2477 L<http://annocpan.org/dist/File-Rsync-Mirror-Recentfile>
2479 =item * CPAN Ratings
2481 L<http://cpanratings.perl.org/d/File-Rsync-Mirror-Recentfile>
2483 =item * Search CPAN
2485 L<http://search.cpan.org/dist/File-Rsync-Mirror-Recentfile>
2487 =back
2490 =head1 ACKNOWLEDGEMENTS
2492 Thanks to RJBS for module-starter.
2494 =head1 AUTHOR
2496 Andreas König
2498 =head1 COPYRIGHT & LICENSE
2500 Copyright 2008,2009 Andreas König.
2502 This program is free software; you can redistribute it and/or modify it
2503 under the same terms as Perl itself.
2506 =cut
2508 1; # End of File::Rsync::Mirror::Recentfile
2510 # Local Variables:
2511 # mode: cperl
2512 # cperl-indent-level: 4
2513 # End: