8e4044b278aae07d8b5eb021112ca2bce0be7c10
[rersyncrecent.git] / lib / File / Rsync / Mirror / Recentfile.pm
blob8e4044b278aae07d8b5eb021112ca2bce0be7c10
1 package File::Rsync::Mirror::Recentfile;
3 # use warnings;
4 use strict;
6 =encoding utf-8
8 =head1 NAME
10 File::Rsync::Mirror::Recentfile - mirroring via rsync made efficient
12 =cut
14 my $HAVE = {};
15 for my $package (
16 "Data::Serializer",
17 "File::Rsync"
18 ) {
19 $HAVE->{$package} = eval qq{ require $package; };
21 use Config;
22 use File::Basename qw(basename dirname fileparse);
23 use File::Copy qw(cp);
24 use File::Path qw(mkpath);
25 use File::Rsync::Mirror::Recentfile::FakeBigFloat qw(:all);
26 use File::Temp;
27 use List::Util qw(first max min);
28 use Scalar::Util qw(reftype);
29 use Storable;
30 use Time::HiRes qw();
31 use YAML::Syck;
33 use version; our $VERSION = qv('0.0.8');
35 use constant MAX_INT => ~0>>1; # anything better?
36 use constant DEFAULT_PROTOCOL => 1;
38 # cf. interval_secs
39 my %seconds;
41 # maybe subclass if this mapping is bad?
42 my %serializers;
44 =head1 SYNOPSIS
46 Writer (of a single file):
48 use File::Rsync::Mirror::Recentfile;
49 my $fr = File::Rsync::Mirror::Recentfile->new
51 interval => q(6h),
52 filenameroot => "RECENT",
53 comment => "These 'RECENT' files are part of a test of a new CPAN mirroring concept. Please ignore them for now.",
54 localroot => "/home/ftp/pub/PAUSE/authors/",
55 aggregator => [qw(1d 1W 1M 1Q 1Y Z)],
57 $rf->update("/home/ftp/pub/PAUSE/authors/id/A/AN/ANDK/CPAN-1.92_63.tar.gz","new");
59 Reader/mirrorer:
61 my $rf = File::Rsync::Mirror::Recentfile->new
63 filenameroot => "RECENT",
64 interval => q(6h),
65 localroot => "/home/ftp/pub/PAUSE/authors",
66 remote_dir => "",
67 remote_host => "pause.perl.org",
68 remote_module => "authors",
69 rsync_options => {
70 compress => 1,
71 'rsync-path' => '/usr/bin/rsync',
72 links => 1,
73 times => 1,
74 'omit-dir-times' => 1,
75 checksum => 1,
77 verbose => 1,
79 $rf->mirror;
81 Aggregator (usually the writer):
83 my $rf = File::Rsync::Mirror::Recentfile->new_from_file ( $file );
84 $rf->aggregate;
86 =head1 DESCRIPTION
88 Lower level than F:R:M:Recent, handles one recentfile. Whereas a tree
89 is always composed of several recentfiles, controlled by the
90 F:R:M:Recent object. The Recentfile object has to do the bookkeeping
91 for a single timeslice.
93 =head1 EXPORT
95 No exports.
97 =head1 CONSTRUCTORS / DESTRUCTOR
99 =head2 my $obj = CLASS->new(%hash)
101 Constructor. On every argument pair the key is a method name and the
102 value is an argument to that method name.
104 If a recentfile for this resource already exists, metadata that are
105 not defined by the constructor will be fetched from there as soon as
106 it is being read by recent_events().
108 =cut
110 sub new {
111 my($class, @args) = @_;
112 my $self = bless {}, $class;
113 while (@args) {
114 my($method,$arg) = splice @args, 0, 2;
115 $self->$method($arg);
117 unless (defined $self->protocol) {
118 $self->protocol(DEFAULT_PROTOCOL);
120 unless (defined $self->filenameroot) {
121 $self->filenameroot("RECENT");
123 unless (defined $self->serializer_suffix) {
124 $self->serializer_suffix(".yaml");
126 return $self;
129 =head2 my $obj = CLASS->new_from_file($file)
131 Constructor. $file is a I<recentfile>.
133 =cut
135 sub new_from_file {
136 my($class, $file) = @_;
137 my $self = bless {}, $class;
138 $self->_rfile($file);
139 #?# $self->lock;
140 my $serialized = do { open my $fh, $file or die "Could not open '$file': $!";
141 local $/;
142 <$fh>;
144 # XXX: we can skip this step when the metadata are sufficient, but
145 # we cannot parse the file without some magic stuff about
146 # serialized formats
147 while (-l $file) {
148 my($name,$path) = fileparse $file;
149 my $symlink = readlink $file;
150 if ($symlink =~ m|/|) {
151 die "FIXME: filenames containing '/' not supported, got $symlink";
153 $file = File::Spec->catfile ( $path, $symlink );
155 my($name,$path,$suffix) = fileparse $file, keys %serializers;
156 $self->serializer_suffix($suffix);
157 $self->localroot($path);
158 die "Could not determine file format from suffix" unless $suffix;
159 my $deserialized;
160 if ($suffix eq ".yaml") {
161 require YAML::Syck;
162 $deserialized = YAML::Syck::LoadFile($file);
163 } elsif ($HAVE->{"Data::Serializer"}) {
164 my $serializer = Data::Serializer->new
165 ( serializer => $serializers{$suffix} );
166 $deserialized = $serializer->raw_deserialize($serialized);
167 } else {
168 die "Data::Serializer not installed, cannot proceed with suffix '$suffix'";
170 while (my($k,$v) = each %{$deserialized->{meta}}) {
171 next if $k ne lc $k; # "Producers"
172 $self->$k($v);
174 unless (defined $self->protocol) {
175 $self->protocol(DEFAULT_PROTOCOL);
177 return $self;
180 =head2 DESTROY
182 A simple unlock.
184 =cut
185 sub DESTROY {
186 my $self = shift;
187 $self->unlock;
188 unless ($self->_current_tempfile_fh) {
189 if (my $tempfile = $self->_current_tempfile) {
190 if (-e $tempfile) {
191 # unlink $tempfile; # may fail in global destruction
197 =head1 ACCESSORS
199 =cut
201 my @accessors;
203 BEGIN {
204 @accessors = (
205 "_current_tempfile",
206 "_current_tempfile_fh",
207 "_delayed_operations",
208 "_done",
209 "_interval",
210 "_is_locked",
211 "_localroot",
212 "_merged",
213 "_pathdb",
214 "_remember_last_uptodate_call",
215 "_remote_dir",
216 "_remoteroot",
217 "_requires_fsck",
218 "_rfile",
219 "_rsync",
220 "__verified_tempdir",
221 "_seeded",
222 "_uptodateness_ever_reached",
223 "_use_tempfile",
226 my @pod_lines =
227 split /\n/, <<'=cut'; push @accessors, grep {s/^=item\s+//} @pod_lines; }
229 =over 4
231 =item aggregator
233 A list of interval specs that tell the aggregator which I<recentfile>s
234 are to be produced.
236 =item canonize
238 The name of a method to canonize the path before rsyncing. Only
239 supported value is C<naive_path_normalize>. Defaults to that.
241 =item comment
243 A comment about this tree and setup.
245 =item dirtymark
247 A timestamp. The dirtymark is updated whenever an out of band change
248 on the origin server is performed that violates the protocol. Say,
249 they add or remove files in the middle somewhere. Slaves must react
250 with a devaluation of their C<done> structure which then leads to a
251 full re-sync of all files. Implementation note: dirtymark may increase
252 or decrease.
254 =item filenameroot
256 The (prefix of the) filename we use for this I<recentfile>. Defaults to
257 C<RECENT>. The string must not contain a directory separator.
259 =item have_mirrored
261 Timestamp remembering when we mirrored this recentfile the last time.
262 Only relevant for slaves.
264 =item ignore_link_stat_errors
266 If set to true, rsync errors are ignored that complain about link stat
267 errors. These seem to happen only when there are files missing at the
268 origin. In race conditions this can always happen, so it defaults to
269 true.
271 =item is_slave
273 If set to true, this object will fetch a new recentfile from remote
274 when the timespan between the last mirror (see have_mirrored) and now
275 is too large (see C<ttl>).
277 =item keep_delete_objects_forever
279 The default for delete events is that they are passed through the
280 collection of recentfile objects until they reach the Z file. There
281 they get dropped so that the associated file object ceases to exist at
282 all. By setting C<keep_delete_objects_forever> the delete objects are
283 kept forever. This makes the Z file larger but has the advantage that
284 slaves that have interrupted mirroring for a long time still can clean
285 up their copy.
287 =item locktimeout
289 After how many seconds shall we die if we cannot lock a I<recentfile>?
290 Defaults to 600 seconds.
292 =item loopinterval
294 When mirror_loop is called, this accessor can specify how much time
295 every loop shall at least take. If the work of a loop is done before
296 that time has gone, sleeps for the rest of the time. Defaults to
297 arbitrary 42 seconds.
299 =item max_files_per_connection
301 Maximum number of files that are transferred on a single rsync call.
302 Setting it higher means higher performance at the price of holding
303 connections longer and potentially disturbing other users in the pool.
304 Defaults to the arbitrary value 42.
306 =item max_rsync_errors
308 When rsync operations encounter that many errors without any resetting
309 success in between, then we die. Defaults to unlimited. A value of
310 -1 means we run forever ignoring all rsync errors.
312 =item minmax
314 Hashref remembering when we read the recent_events from this file the
315 last time and what the timespan was.
317 =item protocol
319 When the RECENT file format changes, we increment the protocol. We try
320 to support older protocols in later releases.
322 =item remote_host
324 The host we are mirroring from. Leave empty for the local filesystem.
326 =item remote_module
328 Rsync servers have so called modules to separate directory trees from
329 each other. Put here the name of the module under which we are
330 mirroring. Leave empty for local filesystem.
332 =item rsync_options
334 Things like compress, links, times or checksums. Passed in to the
335 File::Rsync object used to run the mirror.
337 =item serializer_suffix
339 Mostly untested accessor. The only well tested format for
340 I<recentfile>s at the moment is YAML. It is used with YAML::Syck via
341 Data::Serializer. But in principle other formats are supported as
342 well. See section SERIALIZERS below.
344 =item sleep_per_connection
346 Sleep that many seconds (floating point OK) after every chunk of rsyncing
347 has finished. Defaults to arbitrary 0.42.
349 =item tempdir
351 Directory to write temporary files to. Must allow rename operations
352 into the tree which usually means it must live on the same partition
353 as the target directory. Defaults to C<< $self->localroot >>.
355 =item ttl
357 Time to live. Number of seconds after which this recentfile must be
358 fetched again from the origin server. Only relevant for slaves.
359 Defaults to arbitrary 24.2 seconds.
361 =item verbose
363 Boolean to turn on a bit verbosity.
365 =item verboselog
367 Path to the logfile to write verbose progress information to. This is
368 a primitive stop gap solution to get simple verbose logging working.
369 Switching to Log4perl or similar is probably the way to go.
371 =back
373 =cut
375 use accessors @accessors;
377 =head1 METHODS
379 =head2 (void) $obj->aggregate( %options )
381 Takes all intervals that are collected in the accessor called
382 aggregator. Sorts them by actual length of the interval.
383 Removes those that are shorter than our own interval. Then merges this
384 object into the next larger object. The merging continues upwards
385 as long as the next I<recentfile> is old enough to warrant a merge.
387 If a merge is warranted is decided according to the interval of the
388 previous interval so that larger files are not so often updated as
389 smaller ones. If $options{force} is true, all files get updated.
391 Here is an example to illustrate the behaviour. Given aggregators
393 1h 1d 1W 1M 1Q 1Y Z
395 then
397 1h updates 1d on every call to aggregate()
398 1d updates 1W earliest after 1h
399 1W updates 1M earliest after 1d
400 1M updates 1Q earliest after 1W
401 1Q updates 1Y earliest after 1M
402 1Y updates Z earliest after 1Q
404 Note that all but the smallest recentfile get updated at an arbitrary
405 rate and as such are quite useless on their own.
407 =cut
409 sub aggregate {
410 my($self, %option) = @_;
411 my %seen_interval;
412 my @aggs = sort { $a->{secs} <=> $b->{secs} }
413 grep { !$seen_interval{$_->{interval}}++ && $_->{secs} >= $self->interval_secs }
414 map { { interval => $_, secs => $self->interval_secs($_)} }
415 $self->interval, @{$self->aggregator || []};
416 $self->update;
417 $aggs[0]{object} = $self;
418 AGGREGATOR: for my $i (0..$#aggs-1) {
419 my $this = $aggs[$i]{object};
420 my $next = $this->_sparse_clone;
421 $next->interval($aggs[$i+1]{interval});
422 my $want_merge = 0;
423 if ($option{force} || $i == 0) {
424 $want_merge = 1;
425 } else {
426 my $next_rfile = $next->rfile;
427 if (-e $next_rfile) {
428 my $prev = $aggs[$i-1]{object};
429 local $^T = time;
430 my $next_age = 86400 * -M $next_rfile;
431 if ($next_age > $prev->interval_secs) {
432 $want_merge = 1;
434 } else {
435 $want_merge = 1;
438 if ($want_merge) {
439 $next->merge($this);
440 $aggs[$i+1]{object} = $next;
441 } else {
442 last AGGREGATOR;
447 # collect file size and mtime for all files of this aggregate
448 sub _debug_aggregate {
449 my($self) = @_;
450 my @aggs = sort { $a->{secs} <=> $b->{secs} }
451 map { { interval => $_, secs => $self->interval_secs($_)} }
452 $self->interval, @{$self->aggregator || []};
453 my $report = [];
454 for my $i (0..$#aggs) {
455 my $this = Storable::dclone $self;
456 $this->interval($aggs[$i]{interval});
457 my $rfile = $this->rfile;
458 my @stat = stat $rfile;
459 push @$report, {rfile => $rfile, size => $stat[7], mtime => $stat[9]};
461 $report;
464 # (void) $self->_assert_symlink()
465 sub _assert_symlink {
466 my($self) = @_;
467 my $recentrecentfile = File::Spec->catfile
469 $self->localroot,
470 sprintf
472 "%s.recent",
473 $self->filenameroot
476 if ($Config{d_symlink} eq "define") {
477 my $howto_create_symlink; # 0=no need; 1=straight symlink; 2=rename symlink
478 if (-l $recentrecentfile) {
479 my $found_symlink = readlink $recentrecentfile;
480 if ($found_symlink eq $self->rfilename) {
481 return;
482 } else {
483 $howto_create_symlink = 2;
485 } else {
486 $howto_create_symlink = 1;
488 if (1 == $howto_create_symlink) {
489 symlink $self->rfilename, $recentrecentfile or die "Could not create symlink '$recentrecentfile': $!"
490 } else {
491 unlink "$recentrecentfile.$$"; # may fail
492 symlink $self->rfilename, "$recentrecentfile.$$" or die "Could not create symlink '$recentrecentfile.$$': $!";
493 rename "$recentrecentfile.$$", $recentrecentfile or die "Could not rename '$recentrecentfile.$$' to $recentrecentfile: $!";
495 } else {
496 warn "Warning: symlinks not supported on this system, doing a copy instead\n";
497 unlink "$recentrecentfile.$$"; # may fail
498 cp $self->rfilename, "$recentrecentfile.$$" or die "Could not copy to '$recentrecentfile.$$': $!";
499 rename "$recentrecentfile.$$", $recentrecentfile or die "Could not rename '$recentrecentfile.$$' to $recentrecentfile: $!";
503 =head2 $hashref = $obj->delayed_operations
505 A hash of hashes containing unlink and rmdir operations which had to
506 wait until the recentfile got unhidden in order to not confuse
507 downstream mirrors (in case we have some).
509 =cut
511 sub delayed_operations {
512 my($self) = @_;
513 my $x = $self->_delayed_operations;
514 unless (defined $x) {
515 $x = {
516 unlink => {},
517 rmdir => {},
519 $self->_delayed_operations ($x);
521 return $x;
524 =head2 $done = $obj->done
526 C<$done> is a reference to a L<File::Rsync::Mirror::Recentfile::Done>
527 object that keeps track of rsync activities. Only needed and used when
528 we are a mirroring slave.
530 =cut
532 sub done {
533 my($self) = @_;
534 my $done = $self->_done;
535 if (!$done) {
536 require File::Rsync::Mirror::Recentfile::Done;
537 $done = File::Rsync::Mirror::Recentfile::Done->new();
538 $done->_rfinterval ($self->interval);
539 $self->_done ( $done );
541 return $done;
544 =head2 $tempfilename = $obj->get_remote_recentfile_as_tempfile ()
546 Stores the remote I<recentfile> locally as a tempfile. The caller is
547 responsible to remove the file after use.
549 Note: if you're intending to act as an rsync server for other slaves,
550 then you must prefer this method to fetch that file with
551 get_remotefile(). Otherwise downstream mirrors would expect you to
552 already have mirrored all the files that are in the I<recentfile>
553 before you have them mirrored.
555 =cut
557 sub get_remote_recentfile_as_tempfile {
558 my($self) = @_;
559 mkpath $self->localroot;
560 my $fh;
561 my $trfilename;
562 if ( $self->_use_tempfile() ) {
563 if ($self->ttl_reached) {
564 $fh = $self->_current_tempfile_fh;
565 $trfilename = $self->rfilename;
566 } else {
567 return $self->_current_tempfile;
569 } else {
570 $trfilename = $self->rfilename;
573 my $dst;
574 if ($fh) {
575 $dst = $self->_current_tempfile;
576 } else {
577 $fh = $self->_get_remote_rat_provide_tempfile_object ($trfilename);
578 $dst = $fh->filename;
579 $self->_current_tempfile ($dst);
580 my $rfile = eval { $self->rfile; }; # may fail (RECENT.recent has no rfile)
581 if (defined $rfile && -e $rfile) {
582 # saving on bandwidth. Might need to be configurable
583 # $self->bandwidth_is_cheap?
584 cp $rfile, $dst or die "Could not copy '$rfile' to '$dst': $!"
587 my $src = join ("/",
588 $self->remoteroot,
589 $trfilename,
591 if ($self->verbose) {
592 my $doing = -e $dst ? "Sync" : "Get";
593 my $display_dst = join "/", "...", basename(dirname($dst)), basename($dst);
594 my $LFH = $self->_logfilehandle;
595 printf $LFH
597 "%-4s %d (1/1/%s) temp %s ... ",
598 $doing,
599 time,
600 $self->interval,
601 $display_dst,
604 my $gaveup = 0;
605 my $retried = 0;
606 local($ENV{LANG}) = "C";
607 while (!$self->rsync->exec(
608 src => $src,
609 dst => $dst,
610 )) {
611 $self->register_rsync_error ($self->rsync->err);
612 if (++$retried >= 3) {
613 warn "XXX giving up";
614 $gaveup = 1;
615 last;
618 if ($gaveup) {
619 my $LFH = $self->_logfilehandle;
620 printf $LFH "Warning: gave up mirroring %s, will try again later", $self->interval;
621 } else {
622 $self->_refresh_internals ($dst);
623 $self->have_mirrored (Time::HiRes::time);
624 $self->un_register_rsync_error ();
626 $self->unseed;
627 if ($self->verbose) {
628 my $LFH = $self->_logfilehandle;
629 print $LFH "DONE\n";
631 my $mode = 0644;
632 chmod $mode, $dst or die "Could not chmod $mode '$dst': $!";
633 return $dst;
636 sub _verified_tempdir {
637 my($self) = @_;
638 my $tempdir = $self->__verified_tempdir();
639 return $tempdir if defined $tempdir;
640 unless ($tempdir = $self->tempdir) {
641 $tempdir = $self->localroot;
643 unless (-d $tempdir) {
644 mkpath $tempdir;
646 $self->__verified_tempdir($tempdir);
647 return $tempdir;
650 sub _get_remote_rat_provide_tempfile_object {
651 my($self, $trfilename) = @_;
652 my $_verified_tempdir = $self->_verified_tempdir;
653 my $fh = File::Temp->new
654 (TEMPLATE => sprintf(".FRMRecent-%s-XXXX",
655 $trfilename,
657 DIR => $_verified_tempdir,
658 SUFFIX => $self->serializer_suffix,
659 UNLINK => $self->_use_tempfile,
661 my $mode = 0644;
662 my $dst = $fh->filename;
663 chmod $mode, $dst or die "Could not chmod $mode '$dst': $!";
664 if ($self->_use_tempfile) {
665 $self->_current_tempfile_fh ($fh); # delay self destruction
667 return $fh;
670 sub _logfilehandle {
671 my($self) = @_;
672 my $fh;
673 if (my $vl = $self->verboselog) {
674 open $fh, ">>", $vl or die "Could not open >> '$vl': $!";
675 } else {
676 $fh = \*STDERR;
678 return $fh;
681 =head2 $localpath = $obj->get_remotefile ( $relative_path )
683 Rsyncs one single remote file to local filesystem.
685 Note: no locking is done on this file. Any number of processes may
686 mirror this object.
688 Note II: do not use for recentfiles. If you are a cascading
689 slave/server combination, it would confuse other slaves. They would
690 expect the contents of these recentfiles to be available. Use
691 get_remote_recentfile_as_tempfile() instead.
693 =cut
695 sub get_remotefile {
696 my($self, $path) = @_;
697 my $dst = File::Spec->catfile($self->localroot, $path);
698 mkpath dirname $dst;
699 if ($self->verbose) {
700 my $doing = -e $dst ? "Sync" : "Get";
701 my $LFH = $self->_logfilehandle;
702 printf $LFH
704 "%-4s %d (1/1/%s) %s ... ",
705 $doing,
706 time,
707 $self->interval,
708 $path,
711 local($ENV{LANG}) = "C";
712 my $remoteroot = $self->remoteroot or die "Alert: missing remoteroot. Cannot continue";
713 while (!$self->rsync->exec(
714 src => join("/",
715 $remoteroot,
716 $path),
717 dst => $dst,
718 )) {
719 $self->register_rsync_error ($self->rsync->err);
721 $self->un_register_rsync_error ();
722 if ($self->verbose) {
723 my $LFH = $self->_logfilehandle;
724 print $LFH "DONE\n";
726 return $dst;
729 =head2 $obj->interval ( $interval_spec )
731 Get/set accessor. $interval_spec is a string and described below in
732 the section INTERVAL SPEC.
734 =cut
736 sub interval {
737 my ($self, $interval) = @_;
738 if (@_ >= 2) {
739 $self->_interval($interval);
740 $self->_rfile(undef);
742 $interval = $self->_interval;
743 unless (defined $interval) {
744 # do not ask the $self too much, it recurses!
745 require Carp;
746 Carp::confess("Alert: interval undefined for '".$self."'. Cannot continue.");
748 return $interval;
751 =head2 $secs = $obj->interval_secs ( $interval_spec )
753 $interval_spec is described below in the section INTERVAL SPEC. If
754 empty defaults to the inherent interval for this object.
756 =cut
758 sub interval_secs {
759 my ($self, $interval) = @_;
760 $interval ||= $self->interval;
761 unless (defined $interval) {
762 die "interval_secs() called without argument on an object without a declared one";
764 my ($n,$t) = $interval =~ /^(\d*)([smhdWMQYZ]$)/ or
765 die "Could not determine seconds from interval[$interval]";
766 if ($interval eq "Z") {
767 return MAX_INT;
768 } elsif (exists $seconds{$t} and $n =~ /^\d+$/) {
769 return $seconds{$t}*$n;
770 } else {
771 die "Invalid interval specification: n[$n]t[$t]";
775 =head2 $obj->localroot ( $localroot )
777 Get/set accessor. The local root of the tree. Guaranteed without
778 trailing slash.
780 =cut
782 sub localroot {
783 my ($self, $localroot) = @_;
784 if (@_ >= 2) {
785 $localroot =~ s|/$||;
786 $self->_localroot($localroot);
787 $self->_rfile(undef);
789 $localroot = $self->_localroot;
792 =head2 $ret = $obj->local_path($path_found_in_recentfile)
794 Combines the path to our local mirror and the path of an object found
795 in this I<recentfile>. In other words: the target of a mirror operation.
797 Implementation note: We split on slashes and then use
798 File::Spec::catfile to adjust to the local operating system.
800 =cut
802 sub local_path {
803 my($self,$path) = @_;
804 unless (defined $path) {
805 # seems like a degenerated case
806 return $self->localroot;
808 my @p = split m|/|, $path;
809 File::Spec->catfile($self->localroot,@p);
812 =head2 (void) $obj->lock
814 Locking is implemented with an C<mkdir> on a locking directory
815 (C<.lock> appended to $rfile).
817 =cut
819 sub lock {
820 my ($self) = @_;
821 # not using flock because it locks on filehandles instead of
822 # old school ressources.
823 my $locked = $self->_is_locked and return;
824 my $rfile = $self->rfile;
825 # XXX need a way to allow breaking the lock
826 my $start = time;
827 my $locktimeout = $self->locktimeout || 600;
828 my %have_warned;
829 my $lockdir = "$rfile.lock";
830 my $procfile = "$lockdir/process";
831 GETLOCK: while (not mkdir $lockdir) {
832 if (open my $fh, "<", $procfile) {
833 chomp(my $process = <$fh>);
834 if (0) {
835 } elsif ($process !~ /^\d+$/) {
836 warn "Warning: unknown process holds a lock in '$lockdir', waiting..." unless $have_warned{unknown}++;
837 } elsif ($$ == $process) {
838 last GETLOCK;
839 } elsif (kill 0, $process) {
840 warn "Warning: process $process holds a lock in '$lockdir', waiting..." unless $have_warned{$process}++;
841 } else {
842 warn "Warning: breaking lock held by process $process";
843 sleep 1;
844 last GETLOCK;
846 } else {
847 warn "Warning: unknown process holds a lock in '$lockdir', waiting..." unless $have_warned{unknown}++;
849 Time::HiRes::sleep 0.01;
850 if (time - $start > $locktimeout) {
851 die "Could not acquire lockdirectory '$rfile.lock': $!";
853 } # GETLOCK
854 open my $fh, ">", $procfile or die "Could not open >$procfile\: $!";
855 print $fh $$, "\n";
856 close $fh or die "Could not close: $!";
857 $self->_is_locked (1);
860 =head2 (void) $obj->merge ($other)
862 Bulk update of this object with another one. It's used to merge a
863 smaller and younger $other object into the current one. If this file
864 is a C<Z> file, then we normally do not merge in objects of type
865 C<delete>; this can be overridden by setting
866 keep_delete_objects_forever. But if we encounter an object of type
867 delete we delete the corresponding C<new> object if we have it.
869 If there is nothing to be merged, nothing is done.
871 =cut
873 sub merge {
874 my($self, $other) = @_;
875 $self->_merge_sanitycheck ( $other );
876 $other->lock;
877 my $other_recent = $other->recent_events || [];
878 $self->lock;
879 $self->_merge_locked ( $other, $other_recent );
880 $self->unlock;
881 $other->unlock;
884 sub _merge_locked {
885 my($self, $other, $other_recent) = @_;
886 my $my_recent = $self->recent_events || [];
888 # calculate the target time span
889 my $myepoch = $my_recent->[0] ? $my_recent->[0]{epoch} : undef;
890 my $epoch = $other_recent->[0] ? $other_recent->[0]{epoch} : $myepoch;
891 my $oldest_allowed = 0;
892 my $something_done;
893 unless ($my_recent->[0]) {
894 # obstetrics
895 $something_done = 1;
897 if ($epoch) {
898 if (($other->dirtymark||0) ne ($self->dirtymark||0)) {
899 $oldest_allowed = 0;
900 $something_done = 1;
901 } elsif (my $merged = $self->merged) {
902 my $secs = $self->interval_secs();
903 $oldest_allowed = min($epoch - $secs, $merged->{epoch}||0);
904 if (@$other_recent and
905 _bigfloatlt($other_recent->[-1]{epoch}, $oldest_allowed)
907 $oldest_allowed = $other_recent->[-1]{epoch};
910 while (@$my_recent && _bigfloatlt($my_recent->[-1]{epoch}, $oldest_allowed)) {
911 pop @$my_recent;
912 $something_done = 1;
916 my %have_path;
917 my $other_recent_filtered = [];
918 for my $oev (@$other_recent) {
919 my $oevepoch = $oev->{epoch} || 0;
920 next if _bigfloatlt($oevepoch, $oldest_allowed);
921 my $path = $oev->{path};
922 next if $have_path{$path}++;
923 if ( $self->interval eq "Z"
924 and $oev->{type} eq "delete"
925 and ! $self->keep_delete_objects_forever
927 # do nothing
928 } else {
929 if (!$myepoch || _bigfloatgt($oevepoch, $myepoch)) {
930 $something_done = 1;
932 push @$other_recent_filtered, { epoch => $oev->{epoch}, path => $path, type => $oev->{type} };
935 if ($something_done) {
936 $self->_merge_something_done ($other_recent_filtered, $my_recent, $other_recent, $other, \%have_path, $epoch);
940 sub _merge_something_done {
941 my($self, $other_recent_filtered, $my_recent, $other_recent, $other, $have_path, $epoch) = @_;
942 my $recent = [];
943 my $epoch_conflict = 0;
944 my $last_epoch;
945 ZIP: while (@$other_recent_filtered || @$my_recent) {
946 my $event;
947 if (!@$my_recent ||
948 @$other_recent_filtered && _bigfloatge($other_recent_filtered->[0]{epoch},$my_recent->[0]{epoch})) {
949 $event = shift @$other_recent_filtered;
950 } else {
951 $event = shift @$my_recent;
952 next ZIP if $have_path->{$event->{path}}++;
954 $epoch_conflict=1 if defined $last_epoch && $event->{epoch} eq $last_epoch;
955 $last_epoch = $event->{epoch};
956 push @$recent, $event;
958 if ($epoch_conflict) {
959 my %have_epoch;
960 for (my $i = $#$recent;$i>=0;$i--) {
961 my $epoch = $recent->[$i]{epoch};
962 if ($have_epoch{$epoch}++) {
963 while ($have_epoch{$epoch}) {
964 $epoch = _increase_a_bit($epoch);
966 $recent->[$i]{epoch} = $epoch;
967 $have_epoch{$epoch}++;
971 if (!$self->dirtymark || $other->dirtymark ne $self->dirtymark) {
972 $self->dirtymark ( $other->dirtymark );
974 $self->write_recent($recent);
975 $other->merged({
976 time => Time::HiRes::time, # not used anywhere
977 epoch => $recent->[0]{epoch},
978 into_interval => $self->interval, # not used anywhere
980 $other->write_recent($other_recent);
983 sub _merge_sanitycheck {
984 my($self, $other) = @_;
985 if ($self->interval_secs <= $other->interval_secs) {
986 require Carp;
987 Carp::confess
988 (sprintf
990 "Alert: illegal merge operation of a bigger interval[%d] into a smaller[%d]",
991 $self->interval_secs,
992 $other->interval_secs,
997 =head2 merged
999 Hashref denoting when this recentfile has been merged into some other
1000 at which epoch.
1002 =cut
1004 sub merged {
1005 my($self, $set) = @_;
1006 if (defined $set) {
1007 $self->_merged ($set);
1009 my $merged = $self->_merged;
1010 my $into;
1011 if ($merged and $into = $merged->{into_interval} and defined $self->_interval) {
1012 # sanity checks
1013 if ($into eq $self->interval) {
1014 require Carp;
1015 Carp::cluck(sprintf
1017 "Warning: into_interval[%s] same as own interval[%s]. Danger ahead.",
1018 $into,
1019 $self->interval,
1021 } elsif ($self->interval_secs($into) < $self->interval_secs) {
1022 require Carp;
1023 Carp::cluck(sprintf
1025 "Warning: into_interval_secs[%s] smaller than own interval_secs[%s] on interval[%s]. Danger ahead.",
1026 $self->interval_secs($into),
1027 $self->interval_secs,
1028 $self->interval,
1032 $merged;
1035 =head2 $hashref = $obj->meta_data
1037 Returns the hashref of metadata that the server has to add to the
1038 I<recentfile>.
1040 =cut
1042 sub meta_data {
1043 my($self) = @_;
1044 my $ret = $self->{meta};
1045 for my $m (
1046 "aggregator",
1047 "canonize",
1048 "comment",
1049 "dirtymark",
1050 "filenameroot",
1051 "interval",
1052 "merged",
1053 "minmax",
1054 "protocol",
1055 "serializer_suffix",
1057 my $v = $self->$m;
1058 if (defined $v) {
1059 $ret->{$m} = $v;
1062 # XXX need to reset the Producer if I am a writer, keep it when I
1063 # am a reader
1064 $ret->{Producers} ||= {
1065 __PACKAGE__, "$VERSION", # stringified it looks better
1066 '$0', $0,
1067 'time', Time::HiRes::time,
1069 $ret->{dirtymark} ||= Time::HiRes::time;
1070 return $ret;
1073 =head2 $success = $obj->mirror ( %options )
1075 Mirrors the files in this I<recentfile> as reported by
1076 C<recent_events>. Options named C<after>, C<before>, C<max> are passed
1077 through to the C<recent_events> call. The boolean option C<piecemeal>,
1078 if true, causes C<mirror> to only rsync C<max_files_per_connection>
1079 and keep track of the rsynced files so that future calls will rsync
1080 different files until all files are brought to sync.
1082 =cut
1084 sub mirror {
1085 my($self, %options) = @_;
1086 my $trecentfile = $self->get_remote_recentfile_as_tempfile();
1087 $self->_use_tempfile (1);
1088 # skip-deletes is inadequat for passthrough within mirror. We
1089 # would never reach uptodateness when a delete were on a
1090 # borderline
1091 my %passthrough = map { ($_ => $options{$_}) } qw(before after max);
1092 my ($recent_events) = $self->recent_events(%passthrough);
1093 my(@error, @dlcollector); # download-collector: array containing paths we need
1094 my $first_item = 0;
1095 my $last_item = $#$recent_events;
1096 my $done = $self->done;
1097 my $pathdb = $self->_pathdb;
1098 ITEM: for my $i ($first_item..$last_item) {
1099 my $status = +{};
1100 $self->_mirror_item
1103 $recent_events,
1104 $last_item,
1105 $done,
1106 $pathdb,
1107 \@dlcollector,
1108 \%options,
1109 $status,
1110 \@error,
1112 last if $i == $last_item;
1113 if ($status->{mustreturn}){
1114 if ($self->_current_tempfile && ! $self->_current_tempfile_fh) {
1115 # looks like a bug somewhere else
1116 my $t = $self->_current_tempfile;
1117 unlink $t or die "Could not unlink '$t': $!";
1118 $self->_current_tempfile(undef);
1119 $self->_use_tempfile(0);
1121 return;
1124 if (@dlcollector) {
1125 my $success = eval { $self->_mirror_dlcollector (\@dlcollector,$pathdb,$recent_events);};
1126 if (!$success || $@) {
1127 warn "Warning: Unknown error while mirroring: $@";
1128 push @error, $@;
1129 sleep 1;
1132 if ($self->verbose) {
1133 my $LFH = $self->_logfilehandle;
1134 print $LFH "DONE\n";
1136 # once we've gone to the end we consider ourselves free of obligations
1137 $self->unseed;
1138 $self->_mirror_unhide_tempfile ($trecentfile);
1139 $self->_mirror_perform_delayed_ops(\%options);
1140 return !@error;
1143 sub _mirror_item {
1144 my($self,
1146 $recent_events,
1147 $last_item,
1148 $done,
1149 $pathdb,
1150 $dlcollector,
1151 $options,
1152 $status,
1153 $error,
1154 ) = @_;
1155 my $recent_event = $recent_events->[$i];
1156 return if $done->covered ( $recent_event->{epoch} );
1157 if ($pathdb) {
1158 my $rec = $pathdb->{$recent_event->{path}};
1159 if ($rec && $rec->{recentepoch}) {
1160 if (_bigfloatgt
1161 ( $rec->{recentepoch}, $recent_event->{epoch} )){
1162 $done->register ($recent_events, [$i]);
1163 return;
1167 my $dst = $self->local_path($recent_event->{path});
1168 if ($recent_event->{type} eq "new"){
1169 $self->_mirror_item_new
1171 $dst,
1173 $last_item,
1174 $recent_events,
1175 $recent_event,
1176 $dlcollector,
1177 $pathdb,
1178 $status,
1179 $error,
1180 $options,
1182 } elsif ($recent_event->{type} eq "delete") {
1183 my $activity;
1184 if ($options->{'skip-deletes'}) {
1185 $activity = "skipped";
1186 } else {
1187 if (! -e $dst) {
1188 $activity = "not_found";
1189 } elsif (-l $dst or not -d _) {
1190 $self->delayed_operations->{unlink}{$dst}++;
1191 $activity = "deleted";
1192 } else {
1193 $self->delayed_operations->{rmdir}{$dst}++;
1194 $activity = "deleted";
1197 $done->register ($recent_events, [$i]);
1198 if ($pathdb) {
1199 $self->_mirror_register_path($pathdb,[$recent_event],$activity);
1201 } else {
1202 warn "Warning: invalid upload type '$recent_event->{type}'";
1206 sub _mirror_item_new {
1207 my($self,
1208 $dst,
1210 $last_item,
1211 $recent_events,
1212 $recent_event,
1213 $dlcollector,
1214 $pathdb,
1215 $status,
1216 $error,
1217 $options,
1218 ) = @_;
1219 if ($self->verbose) {
1220 my $doing = -e $dst ? "Sync" : "Get";
1221 my $LFH = $self->_logfilehandle;
1222 printf $LFH
1224 "%-4s %d (%d/%d/%s) %s ... ",
1225 $doing,
1226 time,
1227 1+$i,
1228 1+$last_item,
1229 $self->interval,
1230 $recent_event->{path},
1233 my $max_files_per_connection = $self->max_files_per_connection || 42;
1234 my $success;
1235 if ($self->verbose) {
1236 my $LFH = $self->_logfilehandle;
1237 print $LFH "\n";
1239 push @$dlcollector, { rev => $recent_event, i => $i };
1240 if (@$dlcollector >= $max_files_per_connection) {
1241 $success = eval {$self->_mirror_dlcollector ($dlcollector,$pathdb,$recent_events);};
1242 my $sleep = $self->sleep_per_connection;
1243 $sleep = 0.42 unless defined $sleep;
1244 Time::HiRes::sleep $sleep;
1245 if ($options->{piecemeal}) {
1246 $status->{mustreturn} = 1;
1247 return;
1249 } else {
1250 return;
1252 if (!$success || $@) {
1253 warn "Warning: Error while mirroring: $@";
1254 push @$error, $@;
1255 sleep 1;
1257 if ($self->verbose) {
1258 my $LFH = $self->_logfilehandle;
1259 print $LFH "DONE\n";
1263 sub _mirror_dlcollector {
1264 my($self,$xcoll,$pathdb,$recent_events) = @_;
1265 my $success = $self->mirror_path([map {$_->{rev}{path}} @$xcoll]);
1266 if ($pathdb) {
1267 $self->_mirror_register_path($pathdb,[map {$_->{rev}} @$xcoll],"rsync");
1269 $self->done->register($recent_events, [map {$_->{i}} @$xcoll]);
1270 @$xcoll = ();
1271 return $success;
1274 sub _mirror_register_path {
1275 my($self,$pathdb,$coll,$activity) = @_;
1276 my $time = time;
1277 for my $item (@$coll) {
1278 $pathdb->{$item->{path}} =
1280 recentepoch => $item->{epoch},
1281 ($activity."_on") => $time,
1286 sub _mirror_unhide_tempfile {
1287 my($self, $trecentfile) = @_;
1288 my $rfile = $self->rfile;
1289 if (rename $trecentfile, $rfile) {
1290 # warn "DEBUG: renamed '$trecentfile' to '$rfile'";
1291 } else {
1292 require Carp;
1293 Carp::confess("Could not rename '$trecentfile' to '$rfile': $!");
1295 $self->_use_tempfile (0);
1296 if (my $ctfh = $self->_current_tempfile_fh) {
1297 $ctfh->unlink_on_destroy (0);
1298 $self->_current_tempfile_fh (undef);
1302 sub _mirror_perform_delayed_ops {
1303 my($self,$options) = @_;
1304 my $delayed = $self->delayed_operations;
1305 for my $dst (keys %{$delayed->{unlink}}) {
1306 unless (unlink $dst) {
1307 require Carp;
1308 Carp::cluck ( "Warning: Error while unlinking '$dst': $!" ) if $options->{verbose};
1310 if ($self->verbose) {
1311 my $doing = "Del";
1312 my $LFH = $self->_logfilehandle;
1313 printf $LFH
1315 "%-4s %d (%s) %s DONE\n",
1316 $doing,
1317 time,
1318 $self->interval,
1319 $dst,
1321 delete $delayed->{unlink}{$dst};
1324 for my $dst (sort {length($b) <=> length($a)} keys %{$delayed->{rmdir}}) {
1325 unless (rmdir $dst) {
1326 require Carp;
1327 Carp::cluck ( "Warning: Error on rmdir '$dst': $!" ) if $options->{verbose};
1329 if ($self->verbose) {
1330 my $doing = "Del";
1331 my $LFH = $self->_logfilehandle;
1332 printf $LFH
1334 "%-4s %d (%s) %s DONE\n",
1335 $doing,
1336 time,
1337 $self->interval,
1338 $dst,
1340 delete $delayed->{rmdir}{$dst};
1345 =head2 $success = $obj->mirror_path ( $arrref | $path )
1347 If the argument is a scalar it is treated as a path. The remote path
1348 is mirrored into the local copy. $path is the path found in the
1349 I<recentfile>, i.e. it is relative to the root directory of the
1350 mirror.
1352 If the argument is an array reference then all elements are treated as
1353 a path below the current tree and all are rsynced with a single
1354 command (and a single connection).
1356 =cut
1358 sub mirror_path {
1359 my($self,$path) = @_;
1360 # XXX simplify the two branches such that $path is treated as
1361 # [$path] maybe even demand the argument as an arrayref to
1362 # simplify docs and code. (rsync-over-recentfile-2.pl uses the
1363 # interface)
1364 if (ref $path and ref $path eq "ARRAY") {
1365 my $dst = $self->localroot;
1366 mkpath dirname $dst;
1367 my($fh) = File::Temp->new(TEMPLATE => sprintf(".%s-XXXX",
1368 lc $self->filenameroot,
1370 TMPDIR => 1,
1371 UNLINK => 0,
1373 for my $p (@$path) {
1374 print $fh $p, "\n";
1376 $fh->flush;
1377 $fh->unlink_on_destroy(1);
1378 my $gaveup = 0;
1379 my $retried = 0;
1380 local($ENV{LANG}) = "C";
1381 while (!$self->rsync->exec
1383 src => join("/",
1384 $self->remoteroot,
1386 dst => $dst,
1387 'files-from' => $fh->filename,
1388 )) {
1389 my(@err) = $self->rsync->err;
1390 if ($self->_my_ignore_link_stat_errors && "@err" =~ m{^ rsync: \s link_stat }x ) {
1391 if ($self->verbose) {
1392 my $LFH = $self->_logfilehandle;
1393 print $LFH "Info: ignoring link_stat error '@err'";
1395 return 1;
1397 $self->register_rsync_error (@err);
1398 if (++$retried >= 3) {
1399 my $batchsize = @$path;
1400 warn "The number of rsync retries now reached 3 within a batch of size $batchsize. Error was '@err'. Giving up now, will retry later, ";
1401 $gaveup = 1;
1402 last;
1404 sleep 1;
1406 unless ($gaveup) {
1407 $self->un_register_rsync_error ();
1409 } else {
1410 my $dst = $self->local_path($path);
1411 mkpath dirname $dst;
1412 local($ENV{LANG}) = "C";
1413 while (!$self->rsync->exec
1415 src => join("/",
1416 $self->remoteroot,
1417 $path
1419 dst => $dst,
1420 )) {
1421 my(@err) = $self->rsync->err;
1422 if ($self->_my_ignore_link_stat_errors && "@err" =~ m{^ rsync: \s link_stat }x ) {
1423 if ($self->verbose) {
1424 my $LFH = $self->_logfilehandle;
1425 print $LFH "Info: ignoring link_stat error '@err'";
1427 return 1;
1429 $self->register_rsync_error (@err);
1431 $self->un_register_rsync_error ();
1433 return 1;
1436 sub _my_ignore_link_stat_errors {
1437 my($self) = @_;
1438 my $x = $self->ignore_link_stat_errors;
1439 $x = 1 unless defined $x;
1440 return $x;
1443 sub _my_current_rfile {
1444 my($self) = @_;
1445 my $rfile;
1446 if ($self->_use_tempfile) {
1447 $rfile = $self->_current_tempfile;
1449 unless ($rfile && -s $rfile) {
1450 $rfile = $self->rfile;
1452 return $rfile;
1455 =head2 $path = $obj->naive_path_normalize ($path)
1457 Takes an absolute unix style path as argument and canonicalizes it to
1458 a shorter path if possible, removing things like double slashes or
1459 C</./> and removes references to C<../> directories to get a shorter
1460 unambiguos path. This is used to make the code easier that determines
1461 if a file passed to C<upgrade()> is indeed below our C<localroot>.
1463 =cut
1465 sub naive_path_normalize {
1466 my($self,$path) = @_;
1467 $path =~ s|/+|/|g;
1468 1 while $path =~ s|/[^/]+/\.\./|/|;
1469 $path =~ s|/$||;
1470 $path;
1473 =head2 $ret = $obj->read_recent_1 ( $data )
1475 Delegate of C<recent_events()> on protocol 1
1477 =cut
1479 sub read_recent_1 {
1480 my($self, $data) = @_;
1481 return $data->{recent};
1484 =head2 $array_ref = $obj->recent_events ( %options )
1486 Note: the code relies on the resource being written atomically. We
1487 cannot lock because we may have no write access. If the caller has
1488 write access (eg. aggregate() or update()), it has to care for any
1489 necessary locking and it MUST write atomically.
1491 If C<$options{after}> is specified, only file events after this
1492 timestamp are returned.
1494 If C<$options{before}> is specified, only file events before this
1495 timestamp are returned.
1497 If C<$options{max}> is specified only a maximum of this many most
1498 recent events is returned.
1500 If C<$options{'skip-deletes'}> is specified, no files-to-be-deleted
1501 will be returned.
1503 If C<$options{contains}> is specified the value must be a hash
1504 reference containing a query. The query may contain the keys C<epoch>,
1505 C<path>, and C<type>. Each represents a condition that must be met. If
1506 there is more than one such key, the conditions are ANDed.
1508 If C<$options{info}> is specified, it must be a hashref. This hashref
1509 will be filled with metadata about the unfiltered recent_events of
1510 this object, in key C<first> there is the first item, in key C<last>
1511 is the last.
1513 =cut
1515 sub recent_events {
1516 my ($self, %options) = @_;
1517 my $info = $options{info};
1518 if ($self->is_slave) {
1519 # XXX seems dubious, might produce tempfiles without removing them?
1520 $self->get_remote_recentfile_as_tempfile;
1522 my $rfile_or_tempfile = $self->_my_current_rfile or return [];
1523 -e $rfile_or_tempfile or return [];
1524 my $suffix = $self->serializer_suffix;
1525 my ($data) = eval {
1526 $self->_try_deserialize
1528 $suffix,
1529 $rfile_or_tempfile,
1532 my $err = $@;
1533 if ($err or !$data) {
1534 return [];
1536 my $re;
1537 if (reftype $data eq 'ARRAY') { # protocol 0
1538 $re = $data;
1539 } else {
1540 $re = $self->_recent_events_protocol_x
1542 $data,
1543 $rfile_or_tempfile,
1546 return $re unless grep {defined $options{$_}} qw(after before contains max skip-deletes);
1547 $self->_recent_events_handle_options ($re, \%options);
1550 # File::Rsync::Mirror::Recentfile::_recent_events_handle_options
1551 sub _recent_events_handle_options {
1552 my($self, $re, $options) = @_;
1553 my $last_item = $#$re;
1554 my $info = $options->{info};
1555 if ($info) {
1556 $info->{first} = $re->[0];
1557 $info->{last} = $re->[-1];
1559 if (defined $options->{after}) {
1560 if ($re->[0]{epoch} > $options->{after}) {
1561 if (
1562 my $f = first
1563 {$re->[$_]{epoch} <= $options->{after}}
1564 0..$#$re
1566 $last_item = $f-1;
1568 } else {
1569 $last_item = -1;
1572 my $first_item = 0;
1573 if (defined $options->{before}) {
1574 if ($re->[0]{epoch} > $options->{before}) {
1575 if (
1576 my $f = first
1577 {$re->[$_]{epoch} < $options->{before}}
1578 0..$last_item
1580 $first_item = $f;
1582 } else {
1583 $first_item = 0;
1586 if (0 != $first_item || -1 != $last_item) {
1587 @$re = splice @$re, $first_item, 1+$last_item-$first_item;
1589 if ($options->{'skip-deletes'}) {
1590 @$re = grep { $_->{type} ne "delete" } @$re;
1592 if (my $contopt = $options->{contains}) {
1593 my $seen_allowed = 0;
1594 for my $allow (qw(epoch path type)) {
1595 if (exists $contopt->{$allow}) {
1596 $seen_allowed++;
1597 my $v = $contopt->{$allow};
1598 @$re = grep { $_->{$allow} eq $v } @$re;
1601 if (keys %$contopt > $seen_allowed) {
1602 require Carp;
1603 Carp::confess
1604 (sprintf "unknown query: %s", join ", ", %$contopt);
1607 if ($options->{max} && @$re > $options->{max}) {
1608 @$re = splice @$re, 0, $options->{max};
1610 $re;
1613 sub _recent_events_protocol_x {
1614 my($self,
1615 $data,
1616 $rfile_or_tempfile,
1617 ) = @_;
1618 my $meth = sprintf "read_recent_%d", $data->{meta}{protocol};
1619 # we may be reading meta for the first time
1620 while (my($k,$v) = each %{$data->{meta}}) {
1621 if ($k ne lc $k){ # "Producers"
1622 $self->{ORIG}{$k} = $v;
1623 next;
1625 next if defined $self->$k;
1626 $self->$k($v);
1628 my $re = $self->$meth ($data);
1629 my $minmax;
1630 if (my @stat = stat $rfile_or_tempfile) {
1631 $minmax = { mtime => $stat[9] };
1632 } else {
1633 # defensive because ABH encountered:
1635 #### Sync 1239828608 (1/1/Z) temp .../authors/.FRMRecent-RECENT-Z.yaml-
1636 #### Ydr_.yaml ... DONE
1637 #### Cannot stat '/mirrors/CPAN/authors/.FRMRecent-RECENT-Z.yaml-
1638 #### Ydr_.yaml': No such file or directory at /usr/lib/perl5/site_perl/
1639 #### 5.8.8/File/Rsync/Mirror/Recentfile.pm line 1558.
1640 #### unlink0: /mirrors/CPAN/authors/.FRMRecent-RECENT-Z.yaml-Ydr_.yaml is
1641 #### gone already at cpan-pause.pl line 0
1643 my $LFH = $self->_logfilehandle;
1644 print $LFH "Warning (maybe harmless): Cannot stat '$rfile_or_tempfile': $!"
1646 if (@$re) {
1647 $minmax->{min} = $re->[-1]{epoch};
1648 $minmax->{max} = $re->[0]{epoch};
1650 $self->minmax ( $minmax );
1651 return $re;
1654 sub _try_deserialize {
1655 my($self,
1656 $suffix,
1657 $rfile_or_tempfile,
1658 ) = @_;
1659 if ($suffix eq ".yaml") {
1660 require YAML::Syck;
1661 YAML::Syck::LoadFile($rfile_or_tempfile);
1662 } elsif ($HAVE->{"Data::Serializer"}) {
1663 my $serializer = Data::Serializer->new
1664 ( serializer => $serializers{$suffix} );
1665 my $serialized = do
1667 open my $fh, $rfile_or_tempfile or die "Could not open: $!";
1668 local $/;
1669 <$fh>;
1671 $serializer->raw_deserialize($serialized);
1672 } else {
1673 die "Data::Serializer not installed, cannot proceed with suffix '$suffix'";
1677 sub _refresh_internals {
1678 my($self, $dst) = @_;
1679 my $class = ref $self;
1680 my $rfpeek = $class->new_from_file ($dst);
1681 for my $acc (qw(
1682 _merged
1683 minmax
1684 )) {
1685 $self->$acc ( $rfpeek->$acc );
1687 my $old_dirtymark = $self->dirtymark;
1688 my $new_dirtymark = $rfpeek->dirtymark;
1689 if ($old_dirtymark && $new_dirtymark && $new_dirtymark ne $old_dirtymark) {
1690 $self->done->reset;
1691 $self->dirtymark ( $new_dirtymark );
1692 $self->_uptodateness_ever_reached(0);
1693 $self->seed;
1697 =head2 $ret = $obj->rfilename
1699 Just the basename of our I<recentfile>, composed from C<filenameroot>,
1700 a dash, C<interval>, and C<serializer_suffix>. E.g. C<RECENT-6h.yaml>
1702 =cut
1704 sub rfilename {
1705 my($self) = @_;
1706 my $file = sprintf("%s-%s%s",
1707 $self->filenameroot,
1708 $self->interval,
1709 $self->serializer_suffix,
1711 return $file;
1714 =head2 $str = $self->remote_dir
1716 The directory we are mirroring from.
1718 =cut
1720 sub remote_dir {
1721 my($self, $set) = @_;
1722 if (defined $set) {
1723 $self->_remote_dir ($set);
1725 my $x = $self->_remote_dir;
1726 $self->is_slave (1);
1727 return $x;
1730 =head2 $str = $obj->remoteroot
1732 =head2 (void) $obj->remoteroot ( $set )
1734 Get/Set the composed prefix needed when rsyncing from a remote module.
1735 If remote_host, remote_module, and remote_dir are set, it is composed
1736 from these.
1738 =cut
1740 sub remoteroot {
1741 my($self, $set) = @_;
1742 if (defined $set) {
1743 $self->_remoteroot($set);
1745 my $remoteroot = $self->_remoteroot;
1746 unless (defined $remoteroot) {
1747 $remoteroot = sprintf
1749 "%s%s%s",
1750 defined $self->remote_host ? ($self->remote_host."::") : "",
1751 defined $self->remote_module ? ($self->remote_module."/") : "",
1752 defined $self->remote_dir ? $self->remote_dir : "",
1754 $self->_remoteroot($remoteroot);
1756 return $remoteroot;
1759 =head2 (void) $obj->split_rfilename ( $recentfilename )
1761 Inverse method to C<rfilename>. C<$recentfilename> is a plain filename
1762 of the pattern
1764 $filenameroot-$interval$serializer_suffix
1766 e.g.
1768 RECENT-1M.yaml
1770 This filename is split into its parts and the parts are fed to the
1771 object itself.
1773 =cut
1775 sub split_rfilename {
1776 my($self, $rfname) = @_;
1777 my($splitter) = qr(^(.+)-([^-\.]+)(\.[^\.]+));
1778 if (my($f,$i,$s) = $rfname =~ $splitter) {
1779 $self->filenameroot ($f);
1780 $self->interval ($i);
1781 $self->serializer_suffix ($s);
1782 } else {
1783 die "Alert: cannot split '$rfname', doesn't match '$splitter'";
1785 return;
1788 =head2 my $rfile = $obj->rfile
1790 Returns the full path of the I<recentfile>
1792 =cut
1794 sub rfile {
1795 my($self) = @_;
1796 my $rfile = $self->_rfile;
1797 return $rfile if defined $rfile;
1798 $rfile = File::Spec->catfile
1799 ($self->localroot,
1800 $self->rfilename,
1802 $self->_rfile ($rfile);
1803 return $rfile;
1806 =head2 $rsync_obj = $obj->rsync
1808 The File::Rsync object that this object uses for communicating with an
1809 upstream server.
1811 =cut
1813 sub rsync {
1814 my($self) = @_;
1815 my $rsync = $self->_rsync;
1816 unless (defined $rsync) {
1817 my $rsync_options = $self->rsync_options || {};
1818 if ($HAVE->{"File::Rsync"}) {
1819 $rsync = File::Rsync->new($rsync_options);
1820 $self->_rsync($rsync);
1821 } else {
1822 die "File::Rsync required for rsync operations. Cannot continue";
1825 return $rsync;
1828 =head2 (void) $obj->register_rsync_error(@err)
1830 =head2 (void) $obj->un_register_rsync_error()
1832 Register_rsync_error is called whenever the File::Rsync object fails
1833 on an exec (say, connection doesn't succeed). It issues a warning and
1834 sleeps for an increasing amount of time. Un_register_rsync_error
1835 resets the error count. See also accessor C<max_rsync_errors>.
1837 =cut
1840 my $no_success_count = 0;
1841 my $no_success_time = 0;
1842 sub register_rsync_error {
1843 my($self, @err) = @_;
1844 chomp @err;
1845 $no_success_time = time;
1846 $no_success_count++;
1847 my $max_rsync_errors = $self->max_rsync_errors;
1848 $max_rsync_errors = MAX_INT unless defined $max_rsync_errors;
1849 if ($max_rsync_errors>=0 && $no_success_count >= $max_rsync_errors) {
1850 require Carp;
1851 Carp::confess
1853 sprintf
1855 "Alert: Error while rsyncing (%s): '%s', error count: %d, exiting now,",
1856 $self->interval,
1857 join(" ",@err),
1858 $no_success_count,
1861 my $sleep = 12 * $no_success_count;
1862 $sleep = 300 if $sleep > 300;
1863 require Carp;
1864 Carp::cluck
1865 (sprintf
1867 "Warning: %s, Error while rsyncing (%s): '%s', sleeping %d",
1868 scalar(localtime($no_success_time)),
1869 $self->interval,
1870 join(" ",@err),
1871 $sleep,
1873 sleep $sleep
1875 sub un_register_rsync_error {
1876 my($self) = @_;
1877 $no_success_time = 0;
1878 $no_success_count = 0;
1882 =head2 $clone = $obj->_sparse_clone
1884 Clones just as much from itself that it does not hurt. Experimental
1885 method.
1887 Note: what fits better: sparse or shallow? Other suggestions?
1889 =cut
1891 sub _sparse_clone {
1892 my($self) = @_;
1893 my $new = bless {}, ref $self;
1894 for my $m (qw(
1895 _interval
1896 _localroot
1897 _remoteroot
1898 _rfile
1899 _use_tempfile
1900 aggregator
1901 filenameroot
1902 ignore_link_stat_errors
1903 is_slave
1904 max_files_per_connection
1905 protocol
1906 rsync_options
1907 serializer_suffix
1908 sleep_per_connection
1909 tempdir
1910 verbose
1911 )) {
1912 my $o = $self->$m;
1913 $o = Storable::dclone $o if ref $o;
1914 $new->$m($o);
1916 $new;
1919 =head2 $boolean = OBJ->ttl_reached ()
1921 =cut
1923 sub ttl_reached {
1924 my($self) = @_;
1925 my $have_mirrored = $self->have_mirrored || 0;
1926 my $now = Time::HiRes::time;
1927 my $ttl = $self->ttl;
1928 $ttl = 24.2 unless defined $ttl;
1929 if ($now > $have_mirrored + $ttl) {
1930 return 1;
1932 return 0;
1935 =head2 (void) $obj->unlock()
1937 Unlocking is implemented with an C<rmdir> on a locking directory
1938 (C<.lock> appended to $rfile).
1940 =cut
1942 sub unlock {
1943 my($self) = @_;
1944 return unless $self->_is_locked;
1945 my $rfile = $self->rfile;
1946 unlink "$rfile.lock/process" or warn "Could not unlink lockfile '$rfile.lock/process': $!";
1947 rmdir "$rfile.lock" or warn "Could not rmdir lockdir '$rfile.lock': $!";;
1948 $self->_is_locked (0);
1951 =head2 unseed
1953 Sets this recentfile in the state of not 'seeded'.
1955 =cut
1956 sub unseed {
1957 my($self) = @_;
1958 $self->seeded(0);
1961 =head2 $ret = $obj->update ($path, $type)
1963 =head2 $ret = $obj->update ($path, "new", $dirty_epoch)
1965 =head2 $ret = $obj->update ()
1967 Enter one file into the local I<recentfile>. $path is the (usually
1968 absolute) path. If the path is outside I<our> tree, then it is
1969 ignored.
1971 C<$type> is one of C<new> or C<delete>.
1973 Events of type C<new> may set $dirty_epoch. $dirty_epoch is normally
1974 not used and the epoch is calculated by the update() routine itself
1975 based on current time. But if there is the demand to insert a
1976 not-so-current file into the dataset, then the caller sets
1977 $dirty_epoch. This causes the epoch of the registered event to become
1978 $dirty_epoch or -- if the exact value given is already taken -- a tiny
1979 bit more. As compensation the dirtymark of the whole dataset is set to
1980 now or the current epoch, whichever is higher. Note: setting the
1981 dirty_epoch to the future is prohibited as it's very unlikely to be
1982 intended: it definitely might wreak havoc with the index files.
1984 The new file event is unshifted (or, if dirty_epoch is set, inserted
1985 at the place it belongs to, according to the rule to have a sequence
1986 of strictly decreasing timestamps) to the array of recent_events and
1987 the array is shortened to the length of the timespan allowed. This is
1988 usually the timespan specified by the interval of this recentfile but
1989 as long as this recentfile has not been merged to another one, the
1990 timespan may grow without bounds.
1992 The third form runs an update without inserting a new file. This may
1993 be desired to truncate a recentfile.
1995 =cut
1996 sub _epoch_monotonically_increasing {
1997 my($self,$epoch,$recent) = @_;
1998 return $epoch unless @$recent; # the first one goes unoffended
1999 if (_bigfloatgt("".$epoch,$recent->[0]{epoch})) {
2000 return $epoch;
2001 } else {
2002 return _increase_a_bit($recent->[0]{epoch});
2005 sub update {
2006 my($self,$path,$type,$dirty_epoch) = @_;
2007 if (defined $path or defined $type or defined $dirty_epoch) {
2008 die "update called without path argument" unless defined $path;
2009 die "update called without type argument" unless defined $type;
2010 die "update called with illegal type argument: $type" unless $type =~ /(new|delete)/;
2012 $self->lock;
2013 my $ctx = $self->_locked_batch_update([{path=>$path,type=>$type,epoch=>$dirty_epoch}]);
2014 $self->write_recent($ctx->{recent}) if $ctx->{something_done};
2015 $self->_assert_symlink;
2016 $self->unlock;
2019 =head2 $obj->batch_update($batch)
2021 Like update but for many files. $batch is an arrayref containing
2022 hashrefs with the structure
2025 path => $path,
2026 type => $type,
2027 epoch => $epoch,
2032 =cut
2033 sub batch_update {
2034 my($self,$batch) = @_;
2035 $self->lock;
2036 my $ctx = $self->_locked_batch_update($batch);
2037 $self->write_recent($ctx->{recent}) if $ctx->{something_done};
2038 $self->_assert_symlink;
2039 $self->unlock;
2041 sub _locked_batch_update {
2042 my($self,$batch) = @_;
2043 my $something_done = 0;
2044 my $recent = $self->recent_events;
2045 unless ($recent->[0]) {
2046 # obstetrics
2047 $something_done = 1;
2049 my %paths_in_recent = map { $_->{path} => undef } @$recent;
2050 my $interval = $self->interval;
2051 my $canonmeth = $self->canonize;
2052 unless ($canonmeth) {
2053 $canonmeth = "naive_path_normalize";
2055 my $oldest_allowed = 0;
2056 my $setting_new_dirty_mark = 0;
2057 my $console;
2058 if ($self->verbose && @$batch > 1) {
2059 eval {require Time::Progress};
2060 warn "dollarat[$@]" if $@;
2061 $| = 1;
2062 $console = new Time::Progress;
2063 $console->attr( min => 1, max => scalar @$batch );
2064 print "\n";
2066 my $i = 0;
2067 my $memo_splicepos;
2068 ITEM: for my $item (sort {($b->{epoch}||0) <=> ($a->{epoch}||0)} @$batch) {
2069 $i++;
2070 print $console->report( "\rdone %p elapsed: %L (%l sec), ETA %E (%e sec)", $i ) if $console and not $i % 50;
2071 my $ctx = $self->_update_batch_item($item,$canonmeth,$recent,$setting_new_dirty_mark,$oldest_allowed,$something_done,\%paths_in_recent,$memo_splicepos);
2072 $something_done = $ctx->{something_done};
2073 $oldest_allowed = $ctx->{oldest_allowed};
2074 $setting_new_dirty_mark = $ctx->{setting_new_dirty_mark};
2075 $recent = $ctx->{recent};
2076 $memo_splicepos = $ctx->{memo_splicepos};
2078 print "\n" if $console;
2079 if ($setting_new_dirty_mark) {
2080 $oldest_allowed = 0;
2082 TRUNCATE: while (@$recent) {
2083 if (_bigfloatlt($recent->[-1]{epoch}, $oldest_allowed)) {
2084 pop @$recent;
2085 $something_done = 1;
2086 } else {
2087 last TRUNCATE;
2090 return {something_done=>$something_done,recent=>$recent};
2092 sub _update_batch_item {
2093 my($self,$item,$canonmeth,$recent,$setting_new_dirty_mark,$oldest_allowed,$something_done,$paths_in_recent,$memo_splicepos) = @_;
2094 my($path,$type,$dirty_epoch) = @{$item}{qw(path type epoch)};
2095 if (defined $path or defined $type or defined $dirty_epoch) {
2096 $path = $self->$canonmeth($path);
2098 # you must calculate the time after having locked, of course
2099 my $now = Time::HiRes::time;
2101 my $epoch;
2102 if (defined $dirty_epoch && _bigfloatgt($now,$dirty_epoch)) {
2103 $epoch = $dirty_epoch;
2104 } else {
2105 $epoch = $self->_epoch_monotonically_increasing($now,$recent);
2107 $recent ||= [];
2108 my $merged = $self->merged;
2109 if ($merged->{epoch} && !$setting_new_dirty_mark) {
2110 my $virtualnow = _bigfloatmax($now,$epoch);
2111 # for the lower bound I think we need no big math, we calc already
2112 my $secs = $self->interval_secs();
2113 $oldest_allowed = min($virtualnow - $secs, $merged->{epoch}, $epoch);
2114 } else {
2115 # as long as we are not merged at all, no limits!
2117 my $lrd = $self->localroot;
2118 if (defined $path && $path =~ s|^\Q$lrd\E||) {
2119 $path =~ s|^/||;
2120 my $splicepos;
2121 # remove the older duplicates of this $path, irrespective of $type:
2122 if (defined $dirty_epoch) {
2123 my $ctx = $self->_update_with_dirty_epoch($path,$recent,$epoch,$paths_in_recent,$memo_splicepos);
2124 $recent = $ctx->{recent};
2125 $splicepos = $ctx->{splicepos};
2126 $epoch = $ctx->{epoch};
2127 my $dirtymark = $self->dirtymark;
2128 my $new_dm = $now;
2129 if (_bigfloatgt($epoch, $now)) { # just in case we had to increase it
2130 $new_dm = $epoch;
2132 $self->dirtymark($new_dm);
2133 $setting_new_dirty_mark = 1;
2134 if (not defined $merged->{epoch} or _bigfloatlt($epoch,$merged->{epoch})) {
2135 $self->merged(+{});
2137 } else {
2138 $recent = [ grep { $_->{path} ne $path } @$recent ];
2139 $splicepos = 0;
2141 if (defined $splicepos) {
2142 splice @$recent, $splicepos, 0, { epoch => $epoch, path => $path, type => $type };
2143 $paths_in_recent->{$path} = undef;
2145 $memo_splicepos = $splicepos;
2146 $something_done = 1;
2148 return
2150 something_done => $something_done,
2151 oldest_allowed => $oldest_allowed,
2152 setting_new_dirty_mark => $setting_new_dirty_mark,
2153 recent => $recent,
2154 memo_splicepos => $memo_splicepos,
2157 sub _update_with_dirty_epoch {
2158 my($self,$path,$recent,$epoch,$paths_in_recent,$memo_splicepos) = @_;
2159 my $splicepos;
2160 my $new_recent = [];
2161 if (exists $paths_in_recent->{$path}) {
2162 my $cancel = 0;
2163 KNOWN_EVENT: for my $i (0..$#$recent) {
2164 if ($recent->[$i]{path} eq $path) {
2165 if ($recent->[$i]{epoch} eq $epoch) {
2166 # nothing to do
2167 $cancel = 1;
2168 last KNOWN_EVENT;
2170 } else {
2171 push @$new_recent, $recent->[$i];
2174 @$recent = @$new_recent unless $cancel;
2176 if (!exists $recent->[0] or _bigfloatgt($epoch,$recent->[0]{epoch})) {
2177 $splicepos = 0;
2178 } elsif (_bigfloatlt($epoch,$recent->[-1]{epoch})) {
2179 $splicepos = @$recent;
2180 } else {
2181 my $startingpoint;
2182 if (_bigfloatgt($memo_splicepos<=$#$recent && $epoch, $recent->[$memo_splicepos]{epoch})) {
2183 $startingpoint = 0;
2184 } else {
2185 $startingpoint = $memo_splicepos;
2187 RECENT: for my $i ($startingpoint..$#$recent) {
2188 my $ev = $recent->[$i];
2189 if ($epoch eq $recent->[$i]{epoch}) {
2190 $epoch = _increase_a_bit($epoch, $i ? $recent->[$i-1]{epoch} : undef);
2192 if (_bigfloatgt($epoch,$recent->[$i]{epoch})) {
2193 $splicepos = $i;
2194 last RECENT;
2198 return {
2199 recent => $recent,
2200 splicepos => $splicepos,
2201 epoch => $epoch,
2205 =head2 seed
2207 Sets this recentfile in the state of 'seeded' which means it has to
2208 re-evaluate its uptodateness.
2210 =cut
2211 sub seed {
2212 my($self) = @_;
2213 $self->seeded(1);
2216 =head2 seeded
2218 Tells if the recentfile is in the state 'seeded'.
2220 =cut
2221 sub seeded {
2222 my($self, $set) = @_;
2223 if (defined $set) {
2224 $self->_seeded ($set);
2226 my $x = $self->_seeded;
2227 unless (defined $x) {
2228 $x = 0;
2229 $self->_seeded ($x);
2231 return $x;
2234 =head2 uptodate
2236 True if this object has mirrored the complete interval covered by the
2237 current recentfile.
2239 =cut
2240 sub uptodate {
2241 my($self) = @_;
2242 my $uptodate;
2243 my $why;
2244 if ($self->_uptodateness_ever_reached and not $self->seeded) {
2245 $why = "saturated";
2246 $uptodate = 1;
2248 # it's too easy to misconfigure ttl and related timings and then
2249 # never reach uptodateness, so disabled 2009-03-22
2250 if (0 and not defined $uptodate) {
2251 if ($self->ttl_reached){
2252 $why = "ttl_reached returned true, so we are not uptodate";
2253 $uptodate = 0 ;
2256 unless (defined $uptodate) {
2257 # look if recentfile has unchanged timestamp
2258 my $minmax = $self->minmax;
2259 if (exists $minmax->{mtime}) {
2260 my $rfile = $self->_my_current_rfile;
2261 my @stat = stat $rfile;
2262 if (@stat) {
2263 my $mtime = $stat[9];
2264 if (defined $mtime && defined $minmax->{mtime} && $mtime > $minmax->{mtime}) {
2265 $why = "mtime[$mtime] of rfile[$rfile] > minmax/mtime[$minmax->{mtime}], so we are not uptodate";
2266 $uptodate = 0;
2267 } else {
2268 my $covered = $self->done->covered(@$minmax{qw(max min)});
2269 $why = sprintf "minmax covered[%s], so we return that", defined $covered ? $covered : "UNDEF";
2270 $uptodate = $covered;
2272 } else {
2273 require Carp;
2274 $why = "Could not stat '$rfile': $!";
2275 Carp::cluck($why);
2276 $uptodate = 0;
2280 unless (defined $uptodate) {
2281 $why = "fallthrough, so not uptodate";
2282 $uptodate = 0;
2284 if ($uptodate) {
2285 $self->_uptodateness_ever_reached(1);
2287 my $remember =
2289 uptodate => $uptodate,
2290 why => $why,
2292 $self->_remember_last_uptodate_call($remember);
2293 return $uptodate;
2296 =head2 $obj->write_recent ($recent_files_arrayref)
2298 Writes a I<recentfile> based on the current reflection of the current
2299 state of the tree limited by the current interval.
2301 =cut
2302 sub _resort {
2303 my($self) = @_;
2304 @{$_[1]} = sort { _bigfloatcmp($b->{epoch},$a->{epoch}) } @{$_[1]};
2305 return;
2307 sub write_recent {
2308 my ($self,$recent) = @_;
2309 die "write_recent called without argument" unless defined $recent;
2310 my $Last_epoch;
2311 SANITYCHECK: for my $i (0..$#$recent) {
2312 if (defined($Last_epoch) and _bigfloatge($recent->[$i]{epoch},$Last_epoch)) {
2313 require Carp;
2314 Carp::confess(sprintf "Warning: disorder '%s'>='%s', re-sorting %s\n",
2315 $recent->[$i]{epoch}, $Last_epoch, $self->interval);
2316 # you may want to:
2317 # $self->_resort($recent);
2318 # last SANITYCHECK;
2320 $Last_epoch = $recent->[$i]{epoch};
2322 my $minmax = $self->minmax;
2323 if (!defined $minmax->{max} || _bigfloatlt($minmax->{max},$recent->[0]{epoch})) {
2324 $minmax->{max} = @$recent && exists $recent->[0]{epoch} ? $recent->[0]{epoch} : undef;
2326 if (!defined $minmax->{min} || _bigfloatlt($minmax->{min},$recent->[-1]{epoch})) {
2327 $minmax->{min} = @$recent && exists $recent->[-1]{epoch} ? $recent->[-1]{epoch} : undef;
2329 $self->minmax($minmax);
2330 my $meth = sprintf "write_%d", $self->protocol;
2331 $self->$meth($recent);
2334 =head2 $obj->write_0 ($recent_files_arrayref)
2336 Delegate of C<write_recent()> on protocol 0
2338 =cut
2340 sub write_0 {
2341 my ($self,$recent) = @_;
2342 my $rfile = $self->rfile;
2343 YAML::Syck::DumpFile("$rfile.new",$recent);
2344 rename "$rfile.new", $rfile or die "Could not rename to '$rfile': $!";
2347 =head2 $obj->write_1 ($recent_files_arrayref)
2349 Delegate of C<write_recent()> on protocol 1
2351 =cut
2353 sub write_1 {
2354 my ($self,$recent) = @_;
2355 my $rfile = $self->rfile;
2356 my $suffix = $self->serializer_suffix;
2357 my $data = {
2358 meta => $self->meta_data,
2359 recent => $recent,
2361 my $serialized;
2362 if ($suffix eq ".yaml") {
2363 $serialized = YAML::Syck::Dump($data);
2364 } elsif ($HAVE->{"Data::Serializer"}) {
2365 my $serializer = Data::Serializer->new
2366 ( serializer => $serializers{$suffix} );
2367 $serialized = $serializer->raw_serialize($data);
2368 } else {
2369 die "Data::Serializer not installed, cannot proceed with suffix '$suffix'";
2371 open my $fh, ">", "$rfile.new" or die "Could not open >'$rfile.new': $!";
2372 print $fh $serialized;
2373 close $fh or die "Could not close '$rfile.new': $!";
2374 rename "$rfile.new", $rfile or die "Could not rename to '$rfile': $!";
2377 BEGIN {
2378 my $nq = qr/[^"]+/; # non-quotes
2379 my @pod_lines =
2380 split /\n/, <<'=cut'; %serializers = map { my @x = /"($nq)"\s+=>\s+"($nq)"/; @x } grep {s/^=item\s+C<<\s+(.+)\s+>>$/$1/} @pod_lines; }
2382 =head1 SERIALIZERS
2384 The following suffixes are supported and trigger the use of these
2385 serializers:
2387 =over 4
2389 =item C<< ".yaml" => "YAML::Syck" >>
2391 =item C<< ".json" => "JSON" >>
2393 =item C<< ".sto" => "Storable" >>
2395 =item C<< ".dd" => "Data::Dumper" >>
2397 =back
2399 =cut
2401 BEGIN {
2402 my @pod_lines =
2403 split /\n/, <<'=cut'; %seconds = map { eval } grep {s/^=item\s+C<<(.+)>>$/$1/} @pod_lines; }
2405 =head1 INTERVAL SPEC
2407 An interval spec is a primitive way to express time spans. Normally it
2408 is composed from an integer and a letter.
2410 As a special case, a string that consists only of the single letter
2411 C<Z>, stands for MAX_INT seconds.
2413 The following letters express the specified number of seconds:
2415 =over 4
2417 =item C<< s => 1 >>
2419 =item C<< m => 60 >>
2421 =item C<< h => 60*60 >>
2423 =item C<< d => 60*60*24 >>
2425 =item C<< W => 60*60*24*7 >>
2427 =item C<< M => 60*60*24*30 >>
2429 =item C<< Q => 60*60*24*90 >>
2431 =item C<< Y => 60*60*24*365.25 >>
2433 =back
2435 =cut
2437 =head1 SEE ALSO
2439 L<File::Rsync::Mirror::Recent>,
2440 L<File::Rsync::Mirror::Recentfile::Done>,
2441 L<File::Rsync::Mirror::Recentfile::FakeBigFloat>
2443 =head1 BUGS
2445 Please report any bugs or feature requests through the web interface
2447 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=File-Rsync-Mirror-Recentfile>.
2448 I will be notified, and then you'll automatically be notified of
2449 progress on your bug as I make changes.
2451 =head1 KNOWN BUGS
2453 Memory hungry: it seems all memory is allocated during the initial
2454 rsync where a list of all files is maintained in memory.
2456 =head1 SUPPORT
2458 You can find documentation for this module with the perldoc command.
2460 perldoc File::Rsync::Mirror::Recentfile
2462 You can also look for information at:
2464 =over 4
2466 =item * RT: CPAN's request tracker
2468 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Rsync-Mirror-Recentfile>
2470 =item * AnnoCPAN: Annotated CPAN documentation
2472 L<http://annocpan.org/dist/File-Rsync-Mirror-Recentfile>
2474 =item * CPAN Ratings
2476 L<http://cpanratings.perl.org/d/File-Rsync-Mirror-Recentfile>
2478 =item * Search CPAN
2480 L<http://search.cpan.org/dist/File-Rsync-Mirror-Recentfile>
2482 =back
2485 =head1 ACKNOWLEDGEMENTS
2487 Thanks to RJBS for module-starter.
2489 =head1 AUTHOR
2491 Andreas König
2493 =head1 COPYRIGHT & LICENSE
2495 Copyright 2008,2009 Andreas König.
2497 This program is free software; you can redistribute it and/or modify it
2498 under the same terms as Perl itself.
2501 =cut
2503 1; # End of File::Rsync::Mirror::Recentfile
2505 # Local Variables:
2506 # mode: cperl
2507 # cperl-indent-level: 4
2508 # End: