yet another symlink bug spotted by Henk P. Penning
[rersyncrecent.git] / lib / File / Rsync / Mirror / Recentfile.pm
blob9a29c7e7ba5a2946166cd6ae92875d82c8dc5091
1 package File::Rsync::Mirror::Recentfile;
3 # use warnings;
4 use strict;
6 =encoding utf-8
8 =head1 NAME
10 File::Rsync::Mirror::Recentfile - mirroring via rsync made efficient
12 =cut
14 my $HAVE = {};
15 for my $package (
16 "Data::Serializer",
17 "File::Rsync"
18 ) {
19 $HAVE->{$package} = eval qq{ require $package; };
21 use Config;
22 use File::Basename qw(basename dirname fileparse);
23 use File::Copy qw(cp);
24 use File::Path qw(mkpath);
25 use File::Rsync::Mirror::Recentfile::FakeBigFloat qw(:all);
26 use File::Temp;
27 use List::Util qw(first max min);
28 use Scalar::Util qw(reftype);
29 use Storable;
30 use Time::HiRes qw();
31 use YAML::Syck;
33 use version; our $VERSION = qv('0.0.8');
35 use constant MAX_INT => ~0>>1; # anything better?
36 use constant DEFAULT_PROTOCOL => 1;
38 # cf. interval_secs
39 my %seconds;
41 # maybe subclass if this mapping is bad?
42 my %serializers;
44 =head1 SYNOPSIS
46 Writer (of a single file):
48 use File::Rsync::Mirror::Recentfile;
49 my $fr = File::Rsync::Mirror::Recentfile->new
51 interval => q(6h),
52 filenameroot => "RECENT",
53 comment => "These 'RECENT' files are part of a test of a new CPAN mirroring concept. Please ignore them for now.",
54 localroot => "/home/ftp/pub/PAUSE/authors/",
55 aggregator => [qw(1d 1W 1M 1Q 1Y Z)],
57 $rf->update("/home/ftp/pub/PAUSE/authors/id/A/AN/ANDK/CPAN-1.92_63.tar.gz","new");
59 Reader/mirrorer:
61 my $rf = File::Rsync::Mirror::Recentfile->new
63 filenameroot => "RECENT",
64 interval => q(6h),
65 localroot => "/home/ftp/pub/PAUSE/authors",
66 remote_dir => "",
67 remote_host => "pause.perl.org",
68 remote_module => "authors",
69 rsync_options => {
70 compress => 1,
71 'rsync-path' => '/usr/bin/rsync',
72 links => 1,
73 times => 1,
74 'omit-dir-times' => 1,
75 checksum => 1,
77 verbose => 1,
79 $rf->mirror;
81 Aggregator (usually the writer):
83 my $rf = File::Rsync::Mirror::Recentfile->new_from_file ( $file );
84 $rf->aggregate;
86 =head1 DESCRIPTION
88 Lower level than F:R:M:Recent, handles one recentfile. Whereas a tree
89 is always composed of several recentfiles, controlled by the
90 F:R:M:Recent object. The Recentfile object has to do the bookkeeping
91 for a single timeslice.
93 =head1 EXPORT
95 No exports.
97 =head1 CONSTRUCTORS / DESTRUCTOR
99 =head2 my $obj = CLASS->new(%hash)
101 Constructor. On every argument pair the key is a method name and the
102 value is an argument to that method name.
104 If a recentfile for this resource already exists, metadata that are
105 not defined by the constructor will be fetched from there as soon as
106 it is being read by recent_events().
108 =cut
110 sub new {
111 my($class, @args) = @_;
112 my $self = bless {}, $class;
113 while (@args) {
114 my($method,$arg) = splice @args, 0, 2;
115 $self->$method($arg);
117 unless (defined $self->protocol) {
118 $self->protocol(DEFAULT_PROTOCOL);
120 unless (defined $self->filenameroot) {
121 $self->filenameroot("RECENT");
123 unless (defined $self->serializer_suffix) {
124 $self->serializer_suffix(".yaml");
126 return $self;
129 =head2 my $obj = CLASS->new_from_file($file)
131 Constructor. $file is a I<recentfile>.
133 =cut
135 sub new_from_file {
136 my($class, $file) = @_;
137 my $self = bless {}, $class;
138 $self->_rfile($file);
139 #?# $self->lock;
140 my $serialized = do { open my $fh, $file or die "Could not open '$file': $!";
141 local $/;
142 <$fh>;
144 # XXX: we can skip this step when the metadata are sufficient, but
145 # we cannot parse the file without some magic stuff about
146 # serialized formats
147 while (-l $file) {
148 my($name,$path) = fileparse $file;
149 my $symlink = readlink $file;
150 if ($symlink =~ m|/|) {
151 die "FIXME: filenames containing '/' not supported, got $symlink";
153 $file = File::Spec->catfile ( $path, $symlink );
155 my($name,$path,$suffix) = fileparse $file, keys %serializers;
156 $self->serializer_suffix($suffix);
157 $self->localroot($path);
158 die "Could not determine file format from suffix" unless $suffix;
159 my $deserialized;
160 if ($suffix eq ".yaml") {
161 require YAML::Syck;
162 $deserialized = YAML::Syck::LoadFile($file);
163 } elsif ($HAVE->{"Data::Serializer"}) {
164 my $serializer = Data::Serializer->new
165 ( serializer => $serializers{$suffix} );
166 $deserialized = $serializer->raw_deserialize($serialized);
167 } else {
168 die "Data::Serializer not installed, cannot proceed with suffix '$suffix'";
170 while (my($k,$v) = each %{$deserialized->{meta}}) {
171 next if $k ne lc $k; # "Producers"
172 $self->$k($v);
174 unless (defined $self->protocol) {
175 $self->protocol(DEFAULT_PROTOCOL);
177 return $self;
180 =head2 DESTROY
182 A simple unlock.
184 =cut
185 sub DESTROY {
186 my $self = shift;
187 $self->unlock;
188 unless ($self->_current_tempfile_fh) {
189 if (my $tempfile = $self->_current_tempfile) {
190 if (-e $tempfile) {
191 # unlink $tempfile; # may fail in global destruction
197 =head1 ACCESSORS
199 =cut
201 my @accessors;
203 BEGIN {
204 @accessors = (
205 "_current_tempfile",
206 "_current_tempfile_fh",
207 "_delayed_operations",
208 "_done",
209 "_interval",
210 "_is_locked",
211 "_localroot",
212 "_merged",
213 "_pathdb",
214 "_remember_last_uptodate_call",
215 "_remote_dir",
216 "_remoteroot",
217 "_requires_fsck",
218 "_rfile",
219 "_rsync",
220 "__verified_tempdir",
221 "_seeded",
222 "_uptodateness_ever_reached",
223 "_use_tempfile",
226 my @pod_lines =
227 split /\n/, <<'=cut'; push @accessors, grep {s/^=item\s+//} @pod_lines; }
229 =over 4
231 =item aggregator
233 A list of interval specs that tell the aggregator which I<recentfile>s
234 are to be produced.
236 =item canonize
238 The name of a method to canonize the path before rsyncing. Only
239 supported value is C<naive_path_normalize>. Defaults to that.
241 =item comment
243 A comment about this tree and setup.
245 =item dirtymark
247 A timestamp. The dirtymark is updated whenever an out of band change
248 on the origin server is performed that violates the protocol. Say,
249 they add or remove files in the middle somewhere. Slaves must react
250 with a devaluation of their C<done> structure which then leads to a
251 full re-sync of all files. Implementation note: dirtymark may increase
252 or decrease.
254 =item filenameroot
256 The (prefix of the) filename we use for this I<recentfile>. Defaults to
257 C<RECENT>. The string must not contain a directory separator.
259 =item have_mirrored
261 Timestamp remembering when we mirrored this recentfile the last time.
262 Only relevant for slaves.
264 =item ignore_link_stat_errors
266 If set to true, rsync errors are ignored that complain about link stat
267 errors. These seem to happen only when there are files missing at the
268 origin. In race conditions this can always happen, so it defaults to
269 true.
271 =item is_slave
273 If set to true, this object will fetch a new recentfile from remote
274 when the timespan between the last mirror (see have_mirrored) and now
275 is too large (see C<ttl>).
277 =item keep_delete_objects_forever
279 The default for delete events is that they are passed through the
280 collection of recentfile objects until they reach the Z file. There
281 they get dropped so that the associated file object ceases to exist at
282 all. By setting C<keep_delete_objects_forever> the delete objects are
283 kept forever. This makes the Z file larger but has the advantage that
284 slaves that have interrupted mirroring for a long time still can clean
285 up their copy.
287 =item locktimeout
289 After how many seconds shall we die if we cannot lock a I<recentfile>?
290 Defaults to 600 seconds.
292 =item loopinterval
294 When mirror_loop is called, this accessor can specify how much time
295 every loop shall at least take. If the work of a loop is done before
296 that time has gone, sleeps for the rest of the time. Defaults to
297 arbitrary 42 seconds.
299 =item max_files_per_connection
301 Maximum number of files that are transferred on a single rsync call.
302 Setting it higher means higher performance at the price of holding
303 connections longer and potentially disturbing other users in the pool.
304 Defaults to the arbitrary value 42.
306 =item max_rsync_errors
308 When rsync operations encounter that many errors without any resetting
309 success in between, then we die. Defaults to unlimited. A value of
310 -1 means we run forever ignoring all rsync errors.
312 =item minmax
314 Hashref remembering when we read the recent_events from this file the
315 last time and what the timespan was.
317 =item protocol
319 When the RECENT file format changes, we increment the protocol. We try
320 to support older protocols in later releases.
322 =item remote_host
324 The host we are mirroring from. Leave empty for the local filesystem.
326 =item remote_module
328 Rsync servers have so called modules to separate directory trees from
329 each other. Put here the name of the module under which we are
330 mirroring. Leave empty for local filesystem.
332 =item rsync_options
334 Things like compress, links, times or checksums. Passed in to the
335 File::Rsync object used to run the mirror.
337 =item serializer_suffix
339 Mostly untested accessor. The only well tested format for
340 I<recentfile>s at the moment is YAML. It is used with YAML::Syck via
341 Data::Serializer. But in principle other formats are supported as
342 well. See section SERIALIZERS below.
344 =item sleep_per_connection
346 Sleep that many seconds (floating point OK) after every chunk of rsyncing
347 has finished. Defaults to arbitrary 0.42.
349 =item tempdir
351 Directory to write temporary files to. Must allow rename operations
352 into the tree which usually means it must live on the same partition
353 as the target directory. Defaults to C<< $self->localroot >>.
355 =item ttl
357 Time to live. Number of seconds after which this recentfile must be
358 fetched again from the origin server. Only relevant for slaves.
359 Defaults to arbitrary 24.2 seconds.
361 =item verbose
363 Boolean to turn on a bit verbosity.
365 =item verboselog
367 Path to the logfile to write verbose progress information to. This is
368 a primitive stop gap solution to get simple verbose logging working.
369 Switching to Log4perl or similar is probably the way to go.
371 =back
373 =cut
375 use accessors @accessors;
377 =head1 METHODS
379 =head2 (void) $obj->aggregate( %options )
381 Takes all intervals that are collected in the accessor called
382 aggregator. Sorts them by actual length of the interval.
383 Removes those that are shorter than our own interval. Then merges this
384 object into the next larger object. The merging continues upwards
385 as long as the next I<recentfile> is old enough to warrant a merge.
387 If a merge is warranted is decided according to the interval of the
388 previous interval so that larger files are not so often updated as
389 smaller ones. If $options{force} is true, all files get updated.
391 Here is an example to illustrate the behaviour. Given aggregators
393 1h 1d 1W 1M 1Q 1Y Z
395 then
397 1h updates 1d on every call to aggregate()
398 1d updates 1W earliest after 1h
399 1W updates 1M earliest after 1d
400 1M updates 1Q earliest after 1W
401 1Q updates 1Y earliest after 1M
402 1Y updates Z earliest after 1Q
404 Note that all but the smallest recentfile get updated at an arbitrary
405 rate and as such are quite useless on their own.
407 =cut
409 sub aggregate {
410 my($self, %option) = @_;
411 my %seen_interval;
412 my @aggs = sort { $a->{secs} <=> $b->{secs} }
413 grep { !$seen_interval{$_->{interval}}++ && $_->{secs} >= $self->interval_secs }
414 map { { interval => $_, secs => $self->interval_secs($_)} }
415 $self->interval, @{$self->aggregator || []};
416 $self->update;
417 $aggs[0]{object} = $self;
418 AGGREGATOR: for my $i (0..$#aggs-1) {
419 my $this = $aggs[$i]{object};
420 my $next = $this->_sparse_clone;
421 $next->interval($aggs[$i+1]{interval});
422 my $want_merge = 0;
423 if ($option{force} || $i == 0) {
424 $want_merge = 1;
425 } else {
426 my $next_rfile = $next->rfile;
427 if (-e $next_rfile) {
428 my $prev = $aggs[$i-1]{object};
429 local $^T = time;
430 my $next_age = 86400 * -M $next_rfile;
431 if ($next_age > $prev->interval_secs) {
432 $want_merge = 1;
434 } else {
435 $want_merge = 1;
438 if ($want_merge) {
439 $next->merge($this);
440 $aggs[$i+1]{object} = $next;
441 } else {
442 last AGGREGATOR;
447 # collect file size and mtime for all files of this aggregate
448 sub _debug_aggregate {
449 my($self) = @_;
450 my @aggs = sort { $a->{secs} <=> $b->{secs} }
451 map { { interval => $_, secs => $self->interval_secs($_)} }
452 $self->interval, @{$self->aggregator || []};
453 my $report = [];
454 for my $i (0..$#aggs) {
455 my $this = Storable::dclone $self;
456 $this->interval($aggs[$i]{interval});
457 my $rfile = $this->rfile;
458 my @stat = stat $rfile;
459 push @$report, {rfile => $rfile, size => $stat[7], mtime => $stat[9]};
461 $report;
464 # (void) $self->_assert_symlink()
465 sub _assert_symlink {
466 my($self) = @_;
467 my $recentrecentfile = File::Spec->catfile
469 $self->localroot,
470 sprintf
472 "%s.recent",
473 $self->filenameroot
476 if ($Config{d_symlink} eq "define") {
477 my $howto_create_symlink; # 0=no need; 1=straight symlink; 2=rename symlink
478 if (-l $recentrecentfile) {
479 my $found_symlink = readlink $recentrecentfile;
480 if ($found_symlink eq $self->rfilename) {
481 return;
482 } else {
483 $howto_create_symlink = 2;
485 } else {
486 $howto_create_symlink = 1;
488 if (1 == $howto_create_symlink) {
489 symlink $self->rfilename, $recentrecentfile or die "Could not create symlink '$recentrecentfile': $!"
490 } else {
491 unlink "$recentrecentfile.$$"; # may fail
492 symlink $self->rfilename, "$recentrecentfile.$$" or die "Could not create symlink '$recentrecentfile.$$': $!";
493 rename "$recentrecentfile.$$", $recentrecentfile or die "Could not rename '$recentrecentfile.$$' to $recentrecentfile: $!";
495 } else {
496 warn "Warning: symlinks not supported on this system, doing a copy instead\n";
497 unlink "$recentrecentfile.$$"; # may fail
498 cp $self->rfilename, "$recentrecentfile.$$" or die "Could not copy to '$recentrecentfile.$$': $!";
499 rename "$recentrecentfile.$$", $recentrecentfile or die "Could not rename '$recentrecentfile.$$' to $recentrecentfile: $!";
503 =head2 $hashref = $obj->delayed_operations
505 A hash of hashes containing unlink and rmdir operations which had to
506 wait until the recentfile got unhidden in order to not confuse
507 downstream mirrors (in case we have some).
509 =cut
511 sub delayed_operations {
512 my($self) = @_;
513 my $x = $self->_delayed_operations;
514 unless (defined $x) {
515 $x = {
516 unlink => {},
517 rmdir => {},
519 $self->_delayed_operations ($x);
521 return $x;
524 =head2 $done = $obj->done
526 C<$done> is a reference to a L<File::Rsync::Mirror::Recentfile::Done>
527 object that keeps track of rsync activities. Only needed and used when
528 we are a mirroring slave.
530 =cut
532 sub done {
533 my($self) = @_;
534 my $done = $self->_done;
535 if (!$done) {
536 require File::Rsync::Mirror::Recentfile::Done;
537 $done = File::Rsync::Mirror::Recentfile::Done->new();
538 $done->_rfinterval ($self->interval);
539 $self->_done ( $done );
541 return $done;
544 =head2 $tempfilename = $obj->get_remote_recentfile_as_tempfile ()
546 Stores the remote I<recentfile> locally as a tempfile. The caller is
547 responsible to remove the file after use.
549 Note: if you're intending to act as an rsync server for other slaves,
550 then you must prefer this method to fetch that file with
551 get_remotefile(). Otherwise downstream mirrors would expect you to
552 already have mirrored all the files that are in the I<recentfile>
553 before you have them mirrored.
555 =cut
557 sub get_remote_recentfile_as_tempfile {
558 my($self) = @_;
559 mkpath $self->localroot;
560 my $fh;
561 my $trfilename;
562 if ( $self->_use_tempfile() ) {
563 if ($self->ttl_reached) {
564 $fh = $self->_current_tempfile_fh;
565 $trfilename = $self->rfilename;
566 } else {
567 return $self->_current_tempfile;
569 } else {
570 $trfilename = $self->rfilename;
573 my $dst;
574 if ($fh) {
575 $dst = $self->_current_tempfile;
576 } else {
577 $fh = $self->_get_remote_rat_provide_tempfile_object ($trfilename);
578 $dst = $fh->filename;
579 $self->_current_tempfile ($dst);
580 my $rfile = eval { $self->rfile; }; # may fail (RECENT.recent has no rfile)
581 if (defined $rfile && -e $rfile) {
582 # saving on bandwidth. Might need to be configurable
583 # $self->bandwidth_is_cheap?
584 cp $rfile, $dst or die "Could not copy '$rfile' to '$dst': $!"
587 my $src = join ("/",
588 $self->remoteroot,
589 $trfilename,
591 if ($self->verbose) {
592 my $doing = -e $dst ? "Sync" : "Get";
593 my $display_dst = join "/", "...", basename(dirname($dst)), basename($dst);
594 my $LFH = $self->_logfilehandle;
595 printf $LFH
597 "%-4s %d (1/1/%s) temp %s ... ",
598 $doing,
599 time,
600 $self->interval,
601 $display_dst,
604 my $gaveup = 0;
605 my $retried = 0;
606 local($ENV{LANG}) = "C";
607 while (!$self->rsync->exec(
608 src => $src,
609 dst => $dst,
610 )) {
611 $self->register_rsync_error ($self->rsync->err);
612 if (++$retried >= 3) {
613 warn "XXX giving up";
614 $gaveup = 1;
615 last;
618 if ($gaveup) {
619 my $LFH = $self->_logfilehandle;
620 printf $LFH "Warning: gave up mirroring %s, will try again later", $self->interval;
621 } else {
622 $self->_refresh_internals ($dst);
623 $self->have_mirrored (Time::HiRes::time);
624 $self->un_register_rsync_error ();
626 $self->unseed;
627 if ($self->verbose) {
628 my $LFH = $self->_logfilehandle;
629 print $LFH "DONE\n";
631 my $mode = 0644;
632 chmod $mode, $dst or die "Could not chmod $mode '$dst': $!";
633 return $dst;
636 sub _verified_tempdir {
637 my($self) = @_;
638 my $tempdir = $self->__verified_tempdir();
639 return $tempdir if defined $tempdir;
640 unless ($tempdir = $self->tempdir) {
641 $tempdir = $self->localroot;
643 unless (-d $tempdir) {
644 mkpath $tempdir;
646 $self->__verified_tempdir($tempdir);
647 return $tempdir;
650 sub _get_remote_rat_provide_tempfile_object {
651 my($self, $trfilename) = @_;
652 my $_verified_tempdir = $self->_verified_tempdir;
653 my $fh = File::Temp->new
654 (TEMPLATE => sprintf(".FRMRecent-%s-XXXX",
655 $trfilename,
657 DIR => $_verified_tempdir,
658 SUFFIX => $self->serializer_suffix,
659 UNLINK => $self->_use_tempfile,
661 my $mode = 0644;
662 my $dst = $fh->filename;
663 chmod $mode, $dst or die "Could not chmod $mode '$dst': $!";
664 if ($self->_use_tempfile) {
665 $self->_current_tempfile_fh ($fh); # delay self destruction
667 return $fh;
670 sub _logfilehandle {
671 my($self) = @_;
672 my $fh;
673 if (my $vl = $self->verboselog) {
674 open $fh, ">>", $vl or die "Could not open >> '$vl': $!";
675 } else {
676 $fh = \*STDERR;
678 return $fh;
681 =head2 $localpath = $obj->get_remotefile ( $relative_path )
683 Rsyncs one single remote file to local filesystem.
685 Note: no locking is done on this file. Any number of processes may
686 mirror this object.
688 Note II: do not use for recentfiles. If you are a cascading
689 slave/server combination, it would confuse other slaves. They would
690 expect the contents of these recentfiles to be available. Use
691 get_remote_recentfile_as_tempfile() instead.
693 =cut
695 sub get_remotefile {
696 my($self, $path) = @_;
697 my $dst = File::Spec->catfile($self->localroot, $path);
698 mkpath dirname $dst;
699 if ($self->verbose) {
700 my $doing = -e $dst ? "Sync" : "Get";
701 my $LFH = $self->_logfilehandle;
702 printf $LFH
704 "%-4s %d (1/1/%s) %s ... ",
705 $doing,
706 time,
707 $self->interval,
708 $path,
711 local($ENV{LANG}) = "C";
712 my $remoteroot = $self->remoteroot or die "Alert: missing remoteroot. Cannot continue";
713 while (!$self->rsync->exec(
714 src => join("/",
715 $remoteroot,
716 $path),
717 dst => $dst,
718 )) {
719 $self->register_rsync_error ($self->rsync->err);
721 $self->un_register_rsync_error ();
722 if ($self->verbose) {
723 my $LFH = $self->_logfilehandle;
724 print $LFH "DONE\n";
726 return $dst;
729 =head2 $obj->interval ( $interval_spec )
731 Get/set accessor. $interval_spec is a string and described below in
732 the section INTERVAL SPEC.
734 =cut
736 sub interval {
737 my ($self, $interval) = @_;
738 if (@_ >= 2) {
739 $self->_interval($interval);
740 $self->_rfile(undef);
742 $interval = $self->_interval;
743 unless (defined $interval) {
744 # do not ask the $self too much, it recurses!
745 require Carp;
746 Carp::confess("Alert: interval undefined for '".$self."'. Cannot continue.");
748 return $interval;
751 =head2 $secs = $obj->interval_secs ( $interval_spec )
753 $interval_spec is described below in the section INTERVAL SPEC. If
754 empty defaults to the inherent interval for this object.
756 =cut
758 sub interval_secs {
759 my ($self, $interval) = @_;
760 $interval ||= $self->interval;
761 unless (defined $interval) {
762 die "interval_secs() called without argument on an object without a declared one";
764 my ($n,$t) = $interval =~ /^(\d*)([smhdWMQYZ]$)/ or
765 die "Could not determine seconds from interval[$interval]";
766 if ($interval eq "Z") {
767 return MAX_INT;
768 } elsif (exists $seconds{$t} and $n =~ /^\d+$/) {
769 return $seconds{$t}*$n;
770 } else {
771 die "Invalid interval specification: n[$n]t[$t]";
775 =head2 $obj->localroot ( $localroot )
777 Get/set accessor. The local root of the tree. Guaranteed without
778 trailing slash.
780 =cut
782 sub localroot {
783 my ($self, $localroot) = @_;
784 if (@_ >= 2) {
785 $localroot =~ s|/$||;
786 $self->_localroot($localroot);
787 $self->_rfile(undef);
789 $localroot = $self->_localroot;
792 =head2 $ret = $obj->local_path($path_found_in_recentfile)
794 Combines the path to our local mirror and the path of an object found
795 in this I<recentfile>. In other words: the target of a mirror operation.
797 Implementation note: We split on slashes and then use
798 File::Spec::catfile to adjust to the local operating system.
800 =cut
802 sub local_path {
803 my($self,$path) = @_;
804 unless (defined $path) {
805 # seems like a degenerated case
806 return $self->localroot;
808 my @p = split m|/|, $path;
809 File::Spec->catfile($self->localroot,@p);
812 =head2 (void) $obj->lock
814 Locking is implemented with an C<mkdir> on a locking directory
815 (C<.lock> appended to $rfile).
817 =cut
819 sub lock {
820 my ($self) = @_;
821 # not using flock because it locks on filehandles instead of
822 # old school ressources.
823 my $locked = $self->_is_locked and return;
824 my $rfile = $self->rfile;
825 # XXX need a way to allow breaking the lock
826 my $start = time;
827 my $locktimeout = $self->locktimeout || 600;
828 my %have_warned;
829 my $lockdir = "$rfile.lock";
830 my $procfile = "$lockdir/process";
831 GETLOCK: while (not mkdir $lockdir) {
832 if (open my $fh, "<", $procfile) {
833 chomp(my $process = <$fh>);
834 if (0) {
835 } elsif ($process !~ /^\d+$/) {
836 warn "Warning: unknown process holds a lock in '$lockdir', waiting..." unless $have_warned{unknown}++;
837 } elsif ($$ == $process) {
838 last GETLOCK;
839 } elsif (kill 0, $process) {
840 warn "Warning: process $process holds a lock in '$lockdir', waiting..." unless $have_warned{$process}++;
841 } else {
842 warn "Warning: breaking lock held by process $process";
843 sleep 1;
844 last GETLOCK;
846 } else {
847 warn "Warning: unknown process holds a lock in '$lockdir', waiting..." unless $have_warned{unknown}++;
849 Time::HiRes::sleep 0.01;
850 if (time - $start > $locktimeout) {
851 die "Could not acquire lockdirectory '$rfile.lock': $!";
853 } # GETLOCK
854 open my $fh, ">", $procfile or die "Could not open >$procfile\: $!";
855 print $fh $$, "\n";
856 close $fh or die "Could not close: $!";
857 $self->_is_locked (1);
860 =head2 (void) $obj->merge ($other)
862 Bulk update of this object with another one. It's used to merge a
863 smaller and younger $other object into the current one. If this file
864 is a C<Z> file, then we normally do not merge in objects of type
865 C<delete>; this can be overridden by setting
866 keep_delete_objects_forever. But if we encounter an object of type
867 delete we delete the corresponding C<new> object if we have it.
869 If there is nothing to be merged, nothing is done.
871 =cut
873 sub merge {
874 my($self, $other) = @_;
875 $self->_merge_sanitycheck ( $other );
876 $other->lock;
877 my $other_recent = $other->recent_events || [];
878 $self->lock;
879 $self->_merge_locked ( $other, $other_recent );
880 $self->unlock;
881 $other->unlock;
884 sub _merge_locked {
885 my($self, $other, $other_recent) = @_;
886 my $my_recent = $self->recent_events || [];
888 # calculate the target time span
889 my $myepoch = $my_recent->[0] ? $my_recent->[0]{epoch} : undef;
890 my $epoch = $other_recent->[0] ? $other_recent->[0]{epoch} : $myepoch;
891 my $oldest_allowed = 0;
892 my $something_done;
893 unless ($my_recent->[0]) {
894 # obstetrics
895 $something_done = 1;
897 if ($epoch) {
898 if (($other->dirtymark||0) ne ($self->dirtymark||0)) {
899 $oldest_allowed = 0;
900 $something_done = 1;
901 } elsif (my $merged = $self->merged) {
902 my $secs = $self->interval_secs();
903 $oldest_allowed = min($epoch - $secs, $merged->{epoch}||0);
904 if (@$other_recent and
905 _bigfloatlt($other_recent->[-1]{epoch}, $oldest_allowed)
907 $oldest_allowed = $other_recent->[-1]{epoch};
910 while (@$my_recent && _bigfloatlt($my_recent->[-1]{epoch}, $oldest_allowed)) {
911 pop @$my_recent;
912 $something_done = 1;
916 my %have_path;
917 my $other_recent_filtered = [];
918 for my $oev (@$other_recent) {
919 my $oevepoch = $oev->{epoch} || 0;
920 next if _bigfloatlt($oevepoch, $oldest_allowed);
921 my $path = $oev->{path};
922 next if $have_path{$path}++;
923 if ( $self->interval eq "Z"
924 and $oev->{type} eq "delete"
925 and ! $self->keep_delete_objects_forever
927 # do nothing
928 } else {
929 if (!$myepoch || _bigfloatgt($oevepoch, $myepoch)) {
930 $something_done = 1;
932 push @$other_recent_filtered, { epoch => $oev->{epoch}, path => $path, type => $oev->{type} };
935 if ($something_done) {
936 $self->_merge_something_done ($other_recent_filtered, $my_recent, $other_recent, $other, \%have_path, $epoch);
940 sub _merge_something_done {
941 my($self, $other_recent_filtered, $my_recent, $other_recent, $other, $have_path, $epoch) = @_;
942 my $recent = [];
943 my $epoch_conflict = 0;
944 my $last_epoch;
945 ZIP: while (@$other_recent_filtered || @$my_recent) {
946 my $event;
947 if (!@$my_recent ||
948 @$other_recent_filtered && _bigfloatge($other_recent_filtered->[0]{epoch},$my_recent->[0]{epoch})) {
949 $event = shift @$other_recent_filtered;
950 } else {
951 $event = shift @$my_recent;
952 next ZIP if $have_path->{$event->{path}}++;
954 $epoch_conflict=1 if defined $last_epoch && $event->{epoch} eq $last_epoch;
955 $last_epoch = $event->{epoch};
956 push @$recent, $event;
958 if ($epoch_conflict) {
959 my %have_epoch;
960 for (my $i = $#$recent;$i>=0;$i--) {
961 my $epoch = $recent->[$i]{epoch};
962 if ($have_epoch{$epoch}++) {
963 while ($have_epoch{$epoch}) {
964 $epoch = _increase_a_bit($epoch);
966 $recent->[$i]{epoch} = $epoch;
967 $have_epoch{$epoch}++;
971 if (!$self->dirtymark || $other->dirtymark ne $self->dirtymark) {
972 $self->dirtymark ( $other->dirtymark );
974 $self->write_recent($recent);
975 $other->merged({
976 time => Time::HiRes::time, # not used anywhere
977 epoch => $recent->[0]{epoch},
978 into_interval => $self->interval, # not used anywhere
980 $other->write_recent($other_recent);
983 sub _merge_sanitycheck {
984 my($self, $other) = @_;
985 if ($self->interval_secs <= $other->interval_secs) {
986 require Carp;
987 Carp::confess
988 (sprintf
990 "Alert: illegal merge operation of a bigger interval[%d] into a smaller[%d]",
991 $self->interval_secs,
992 $other->interval_secs,
997 =head2 merged
999 Hashref denoting when this recentfile has been merged into some other
1000 at which epoch.
1002 =cut
1004 sub merged {
1005 my($self, $set) = @_;
1006 if (defined $set) {
1007 $self->_merged ($set);
1009 my $merged = $self->_merged;
1010 my $into;
1011 if ($merged and $into = $merged->{into_interval} and defined $self->_interval) {
1012 # sanity checks
1013 if ($into eq $self->interval) {
1014 require Carp;
1015 Carp::cluck(sprintf
1017 "Warning: into_interval[%s] same as own interval[%s]. Danger ahead.",
1018 $into,
1019 $self->interval,
1021 } elsif ($self->interval_secs($into) < $self->interval_secs) {
1022 require Carp;
1023 Carp::cluck(sprintf
1025 "Warning: into_interval_secs[%s] smaller than own interval_secs[%s] on interval[%s]. Danger ahead.",
1026 $self->interval_secs($into),
1027 $self->interval_secs,
1028 $self->interval,
1032 $merged;
1035 =head2 $hashref = $obj->meta_data
1037 Returns the hashref of metadata that the server has to add to the
1038 I<recentfile>.
1040 =cut
1042 sub meta_data {
1043 my($self) = @_;
1044 my $ret = $self->{meta};
1045 for my $m (
1046 "aggregator",
1047 "canonize",
1048 "comment",
1049 "dirtymark",
1050 "filenameroot",
1051 "interval",
1052 "merged",
1053 "minmax",
1054 "protocol",
1055 "serializer_suffix",
1057 my $v = $self->$m;
1058 if (defined $v) {
1059 $ret->{$m} = $v;
1062 # XXX need to reset the Producer if I am a writer, keep it when I
1063 # am a reader
1064 $ret->{Producers} ||= {
1065 __PACKAGE__, "$VERSION", # stringified it looks better
1066 '$0', $0,
1067 'time', Time::HiRes::time,
1069 $ret->{dirtymark} ||= Time::HiRes::time;
1070 return $ret;
1073 =head2 $success = $obj->mirror ( %options )
1075 Mirrors the files in this I<recentfile> as reported by
1076 C<recent_events>. Options named C<after>, C<before>, C<max> are passed
1077 through to the C<recent_events> call. The boolean option C<piecemeal>,
1078 if true, causes C<mirror> to only rsync C<max_files_per_connection>
1079 and keep track of the rsynced files so that future calls will rsync
1080 different files until all files are brought to sync.
1082 =cut
1084 sub mirror {
1085 my($self, %options) = @_;
1086 my $trecentfile = $self->get_remote_recentfile_as_tempfile();
1087 $self->_use_tempfile (1);
1088 # skip-deletes is inadequat for passthrough within mirror. We
1089 # would never reach uptodateness when a delete were on a
1090 # borderline
1091 my %passthrough = map { ($_ => $options{$_}) } qw(before after max);
1092 my ($recent_events) = $self->recent_events(%passthrough);
1093 my(@error, @dlcollector); # download-collector: array containing paths we need
1094 my $first_item = 0;
1095 my $last_item = $#$recent_events;
1096 my $done = $self->done;
1097 my $pathdb = $self->_pathdb;
1098 ITEM: for my $i ($first_item..$last_item) {
1099 my $status = +{};
1100 $self->_mirror_item
1103 $recent_events,
1104 $last_item,
1105 $done,
1106 $pathdb,
1107 \@dlcollector,
1108 \%options,
1109 $status,
1110 \@error,
1112 last if $i == $last_item;
1113 if ($status->{mustreturn}){
1114 if ($self->_current_tempfile && ! $self->_current_tempfile_fh) {
1115 # looks like a bug somewhere else
1116 my $t = $self->_current_tempfile;
1117 unlink $t or die "Could not unlink '$t': $!";
1118 $self->_current_tempfile(undef);
1119 $self->_use_tempfile(0);
1121 return;
1124 if (@dlcollector) {
1125 my $success = eval { $self->_mirror_dlcollector (\@dlcollector,$pathdb,$recent_events);};
1126 if (!$success || $@) {
1127 warn "Warning: Unknown error while mirroring: $@";
1128 push @error, $@;
1129 sleep 1;
1132 if ($self->verbose) {
1133 my $LFH = $self->_logfilehandle;
1134 print $LFH "DONE\n";
1136 # once we've gone to the end we consider ourselves free of obligations
1137 $self->unseed;
1138 $self->_mirror_unhide_tempfile ($trecentfile);
1139 $self->_mirror_perform_delayed_ops(\%options);
1140 return !@error;
1143 sub _mirror_item {
1144 my($self,
1146 $recent_events,
1147 $last_item,
1148 $done,
1149 $pathdb,
1150 $dlcollector,
1151 $options,
1152 $status,
1153 $error,
1154 ) = @_;
1155 my $recent_event = $recent_events->[$i];
1156 return if $done->covered ( $recent_event->{epoch} );
1157 if ($pathdb) {
1158 my $rec = $pathdb->{$recent_event->{path}};
1159 if ($rec && $rec->{recentepoch}) {
1160 if (_bigfloatgt
1161 ( $rec->{recentepoch}, $recent_event->{epoch} )){
1162 $done->register ($recent_events, [$i]);
1163 return;
1167 my $dst = $self->local_path($recent_event->{path});
1168 if ($recent_event->{type} eq "new"){
1169 $self->_mirror_item_new
1171 $dst,
1173 $last_item,
1174 $recent_events,
1175 $recent_event,
1176 $dlcollector,
1177 $pathdb,
1178 $status,
1179 $error,
1180 $options,
1182 } elsif ($recent_event->{type} eq "delete") {
1183 my $activity;
1184 if ($options->{'skip-deletes'}) {
1185 $activity = "skipped";
1186 } else {
1187 my @lstat = lstat $dst;
1188 if (! -e _) {
1189 $activity = "not_found";
1190 } elsif (-l _ or not -d _) {
1191 $self->delayed_operations->{unlink}{$dst}++;
1192 $activity = "deleted";
1193 } else {
1194 $self->delayed_operations->{rmdir}{$dst}++;
1195 $activity = "deleted";
1198 $done->register ($recent_events, [$i]);
1199 if ($pathdb) {
1200 $self->_mirror_register_path($pathdb,[$recent_event],$activity);
1202 } else {
1203 warn "Warning: invalid upload type '$recent_event->{type}'";
1207 sub _mirror_item_new {
1208 my($self,
1209 $dst,
1211 $last_item,
1212 $recent_events,
1213 $recent_event,
1214 $dlcollector,
1215 $pathdb,
1216 $status,
1217 $error,
1218 $options,
1219 ) = @_;
1220 if ($self->verbose) {
1221 my $doing = -e $dst ? "Sync" : "Get";
1222 my $LFH = $self->_logfilehandle;
1223 printf $LFH
1225 "%-4s %d (%d/%d/%s) %s ... ",
1226 $doing,
1227 time,
1228 1+$i,
1229 1+$last_item,
1230 $self->interval,
1231 $recent_event->{path},
1234 my $max_files_per_connection = $self->max_files_per_connection || 42;
1235 my $success;
1236 if ($self->verbose) {
1237 my $LFH = $self->_logfilehandle;
1238 print $LFH "\n";
1240 push @$dlcollector, { rev => $recent_event, i => $i };
1241 if (@$dlcollector >= $max_files_per_connection) {
1242 $success = eval {$self->_mirror_dlcollector ($dlcollector,$pathdb,$recent_events);};
1243 my $sleep = $self->sleep_per_connection;
1244 $sleep = 0.42 unless defined $sleep;
1245 Time::HiRes::sleep $sleep;
1246 if ($options->{piecemeal}) {
1247 $status->{mustreturn} = 1;
1248 return;
1250 } else {
1251 return;
1253 if (!$success || $@) {
1254 warn "Warning: Error while mirroring: $@";
1255 push @$error, $@;
1256 sleep 1;
1258 if ($self->verbose) {
1259 my $LFH = $self->_logfilehandle;
1260 print $LFH "DONE\n";
1264 sub _mirror_dlcollector {
1265 my($self,$xcoll,$pathdb,$recent_events) = @_;
1266 my $success = $self->mirror_path([map {$_->{rev}{path}} @$xcoll]);
1267 if ($pathdb) {
1268 $self->_mirror_register_path($pathdb,[map {$_->{rev}} @$xcoll],"rsync");
1270 $self->done->register($recent_events, [map {$_->{i}} @$xcoll]);
1271 @$xcoll = ();
1272 return $success;
1275 sub _mirror_register_path {
1276 my($self,$pathdb,$coll,$activity) = @_;
1277 my $time = time;
1278 for my $item (@$coll) {
1279 $pathdb->{$item->{path}} =
1281 recentepoch => $item->{epoch},
1282 ($activity."_on") => $time,
1287 sub _mirror_unhide_tempfile {
1288 my($self, $trecentfile) = @_;
1289 my $rfile = $self->rfile;
1290 if (rename $trecentfile, $rfile) {
1291 # warn "DEBUG: renamed '$trecentfile' to '$rfile'";
1292 } else {
1293 require Carp;
1294 Carp::confess("Could not rename '$trecentfile' to '$rfile': $!");
1296 $self->_use_tempfile (0);
1297 if (my $ctfh = $self->_current_tempfile_fh) {
1298 $ctfh->unlink_on_destroy (0);
1299 $self->_current_tempfile_fh (undef);
1303 sub _mirror_perform_delayed_ops {
1304 my($self,$options) = @_;
1305 my $delayed = $self->delayed_operations;
1306 for my $dst (keys %{$delayed->{unlink}}) {
1307 unless (unlink $dst) {
1308 require Carp;
1309 Carp::cluck ( "Warning: Error while unlinking '$dst': $!" ) if $options->{verbose};
1311 if ($self->verbose) {
1312 my $doing = "Del";
1313 my $LFH = $self->_logfilehandle;
1314 printf $LFH
1316 "%-4s %d (%s) %s DONE\n",
1317 $doing,
1318 time,
1319 $self->interval,
1320 $dst,
1322 delete $delayed->{unlink}{$dst};
1325 for my $dst (sort {length($b) <=> length($a)} keys %{$delayed->{rmdir}}) {
1326 unless (rmdir $dst) {
1327 require Carp;
1328 Carp::cluck ( "Warning: Error on rmdir '$dst': $!" ) if $options->{verbose};
1330 if ($self->verbose) {
1331 my $doing = "Del";
1332 my $LFH = $self->_logfilehandle;
1333 printf $LFH
1335 "%-4s %d (%s) %s DONE\n",
1336 $doing,
1337 time,
1338 $self->interval,
1339 $dst,
1341 delete $delayed->{rmdir}{$dst};
1346 =head2 $success = $obj->mirror_path ( $arrref | $path )
1348 If the argument is a scalar it is treated as a path. The remote path
1349 is mirrored into the local copy. $path is the path found in the
1350 I<recentfile>, i.e. it is relative to the root directory of the
1351 mirror.
1353 If the argument is an array reference then all elements are treated as
1354 a path below the current tree and all are rsynced with a single
1355 command (and a single connection).
1357 =cut
1359 sub mirror_path {
1360 my($self,$path) = @_;
1361 # XXX simplify the two branches such that $path is treated as
1362 # [$path] maybe even demand the argument as an arrayref to
1363 # simplify docs and code. (rsync-over-recentfile-2.pl uses the
1364 # interface)
1365 if (ref $path and ref $path eq "ARRAY") {
1366 my $dst = $self->localroot;
1367 mkpath dirname $dst;
1368 my($fh) = File::Temp->new(TEMPLATE => sprintf(".%s-XXXX",
1369 lc $self->filenameroot,
1371 TMPDIR => 1,
1372 UNLINK => 0,
1374 for my $p (@$path) {
1375 print $fh $p, "\n";
1377 $fh->flush;
1378 $fh->unlink_on_destroy(1);
1379 my $gaveup = 0;
1380 my $retried = 0;
1381 local($ENV{LANG}) = "C";
1382 while (!$self->rsync->exec
1384 src => join("/",
1385 $self->remoteroot,
1387 dst => $dst,
1388 'files-from' => $fh->filename,
1389 )) {
1390 my(@err) = $self->rsync->err;
1391 if ($self->_my_ignore_link_stat_errors && "@err" =~ m{^ rsync: \s link_stat }x ) {
1392 if ($self->verbose) {
1393 my $LFH = $self->_logfilehandle;
1394 print $LFH "Info: ignoring link_stat error '@err'";
1396 return 1;
1398 $self->register_rsync_error (@err);
1399 if (++$retried >= 3) {
1400 my $batchsize = @$path;
1401 warn "The number of rsync retries now reached 3 within a batch of size $batchsize. Error was '@err'. Giving up now, will retry later, ";
1402 $gaveup = 1;
1403 last;
1405 sleep 1;
1407 unless ($gaveup) {
1408 $self->un_register_rsync_error ();
1410 } else {
1411 my $dst = $self->local_path($path);
1412 mkpath dirname $dst;
1413 local($ENV{LANG}) = "C";
1414 while (!$self->rsync->exec
1416 src => join("/",
1417 $self->remoteroot,
1418 $path
1420 dst => $dst,
1421 )) {
1422 my(@err) = $self->rsync->err;
1423 if ($self->_my_ignore_link_stat_errors && "@err" =~ m{^ rsync: \s link_stat }x ) {
1424 if ($self->verbose) {
1425 my $LFH = $self->_logfilehandle;
1426 print $LFH "Info: ignoring link_stat error '@err'";
1428 return 1;
1430 $self->register_rsync_error (@err);
1432 $self->un_register_rsync_error ();
1434 return 1;
1437 sub _my_ignore_link_stat_errors {
1438 my($self) = @_;
1439 my $x = $self->ignore_link_stat_errors;
1440 $x = 1 unless defined $x;
1441 return $x;
1444 sub _my_current_rfile {
1445 my($self) = @_;
1446 my $rfile;
1447 if ($self->_use_tempfile) {
1448 $rfile = $self->_current_tempfile;
1450 unless ($rfile && -s $rfile) {
1451 $rfile = $self->rfile;
1453 return $rfile;
1456 =head2 $path = $obj->naive_path_normalize ($path)
1458 Takes an absolute unix style path as argument and canonicalizes it to
1459 a shorter path if possible, removing things like double slashes or
1460 C</./> and removes references to C<../> directories to get a shorter
1461 unambiguos path. This is used to make the code easier that determines
1462 if a file passed to C<upgrade()> is indeed below our C<localroot>.
1464 =cut
1466 sub naive_path_normalize {
1467 my($self,$path) = @_;
1468 $path =~ s|/+|/|g;
1469 1 while $path =~ s|/[^/]+/\.\./|/|;
1470 $path =~ s|/$||;
1471 $path;
1474 =head2 $ret = $obj->read_recent_1 ( $data )
1476 Delegate of C<recent_events()> on protocol 1
1478 =cut
1480 sub read_recent_1 {
1481 my($self, $data) = @_;
1482 return $data->{recent};
1485 =head2 $array_ref = $obj->recent_events ( %options )
1487 Note: the code relies on the resource being written atomically. We
1488 cannot lock because we may have no write access. If the caller has
1489 write access (eg. aggregate() or update()), it has to care for any
1490 necessary locking and it MUST write atomically.
1492 If C<$options{after}> is specified, only file events after this
1493 timestamp are returned.
1495 If C<$options{before}> is specified, only file events before this
1496 timestamp are returned.
1498 If C<$options{max}> is specified only a maximum of this many most
1499 recent events is returned.
1501 If C<$options{'skip-deletes'}> is specified, no files-to-be-deleted
1502 will be returned.
1504 If C<$options{contains}> is specified the value must be a hash
1505 reference containing a query. The query may contain the keys C<epoch>,
1506 C<path>, and C<type>. Each represents a condition that must be met. If
1507 there is more than one such key, the conditions are ANDed.
1509 If C<$options{info}> is specified, it must be a hashref. This hashref
1510 will be filled with metadata about the unfiltered recent_events of
1511 this object, in key C<first> there is the first item, in key C<last>
1512 is the last.
1514 =cut
1516 sub recent_events {
1517 my ($self, %options) = @_;
1518 my $info = $options{info};
1519 if ($self->is_slave) {
1520 # XXX seems dubious, might produce tempfiles without removing them?
1521 $self->get_remote_recentfile_as_tempfile;
1523 my $rfile_or_tempfile = $self->_my_current_rfile or return [];
1524 -e $rfile_or_tempfile or return [];
1525 my $suffix = $self->serializer_suffix;
1526 my ($data) = eval {
1527 $self->_try_deserialize
1529 $suffix,
1530 $rfile_or_tempfile,
1533 my $err = $@;
1534 if ($err or !$data) {
1535 return [];
1537 my $re;
1538 if (reftype $data eq 'ARRAY') { # protocol 0
1539 $re = $data;
1540 } else {
1541 $re = $self->_recent_events_protocol_x
1543 $data,
1544 $rfile_or_tempfile,
1547 return $re unless grep {defined $options{$_}} qw(after before contains max skip-deletes);
1548 $self->_recent_events_handle_options ($re, \%options);
1551 # File::Rsync::Mirror::Recentfile::_recent_events_handle_options
1552 sub _recent_events_handle_options {
1553 my($self, $re, $options) = @_;
1554 my $last_item = $#$re;
1555 my $info = $options->{info};
1556 if ($info) {
1557 $info->{first} = $re->[0];
1558 $info->{last} = $re->[-1];
1560 if (defined $options->{after}) {
1561 if ($re->[0]{epoch} > $options->{after}) {
1562 if (
1563 my $f = first
1564 {$re->[$_]{epoch} <= $options->{after}}
1565 0..$#$re
1567 $last_item = $f-1;
1569 } else {
1570 $last_item = -1;
1573 my $first_item = 0;
1574 if (defined $options->{before}) {
1575 if ($re->[0]{epoch} > $options->{before}) {
1576 if (
1577 my $f = first
1578 {$re->[$_]{epoch} < $options->{before}}
1579 0..$last_item
1581 $first_item = $f;
1583 } else {
1584 $first_item = 0;
1587 if (0 != $first_item || -1 != $last_item) {
1588 @$re = splice @$re, $first_item, 1+$last_item-$first_item;
1590 if ($options->{'skip-deletes'}) {
1591 @$re = grep { $_->{type} ne "delete" } @$re;
1593 if (my $contopt = $options->{contains}) {
1594 my $seen_allowed = 0;
1595 for my $allow (qw(epoch path type)) {
1596 if (exists $contopt->{$allow}) {
1597 $seen_allowed++;
1598 my $v = $contopt->{$allow};
1599 @$re = grep { $_->{$allow} eq $v } @$re;
1602 if (keys %$contopt > $seen_allowed) {
1603 require Carp;
1604 Carp::confess
1605 (sprintf "unknown query: %s", join ", ", %$contopt);
1608 if ($options->{max} && @$re > $options->{max}) {
1609 @$re = splice @$re, 0, $options->{max};
1611 $re;
1614 sub _recent_events_protocol_x {
1615 my($self,
1616 $data,
1617 $rfile_or_tempfile,
1618 ) = @_;
1619 my $meth = sprintf "read_recent_%d", $data->{meta}{protocol};
1620 # we may be reading meta for the first time
1621 while (my($k,$v) = each %{$data->{meta}}) {
1622 if ($k ne lc $k){ # "Producers"
1623 $self->{ORIG}{$k} = $v;
1624 next;
1626 next if defined $self->$k;
1627 $self->$k($v);
1629 my $re = $self->$meth ($data);
1630 my $minmax;
1631 if (my @stat = stat $rfile_or_tempfile) {
1632 $minmax = { mtime => $stat[9] };
1633 } else {
1634 # defensive because ABH encountered:
1636 #### Sync 1239828608 (1/1/Z) temp .../authors/.FRMRecent-RECENT-Z.yaml-
1637 #### Ydr_.yaml ... DONE
1638 #### Cannot stat '/mirrors/CPAN/authors/.FRMRecent-RECENT-Z.yaml-
1639 #### Ydr_.yaml': No such file or directory at /usr/lib/perl5/site_perl/
1640 #### 5.8.8/File/Rsync/Mirror/Recentfile.pm line 1558.
1641 #### unlink0: /mirrors/CPAN/authors/.FRMRecent-RECENT-Z.yaml-Ydr_.yaml is
1642 #### gone already at cpan-pause.pl line 0
1644 my $LFH = $self->_logfilehandle;
1645 print $LFH "Warning (maybe harmless): Cannot stat '$rfile_or_tempfile': $!"
1647 if (@$re) {
1648 $minmax->{min} = $re->[-1]{epoch};
1649 $minmax->{max} = $re->[0]{epoch};
1651 $self->minmax ( $minmax );
1652 return $re;
1655 sub _try_deserialize {
1656 my($self,
1657 $suffix,
1658 $rfile_or_tempfile,
1659 ) = @_;
1660 if ($suffix eq ".yaml") {
1661 require YAML::Syck;
1662 YAML::Syck::LoadFile($rfile_or_tempfile);
1663 } elsif ($HAVE->{"Data::Serializer"}) {
1664 my $serializer = Data::Serializer->new
1665 ( serializer => $serializers{$suffix} );
1666 my $serialized = do
1668 open my $fh, $rfile_or_tempfile or die "Could not open: $!";
1669 local $/;
1670 <$fh>;
1672 $serializer->raw_deserialize($serialized);
1673 } else {
1674 die "Data::Serializer not installed, cannot proceed with suffix '$suffix'";
1678 sub _refresh_internals {
1679 my($self, $dst) = @_;
1680 my $class = ref $self;
1681 my $rfpeek = $class->new_from_file ($dst);
1682 for my $acc (qw(
1683 _merged
1684 minmax
1685 )) {
1686 $self->$acc ( $rfpeek->$acc );
1688 my $old_dirtymark = $self->dirtymark;
1689 my $new_dirtymark = $rfpeek->dirtymark;
1690 if ($old_dirtymark && $new_dirtymark && $new_dirtymark ne $old_dirtymark) {
1691 $self->done->reset;
1692 $self->dirtymark ( $new_dirtymark );
1693 $self->_uptodateness_ever_reached(0);
1694 $self->seed;
1698 =head2 $ret = $obj->rfilename
1700 Just the basename of our I<recentfile>, composed from C<filenameroot>,
1701 a dash, C<interval>, and C<serializer_suffix>. E.g. C<RECENT-6h.yaml>
1703 =cut
1705 sub rfilename {
1706 my($self) = @_;
1707 my $file = sprintf("%s-%s%s",
1708 $self->filenameroot,
1709 $self->interval,
1710 $self->serializer_suffix,
1712 return $file;
1715 =head2 $str = $self->remote_dir
1717 The directory we are mirroring from.
1719 =cut
1721 sub remote_dir {
1722 my($self, $set) = @_;
1723 if (defined $set) {
1724 $self->_remote_dir ($set);
1726 my $x = $self->_remote_dir;
1727 $self->is_slave (1);
1728 return $x;
1731 =head2 $str = $obj->remoteroot
1733 =head2 (void) $obj->remoteroot ( $set )
1735 Get/Set the composed prefix needed when rsyncing from a remote module.
1736 If remote_host, remote_module, and remote_dir are set, it is composed
1737 from these.
1739 =cut
1741 sub remoteroot {
1742 my($self, $set) = @_;
1743 if (defined $set) {
1744 $self->_remoteroot($set);
1746 my $remoteroot = $self->_remoteroot;
1747 unless (defined $remoteroot) {
1748 $remoteroot = sprintf
1750 "%s%s%s",
1751 defined $self->remote_host ? ($self->remote_host."::") : "",
1752 defined $self->remote_module ? ($self->remote_module."/") : "",
1753 defined $self->remote_dir ? $self->remote_dir : "",
1755 $self->_remoteroot($remoteroot);
1757 return $remoteroot;
1760 =head2 (void) $obj->split_rfilename ( $recentfilename )
1762 Inverse method to C<rfilename>. C<$recentfilename> is a plain filename
1763 of the pattern
1765 $filenameroot-$interval$serializer_suffix
1767 e.g.
1769 RECENT-1M.yaml
1771 This filename is split into its parts and the parts are fed to the
1772 object itself.
1774 =cut
1776 sub split_rfilename {
1777 my($self, $rfname) = @_;
1778 my($splitter) = qr(^(.+)-([^-\.]+)(\.[^\.]+));
1779 if (my($f,$i,$s) = $rfname =~ $splitter) {
1780 $self->filenameroot ($f);
1781 $self->interval ($i);
1782 $self->serializer_suffix ($s);
1783 } else {
1784 die "Alert: cannot split '$rfname', doesn't match '$splitter'";
1786 return;
1789 =head2 my $rfile = $obj->rfile
1791 Returns the full path of the I<recentfile>
1793 =cut
1795 sub rfile {
1796 my($self) = @_;
1797 my $rfile = $self->_rfile;
1798 return $rfile if defined $rfile;
1799 $rfile = File::Spec->catfile
1800 ($self->localroot,
1801 $self->rfilename,
1803 $self->_rfile ($rfile);
1804 return $rfile;
1807 =head2 $rsync_obj = $obj->rsync
1809 The File::Rsync object that this object uses for communicating with an
1810 upstream server.
1812 =cut
1814 sub rsync {
1815 my($self) = @_;
1816 my $rsync = $self->_rsync;
1817 unless (defined $rsync) {
1818 my $rsync_options = $self->rsync_options || {};
1819 if ($HAVE->{"File::Rsync"}) {
1820 $rsync = File::Rsync->new($rsync_options);
1821 $self->_rsync($rsync);
1822 } else {
1823 die "File::Rsync required for rsync operations. Cannot continue";
1826 return $rsync;
1829 =head2 (void) $obj->register_rsync_error(@err)
1831 =head2 (void) $obj->un_register_rsync_error()
1833 Register_rsync_error is called whenever the File::Rsync object fails
1834 on an exec (say, connection doesn't succeed). It issues a warning and
1835 sleeps for an increasing amount of time. Un_register_rsync_error
1836 resets the error count. See also accessor C<max_rsync_errors>.
1838 =cut
1841 my $no_success_count = 0;
1842 my $no_success_time = 0;
1843 sub register_rsync_error {
1844 my($self, @err) = @_;
1845 chomp @err;
1846 $no_success_time = time;
1847 $no_success_count++;
1848 my $max_rsync_errors = $self->max_rsync_errors;
1849 $max_rsync_errors = MAX_INT unless defined $max_rsync_errors;
1850 if ($max_rsync_errors>=0 && $no_success_count >= $max_rsync_errors) {
1851 require Carp;
1852 Carp::confess
1854 sprintf
1856 "Alert: Error while rsyncing (%s): '%s', error count: %d, exiting now,",
1857 $self->interval,
1858 join(" ",@err),
1859 $no_success_count,
1862 my $sleep = 12 * $no_success_count;
1863 $sleep = 300 if $sleep > 300;
1864 require Carp;
1865 Carp::cluck
1866 (sprintf
1868 "Warning: %s, Error while rsyncing (%s): '%s', sleeping %d",
1869 scalar(localtime($no_success_time)),
1870 $self->interval,
1871 join(" ",@err),
1872 $sleep,
1874 sleep $sleep
1876 sub un_register_rsync_error {
1877 my($self) = @_;
1878 $no_success_time = 0;
1879 $no_success_count = 0;
1883 =head2 $clone = $obj->_sparse_clone
1885 Clones just as much from itself that it does not hurt. Experimental
1886 method.
1888 Note: what fits better: sparse or shallow? Other suggestions?
1890 =cut
1892 sub _sparse_clone {
1893 my($self) = @_;
1894 my $new = bless {}, ref $self;
1895 for my $m (qw(
1896 _interval
1897 _localroot
1898 _remoteroot
1899 _rfile
1900 _use_tempfile
1901 aggregator
1902 filenameroot
1903 ignore_link_stat_errors
1904 is_slave
1905 max_files_per_connection
1906 protocol
1907 rsync_options
1908 serializer_suffix
1909 sleep_per_connection
1910 tempdir
1911 verbose
1912 )) {
1913 my $o = $self->$m;
1914 $o = Storable::dclone $o if ref $o;
1915 $new->$m($o);
1917 $new;
1920 =head2 $boolean = OBJ->ttl_reached ()
1922 =cut
1924 sub ttl_reached {
1925 my($self) = @_;
1926 my $have_mirrored = $self->have_mirrored || 0;
1927 my $now = Time::HiRes::time;
1928 my $ttl = $self->ttl;
1929 $ttl = 24.2 unless defined $ttl;
1930 if ($now > $have_mirrored + $ttl) {
1931 return 1;
1933 return 0;
1936 =head2 (void) $obj->unlock()
1938 Unlocking is implemented with an C<rmdir> on a locking directory
1939 (C<.lock> appended to $rfile).
1941 =cut
1943 sub unlock {
1944 my($self) = @_;
1945 return unless $self->_is_locked;
1946 my $rfile = $self->rfile;
1947 unlink "$rfile.lock/process" or warn "Could not unlink lockfile '$rfile.lock/process': $!";
1948 rmdir "$rfile.lock" or warn "Could not rmdir lockdir '$rfile.lock': $!";;
1949 $self->_is_locked (0);
1952 =head2 unseed
1954 Sets this recentfile in the state of not 'seeded'.
1956 =cut
1957 sub unseed {
1958 my($self) = @_;
1959 $self->seeded(0);
1962 =head2 $ret = $obj->update ($path, $type)
1964 =head2 $ret = $obj->update ($path, "new", $dirty_epoch)
1966 =head2 $ret = $obj->update ()
1968 Enter one file into the local I<recentfile>. $path is the (usually
1969 absolute) path. If the path is outside I<our> tree, then it is
1970 ignored.
1972 C<$type> is one of C<new> or C<delete>.
1974 Events of type C<new> may set $dirty_epoch. $dirty_epoch is normally
1975 not used and the epoch is calculated by the update() routine itself
1976 based on current time. But if there is the demand to insert a
1977 not-so-current file into the dataset, then the caller sets
1978 $dirty_epoch. This causes the epoch of the registered event to become
1979 $dirty_epoch or -- if the exact value given is already taken -- a tiny
1980 bit more. As compensation the dirtymark of the whole dataset is set to
1981 now or the current epoch, whichever is higher. Note: setting the
1982 dirty_epoch to the future is prohibited as it's very unlikely to be
1983 intended: it definitely might wreak havoc with the index files.
1985 The new file event is unshifted (or, if dirty_epoch is set, inserted
1986 at the place it belongs to, according to the rule to have a sequence
1987 of strictly decreasing timestamps) to the array of recent_events and
1988 the array is shortened to the length of the timespan allowed. This is
1989 usually the timespan specified by the interval of this recentfile but
1990 as long as this recentfile has not been merged to another one, the
1991 timespan may grow without bounds.
1993 The third form runs an update without inserting a new file. This may
1994 be desired to truncate a recentfile.
1996 =cut
1997 sub _epoch_monotonically_increasing {
1998 my($self,$epoch,$recent) = @_;
1999 return $epoch unless @$recent; # the first one goes unoffended
2000 if (_bigfloatgt("".$epoch,$recent->[0]{epoch})) {
2001 return $epoch;
2002 } else {
2003 return _increase_a_bit($recent->[0]{epoch});
2006 sub update {
2007 my($self,$path,$type,$dirty_epoch) = @_;
2008 if (defined $path or defined $type or defined $dirty_epoch) {
2009 die "update called without path argument" unless defined $path;
2010 die "update called without type argument" unless defined $type;
2011 die "update called with illegal type argument: $type" unless $type =~ /(new|delete)/;
2013 $self->lock;
2014 my $ctx = $self->_locked_batch_update([{path=>$path,type=>$type,epoch=>$dirty_epoch}]);
2015 $self->write_recent($ctx->{recent}) if $ctx->{something_done};
2016 $self->_assert_symlink;
2017 $self->unlock;
2020 =head2 $obj->batch_update($batch)
2022 Like update but for many files. $batch is an arrayref containing
2023 hashrefs with the structure
2026 path => $path,
2027 type => $type,
2028 epoch => $epoch,
2033 =cut
2034 sub batch_update {
2035 my($self,$batch) = @_;
2036 $self->lock;
2037 my $ctx = $self->_locked_batch_update($batch);
2038 $self->write_recent($ctx->{recent}) if $ctx->{something_done};
2039 $self->_assert_symlink;
2040 $self->unlock;
2042 sub _locked_batch_update {
2043 my($self,$batch) = @_;
2044 my $something_done = 0;
2045 my $recent = $self->recent_events;
2046 unless ($recent->[0]) {
2047 # obstetrics
2048 $something_done = 1;
2050 my %paths_in_recent = map { $_->{path} => undef } @$recent;
2051 my $interval = $self->interval;
2052 my $canonmeth = $self->canonize;
2053 unless ($canonmeth) {
2054 $canonmeth = "naive_path_normalize";
2056 my $oldest_allowed = 0;
2057 my $setting_new_dirty_mark = 0;
2058 my $console;
2059 if ($self->verbose && @$batch > 1) {
2060 eval {require Time::Progress};
2061 warn "dollarat[$@]" if $@;
2062 $| = 1;
2063 $console = new Time::Progress;
2064 $console->attr( min => 1, max => scalar @$batch );
2065 print "\n";
2067 my $i = 0;
2068 my $memo_splicepos;
2069 ITEM: for my $item (sort {($b->{epoch}||0) <=> ($a->{epoch}||0)} @$batch) {
2070 $i++;
2071 print $console->report( "\rdone %p elapsed: %L (%l sec), ETA %E (%e sec)", $i ) if $console and not $i % 50;
2072 my $ctx = $self->_update_batch_item($item,$canonmeth,$recent,$setting_new_dirty_mark,$oldest_allowed,$something_done,\%paths_in_recent,$memo_splicepos);
2073 $something_done = $ctx->{something_done};
2074 $oldest_allowed = $ctx->{oldest_allowed};
2075 $setting_new_dirty_mark = $ctx->{setting_new_dirty_mark};
2076 $recent = $ctx->{recent};
2077 $memo_splicepos = $ctx->{memo_splicepos};
2079 print "\n" if $console;
2080 if ($setting_new_dirty_mark) {
2081 $oldest_allowed = 0;
2083 TRUNCATE: while (@$recent) {
2084 if (_bigfloatlt($recent->[-1]{epoch}, $oldest_allowed)) {
2085 pop @$recent;
2086 $something_done = 1;
2087 } else {
2088 last TRUNCATE;
2091 return {something_done=>$something_done,recent=>$recent};
2093 sub _update_batch_item {
2094 my($self,$item,$canonmeth,$recent,$setting_new_dirty_mark,$oldest_allowed,$something_done,$paths_in_recent,$memo_splicepos) = @_;
2095 my($path,$type,$dirty_epoch) = @{$item}{qw(path type epoch)};
2096 if (defined $path or defined $type or defined $dirty_epoch) {
2097 $path = $self->$canonmeth($path);
2099 # you must calculate the time after having locked, of course
2100 my $now = Time::HiRes::time;
2102 my $epoch;
2103 if (defined $dirty_epoch && _bigfloatgt($now,$dirty_epoch)) {
2104 $epoch = $dirty_epoch;
2105 } else {
2106 $epoch = $self->_epoch_monotonically_increasing($now,$recent);
2108 $recent ||= [];
2109 my $merged = $self->merged;
2110 if ($merged->{epoch} && !$setting_new_dirty_mark) {
2111 my $virtualnow = _bigfloatmax($now,$epoch);
2112 # for the lower bound I think we need no big math, we calc already
2113 my $secs = $self->interval_secs();
2114 $oldest_allowed = min($virtualnow - $secs, $merged->{epoch}, $epoch);
2115 } else {
2116 # as long as we are not merged at all, no limits!
2118 my $lrd = $self->localroot;
2119 if (defined $path && $path =~ s|^\Q$lrd\E||) {
2120 $path =~ s|^/||;
2121 my $splicepos;
2122 # remove the older duplicates of this $path, irrespective of $type:
2123 if (defined $dirty_epoch) {
2124 my $ctx = $self->_update_with_dirty_epoch($path,$recent,$epoch,$paths_in_recent,$memo_splicepos);
2125 $recent = $ctx->{recent};
2126 $splicepos = $ctx->{splicepos};
2127 $epoch = $ctx->{epoch};
2128 my $dirtymark = $self->dirtymark;
2129 my $new_dm = $now;
2130 if (_bigfloatgt($epoch, $now)) { # just in case we had to increase it
2131 $new_dm = $epoch;
2133 $self->dirtymark($new_dm);
2134 $setting_new_dirty_mark = 1;
2135 if (not defined $merged->{epoch} or _bigfloatlt($epoch,$merged->{epoch})) {
2136 $self->merged(+{});
2138 } else {
2139 $recent = [ grep { $_->{path} ne $path } @$recent ];
2140 $splicepos = 0;
2142 if (defined $splicepos) {
2143 splice @$recent, $splicepos, 0, { epoch => $epoch, path => $path, type => $type };
2144 $paths_in_recent->{$path} = undef;
2146 $memo_splicepos = $splicepos;
2147 $something_done = 1;
2149 return
2151 something_done => $something_done,
2152 oldest_allowed => $oldest_allowed,
2153 setting_new_dirty_mark => $setting_new_dirty_mark,
2154 recent => $recent,
2155 memo_splicepos => $memo_splicepos,
2158 sub _update_with_dirty_epoch {
2159 my($self,$path,$recent,$epoch,$paths_in_recent,$memo_splicepos) = @_;
2160 my $splicepos;
2161 my $new_recent = [];
2162 if (exists $paths_in_recent->{$path}) {
2163 my $cancel = 0;
2164 KNOWN_EVENT: for my $i (0..$#$recent) {
2165 if ($recent->[$i]{path} eq $path) {
2166 if ($recent->[$i]{epoch} eq $epoch) {
2167 # nothing to do
2168 $cancel = 1;
2169 last KNOWN_EVENT;
2171 } else {
2172 push @$new_recent, $recent->[$i];
2175 @$recent = @$new_recent unless $cancel;
2177 if (!exists $recent->[0] or _bigfloatgt($epoch,$recent->[0]{epoch})) {
2178 $splicepos = 0;
2179 } elsif (_bigfloatlt($epoch,$recent->[-1]{epoch})) {
2180 $splicepos = @$recent;
2181 } else {
2182 my $startingpoint;
2183 if (_bigfloatgt($memo_splicepos<=$#$recent && $epoch, $recent->[$memo_splicepos]{epoch})) {
2184 $startingpoint = 0;
2185 } else {
2186 $startingpoint = $memo_splicepos;
2188 RECENT: for my $i ($startingpoint..$#$recent) {
2189 my $ev = $recent->[$i];
2190 if ($epoch eq $recent->[$i]{epoch}) {
2191 $epoch = _increase_a_bit($epoch, $i ? $recent->[$i-1]{epoch} : undef);
2193 if (_bigfloatgt($epoch,$recent->[$i]{epoch})) {
2194 $splicepos = $i;
2195 last RECENT;
2199 return {
2200 recent => $recent,
2201 splicepos => $splicepos,
2202 epoch => $epoch,
2206 =head2 seed
2208 Sets this recentfile in the state of 'seeded' which means it has to
2209 re-evaluate its uptodateness.
2211 =cut
2212 sub seed {
2213 my($self) = @_;
2214 $self->seeded(1);
2217 =head2 seeded
2219 Tells if the recentfile is in the state 'seeded'.
2221 =cut
2222 sub seeded {
2223 my($self, $set) = @_;
2224 if (defined $set) {
2225 $self->_seeded ($set);
2227 my $x = $self->_seeded;
2228 unless (defined $x) {
2229 $x = 0;
2230 $self->_seeded ($x);
2232 return $x;
2235 =head2 uptodate
2237 True if this object has mirrored the complete interval covered by the
2238 current recentfile.
2240 =cut
2241 sub uptodate {
2242 my($self) = @_;
2243 my $uptodate;
2244 my $why;
2245 if ($self->_uptodateness_ever_reached and not $self->seeded) {
2246 $why = "saturated";
2247 $uptodate = 1;
2249 # it's too easy to misconfigure ttl and related timings and then
2250 # never reach uptodateness, so disabled 2009-03-22
2251 if (0 and not defined $uptodate) {
2252 if ($self->ttl_reached){
2253 $why = "ttl_reached returned true, so we are not uptodate";
2254 $uptodate = 0 ;
2257 unless (defined $uptodate) {
2258 # look if recentfile has unchanged timestamp
2259 my $minmax = $self->minmax;
2260 if (exists $minmax->{mtime}) {
2261 my $rfile = $self->_my_current_rfile;
2262 my @stat = stat $rfile;
2263 if (@stat) {
2264 my $mtime = $stat[9];
2265 if (defined $mtime && defined $minmax->{mtime} && $mtime > $minmax->{mtime}) {
2266 $why = "mtime[$mtime] of rfile[$rfile] > minmax/mtime[$minmax->{mtime}], so we are not uptodate";
2267 $uptodate = 0;
2268 } else {
2269 my $covered = $self->done->covered(@$minmax{qw(max min)});
2270 $why = sprintf "minmax covered[%s], so we return that", defined $covered ? $covered : "UNDEF";
2271 $uptodate = $covered;
2273 } else {
2274 require Carp;
2275 $why = "Could not stat '$rfile': $!";
2276 Carp::cluck($why);
2277 $uptodate = 0;
2281 unless (defined $uptodate) {
2282 $why = "fallthrough, so not uptodate";
2283 $uptodate = 0;
2285 if ($uptodate) {
2286 $self->_uptodateness_ever_reached(1);
2288 my $remember =
2290 uptodate => $uptodate,
2291 why => $why,
2293 $self->_remember_last_uptodate_call($remember);
2294 return $uptodate;
2297 =head2 $obj->write_recent ($recent_files_arrayref)
2299 Writes a I<recentfile> based on the current reflection of the current
2300 state of the tree limited by the current interval.
2302 =cut
2303 sub _resort {
2304 my($self) = @_;
2305 @{$_[1]} = sort { _bigfloatcmp($b->{epoch},$a->{epoch}) } @{$_[1]};
2306 return;
2308 sub write_recent {
2309 my ($self,$recent) = @_;
2310 die "write_recent called without argument" unless defined $recent;
2311 my $Last_epoch;
2312 SANITYCHECK: for my $i (0..$#$recent) {
2313 if (defined($Last_epoch) and _bigfloatge($recent->[$i]{epoch},$Last_epoch)) {
2314 require Carp;
2315 Carp::confess(sprintf "Warning: disorder '%s'>='%s', re-sorting %s\n",
2316 $recent->[$i]{epoch}, $Last_epoch, $self->interval);
2317 # you may want to:
2318 # $self->_resort($recent);
2319 # last SANITYCHECK;
2321 $Last_epoch = $recent->[$i]{epoch};
2323 my $minmax = $self->minmax;
2324 if (!defined $minmax->{max} || _bigfloatlt($minmax->{max},$recent->[0]{epoch})) {
2325 $minmax->{max} = @$recent && exists $recent->[0]{epoch} ? $recent->[0]{epoch} : undef;
2327 if (!defined $minmax->{min} || _bigfloatlt($minmax->{min},$recent->[-1]{epoch})) {
2328 $minmax->{min} = @$recent && exists $recent->[-1]{epoch} ? $recent->[-1]{epoch} : undef;
2330 $self->minmax($minmax);
2331 my $meth = sprintf "write_%d", $self->protocol;
2332 $self->$meth($recent);
2335 =head2 $obj->write_0 ($recent_files_arrayref)
2337 Delegate of C<write_recent()> on protocol 0
2339 =cut
2341 sub write_0 {
2342 my ($self,$recent) = @_;
2343 my $rfile = $self->rfile;
2344 YAML::Syck::DumpFile("$rfile.new",$recent);
2345 rename "$rfile.new", $rfile or die "Could not rename to '$rfile': $!";
2348 =head2 $obj->write_1 ($recent_files_arrayref)
2350 Delegate of C<write_recent()> on protocol 1
2352 =cut
2354 sub write_1 {
2355 my ($self,$recent) = @_;
2356 my $rfile = $self->rfile;
2357 my $suffix = $self->serializer_suffix;
2358 my $data = {
2359 meta => $self->meta_data,
2360 recent => $recent,
2362 my $serialized;
2363 if ($suffix eq ".yaml") {
2364 $serialized = YAML::Syck::Dump($data);
2365 } elsif ($HAVE->{"Data::Serializer"}) {
2366 my $serializer = Data::Serializer->new
2367 ( serializer => $serializers{$suffix} );
2368 $serialized = $serializer->raw_serialize($data);
2369 } else {
2370 die "Data::Serializer not installed, cannot proceed with suffix '$suffix'";
2372 open my $fh, ">", "$rfile.new" or die "Could not open >'$rfile.new': $!";
2373 print $fh $serialized;
2374 close $fh or die "Could not close '$rfile.new': $!";
2375 rename "$rfile.new", $rfile or die "Could not rename to '$rfile': $!";
2378 BEGIN {
2379 my $nq = qr/[^"]+/; # non-quotes
2380 my @pod_lines =
2381 split /\n/, <<'=cut'; %serializers = map { my @x = /"($nq)"\s+=>\s+"($nq)"/; @x } grep {s/^=item\s+C<<\s+(.+)\s+>>$/$1/} @pod_lines; }
2383 =head1 SERIALIZERS
2385 The following suffixes are supported and trigger the use of these
2386 serializers:
2388 =over 4
2390 =item C<< ".yaml" => "YAML::Syck" >>
2392 =item C<< ".json" => "JSON" >>
2394 =item C<< ".sto" => "Storable" >>
2396 =item C<< ".dd" => "Data::Dumper" >>
2398 =back
2400 =cut
2402 BEGIN {
2403 my @pod_lines =
2404 split /\n/, <<'=cut'; %seconds = map { eval } grep {s/^=item\s+C<<(.+)>>$/$1/} @pod_lines; }
2406 =head1 INTERVAL SPEC
2408 An interval spec is a primitive way to express time spans. Normally it
2409 is composed from an integer and a letter.
2411 As a special case, a string that consists only of the single letter
2412 C<Z>, stands for MAX_INT seconds.
2414 The following letters express the specified number of seconds:
2416 =over 4
2418 =item C<< s => 1 >>
2420 =item C<< m => 60 >>
2422 =item C<< h => 60*60 >>
2424 =item C<< d => 60*60*24 >>
2426 =item C<< W => 60*60*24*7 >>
2428 =item C<< M => 60*60*24*30 >>
2430 =item C<< Q => 60*60*24*90 >>
2432 =item C<< Y => 60*60*24*365.25 >>
2434 =back
2436 =cut
2438 =head1 SEE ALSO
2440 L<File::Rsync::Mirror::Recent>,
2441 L<File::Rsync::Mirror::Recentfile::Done>,
2442 L<File::Rsync::Mirror::Recentfile::FakeBigFloat>
2444 =head1 BUGS
2446 Please report any bugs or feature requests through the web interface
2448 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=File-Rsync-Mirror-Recentfile>.
2449 I will be notified, and then you'll automatically be notified of
2450 progress on your bug as I make changes.
2452 =head1 KNOWN BUGS
2454 Memory hungry: it seems all memory is allocated during the initial
2455 rsync where a list of all files is maintained in memory.
2457 =head1 SUPPORT
2459 You can find documentation for this module with the perldoc command.
2461 perldoc File::Rsync::Mirror::Recentfile
2463 You can also look for information at:
2465 =over 4
2467 =item * RT: CPAN's request tracker
2469 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=File-Rsync-Mirror-Recentfile>
2471 =item * AnnoCPAN: Annotated CPAN documentation
2473 L<http://annocpan.org/dist/File-Rsync-Mirror-Recentfile>
2475 =item * CPAN Ratings
2477 L<http://cpanratings.perl.org/d/File-Rsync-Mirror-Recentfile>
2479 =item * Search CPAN
2481 L<http://search.cpan.org/dist/File-Rsync-Mirror-Recentfile>
2483 =back
2486 =head1 ACKNOWLEDGEMENTS
2488 Thanks to RJBS for module-starter.
2490 =head1 AUTHOR
2492 Andreas König
2494 =head1 COPYRIGHT & LICENSE
2496 Copyright 2008,2009 Andreas König.
2498 This program is free software; you can redistribute it and/or modify it
2499 under the same terms as Perl itself.
2502 =cut
2504 1; # End of File::Rsync::Mirror::Recentfile
2506 # Local Variables:
2507 # mode: cperl
2508 # cperl-indent-level: 4
2509 # End: