From 85131ba325b828da20cbb6371d01e1006cf7cfc0 Mon Sep 17 00:00:00 2001 From: "Andreas J. Koenig" Date: Tue, 7 Oct 2008 00:34:08 +0200 Subject: [PATCH] bugfix where intervals were not collapsed: solved with overlapping intervals instead of exactly fitting ones --- Todo | 34 ++++- lib/File/Rsync/Mirror/Recent.pm | 23 +++- lib/File/Rsync/Mirror/Recentfile.pm | 74 ++++++----- lib/File/Rsync/Mirror/Recentfile/Done.pm | 41 +++++- lib/File/Rsync/Mirror/Recentfile/FakeBigFloat.pm | 24 ++++ t/02-operation.t | 19 ++- t/03-done.t | 55 ++++++++ t/RECENT-1h.yaml | 157 +++++++++++++++++++++++ t/perlcriticrc | 2 +- 9 files changed, 384 insertions(+), 45 deletions(-) create mode 100644 t/RECENT-1h.yaml diff --git a/Todo b/Todo index 53e2ef1..1cbe864 100644 --- a/Todo +++ b/Todo @@ -1,3 +1,35 @@ +2008-10-06 Andreas J. Koenig + + * I think, Done::register_one is doing wrong in that it does not + conflate neighboring pieces. The covered() method cannot do this because + it has no recent_events array at hand. But register_one has it and could + do it and for some reason misses to do it (sometimes). + + This means that the three tests I just wrote can probably not survive + because they test with an already broken Done structure. + + The art now is to detect how it happens, then to reproduce, then write a + test, then fix it. + + So from the logfile this is what happens: we have a good interval with + newest file being F1 at T1. Now remotely F1 gets a change and F2 goes on + top of it. Locally we now mirror F2 and open a new done interval for it. + Then we mirror F1 but this time with the timestamp T1b. And when we then + try to close the gap, we do not find T1 but instead something older. We + should gladly accept this older piece and this would fix this bug. + + FIXED + + * bug to fix: when the 1h file changes while rmirror is running, we do + correctly sync the new files but never switch to the 6h file but rather + stay in a rather quick loop that fetches the 1h file again and again. + + Is it possible that we initialize a new object? Or does + get_remote_recentfile_as_tempfile overwrite something in myself? + + Want a new option: _runstatusfile => $file which frequently dumps the + state of all recentfiles to a file. + 2008-10-04 Andreas J. Koenig * Todo: now teach update to verify the timestamp is about to write @@ -168,7 +200,7 @@ - status file for not long running jobs that want to track upstream with a, say, cronjob. - - revisit all XXX _float areas and study Sub::Exporter + - revisit all XXX _float areas and study Sub::Exporter DONE - persistent DB even though we just said we do not need it. Just for extended capabilities and time savings when, for example, upstream diff --git a/lib/File/Rsync/Mirror/Recent.pm b/lib/File/Rsync/Mirror/Recent.pm index fe6fd42..d0cd111 100644 --- a/lib/File/Rsync/Mirror/Recent.pm +++ b/lib/File/Rsync/Mirror/Recent.pm @@ -100,6 +100,8 @@ BEGIN { "_principal_recentfile", "_recentfiles", "_rsync", + "_runstatusfile", # frequenty dumps all rfs + "_logfilefordone", # turns on _logfile on the DONE of the principal recentfile ); my @pod_lines = @@ -392,10 +394,11 @@ Testing this ATM with: compress => 1, links => 1, times => 1, - checksum => 1, + checksum => 0, }, verbose => 1, - + _runstatusfile => "recent-rmirror-state.yml", + _logfilefordone => "recent-rmirror-donelog.log", ); $rrr->rmirror ( "skip-deletes" => 1, loop => 1 ); @@ -410,8 +413,6 @@ sub rmirror { my $_once_per_20s = sub { my $p = $self->principal_recentfile; - require YAML::Syck; YAML::Syck::DumpFile("recent-rmirror-state-$$.yml",$self); # XXX - for my $i (1) { print STDERR ("TODO: refetch prince and let it reset what needs to be resetted\n"); sleep 1; @@ -423,10 +424,24 @@ sub rmirror { my $minimum_time_per_loop = 20; # XXX needs accessor: warning, if # set too low, we do nothing but # mirror the principal! + if (my $logfile = $self->_logfilefordone) { + $rfs->[0]->done->_logfile($logfile); + } LOOP: while () { my $ttleave = time + $minimum_time_per_loop; RECENTFILE: for my $i (0..$#$rfs) { my $rf = $rfs->[$i]; + if (my $file = $self->_runstatusfile) { + require YAML::Syck; + YAML::Syck::DumpFile + ( + $file, + {i => $i, + self => $self, + time => time, + uptodate => {map {($_=>$rfs->[$_]->uptodate)} 0..$#$rfs}, + }); + } last RECENTFILE if time > $ttleave; if ($rf->uptodate){ $rfs->[$i+1]->done->merge($rf->done) if $i < $#$rfs; diff --git a/lib/File/Rsync/Mirror/Recentfile.pm b/lib/File/Rsync/Mirror/Recentfile.pm index 778cbb4..f45f6e5 100644 --- a/lib/File/Rsync/Mirror/Recentfile.pm +++ b/lib/File/Rsync/Mirror/Recentfile.pm @@ -211,6 +211,7 @@ BEGIN { "_localroot", "_merged", "_pathdb", + "_remember_last_uptodate_call", "_remote_dir", "_remoteroot", "_rfile", @@ -465,8 +466,8 @@ sub _assert_symlink { =head2 $done = $obj->done $done is a reference to a File::Rsync::Mirror::Recentfile::Done object -that keeps track of rsync activities. Only used/needed when we are a -mirroring slave. +that keeps track of rsync activities. Only needed and used when we are +a mirroring slave. =cut @@ -836,20 +837,22 @@ sub merged { my $into; if ($merged and $into = $merged->{into_interval} and defined $self->_interval) { if ($into eq $self->interval) { - warn sprintf + require Carp; + Carp::cluck(sprintf ( "Warning: into_interval[%s] same as own interval[%s]. Danger ahead.", $into, $self->interval, - ); + )); } elsif ($self->interval_secs($into) < $self->interval_secs) { - warn sprintf + require Carp; + Carp::cluck(sprintf ( "Warning: into_interval[%s] smaller than own interval[%s] on interval[%s]. Danger ahead.", $self->interval_secs($into), $self->interval_secs, $self->interval, - ); + )); } } $merged; @@ -1731,37 +1734,42 @@ current recentfile. =cut sub uptodate { - my($self, $debug) = @_; + my($self) = @_; + my $uptodate; + my $why; if ($self->ttl_reached){ - if ($debug) { - warn "ttl_reached returned true, so we are not uptodate"; - } - return 0 ; - } - - # look if recentfile has unchanged timestamp - my $minmax = $self->minmax; - if (exists $minmax->{mtime}) { - my $rfile = $self->_my_current_rfile; - my @stat = stat $rfile; - my $mtime = $stat[9]; - if ($mtime > $minmax->{mtime}) { - if ($debug) { - warn "$mtime > $minmax->{mtime}, so we are not uptodate"; - } - return 0; - } else { - my $covered = $self->done->covered(@$minmax{qw(max min)}); - if ($debug) { - warn "minmax covered[$covered], so we return that"; + $why = "ttl_reached returned true, so we are not uptodate"; + $uptodate = 0 ; + } + + unless (defined $uptodate) { + # look if recentfile has unchanged timestamp + my $minmax = $self->minmax; + if (exists $minmax->{mtime}) { + my $rfile = $self->_my_current_rfile; + my @stat = stat $rfile; + my $mtime = $stat[9]; + if ($mtime > $minmax->{mtime}) { + $why = "mtime[$mtime] of rfile[$rfile] > minmax/mtime[$minmax->{mtime}], so we are not uptodate"; + $uptodate = 0; + } else { + my $covered = $self->done->covered(@$minmax{qw(max min)}); + $why = "minmax covered[$covered], so we return that"; + $uptodate = $covered; } - return $covered; } } - if ($debug) { - warn "fallthrough, so not uptodate"; - } - return 0; + unless (defined $uptodate) { + $why = "fallthrough, so not uptodate"; + $uptodate = 0; + } + my $remember = + { + uptodate => $uptodate, + why => $why, + }; + $self->_remember_last_uptodate_call($remember); + return $uptodate; } =head2 $obj->write_recent ($recent_files_arrayref) diff --git a/lib/File/Rsync/Mirror/Recentfile/Done.pm b/lib/File/Rsync/Mirror/Recentfile/Done.pm index 0c354e2..5ea688e 100644 --- a/lib/File/Rsync/Mirror/Recentfile/Done.pm +++ b/lib/File/Rsync/Mirror/Recentfile/Done.pm @@ -25,6 +25,10 @@ use version; our $VERSION = qv('0.0.1'); $done->register ( $recent_events, [3,4,5,9] ); # registers elements 3-5 and 9 my $boolean = $done->covered ( $epoch ); +=head1 DESCRIPTION + +Keeping track of already rsynced timespans. + =head1 EXPORT No exports. @@ -57,6 +61,7 @@ my @accessors; BEGIN { @accessors = ( "__intervals", + "_logfile", ); my @pod_lines = @@ -81,7 +86,8 @@ use accessors @accessors; =head2 $boolean = $obj->covered ( $epoch ) The first form returns true if both timestamps $epoch1 and $epoch2 in -floating point notation have been registered, otherwise false. +floating point notation have been registered within one interval, +otherwise false. The second form returns true if this timestamp has been registered. @@ -89,6 +95,7 @@ The second form returns true if this timestamp has been registered. sub covered { my($self, $epoch_high, $epoch_low) = @_; + die "Alert: covered() called without or with undefined first argument" unless defined $epoch_high; my $intervals = $self->_intervals; return unless @$intervals; if (defined $epoch_low) { @@ -173,12 +180,34 @@ sub register { $reg = [0..$#$re]; } REGISTRANT: for my $i (@$reg) { + my $logfile = $self->_logfile; + if ($logfile) { + require YAML::Syck; + open my $fh, ">>", $logfile or die "Could not open '$logfile': $!"; + print $fh YAML::Syck::Dump({ + t => "before", + i => $i, + ($i>0 ? ("re-1" => $re->[$i-1]) : ()), + "re-0" => $re->[$i], + ($i<$#$re ? ("re+1" => $re->[$i+1]) : ()), + intervals => $intervals, + }); + } $self->_register_one ({ i => $i, re => $re, intervals => $intervals, }); + if ($logfile) { + require YAML::Syck; + open my $fh, ">>", $logfile or die "Could not open '$logfile': $!"; + print $fh YAML::Syck::Dump({ + t => "after", + i => $i, + intervals => $intervals, + }); + } } } @@ -192,14 +221,18 @@ sub _register_one { if (@$intervals) { my $registered = 0; for my $iv (@$intervals) { - my($upper,$lower) = @$iv; # may be the same + my($ivupper,$ivlower) = @$iv; # may be the same if ($i > 0 - && $re->[$i-1]{epoch} eq $lower) { + && _bigfloatge($re->[$i-1]{epoch}, $ivlower) + && _bigfloatle($re->[$i-1]{epoch}, $ivupper) + ) { $iv->[1] = $epoch; $registered++; } if ($i < $#$re - && $re->[$i+1]{epoch} eq $upper) { + && _bigfloatle($re->[$i+1]{epoch}, $ivupper) + && _bigfloatge($re->[$i+1]{epoch}, $ivlower) + ) { $iv->[0] = $epoch; $registered++; } diff --git a/lib/File/Rsync/Mirror/Recentfile/FakeBigFloat.pm b/lib/File/Rsync/Mirror/Recentfile/FakeBigFloat.pm index 7f90137..02b84fc 100644 --- a/lib/File/Rsync/Mirror/Recentfile/FakeBigFloat.pm +++ b/lib/File/Rsync/Mirror/Recentfile/FakeBigFloat.pm @@ -6,7 +6,9 @@ use Data::Float qw(nextup nextdown); # _bigfloat sub _bigfloatcmp ($$); +sub _bigfloatge ($$); sub _bigfloatgt ($$); +sub _bigfloatle ($$); sub _bigfloatlt ($$); sub _bigfloatmax ($$); sub _bigfloatmin ($$); @@ -29,7 +31,9 @@ use base qw(Exporter); our %EXPORT_TAGS; our @EXPORT_OK = qw( _bigfloatcmp + _bigfloatge _bigfloatgt + _bigfloatle _bigfloatlt _bigfloatmax _bigfloatmin @@ -77,6 +81,16 @@ sub _bigfloatcmp ($$) { $l cmp $r; } +=head2 _bigfloatge ( $l, $r ) + +Same for ge + +=cut +sub _bigfloatge ($$) { + my($l,$r) = @_; + _bigfloatcmp($l,$r) >= 0; +} + =head2 _bigfloatgt ( $l, $r ) Same for gt @@ -87,6 +101,16 @@ sub _bigfloatgt ($$) { _bigfloatcmp($l,$r) > 0; } +=head2 _bigfloatle ( $l, $r ) + +Same for lt + +=cut +sub _bigfloatle ($$) { + my($l,$r) = @_; + _bigfloatcmp($l,$r) <= 0; +} + =head2 _bigfloatlt ( $l, $r ) Same for lt diff --git a/t/02-operation.t b/t/02-operation.t index e6747f3..8d85ef1 100644 --- a/t/02-operation.t +++ b/t/02-operation.t @@ -1,9 +1,15 @@ +use Getopt::Long; use Test::More; use strict; my $tests; BEGIN { $tests = 0 } use lib "lib"; +my %Opt; +GetOptions( + "verbose!", + ) or die; +$Opt{verbose} ||= $ENV{PERL_RERSYNCRECENT_TEST_VERBOSE}; my $HAVE; BEGIN { @@ -29,6 +35,7 @@ use YAML::Syck; my $root_from = "t/ta"; my $root_to = "t/tb"; +my $statusfile = "t/recent-rmirror-state.yml"; rmtree [$root_from, $root_to]; { @@ -360,19 +367,27 @@ rmtree [$root_from, $root_to]; # ignore_link_stat_errors => 1, localroot => $root_to, remote => "$root_from/RECENT.recent", - # verbose => 1, + verbose => $Opt{verbose}, rsync_options => { links => 1, times => 1, compress => 1, checksum => 1, }, + _runstatusfile => $statusfile, ); $recc->rmirror; + my $rf2 = File::Rsync::Mirror::Recentfile->new_from_file("$root_from/RECENT-5s.yaml"); + my $file = "$root_from/about-re-mirroring.txt"; + open my $fh, ">", $file or die "Could not open '$file': $!"; + print $fh time; + close $fh or die "Could not close '$file': $!"; + $rf2->update($file, "new"); + $recc->rmirror; } } -rmtree [$root_from, $root_to]; +rmtree [$root_from, $root_to, $statusfile] unless $Opt{verbose}; BEGIN { plan tests => $tests } diff --git a/t/03-done.t b/t/03-done.t index cdb9df7..68d16bc 100644 --- a/t/03-done.t +++ b/t/03-done.t @@ -1,4 +1,7 @@ use Data::Dumper; +use File::Copy qw(cp); +use File::Path qw(mkpath rmtree); +use File::Rsync::Mirror::Recentfile; use File::Rsync::Mirror::Recentfile::Done; use List::Util qw(sum); use Storable qw(dclone); @@ -100,6 +103,58 @@ my @snapshots; } } +{ + BEGIN { + $tests += 4; + } + mkpath "t/ta"; + cp "t/RECENT-1h.yaml", "t/ta/RECENT-Z.yaml"; + my $rf = bless( { + '-aggregator' => [ + '1d', + '1W', + '1M', + '1Q', + '1Y', + 'Z' + ], + '-_localroot' => "t/ta", + '-filenameroot' => 'RECENT', + '-serializer_suffix' => '.yaml', + '-minmax' => { + 'mtime' => '1223270942', + 'min' => '1223269222.00701', + 'max' => '1223270911.76639' + }, + '-verbose' => '1', + '-_done' => bless( { + '-__intervals' => [ + [ + '1223270911.76639', + '1223256470.41935' + ] + ] + }, 'File::Rsync::Mirror::Recentfile::Done' ), + '-have_mirrored' => '1223271134.78303', + '-_interval' => 'Z', + '-protocol' => '1' + }, 'File::Rsync::Mirror::Recentfile' ); + my $rfile = $rf->_my_current_rfile (); + ok $rfile, "Could determine the current rfile[$rfile]"; + my $re = $rf->recent_events; + my $cnt = scalar @$re; + ok $cnt, "re have more than one[$cnt] elements"; + my $done = $rf->done; + ok $done->covered ($re->[0]{epoch},$re->[-1]{epoch}), "covered I"; + $rf->update("t/ta/id/M/MS/MSIMERSON/Mail-Toaster-5.12_01.tar.gz","new"); + $rf->update("t/ta/id/M/MS/MSIMERSON/Mail-Toaster-5.12_01.readme","new"); + my $re2 = $rf->recent_events; + $done->register($re2, [0,1]); + ok $done->covered ($re2->[0]{epoch},$re2->[-1]{epoch}), "covered II"; +} + +rmtree ( "t/ta" ); + BEGIN { plan tests => $tests } # Local Variables: diff --git a/t/RECENT-1h.yaml b/t/RECENT-1h.yaml new file mode 100644 index 0000000..301318d --- /dev/null +++ b/t/RECENT-1h.yaml @@ -0,0 +1,157 @@ +--- +meta: + Producers: + $0: /usr/local/perl/bin/rrr-aggregate + File::Rsync::Mirror::Recentfile: 0.0.1 + time: 1223270942.18846 + aggregator: + - 6h + - 1d + - 1W + - 1M + - 1Q + - 1Y + - Z + canonize: naive_path_normalize + comment: These 'RECENT' files are part of a test of a new CPAN mirroring concept. Please ignore them for now. + filenameroot: RECENT + interval: Z + protocol: 1 + serializer_suffix: .yaml +recent: + - + epoch: 1223270911.76639 + path: id/M/MS/MSIMERSON/Mail-Toaster-5.12_01.tar.gz + type: new + - + epoch: 1223270911.76193 + path: id/M/MS/MSIMERSON/CHECKSUMS + type: new + - + epoch: 1223270811.41559 + path: id/Z/ZA/ZAG/CHECKSUMS + type: new + - + epoch: 1223270810.95688 + path: id/V/VV/VVELOX/CHECKSUMS + type: new + - + epoch: 1223270810.89322 + path: id/V/VP/VPIT/CHECKSUMS + type: new + - + epoch: 1223270810.75097 + path: id/V/VA/VAYDE/Lazy/CHECKSUMS + type: new + - + epoch: 1223270810.73467 + path: id/V/VA/VASILUS/CHECKSUMS + type: new + - + epoch: 1223270808.09977 + path: id/S/SC/SCHROEER/Wortschatz/CHECKSUMS + type: new + - + epoch: 1223270807.69462 + path: id/R/RS/RSRCHBOY/CHECKSUMS + type: new + - + epoch: 1223270807.53375 + path: id/R/RR/RRA/CHECKSUMS + type: new + - + epoch: 1223270807.01402 + path: id/R/RJ/RJBS/CHECKSUMS + type: new + - + epoch: 1223270800.42829 + path: id/M/MT/MTHURN/CHECKSUMS + type: new + - + epoch: 1223270797.76178 + path: id/K/KN/KNORR/CHECKSUMS + type: new + - + epoch: 1223270797.35933 + path: id/K/KA/KANE/CHECKSUMS + type: new + - + epoch: 1223270796.38567 + path: id/J/JR/JRM/CHECKSUMS + type: new + - + epoch: 1223270796.36758 + path: id/J/JR/JRED/CHECKSUMS + type: new + - + epoch: 1223270795.8004 + path: id/J/JM/JMATES/CHECKSUMS + type: new + - + epoch: 1223270795.45088 + path: id/J/JE/JETTERO/CHECKSUMS + type: new + - + epoch: 1223270795.11028 + path: id/J/JB/JBURNETT/CHECKSUMS + type: new + - + epoch: 1223270793.54239 + path: id/G/GP/GPHAT/CHECKSUMS + type: new + - + epoch: 1223270792.74174 + path: id/F/FL/FLOWERYSO/SmugMug/CHECKSUMS + type: new + - + epoch: 1223270792.53034 + path: id/F/FA/FAYLAND/CHECKSUMS + type: new + - + epoch: 1223270791.856 + path: id/D/DW/DWHEELER/CHECKSUMS + type: new + - + epoch: 1223270791.13817 + path: id/D/DL/DLAND/CHECKSUMS + type: new + - + epoch: 1223270790.41091 + path: id/D/DA/DANNY/CHECKSUMS + type: new + - + epoch: 1223270789.69511 + path: id/C/CL/CLEONTY/CHECKSUMS + type: new + - + epoch: 1223270789.64418 + path: id/C/CL/CLAESJAC/CHECKSUMS + type: new + - + epoch: 1223270789.28434 + path: id/C/CD/CDOLAN/CHECKSUMS + type: new + - + epoch: 1223270788.87453 + path: id/C/CA/CASIANO/CHECKSUMS + type: new + - + epoch: 1223270788.06877 + path: id/B/BE/BEHANW/CHECKSUMS + type: new + - + epoch: 1223270787.55463 + path: id/A/AU/AUDREYT/CHECKSUMS + type: new + - + epoch: 1223270786.78341 + path: id/A/AS/ASHLEY/CHECKSUMS + type: new + - + epoch: 1223269613.67094 + path: id/V/VV/VVELOX/zcls-0.0.1.pl + type: new + - + epoch: 1223269222.00701 + path: id/V/VV/VVELOX/zcls-0.0.0.pl + type: new diff --git a/t/perlcriticrc b/t/perlcriticrc index e4b9e40..ede5a82 100644 --- a/t/perlcriticrc +++ b/t/perlcriticrc @@ -18,4 +18,4 @@ include = ClassHierarchies #arg2 = value2 [Subroutines::ProhibitExcessComplexity] -max_mccabe = 21 +max_mccabe = 22 -- 2.11.4.GIT