From 111232530f7f0c236022fbef72a5e67ecdb76cf3 Mon Sep 17 00:00:00 2001 From: "Andreas J. Koenig" Date: Sat, 5 Feb 2011 08:06:46 +0100 Subject: [PATCH] WIP on fsck and server --- Todo | 13 ++++++++++++ bin/rrr-fsck | 67 +++++++++++++++++++++++++++++++++++++++++++++++++--------- bin/rrr-server | 30 +++++++++++++++++--------- 3 files changed, 90 insertions(+), 20 deletions(-) diff --git a/Todo b/Todo index 8dd854e..cd6cab5 100644 --- a/Todo +++ b/Todo @@ -1,3 +1,16 @@ +2011-02-05 Andreas J. Koenig + + * IN_Q_OVERFLOW needs to trigger an fsck, likewise the entry into the + server. + + * wishlist: callback in recent_events that fires when an event is added + to the list so we can display the progress during fsck. + + * speed: a large directory remove that is done by the kernel in 1 + seconds triggers minutes of bookkeeping (07:56:42 - 08:01:10 for ~3000 + files, all due to Schlemiehl again. Want to do some collecting of + pending ops before actually writing to the RECENT files. Lock! + 2011-01-30 Andreas J. Koenig * rrr-init and rrr-server need to be used such: diff --git a/bin/rrr-fsck b/bin/rrr-fsck index 9e19161..1043ede 100755 --- a/bin/rrr-fsck +++ b/bin/rrr-fsck @@ -76,6 +76,7 @@ use File::Spec; use Getopt::Long; use List::Util qw(max); use Pod::Usage qw(pod2usage); +use Time::HiRes qw(time sleep); our %Opt; GetOptions(\%Opt, @@ -106,18 +107,34 @@ my $root = $recc->localroot; die "Alert: Root not defined, giving up" unless defined $root; my %diskfiles; +my $i; +my $last_verbosity = 0; +$|=1; +if ($Opt{verbose}) { + print "\n"; +} find({ wanted => sub { - my @stat = lstat $_; - return if -l _; - return unless -f _; - $diskfiles{$File::Find::name} = $stat[9]; + my @lstat = lstat $_; + return unless -l _ or -f _; + $i++; + if ($Opt{verbose} && time - $last_verbosity > 0.166666) { + printf "\r%8d files and symlinks checked on disk ", $i; + $last_verbosity = time; + } + $diskfiles{$File::Find::name} = $lstat[9]; }, no_chdir => 1, }, $root ); +if ($Opt{verbose}) { + printf "\r%8d files checked on disk\n", $i; +} +if ($Opt{verbose}) { + print "\rChecking index"; +} my $indexfiles = $recc->news; my %seen; my %indexfiles = map {("$root/$_->{path}"=>$_->{epoch})} grep { !$seen{$_->{path}}++ && $_->{type} eq "new" } @$indexfiles; @@ -126,6 +143,9 @@ for my $rf (@{$recc->recentfiles}) { my @stat = stat $rfrfile or die "Could not stat '$rfrfile': $!"; $indexfiles{$rfrfile} = $stat[9]; } +if ($Opt{verbose}) { + printf "%8d entries read from index\n", scalar keys %indexfiles; +} my $sprintfd = length(max scalar @$indexfiles, scalar keys %diskfiles); warn sprintf( "diskfiles: %*d\n". @@ -142,20 +162,38 @@ warn sprintf( $sprintfd, scalar @indexmisses, ); $DB::single++; +my $rf = $recc->principal_recentfile; +my $last_aggregate_call = time; for my $dm (@diskmisses) { - if ($Opt{"dry-run"}) { - warn "Would fetch $dm\n"; + if (0) { + } elsif ($Opt{"dry-run"}) { + if ($Opt{remoteroot}) { + warn "Would fetch $dm\n"; + } else { + warn "Would remove from indexfile $dm\n"; + } } elsif ($Opt{remoteroot}) { my $relative = substr $dm, 1 + length $root; - $recc->principal_recentfile->get_remotefile($relative); + $rf->get_remotefile($relative); } else { - warn "Missing on disk: $dm\n"; + warn "Removing from indexfile: $dm\n"; + $rf->update($dm,"delete"); + if (time > $last_aggregate_call + $rf->interval_secs) { + warn "Aggregating\n"; + $rf->aggregate; + $last_aggregate_call = time; + } + sleep 0.33; } } for my $im (@indexmisses) { if ($Opt{"dry-run"}) { - warn "Would remove $im\n"; - } else { + if ($Opt{remoteroot}) { + warn "Would remove $im\n"; + } else { + warn "Would add to indexfile $im\n"; + } + } elsif ($Opt{remoteroot}) { my $ans; if ($Opt{yes}) { warn "Going to unlink '$im'\n"; @@ -166,6 +204,15 @@ for my $im (@indexmisses) { if ($ans =~ /^y/i) { unlink $im or die "Could not unlink '$im': $!"; } + } else { + warn "Adding to indexfile: $im\n"; + $rf->update($im,"new"); + if (time > $last_aggregate_call + $rf->interval_secs) { + warn "Aggregating\n"; + $rf->aggregate; + $last_aggregate_call = time; + } + sleep 0.33; } } __END__ diff --git a/bin/rrr-server b/bin/rrr-server index a78ab16..8b28690 100755 --- a/bin/rrr-server +++ b/bin/rrr-server @@ -127,7 +127,20 @@ my $in_callback = sub { } unless ($ignore) { my $fullname = $ev->fullname; - if (-d $fullname) { + my($reportname) = $fullname =~ m{^\Q$rootdir\E/(.*)}; + my $time = sprintf "%02d:%02d:%02d", (localtime)[2,1,0]; + if (0) { + } elsif ($ev->IN_DELETE || $ev->IN_MOVED_FROM) { + # we don't know whether it was a directory, we simply pass + # it to $rf. $rf must be robust enough to swallow bogus + # deletes. Alternatively we could look into $rf whether + # this object is known, couldn't we? + $rf->update($fullname,"delete"); + warn "[$time] Deleteobj $reportname (@stringifiedmask)\n"; + } elsif (-l $fullname) { + $rf->update($fullname,"new"); + warn "[$time] Updatelink $reportname (@stringifiedmask)\n"; + } elsif (-d _) { $inotify->watch ( $fullname, @@ -140,20 +153,17 @@ my $in_callback = sub { |IN_MOVE_SELF() ) or die "watch creation failed"; - warn "==> Newwatcher $fullname\n"; - } elsif (-f $ev->fullname) { + warn "[$time] Newwatcher $reportname (@stringifiedmask)\n"; + } elsif (-f _) { if ($ev->IN_CLOSE_WRITE || $ev->IN_MOVED_TO) { - $rf->update($ev->fullname,"new"); - warn "==> Updatefile $fullname\n"; + $rf->update($fullname,"new"); + warn "[$time] Updatefile $reportname (@stringifiedmask)\n"; } - } elsif ($ev->IN_DELETE || $ev->IN_MOVED_FROM) { - $rf->update($ev->fullname,"delete"); - warn "==> Deletefile $fullname\n"; } elsif ($ev->IN_DELETE_SELF || $ev->IN_MOVE_SELF) { $ev->w->cancel; - warn "==> Delwatcher $fullname\n"; + warn "[$time] Delwatcher $reportname (@stringifiedmask)\n"; } else { - warn "==> Ignore $fullname\n"; + warn "[$time] Ignore $reportname (@stringifiedmask)\n"; } } if (time > $last_aggregate_call + $rf->interval_secs) { -- 2.11.4.GIT