From da8c7fa0381f2e831df46e0208d091c820b2604e Mon Sep 17 00:00:00 2001 From: "Andreas J. Koenig" Date: Wed, 9 Feb 2011 21:54:52 +0100 Subject: [PATCH] use batch_update boosting performance --- Todo | 19 ++++++++++++++----- bin/rrr-server | 26 ++++++++++++++------------ 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/Todo b/Todo index 2cb00df..fa2f786 100644 --- a/Todo +++ b/Todo @@ -1,5 +1,19 @@ 2011-02-09 Andreas J. Koenig + * minor bug: fsck just added the lockfile to the index which should be + considered bookkeeping. + + * speed: a large directory remove that is done by the kernel in 1 + seconds triggers minutes of bookkeeping (07:56:42 - 08:01:10 for ~3000 + files, all due to Schlemiehl again. Want to do some collecting of + pending ops before actually writing to the RECENT files. Lock! + + Repeating the timing with 2600 files and it took 21:28:08 - 21:35:27. + + Now having rewritten the loop to use batch_update(): 4383 file removed + 21:43:37 - 21:44:12. From 7:30 to 0:35 while doing 60% more work that's + a joy. + * lockdirectory expiration? server died and blocked fsck for so long. * At the moment rrr-init is not needed anymore, rrr-server can be @@ -11,11 +25,6 @@ * IN_Q_OVERFLOW needs to trigger an fsck, likewise the entry into the server. - * speed: a large directory remove that is done by the kernel in 1 - seconds triggers minutes of bookkeeping (07:56:42 - 08:01:10 for ~3000 - files, all due to Schlemiehl again. Want to do some collecting of - pending ops before actually writing to the RECENT files. Lock! - 2011-01-30 Andreas J. Koenig * rrr-init and rrr-server need to be used such: diff --git a/bin/rrr-server b/bin/rrr-server index 179f362..cde88e8 100755 --- a/bin/rrr-server +++ b/bin/rrr-server @@ -98,13 +98,13 @@ foreach my $directory ( File::Find::Rule->new->directory->in($rootdir) ) { or die "watch creation failed"; } -sub newfile { - my($rf,$fullname) = @_; - $rf->update($fullname,"new"); +sub handle_file { + my($rf,$fullname,$type,$batch) = @_; + push @$batch, {path => $fullname, type => $type}; } sub newdir { - my($inotify,$rf,$fullname) = @_; + my($inotify,$rf,$fullname,$batch) = @_; $inotify->watch ( $fullname, @@ -125,10 +125,10 @@ sub newdir { my $abs = File::Spec->catfile($fullname,$dirent); if (-l $abs || -f _) { warn "[..:..:..] Readdir_F $abs\n"; - newfile($rf,$abs); + handle_file($rf,$abs,"new",$batch); } elsif (-d $abs) { warn "[..:..:..] Readdir_D $abs\n"; - newdir($inotify,$rf,$abs); + newdir($inotify,$rf,$abs,$batch); } } } @@ -138,7 +138,7 @@ my $requires_fsck = 1; my $have_warned_fsck = 0; sub handle_event { - my $ev = shift; + my($ev,$batch) = @_; my @stringifiedmask; for my $watch ( "IN_CREATE", "IN_CLOSE_WRITE", "IN_MOVED_TO", # new @@ -173,20 +173,20 @@ sub handle_event { # it to $rf. $rf must be robust enough to swallow bogus # deletes. Alternatively we could look into $rf whether # this object is known, couldn't we? - $rf->update($fullname,"delete"); + handle_file($rf,$fullname,"delete",$batch); warn "[$time] Deleteobj $reportname (@stringifiedmask)\n"; } elsif ($ev->IN_DELETE_SELF || $ev->IN_MOVE_SELF) { $ev->w->cancel; warn "[$time] Delwatcher $reportname (@stringifiedmask)\n"; } elsif ($ev->IS_DIR) { - newdir($inotify,$rf,$fullname); + newdir($inotify,$rf,$fullname,$batch); warn "[$time] Newwatcher $reportname (@stringifiedmask)\n"; } elsif (-l $fullname) { - newfile($rf,$fullname); + handle_file($rf,$fullname,"new",$batch); warn "[$time] Updatelink $reportname (@stringifiedmask)\n"; } elsif (-f _) { if ($ev->IN_CLOSE_WRITE || $ev->IN_MOVED_TO) { - newfile($rf,$fullname); + handle_file($rf,$fullname,"new",$batch); warn "[$time] Updatefile $reportname (@stringifiedmask)\n"; } } else { @@ -205,9 +205,11 @@ while () { print "Alert: inotify read error: $!"; last; } + my @batch; foreach my $event (@events) { - handle_event($event); + handle_event($event,\@batch); } + $rf->batch_update(\@batch); if ($requires_fsck) { if (time > $have_warned_fsck + 3600) { warn "REMINDER: TODO: HANDLE FSCK"; -- 2.11.4.GIT