backend: retry all trackers on send failure
authorEric Wong <normalperson@yhbt.net>
Tue, 5 Feb 2013 21:57:59 +0000 (5 21:57 +0000)
committerEric Wong <normalperson@yhbt.net>
Tue, 5 Feb 2013 22:16:01 +0000 (5 22:16 +0000)
In single tracker configurations, a restarted tracker may cause
send()/write() failure on the TCP socket.  Retry immediately in
this case, since there's no danger even for non-idempotent
tracker requests.

lib/mogilefs/backend.rb
test/fresh.rb
test/test_fresh.rb

index afc2f1a..b9fc886 100644 (file)
@@ -138,12 +138,17 @@ class MogileFS::Backend
   end
 
   def dispatch_unlocked(request, timeout = @timeout) # :nodoc:
+    tries = nil
     begin
       io = socket
       io.timed_write(request, timeout)
       io
-    rescue SystemCallError, MogileFS::RequestTruncatedError  => err
-      @dead[@active_host] = [ Time.now, err ]
+    rescue SystemCallError, MogileFS::RequestTruncatedError => err
+      tries ||= Hash.new { |tries,host| tries[host] = 0 }
+      nr = tries[@active_host] += 1
+      if nr >= 2
+        @dead[@active_host] = [ Time.now, err ]
+      end
       shutdown_unlocked
       retry
     end
index 4db1e2c..e614b7d 100644 (file)
@@ -39,8 +39,7 @@ EOF
 
     @trackers = @hosts = [ "#@test_host:#@tracker_port" ]
     @tracker.close
-    x!("mogilefsd", "--daemon", "--config=#{@mogilefsd_conf.path}")
-    wait_for_port @tracker_port
+    start_tracker
     @admin = MogileFS::Admin.new(:hosts => @hosts)
     50.times do
       break if File.size(@mogstored_pid.path) > 0
@@ -48,6 +47,11 @@ EOF
     end
   end
 
+  def start_tracker
+    x!("mogilefsd", "--daemon", "--config=#{@mogilefsd_conf.path}")
+    wait_for_port @tracker_port
+  end
+
   def wait_for_port(port)
     tries = 50
     begin
index 706783e..5f420de 100644 (file)
@@ -165,4 +165,36 @@ class TestMogFresh < Test::Unit::TestCase
       end
     end
   end if IO.respond_to?(:copy_stream)
+
+  def test_single_tracker_restart
+    add_host_device_domain
+    client = MogileFS::MogileFS.new :hosts => @hosts, :domain => @domain
+
+    data = "data"
+    client.store_content("key", "default", data)
+    listing = client.list_keys
+    assert_instance_of Array, listing
+
+    # restart the tracker
+    s = TCPSocket.new(@test_host, @tracker_port)
+    s.write "!shutdown\r\n"
+    s.flush # just in case, MRI (at least) syncs by default
+    assert_nil s.gets
+
+    start_tracker
+
+    # transparent retry
+    listing2 = client.list_keys
+    assert_instance_of Array, listing2
+    assert_equal listing, listing2
+    assert_equal([['key'], 'key'], listing)
+
+    # kill the tracker
+    s = TCPSocket.new(@test_host, @tracker_port)
+    s.write "!shutdown\r\n"
+    s.flush # just in case, MRI (at least) syncs by default
+    assert_nil s.gets
+    @mogilefsd_pid = nil
+    assert_raises(MogileFS::UnreachableBackendError) { client.list_keys }
+  end
 end