From f27b83d2a8180e9124251355864264f27b0cd276 Mon Sep 17 00:00:00 2001 From: Tom Werner Date: Wed, 5 Sep 2007 17:18:39 -0700 Subject: [PATCH] fix potential problem with event registration failures --- lib/god/conditions/process_exits.rb | 4 +--- lib/god/conditions/tries.rb | 18 +++++++-------- lib/god/errors.rb | 3 +++ lib/god/hub.rb | 11 ++++++++- site/index.html | 23 ++++++++++++++++++- test/test_conditions_tries.rb | 46 +++++++++++++++++++++++++++++++++++++ 6 files changed, 91 insertions(+), 14 deletions(-) create mode 100644 test/test_conditions_tries.rb diff --git a/lib/god/conditions/process_exits.rb b/lib/god/conditions/process_exits.rb index 591e909..938fb42 100644 --- a/lib/god/conditions/process_exits.rb +++ b/lib/god/conditions/process_exits.rb @@ -16,9 +16,7 @@ module God Hub.trigger(self) end rescue StandardError - # Rapid consecutive kills of a process using ProcessExits can cause - # the registration to fail. Capturing the exception and having the - # start->up transition recover is the best current solution - Tom + raise EventRegistrationFailedError.new end end diff --git a/lib/god/conditions/tries.rb b/lib/god/conditions/tries.rb index cd35e45..176224a 100644 --- a/lib/god/conditions/tries.rb +++ b/lib/god/conditions/tries.rb @@ -2,14 +2,10 @@ module God module Conditions class Tries < PollCondition - attr_accessor :times + attr_accessor :times, :within def prepare - if self.times.kind_of?(Integer) - self.times = [self.times, self.times] - end - - @timeline = Timeline.new(self.times[1]) + @timeline = Timeline.new(self.times) end def valid? @@ -19,9 +15,13 @@ module God end def test - @timeline.push(true) - if @timeline.select { |x| x }.size >= self.times.first - @timeline.clear + @timeline << Time.now + + concensus = (@timeline.size == self.times) + duration = within.nil? || (@timeline.last - @timeline.first) < self.within + + if concensus && duration + @timeline.clear if within.nil? return true else return false diff --git a/lib/god/errors.rb b/lib/god/errors.rb index b4a11f0..85263c4 100644 --- a/lib/god/errors.rb +++ b/lib/god/errors.rb @@ -15,4 +15,7 @@ module God class InvalidCommandError < StandardError end + class EventRegistrationFailedError < StandardError + end + end \ No newline at end of file diff --git a/lib/god/hub.rb b/lib/god/hub.rb index 278d8dc..3a0a9d5 100644 --- a/lib/god/hub.rb +++ b/lib/god/hub.rb @@ -80,7 +80,16 @@ module God # transition or reschedule if dest # transition - watch.move(dest) + begin + watch.move(dest) + rescue EventRegistrationFailedError + msg = watch.name + ' Event registration failed, moving back to previous state' + Syslog.debug(msg) + LOG.log(watch, :info, msg) + + dest = watch.state + retry + end else # reschedule Timer.get.schedule(condition) diff --git a/site/index.html b/site/index.html index c63b4b2..a4f0bda 100644 --- a/site/index.html +++ b/site/index.html @@ -326,7 +326,7 @@ end end

Watches contain conditions grouped by the action to execute should they return true. I start with a start_if block that contains a single condition. Conditions are specified by calling condition with an identifier, in this case -:process_not_running. Each condition can specify a poll interval that will override the default watch interval. In this case, I want to check that the process is still running every 5 seconds instead of the 30 second interval that other conditions will inherit. The ability to set condition specific poll intervals makes it possible to run costly tests less often then cheap tests.

+:process_running. Each condition can specify a poll interval that will override the default watch interval. In this case, I want to check that the process is still running every 5 seconds instead of the 30 second interval that other conditions will inherit. The ability to set condition specific poll intervals makes it possible to run costly tests less often then cheap tests.

    w.restart_if do |restart|
       restart.condition(:memory_usage) do |c|
@@ -450,6 +450,12 @@ God.watch do |w|
     on.condition(:process_running) do |c|
       c.running = true
     end
+    
+    # failsafe
+    on.condition(:tries) do |c|
+      c.times = 3
+      c.transition = :start
+    end
   end
 
   # start if process is not running
@@ -496,10 +502,25 @@ end
     on.condition(:process_running) do |c|
       c.running = true
     end
+    
+    ...
   end

If god has determined that my process isn't running, the Watch will be put into the start state. Upon entering this state, the start command that I specified on the Watch will be called. In addition, the above transition specifies a condition that should be enabled when in either the start or restart states. The condition is another process_running, however this time I'm only interested in moving to another state once it returns true. A true return from this condition means that the process is running and it's ok to transition to the up state (second argument to transition).

+
  # determine when process has finished starting
+  w.transition([:start, :restart], :up) do |on|
+    ...
+    
+    # failsafe
+    on.condition(:tries) do |c|
+      c.times = 3
+      c.transition = :start
+    end
+  end
+ +

The other half of this transition uses the tries condition to ensure that god doesn't get stuck in this state. It's possible that the process could go down while the transition is being made, in which case god would end up polling forever to see if the process is up. Here I've specified that if this condition is called three times, god should override the normal transition destination and move to the start state instead. If you specify a transition attribute on any condition, that state will be transferred to instead of the normal transfer destination.

+
  # start if process is not running
   w.transition(:up, :start) do |on|
     on.condition(:process_exits)
diff --git a/test/test_conditions_tries.rb b/test/test_conditions_tries.rb
new file mode 100644
index 0000000..54f10d7
--- /dev/null
+++ b/test/test_conditions_tries.rb
@@ -0,0 +1,46 @@
+require File.dirname(__FILE__) + '/helper'
+
+class TestConditionsTries < Test::Unit::TestCase
+  def setup
+    @c = Conditions::Tries.new
+    @c.times = 3
+    @c.prepare
+  end
+  
+  def test_prepare_should_create_timeline
+    assert 3, @c.instance_variable_get(:@timeline).instance_variable_get(:@max_size)
+  end
+  
+  def test_test_should_return_true_if_called_three_times_within_one_second
+    assert !@c.test
+    assert !@c.test
+    assert @c.test
+  end
+  
+  def test_test_should_return_false_on_fourth_call_if_called_three_times_within_one_second
+    3.times { @c.test }
+    assert !@c.test
+  end
+end
+
+class TestConditionsTriesWithin < Test::Unit::TestCase
+  def setup
+    @c = Conditions::Tries.new
+    @c.times = 3
+    @c.within = 1.seconds
+    @c.prepare
+  end
+  
+  def test_test_should_return_true_if_called_three_times_within_one_second
+    assert !@c.test
+    assert !@c.test
+    assert @c.test
+  end
+  
+  def test_test_should_return_false_if_called_three_times_within_two_seconds
+    assert !@c.test
+    assert !@c.test
+    assert sleep(1)
+    assert !@c.test
+  end
+end
\ No newline at end of file
-- 
2.11.4.GIT