From e76d44054d00059b8b6e7151a95d60ed3b7576b2 Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Sat, 8 Sep 2007 16:52:20 -0700 Subject: [PATCH] finish lifecycle conditions handling and add flapper condition --- History.txt | 12 +++- Manifest.txt | 3 + examples/events.god | 27 +++++++-- examples/gravatar.god | 10 ++-- lib/god.rb | 2 + lib/god/condition.rb | 31 ++++++++-- lib/god/conditions/flapping.rb | 92 ++++++++++++++++++++++++++++++ lib/god/conditions/tries.rb | 2 +- lib/god/hub.rb | 92 +++++++++++++++++------------- lib/god/trigger.rb | 34 +++++++++++ lib/god/watch.rb | 11 ++-- test/configs/child_events/child_events.god | 13 +++++ test/configs/child_polls/child_polls.god | 18 ++++-- test/helper.rb | 5 +- 14 files changed, 284 insertions(+), 68 deletions(-) create mode 100644 lib/god/conditions/flapping.rb create mode 100644 lib/god/trigger.rb diff --git a/History.txt b/History.txt index aaadf39..5c09991 100644 --- a/History.txt +++ b/History.txt @@ -1,4 +1,14 @@ -== 0.4.0 +== 0.5.0 + +* Major Enhancements + * Implement lifecycle scoped metric to allow for cross-state conditions + * Add TriggerCondition for conditions that need info about state changes +* Minor Enchancements + * Allow EventConditions to do transition overloading +* New Conditions + * Flapping < TriggerCondition - trigger on state change + +== 0.4.0 / 2007-09-13 * Major Enhancements * Add the ability for conditions to override transition state (for exceptional cases) diff --git a/Manifest.txt b/Manifest.txt index 935afa8..484f585 100644 --- a/Manifest.txt +++ b/Manifest.txt @@ -16,6 +16,7 @@ lib/god/condition.rb lib/god/conditions/always.rb lib/god/conditions/cpu_usage.rb lib/god/conditions/degrading_lambda.rb +lib/god/conditions/flapping.rb lib/god/conditions/lambda.rb lib/god/conditions/memory_usage.rb lib/god/conditions/process_exits.rb @@ -38,6 +39,7 @@ lib/god/sugar.rb lib/god/system/process.rb lib/god/timeline.rb lib/god/timer.rb +lib/god/trigger.rb lib/god/watch.rb test/configs/child_events/child_events.god test/configs/child_events/simple_server.rb @@ -59,6 +61,7 @@ test/suite.rb test/test_behavior.rb test/test_condition.rb test/test_conditions_process_running.rb +test/test_conditions_tries.rb test/test_dependency_graph.rb test/test_event_handler.rb test/test_god.rb diff --git a/examples/events.god b/examples/events.god index eb6b712..3627756 100644 --- a/examples/events.god +++ b/examples/events.god @@ -4,11 +4,11 @@ # Run with: # god -c /path/to/events.god -RAILS_ROOT = "/Users/tom/dev/helloworld" +RAILS_ROOT = "/Users/tom/dev/git/helloworld" God.watch do |w| w.name = "local-3000" - w.interval = 5 # seconds + w.interval = 5.seconds w.start = "mongrel_rails start -P ./log/mongrel.pid -c #{RAILS_ROOT} -d" w.stop = "mongrel_rails stop -P ./log/mongrel.pid -c #{RAILS_ROOT}" w.pid_file = File.join(RAILS_ROOT, "log/mongrel.pid") @@ -28,6 +28,12 @@ God.watch do |w| on.condition(:process_running) do |c| c.running = true end + + # failsafe + on.condition(:tries) do |c| + c.times = 3 + c.transition = :start + end end # start if process is not running @@ -39,14 +45,27 @@ God.watch do |w| w.transition(:up, :restart) do |on| on.condition(:memory_usage) do |c| c.interval = 20 - c.above = (50 * 1024) # 50mb + c.above = 50.megabytes c.times = [3, 5] end on.condition(:cpu_usage) do |c| c.interval = 10 - c.above = 10 # percent + c.above = 10.percent c.times = [3, 5] end end + + # lifecycle + w.lifecycle do |on| + on.condition(:flapping) do |c| + c.to_state = [:start, :restart] + c.times = 5 + c.within = 1.minute + c.transition = :unmonitored + c.retry_in = 10.minutes + c.retry_times = 5 + c.retry_within = 2.hours + end + end end \ No newline at end of file diff --git a/examples/gravatar.god b/examples/gravatar.god index 331da6a..b52c147 100644 --- a/examples/gravatar.god +++ b/examples/gravatar.god @@ -8,12 +8,12 @@ RAILS_ROOT = "/var/www/gravatar2/current" %w{8200 8201 8202}.each do |port| God.watch do |w| w.name = "gravatar2-mongrel-#{port}" - w.interval = 30 # seconds + w.interval = 30.seconds w.start = "mongrel_rails cluster::start --only #{port} \ -C #{RAILS_ROOT}/config/mongrel_cluster.yml" w.stop = "mongrel_rails cluster::stop --only #{port} \ -C #{RAILS_ROOT}/config/mongrel_cluster.yml" - w.grace = 10 # seconds + w.grace = 10.seconds pid_file = File.join(RAILS_ROOT, "log/mongrel.#{port}.pid") @@ -21,19 +21,19 @@ RAILS_ROOT = "/var/www/gravatar2/current" w.start_if do |start| start.condition(:process_running) do |c| - c.interval = 5 # seconds + c.interval = 5.seconds c.running = false end end w.restart_if do |restart| restart.condition(:memory_usage) do |c| - c.above = (150 * 1024) # 150mb + c.above = 150.megabytes c.times = [3, 5] # 3 out of 5 intervals end restart.condition(:cpu_usage) do |c| - c.above = 50 # percent + c.above = 50.percent c.times = 5 end end diff --git a/lib/god.rb b/lib/god.rb index ca5de56..2df91bc 100644 --- a/lib/god.rb +++ b/lib/god.rb @@ -26,6 +26,7 @@ require 'god/conditions/cpu_usage' require 'god/conditions/always' require 'god/conditions/lambda' require 'god/conditions/degrading_lambda' +require 'god/conditions/flapping' require 'god/reporter' require 'god/server' @@ -35,6 +36,7 @@ require 'god/hub' require 'god/metric' require 'god/watch' +require 'god/trigger' require 'god/event_handler' require 'god/registry' require 'god/process' diff --git a/lib/god/condition.rb b/lib/god/condition.rb index 2217808..8af078d 100644 --- a/lib/god/condition.rb +++ b/lib/god/condition.rb @@ -1,6 +1,8 @@ module God class Condition < Behavior + attr_accessor :transition + # Generate a Condition of the given kind. The proper class if found by camel casing the # kind (which is given as an underscored symbol). # +kind+ is the underscored symbol representing the class (e.g. foo_bar for God::Conditions::FooBar) @@ -8,7 +10,7 @@ module God sym = kind.to_s.capitalize.gsub(/_(.)/){$1.upcase}.intern c = God::Conditions.const_get(sym).new - unless c.kind_of?(PollCondition) || c.kind_of?(EventCondition) + unless c.kind_of?(PollCondition) || c.kind_of?(EventCondition) || c.kind_of?(TriggerCondition) abort "Condition '#{c.class.name}' must subclass either God::PollCondition or God::EventCondition" end @@ -22,7 +24,6 @@ module God class PollCondition < Condition # all poll conditions can specify a poll interval attr_accessor :interval - attr_accessor :transition # Override this method in your Conditions (optional) def before @@ -33,7 +34,7 @@ module God # Return true if the test passes (everything is ok) # Return false otherwise def test - raise AbstractMethodNotOverriddenError.new("Condition#test must be overridden in subclasses") + raise AbstractMethodNotOverriddenError.new("PollCondition#test must be overridden in subclasses") end # Override this method in your Conditions (optional) @@ -43,7 +44,29 @@ module God class EventCondition < Condition def register - + raise AbstractMethodNotOverriddenError.new("EventCondition#register must be overridden in subclasses") + end + + def deregister + raise AbstractMethodNotOverriddenError.new("EventCondition#deregister must be overridden in subclasses") + end + end + + class TriggerCondition < Condition + def process(event, payload) + raise AbstractMethodNotOverriddenError.new("TriggerCondition#process must be overridden in subclasses") + end + + def trigger + Hub.trigger(self) + end + + def register + Trigger.register(self) + end + + def deregister + Trigger.deregister(self) end end diff --git a/lib/god/conditions/flapping.rb b/lib/god/conditions/flapping.rb new file mode 100644 index 0000000..b8ffa86 --- /dev/null +++ b/lib/god/conditions/flapping.rb @@ -0,0 +1,92 @@ +module God + module Conditions + + class Flapping < TriggerCondition + attr_accessor :times, :within, :from_state, :to_state, :retry_in, :retry_times, :retry_within + + def prepare + @timeline = Timeline.new(self.times) + @retry_timeline = Timeline.new(self.retry_times) + end + + def valid? + valid = true + valid &= complain("You must specify the 'times' attribute for :flapping") if self.times.nil? + valid &= complain("You must specify the 'within' attribute for :flapping") if self.within.nil? + valid &= complain("You must specify either the 'from_state', 'to_state', or both attributes for :flapping") if self.from_state.nil? && self.to_state.nil? + valid + end + + def process(event, payload) + begin + if event == :state_change + event_from_state, event_to_state = *payload + + from_state_match = !self.from_state || self.from_state && Array(self.from_state).include?(event_from_state) + to_state_match = !self.to_state || self.to_state && Array(self.to_state).include?(event_to_state) + + if from_state_match && to_state_match + @timeline << Time.now + + concensus = (@timeline.size == self.times) + duration = (@timeline.last - @timeline.first) < self.within + + if concensus && duration + trigger + retry_mechanism + end + end + end + rescue => e + puts e.message + puts e.backtrace.join("\n") + end + end + + private + + def retry_mechanism + if self.retry_in + @retry_timeline << Time.now + + concensus = (@retry_timeline.size == self.retry_times) + duration = (@retry_timeline.last - @retry_timeline.first) < self.retry_within + + if concensus && duration + # give up + Thread.new do + sleep 1 + + # log + msg = "#{self.watch.name} giving up" + Syslog.debug(msg) + LOG.log(self.watch, :info, msg) + end + else + # try again later + Thread.new do + sleep 1 + + # log + msg = "#{self.watch.name} auto-reenable monitoring in #{self.retry_in} seconds" + Syslog.debug(msg) + LOG.log(self.watch, :info, msg) + + sleep self.retry_in + + # log + msg = "#{self.watch.name} auto-reenabling monitoring" + Syslog.debug(msg) + LOG.log(self.watch, :info, msg) + + if self.watch.state == :unmonitored + self.watch.monitor + end + end + end + end + end + end + + end +end \ No newline at end of file diff --git a/lib/god/conditions/tries.rb b/lib/god/conditions/tries.rb index 176224a..191b1ed 100644 --- a/lib/god/conditions/tries.rb +++ b/lib/god/conditions/tries.rb @@ -18,7 +18,7 @@ module God @timeline << Time.now concensus = (@timeline.size == self.times) - duration = within.nil? || (@timeline.last - @timeline.first) < self.within + duration = self.within.nil? || (@timeline.last - @timeline.first) < self.within if concensus && duration @timeline.clear if within.nil? diff --git a/lib/god/hub.rb b/lib/god/hub.rb index e7da374..51b4e12 100644 --- a/lib/god/hub.rb +++ b/lib/god/hub.rb @@ -3,44 +3,40 @@ module God class Hub class << self # directory to hold conditions and their corresponding metric - # key: condition - # val: metric + # {condition => metric} attr_accessor :directory end self.directory = {} def self.attach(condition, metric) - # add the condition to the directory self.directory[condition] = metric - # schedule poll condition - # register event condition - if condition.kind_of?(PollCondition) - Timer.get.schedule(condition, 0) - else - condition.register + case condition + when PollCondition + Timer.get.schedule(condition, 0) + when EventCondition, TriggerCondition + condition.register end end def self.detach(condition) - # remove the condition from the directory self.directory.delete(condition) - # unschedule any pending polls - Timer.get.unschedule(condition) - - # deregister event condition - if condition.kind_of?(EventCondition) - condition.deregister + case condition + when PollCondition + Timer.get.unschedule(condition) + when EventCondition, TriggerCondition + condition.deregister end end def self.trigger(condition) - if condition.kind_of?(PollCondition) - self.handle_poll(condition) - elsif condition.kind_of?(EventCondition) - self.handle_event(condition) + case condition + when PollCondition + self.handle_poll(condition) + when EventCondition, TriggerCondition + self.handle_event(condition) end end @@ -58,20 +54,8 @@ module God # run the test result = condition.test - # construct destination description - dest_desc = - if metric.destination - metric.destination.inspect - else - if condition.transition - {true => condition.transition}.inspect - else - 'none' - end - end - # log - msg = watch.name + ' ' + condition.class.name + " [#{result}] " + dest_desc + msg = watch.name + ' ' + condition.class.name + " [#{result}] " + self.dest_desc(metric, condition) Syslog.debug(msg) LOG.log(watch, :info, msg) @@ -119,15 +103,43 @@ module God def self.handle_event(condition) Thread.new do metric = self.directory[condition] - watch = metric.watch - watch.mutex.synchronize do - msg = watch.name + ' ' + condition.class.name + " [true] " + metric.destination.inspect - Syslog.debug(msg) - LOG.log(watch, :info, msg) + unless metric.nil? + watch = metric.watch + + watch.mutex.synchronize do + msg = watch.name + ' ' + condition.class.name + " [true] " + self.dest_desc(metric, condition) + Syslog.debug(msg) + LOG.log(watch, :info, msg) - dest = metric.destination[true] - watch.move(dest) + # get the destination + dest = + if condition.transition + # condition override + condition.transition + else + # regular + metric.destination && metric.destination[true] + end + + if dest + watch.move(dest) + end + end + end + end + end + + # helpers + + def self.dest_desc(metric, condition) + if metric.destination + metric.destination.inspect + else + if condition.transition + {true => condition.transition}.inspect + else + 'none' end end end diff --git a/lib/god/trigger.rb b/lib/god/trigger.rb new file mode 100644 index 0000000..580613c --- /dev/null +++ b/lib/god/trigger.rb @@ -0,0 +1,34 @@ +module God + + class Trigger + + class << self + attr_accessor :triggers + end + + @triggers = [] + @mutex = Mutex.new + + def self.register(condition) + @mutex.synchronize do + self.triggers << condition + end + end + + def self.deregister(condition) + @mutex.synchronize do + self.triggers.delete(condition) + end + end + + def self.broadcast(message, payload) + @mutex.synchronize do + self.triggers.each do |t| + t.process(message, payload) + end + end + end + + end + +end \ No newline at end of file diff --git a/lib/god/watch.rb b/lib/god/watch.rb index ead9d91..f9f67e2 100644 --- a/lib/god/watch.rb +++ b/lib/god/watch.rb @@ -166,9 +166,7 @@ module God LOG.log(self, :info, msg) # cleanup from current state - if from_state != :unmonitored - self.metrics[from_state].each { |m| m.disable } - end + self.metrics[from_state].each { |m| m.disable } if to_state == :unmonitored self.metrics[nil].each { |m| m.disable } @@ -183,9 +181,7 @@ module God end # move to new state - if to_state != :unmonitored - self.metrics[to_state].each { |m| m.enable } - end + self.metrics[to_state].each { |m| m.enable } # if no from state, enable lifecycle metric if from_state == :unmonitored @@ -195,6 +191,9 @@ module God # set state self.state = to_state + # trigger + Trigger.broadcast(:state_change, [from_state, to_state]) + # return self self end diff --git a/test/configs/child_events/child_events.god b/test/configs/child_events/child_events.god index 40fe148..d5b953a 100644 --- a/test/configs/child_events/child_events.god +++ b/test/configs/child_events/child_events.god @@ -27,4 +27,17 @@ God.watch do |w| w.transition(:up, :start) do |on| on.condition(:process_exits) end + + # lifecycle + w.lifecycle do |on| + on.condition(:flapping) do |c| + c.to_state = [:start, :restart] + c.times = 5 + c.within = 20.seconds + c.transition = :unmonitored + c.retry_in = 10.seconds + c.retry_times = 2 + c.retry_within = 5.minutes + end + end end \ No newline at end of file diff --git a/test/configs/child_polls/child_polls.god b/test/configs/child_polls/child_polls.god index 760f845..fe3cf9c 100644 --- a/test/configs/child_polls/child_polls.god +++ b/test/configs/child_polls/child_polls.god @@ -1,17 +1,25 @@ God.watch do |w| w.name = 'child-polls' w.start = File.join(File.dirname(__FILE__), *%w[simple_server.rb]) - # w.stop = '' w.interval = 5 w.grace = 2 - w.uid = 'tom' - w.gid = 'tom' - w.group = 'test' - w.log = File.join(File.dirname(__FILE__), *%w[out.log]) w.start_if do |start| start.condition(:process_running) do |c| c.running = false end end + + # lifecycle + w.lifecycle do |on| + on.condition(:flapping) do |c| + c.to_state = [:start, :restart] + c.times = 3 + c.within = 20.seconds + c.transition = :unmonitored + c.retry_in = 10.seconds + c.retry_times = 2 + c.retry_within = 5.minutes + end + end end \ No newline at end of file diff --git a/test/helper.rb b/test/helper.rb index af7101e..f56f258 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -42,8 +42,9 @@ module God end class FakeEventCondition < EventCondition - def test - true + def register + end + def deregister end end end -- 2.11.4.GIT