finish lifecycle conditions handling and add flapper condition
[god.git] / lib / god / conditions / flapping.rb
blobb8ffa8632eac18cedd63b70373e14e92da9802ac
1 module God
2   module Conditions
3     
4     class Flapping < TriggerCondition
5       attr_accessor :times, :within, :from_state, :to_state, :retry_in, :retry_times, :retry_within
6       
7       def prepare
8         @timeline = Timeline.new(self.times)
9         @retry_timeline = Timeline.new(self.retry_times)
10       end
11       
12       def valid?
13         valid = true
14         valid &= complain("You must specify the 'times' attribute for :flapping") if self.times.nil?
15         valid &= complain("You must specify the 'within' attribute for :flapping") if self.within.nil?
16         valid &= complain("You must specify either the 'from_state', 'to_state', or both attributes for :flapping") if self.from_state.nil? && self.to_state.nil?
17         valid
18       end
19       
20       def process(event, payload)
21         begin
22           if event == :state_change
23             event_from_state, event_to_state = *payload
24             
25             from_state_match = !self.from_state || self.from_state && Array(self.from_state).include?(event_from_state)
26             to_state_match = !self.to_state || self.to_state && Array(self.to_state).include?(event_to_state)
27             
28             if from_state_match && to_state_match
29               @timeline << Time.now
30               
31               concensus = (@timeline.size == self.times)
32               duration = (@timeline.last - @timeline.first) < self.within
33               
34               if concensus && duration
35                 trigger
36                 retry_mechanism
37               end
38             end
39           end
40         rescue => e
41           puts e.message
42           puts e.backtrace.join("\n")
43         end
44       end
45       
46       private
47       
48       def retry_mechanism
49         if self.retry_in
50           @retry_timeline << Time.now
51           
52           concensus = (@retry_timeline.size == self.retry_times)
53           duration = (@retry_timeline.last - @retry_timeline.first) < self.retry_within
54           
55           if concensus && duration
56             # give up
57             Thread.new do
58               sleep 1
59               
60               # log
61               msg = "#{self.watch.name} giving up"
62               Syslog.debug(msg)
63               LOG.log(self.watch, :info, msg)
64             end
65           else
66             # try again later
67             Thread.new do
68               sleep 1
69             
70               # log
71               msg = "#{self.watch.name} auto-reenable monitoring in #{self.retry_in} seconds"
72               Syslog.debug(msg)
73               LOG.log(self.watch, :info, msg)
74             
75               sleep self.retry_in
76             
77               # log
78               msg = "#{self.watch.name} auto-reenabling monitoring"
79               Syslog.debug(msg)
80               LOG.log(self.watch, :info, msg)
81             
82               if self.watch.state == :unmonitored
83                 self.watch.monitor
84               end
85             end
86           end
87         end
88       end
89     end
90     
91   end
92 end