1 $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
13 require 'god/system/process'
14 require 'god/dependency_graph'
15 require 'god/timeline'
16 require 'god/configurable'
20 require 'god/behavior'
21 require 'god/behaviors/clean_pid_file'
22 require 'god/behaviors/notify_when_flapping'
24 require 'god/condition'
25 require 'god/conditions/process_running'
26 require 'god/conditions/process_exits'
27 require 'god/conditions/tries'
28 require 'god/conditions/memory_usage'
29 require 'god/conditions/cpu_usage'
30 require 'god/conditions/always'
31 require 'god/conditions/lambda'
32 require 'god/conditions/degrading_lambda'
33 require 'god/conditions/flapping'
36 require 'god/contacts/email'
38 require 'god/reporter'
47 require 'god/event_handler'
48 require 'god/registry'
53 $:.unshift File.join(File.dirname(__FILE__), *%w[.. ext god])
61 God::EventHandler.load
64 # Override abort to exit without executing the at_exit hook
76 LOG_BUFFER_SIZE_DEFAULT = 100
77 PID_FILE_DIRECTORY_DEFAULT = '/var/run/god'
78 DRB_PORT_DEFAULT = 17165
79 DRB_ALLOW_DEFAULT = ['127.0.0.1']
90 attr_accessor :inited,
102 abort "God.init must be called before any Watches"
107 # yield to the config file
108 yield self if block_given?
111 def self.internal_init
113 return if self.inited
118 self.pending_watches = []
120 self.contact_groups = {}
123 self.log_buffer_size ||= LOG_BUFFER_SIZE_DEFAULT
124 self.pid_file_directory ||= PID_FILE_DIRECTORY_DEFAULT
125 self.port ||= DRB_PORT_DEFAULT
126 self.allow ||= DRB_ALLOW_DEFAULT
128 # init has been executed
135 # Instantiate a new, empty Watch object and pass it to the mandatory
136 # block. The attributes of the watch will be set by the configuration
138 def self.watch(&block)
139 self.task(Watch, &block)
142 # Instantiate a new, empty Task object and pass it to the mandatory
143 # block. The attributes of the task will be set by the configuration
145 def self.task(klass = Task)
151 # do the post-configuration
154 # if running, completely remove the watch (if necessary) to
155 # prepare for the reload
156 existing_watch = self.watches[t.name]
157 if self.running && existing_watch
158 self.unwatch(existing_watch)
161 # ensure the new watch has a unique name
162 if self.watches[t.name] || self.groups[t.name]
163 abort "Task name '#{t.name}' already used for a Task or Group"
166 # ensure watch is internally valid
167 t.valid? || abort("Task '#{t.name}' is not valid (see above)")
169 # add to list of watches
170 self.watches[t.name] = t
172 # add to pending watches
173 self.pending_watches << t
175 # add to group if specified
177 # ensure group name hasn't been used for a watch already
178 if self.watches[t.group]
179 abort "Group name '#{t.group}' already used for a Task"
182 self.groups[t.group] ||= []
183 self.groups[t.group] << t
190 def self.unwatch(watch)
197 # remove from watches
198 self.watches.delete(watch.name)
202 self.groups[watch.group].delete(watch)
206 def self.contact(kind)
209 # create the condition
211 c = Contact.generate(kind)
212 rescue NoSuchContactError => e
216 # send to block so config can set attributes
217 yield(c) if block_given?
219 # call prepare on the contact
222 # ensure the new contact has a unique name
223 if self.contacts[c.name] || self.contact_groups[c.name]
224 abort "Contact name '#{c.name}' already used for a Contact or Contact Group"
227 # abort if the Contact is invalid, the Contact will have printed
228 # out its own error messages by now
229 unless Contact.valid?(c) && c.valid?
230 abort "Exiting on invalid contact"
233 # add to list of contacts
234 self.contacts[c.name] = c
236 # add to contact group if specified
238 # ensure group name hasn't been used for a contact already
239 if self.contacts[c.group]
240 abort "Contact Group name '#{c.group}' already used for a Contact"
243 self.contact_groups[c.group] ||= []
244 self.contact_groups[c.group] << c
248 def self.control(name, command)
249 # get the list of watches
250 watches = Array(self.watches[name] || self.groups[name])
256 when "start", "monitor"
257 watches.each { |w| jobs << Thread.new { w.monitor } }
259 watches.each { |w| jobs << Thread.new { w.move(:restart) } }
261 watches.each { |w| jobs << Thread.new { w.unmonitor.action(:stop) } }
263 watches.each { |w| jobs << Thread.new { w.unmonitor } }
265 raise InvalidCommandError.new
268 jobs.each { |j| j.join }
274 self.watches.sort.each do |name, w|
276 w.unmonitor if w.state
277 w.action(:stop) if w.alive?
282 return true unless self.watches.map { |name, w| w.alive? }.any?
295 self.watches.map do |name, w|
296 info[name] = {:state => w.state}
301 def self.running_log(watch_name, since)
302 unless self.watches[watch_name]
303 raise NoSuchWatchError.new
306 LOG.watch_log_since(watch_name, since)
309 def self.running_load(code)
311 self.pending_watches.each { |w| w.monitor if w.autostart? }
312 watches = self.pending_watches.dup
313 self.pending_watches.clear
318 Dir[glob].each do |f|
325 unless test(?d, self.pid_file_directory)
327 FileUtils.mkdir_p(self.pid_file_directory)
328 rescue Errno::EACCES => e
329 abort "Failed to create pid file directory: #{e.message}"
335 unless test(?w, self.pid_file_directory)
336 abort "The pid file directory (#{self.pid_file_directory}) is not writable by #{Etc.getlogin}"
346 self.server = Server.new(self.host, self.port, self.allow)
348 # start event handler system
349 EventHandler.start if EventHandler.loaded?
351 # start the timer system
354 # start monitoring any watches set to autostart
355 self.watches.values.each { |w| w.monitor if w.autostart? }
357 # clear pending watches
358 self.pending_watches.clear
363 # join the timer thread so we don't exit