1 $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
13 require 'god/system/process'
14 require 'god/dependency_graph'
15 require 'god/timeline'
16 require 'god/configurable'
20 require 'god/behavior'
21 require 'god/behaviors/clean_pid_file'
22 require 'god/behaviors/notify_when_flapping'
24 require 'god/condition'
25 require 'god/conditions/process_running'
26 require 'god/conditions/process_exits'
27 require 'god/conditions/tries'
28 require 'god/conditions/memory_usage'
29 require 'god/conditions/cpu_usage'
30 require 'god/conditions/always'
31 require 'god/conditions/lambda'
32 require 'god/conditions/degrading_lambda'
33 require 'god/conditions/flapping'
34 require 'god/conditions/http_response_code'
37 require 'god/contacts/email'
39 require 'god/reporter'
48 require 'god/event_handler'
49 require 'god/registry'
54 $:.unshift File.join(File.dirname(__FILE__), *%w[.. ext god])
57 LOG.datetime_format = "%Y-%m-%d %H:%M:%S "
59 GOD_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
72 def with_stdout_captured
84 God::EventHandler.load
87 alias_method :abort_orig, :abort
91 LOG.log(nil, :error, text) unless text.empty?
95 alias_method :exit_orig, :exit
104 def safe_attr_accessor(*args)
106 define_method((arg.to_s + "=").intern) do |other|
107 if !self.running && self.inited
108 abort "God.#{arg} must be set before any Tasks are defined"
111 if self.running && self.inited
112 LOG.log(nil, :warn, "God.#{arg} can't be set while god is running")
116 instance_variable_set(('@' + arg.to_s).intern, other)
119 define_method(arg) do
120 instance_variable_get(('@' + arg.to_s).intern)
129 LOG_BUFFER_SIZE_DEFAULT = 1000
130 PID_FILE_DIRECTORY_DEFAULT = '/var/run/god'
131 DRB_PORT_DEFAULT = 17165
132 DRB_ALLOW_DEFAULT = ['127.0.0.1']
136 safe_attr_accessor :host,
143 attr_accessor :inited,
155 yield self if block_given?
158 def self.internal_init
160 return if self.inited
165 self.pending_watches = []
167 self.contact_groups = {}
170 self.log_buffer_size ||= LOG_BUFFER_SIZE_DEFAULT
171 self.pid_file_directory ||= PID_FILE_DIRECTORY_DEFAULT
172 self.port ||= DRB_PORT_DEFAULT
173 self.allow ||= DRB_ALLOW_DEFAULT
174 LOG.level = Logger::INFO
176 # init has been executed
183 # Instantiate a new, empty Watch object and pass it to the mandatory
184 # block. The attributes of the watch will be set by the configuration
186 def self.watch(&block)
187 self.task(Watch, &block)
190 # Instantiate a new, empty Task object and pass it to the mandatory
191 # block. The attributes of the task will be set by the configuration
193 def self.task(klass = Task)
199 # do the post-configuration
202 # if running, completely remove the watch (if necessary) to
203 # prepare for the reload
204 existing_watch = self.watches[t.name]
205 if self.running && existing_watch
206 self.unwatch(existing_watch)
209 # ensure the new watch has a unique name
210 if self.watches[t.name] || self.groups[t.name]
211 abort "Task name '#{t.name}' already used for a Task or Group"
214 # ensure watch is internally valid
215 t.valid? || abort("Task '#{t.name}' is not valid (see above)")
217 # add to list of watches
218 self.watches[t.name] = t
220 # add to pending watches
221 self.pending_watches << t
223 # add to group if specified
225 # ensure group name hasn't been used for a watch already
226 if self.watches[t.group]
227 abort "Group name '#{t.group}' already used for a Task"
230 self.groups[t.group] ||= []
231 self.groups[t.group] << t
238 def self.unwatch(watch)
245 # remove from watches
246 self.watches.delete(watch.name)
250 self.groups[watch.group].delete(watch)
254 def self.contact(kind)
257 # create the condition
259 c = Contact.generate(kind)
260 rescue NoSuchContactError => e
264 # send to block so config can set attributes
265 yield(c) if block_given?
267 # call prepare on the contact
270 # ensure the new contact has a unique name
271 if self.contacts[c.name] || self.contact_groups[c.name]
272 abort "Contact name '#{c.name}' already used for a Contact or Contact Group"
275 # abort if the Contact is invalid, the Contact will have printed
276 # out its own error messages by now
277 unless Contact.valid?(c) && c.valid?
278 abort "Exiting on invalid contact"
281 # add to list of contacts
282 self.contacts[c.name] = c
284 # add to contact group if specified
286 # ensure group name hasn't been used for a contact already
287 if self.contacts[c.group]
288 abort "Contact Group name '#{c.group}' already used for a Contact"
291 self.contact_groups[c.group] ||= []
292 self.contact_groups[c.group] << c
296 def self.control(name, command)
297 # get the list of watches
298 watches = Array(self.watches[name] || self.groups[name])
304 when "start", "monitor"
305 watches.each { |w| jobs << Thread.new { w.monitor if w.state != :up } }
307 watches.each { |w| jobs << Thread.new { w.move(:restart) } }
309 watches.each { |w| jobs << Thread.new { w.unmonitor.action(:stop) if w.state != :unmonitored } }
311 watches.each { |w| jobs << Thread.new { w.unmonitor if w.state != :unmonitored } }
313 raise InvalidCommandError.new
316 jobs.each { |j| j.join }
322 self.watches.sort.each do |name, w|
324 w.unmonitor if w.state != :unmonitored
325 w.action(:stop) if w.alive?
330 return true unless self.watches.map { |name, w| w.alive? }.any?
343 self.watches.map do |name, w|
344 info[name] = {:state => w.state}
349 def self.running_log(watch_name, since)
350 unless self.watches[watch_name]
351 raise NoSuchWatchError.new
354 LOG.watch_log_since(watch_name, since)
357 def self.running_load(code, filename)
364 CONFIG_FILE.replace(filename)
365 eval(code, nil, filename)
366 self.pending_watches.each { |w| w.monitor if w.autostart? }
367 watches = self.pending_watches.dup
368 self.pending_watches.clear
369 rescue Exception => e
370 # don't ever let running_load take down god
371 errors << LOG.finish_capture
373 unless e.instance_of?(SystemExit)
374 errors << e.message << "\n"
375 errors << e.backtrace.join("\n")
383 Dir[glob].each do |f|
390 unless test(?d, self.pid_file_directory)
392 FileUtils.mkdir_p(self.pid_file_directory)
393 rescue Errno::EACCES => e
394 abort "Failed to create pid file directory: #{e.message}"
400 unless test(?w, self.pid_file_directory)
401 abort "The pid file directory (#{self.pid_file_directory}) is not writable by #{Etc.getlogin}"
411 self.server = Server.new(self.host, self.port, self.allow)
413 # start event handler system
414 EventHandler.start if EventHandler.loaded?
416 # start the timer system
419 # start monitoring any watches set to autostart
420 self.watches.values.each { |w| w.monitor if w.autostart? }
422 # clear pending watches
423 self.pending_watches.clear
428 # join the timer thread so we don't exit