1 $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
13 require 'god/system/process'
14 require 'god/dependency_graph'
15 require 'god/timeline'
16 require 'god/configurable'
20 require 'god/behavior'
21 require 'god/behaviors/clean_pid_file'
22 require 'god/behaviors/notify_when_flapping'
24 require 'god/condition'
25 require 'god/conditions/process_running'
26 require 'god/conditions/process_exits'
27 require 'god/conditions/tries'
28 require 'god/conditions/memory_usage'
29 require 'god/conditions/cpu_usage'
30 require 'god/conditions/always'
31 require 'god/conditions/lambda'
32 require 'god/conditions/degrading_lambda'
33 require 'god/conditions/flapping'
34 require 'god/conditions/http_response_code'
37 require 'god/contacts/email'
47 require 'god/event_handler'
48 require 'god/registry'
53 $:.unshift File.join(File.dirname(__FILE__), *%w[.. ext god])
56 LOG.datetime_format = "%Y-%m-%d %H:%M:%S "
58 # The $run global determines whether god should be started when the
59 # program would normally end. This should be set to true if when god
60 # should be started (e.g. `god -c <config file>`) and false otherwise
64 GOD_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
72 # Return the binding of god's root level
77 God::EventHandler.load
80 alias_method :abort_orig, :abort
84 LOG.log(nil, :error, text) if text
85 text ? abort_orig(text) : exit(1)
88 alias_method :exit_orig, :exit
97 def safe_attr_accessor(*args)
99 define_method((arg.to_s + "=").intern) do |other|
100 if !self.running && self.inited
101 abort "God.#{arg} must be set before any Tasks are defined"
104 if self.running && self.inited
105 LOG.log(nil, :warn, "God.#{arg} can't be set while god is running")
109 instance_variable_set(('@' + arg.to_s).intern, other)
112 define_method(arg) do
113 instance_variable_get(('@' + arg.to_s).intern)
122 LOG_BUFFER_SIZE_DEFAULT = 1000
123 PID_FILE_DIRECTORY_DEFAULT = '/var/run/god'
124 DRB_PORT_DEFAULT = 17165
125 DRB_ALLOW_DEFAULT = ['127.0.0.1']
129 safe_attr_accessor :host,
136 attr_accessor :inited,
139 :pending_watch_states,
147 # initialize class instance variables
151 self.log_buffer_size = nil
152 self.pid_file_directory = nil
154 def self.internal_init
156 return if self.inited
161 self.pending_watches = []
162 self.pending_watch_states = {}
164 self.contact_groups = {}
167 self.log_buffer_size ||= LOG_BUFFER_SIZE_DEFAULT
168 self.pid_file_directory ||= PID_FILE_DIRECTORY_DEFAULT
169 self.port ||= DRB_PORT_DEFAULT
170 self.allow ||= DRB_ALLOW_DEFAULT
171 LOG.level = Logger::INFO
173 # init has been executed
180 # Instantiate a new, empty Watch object and pass it to the mandatory
181 # block. The attributes of the watch will be set by the configuration
183 def self.watch(&block)
184 self.task(Watch, &block)
187 # Instantiate a new, empty Task object and pass it to the mandatory
188 # block. The attributes of the task will be set by the configuration
190 def self.task(klass = Task)
196 # do the post-configuration
199 # if running, completely remove the watch (if necessary) to
200 # prepare for the reload
201 existing_watch = self.watches[t.name]
202 if self.running && existing_watch
203 self.pending_watch_states[existing_watch.name] = existing_watch.state
204 self.unwatch(existing_watch)
207 # ensure the new watch has a unique name
208 if self.watches[t.name] || self.groups[t.name]
209 abort "Task name '#{t.name}' already used for a Task or Group"
212 # ensure watch is internally valid
213 t.valid? || abort("Task '#{t.name}' is not valid (see above)")
215 # add to list of watches
216 self.watches[t.name] = t
218 # add to pending watches
219 self.pending_watches << t
221 # add to group if specified
223 # ensure group name hasn't been used for a watch already
224 if self.watches[t.group]
225 abort "Group name '#{t.group}' already used for a Task"
228 self.groups[t.group] ||= []
229 self.groups[t.group] << t
236 if self.running && existing_watch
237 LOG.log(t, :info, "#{t.name} Reloaded config")
239 LOG.log(t, :info, "#{t.name} Loaded config")
243 def self.unwatch(watch)
245 watch.unmonitor unless watch.state == :unmonitored
250 # remove from watches
251 self.watches.delete(watch.name)
255 self.groups[watch.group].delete(watch)
259 def self.contact(kind)
262 # create the condition
264 c = Contact.generate(kind)
265 rescue NoSuchContactError => e
269 # send to block so config can set attributes
270 yield(c) if block_given?
272 # call prepare on the contact
275 # remove existing contacts of same name
276 existing_contact = self.contacts[c.name]
277 if self.running && existing_contact
278 self.uncontact(existing_contact)
281 # ensure the new contact has a unique name
282 if self.contacts[c.name] || self.contact_groups[c.name]
283 abort "Contact name '#{c.name}' already used for a Contact or Contact Group"
286 # abort if the Contact is invalid, the Contact will have printed
287 # out its own error messages by now
288 unless Contact.valid?(c) && c.valid?
289 abort "Exiting on invalid contact"
292 # add to list of contacts
293 self.contacts[c.name] = c
295 # add to contact group if specified
297 # ensure group name hasn't been used for a contact already
298 if self.contacts[c.group]
299 abort "Contact Group name '#{c.group}' already used for a Contact"
302 self.contact_groups[c.group] ||= []
303 self.contact_groups[c.group] << c
307 def self.uncontact(contact)
308 self.contacts.delete(contact.name)
310 self.contact_groups[contact.group].delete(contact)
314 def self.control(name, command)
315 # get the list of watches
316 watches = Array(self.watches[name] || self.groups[name])
322 when "start", "monitor"
323 watches.each { |w| jobs << Thread.new { w.monitor if w.state != :up } }
325 watches.each { |w| jobs << Thread.new { w.move(:restart) } }
327 watches.each { |w| jobs << Thread.new { w.unmonitor.action(:stop) if w.state != :unmonitored } }
329 watches.each { |w| jobs << Thread.new { w.unmonitor if w.state != :unmonitored } }
331 raise InvalidCommandError.new
334 jobs.each { |j| j.join }
336 watches.map { |x| x.name }
340 self.watches.sort.each do |name, w|
342 w.unmonitor if w.state != :unmonitored
343 w.action(:stop) if w.alive?
348 return true unless self.watches.map { |name, w| w.alive? }.any?
361 self.watches.map do |name, w|
362 info[name] = {:state => w.state}
367 def self.running_log(watch_name, since)
368 unless self.watches[watch_name]
369 raise NoSuchWatchError.new
372 LOG.watch_log_since(watch_name, since)
375 def self.running_load(code, filename)
382 eval(code, root_binding, filename)
383 self.pending_watches.each do |w|
384 if previous_state = self.pending_watch_states[w.name]
385 w.monitor unless previous_state == :unmonitored
387 w.monitor if w.autostart?
390 watches = self.pending_watches.dup
391 self.pending_watches.clear
392 self.pending_watch_states.clear
393 rescue Exception => e
394 # don't ever let running_load take down god
395 errors << LOG.finish_capture
397 unless e.instance_of?(SystemExit)
398 errors << e.message << "\n"
399 errors << e.backtrace.join("\n")
403 names = watches.map { |x| x.name }
408 Dir[glob].each do |f|
415 unless test(?d, self.pid_file_directory)
417 FileUtils.mkdir_p(self.pid_file_directory)
418 rescue Errno::EACCES => e
419 abort "Failed to create pid file directory: #{e.message}"
425 unless test(?w, self.pid_file_directory)
426 abort "The pid file directory (#{self.pid_file_directory}) is not writable by #{Etc.getlogin}"
436 self.server = Socket.new(self.port)
438 # start event handler system
439 EventHandler.start if EventHandler.loaded?
441 # start the timer system
444 # start monitoring any watches set to autostart
445 self.watches.values.each { |w| w.monitor if w.autostart? }
447 # clear pending watches
448 self.pending_watches.clear
453 # join the timer thread so we don't exit