1 $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
15 require 'god/system/process'
16 require 'god/dependency_graph'
17 require 'god/timeline'
18 require 'god/configurable'
22 require 'god/behavior'
23 require 'god/behaviors/clean_pid_file'
24 require 'god/behaviors/notify_when_flapping'
26 require 'god/condition'
27 require 'god/conditions/process_running'
28 require 'god/conditions/process_exits'
29 require 'god/conditions/tries'
30 require 'god/conditions/memory_usage'
31 require 'god/conditions/cpu_usage'
32 require 'god/conditions/always'
33 require 'god/conditions/lambda'
34 require 'god/conditions/degrading_lambda'
35 require 'god/conditions/flapping'
36 require 'god/conditions/http_response_code'
37 require 'god/conditions/disk_usage'
38 require 'god/conditions/complex'
41 require 'god/contacts/email'
51 require 'god/event_handler'
52 require 'god/registry'
57 require 'god/cli/version'
58 require 'god/cli/command'
60 require 'god/diagnostics'
62 $:.unshift File.join(File.dirname(__FILE__), *%w[.. ext god])
64 # App wide logging system
66 LOG.datetime_format = "%Y-%m-%d %H:%M:%S "
68 def applog(watch, level, text)
69 LOG.log(watch, level, text)
72 # The $run global determines whether god should be started when the
73 # program would normally end. This should be set to true if when god
74 # should be started (e.g. `god -c <config file>`) and false otherwise
78 GOD_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
80 # Return the binding of god's root level
85 # Load the event handler system
86 God::EventHandler.load
89 alias_method :abort_orig, :abort
93 applog(nil, :error, text) if text
97 alias_method :exit_orig, :exit
106 def safe_attr_accessor(*args)
108 define_method((arg.to_s + "=").intern) do |other|
109 if !self.running && self.inited
110 abort "God.#{arg} must be set before any Tasks are defined"
113 if self.running && self.inited
114 applog(nil, :warn, "God.#{arg} can't be set while god is running")
118 instance_variable_set(('@' + arg.to_s).intern, other)
121 define_method(arg) do
122 instance_variable_get(('@' + arg.to_s).intern)
131 LOG_BUFFER_SIZE_DEFAULT = 10
132 PID_FILE_DIRECTORY_DEFAULT = '/var/run/god'
133 DRB_PORT_DEFAULT = 17165
134 DRB_ALLOW_DEFAULT = ['127.0.0.1']
135 LOG_LEVEL_DEFAULT = :info
139 safe_attr_accessor :pid,
148 attr_accessor :inited,
151 :pending_watch_states,
159 # initialize class instance variables
164 self.log_buffer_size = nil
165 self.pid_file_directory = nil
168 # Initialize internal data.
171 def self.internal_init
173 return if self.inited
178 self.pending_watches = []
179 self.pending_watch_states = {}
181 self.contact_groups = {}
184 self.log_buffer_size ||= LOG_BUFFER_SIZE_DEFAULT
185 self.pid_file_directory ||= PID_FILE_DIRECTORY_DEFAULT
186 self.port ||= DRB_PORT_DEFAULT
187 self.allow ||= DRB_ALLOW_DEFAULT
188 self.log_level ||= LOG_LEVEL_DEFAULT
191 log_level_map = {:debug => Logger::DEBUG,
192 :info => Logger::INFO,
193 :warn => Logger::WARN,
194 :error => Logger::ERROR,
195 :fatal => Logger::FATAL}
196 LOG.level = log_level_map[self.log_level]
198 # init has been executed
205 # Instantiate a new, empty Watch object and pass it to the mandatory
206 # block. The attributes of the watch will be set by the configuration
209 # Aborts on duplicate watch name
211 # conflicting group name
214 def self.watch(&block)
215 self.task(Watch, &block)
218 # Instantiate a new, empty Task object and yield it to the mandatory
219 # block. The attributes of the task will be set by the configuration
222 # Aborts on duplicate task name
224 # conflicting group name
227 def self.task(klass = Task)
233 # do the post-configuration
236 # if running, completely remove the watch (if necessary) to
237 # prepare for the reload
238 existing_watch = self.watches[t.name]
239 if self.running && existing_watch
240 self.pending_watch_states[existing_watch.name] = existing_watch.state
241 self.unwatch(existing_watch)
244 # ensure the new watch has a unique name
245 if self.watches[t.name] || self.groups[t.name]
246 abort "Task name '#{t.name}' already used for a Task or Group"
249 # ensure watch is internally valid
250 t.valid? || abort("Task '#{t.name}' is not valid (see above)")
252 # add to list of watches
253 self.watches[t.name] = t
255 # add to pending watches
256 self.pending_watches << t
258 # add to group if specified
260 # ensure group name hasn't been used for a watch already
261 if self.watches[t.group]
262 abort "Group name '#{t.group}' already used for a Task"
265 self.groups[t.group] ||= []
266 self.groups[t.group] << t
273 if self.running && existing_watch
274 applog(t, :info, "#{t.name} Reloaded config")
276 applog(t, :info, "#{t.name} Loaded config")
280 # Unmonitor and remove the given watch from god.
281 # +watch+ is the Watch to remove
284 def self.unwatch(watch)
286 watch.unmonitor unless watch.state == :unmonitored
291 # remove from watches
292 self.watches.delete(watch.name)
296 self.groups[watch.group].delete(watch)
299 applog(watch, :info, "#{watch.name} unwatched")
302 # Instantiate a new Contact of the given kind and send it to the block.
303 # Then prepare, validate, and record the Contact.
304 # +kind+ is the contact class specifier
306 # Aborts on invalid kind
307 # duplicate contact name
309 # conflicting group name
312 def self.contact(kind)
317 c = Contact.generate(kind)
318 rescue NoSuchContactError => e
322 # send to block so config can set attributes
323 yield(c) if block_given?
325 # call prepare on the contact
328 # remove existing contacts of same name
329 existing_contact = self.contacts[c.name]
330 if self.running && existing_contact
331 self.uncontact(existing_contact)
334 # ensure the new contact has a unique name
335 if self.contacts[c.name] || self.contact_groups[c.name]
336 abort "Contact name '#{c.name}' already used for a Contact or Contact Group"
339 # abort if the Contact is invalid, the Contact will have printed
340 # out its own error messages by now
341 unless Contact.valid?(c) && c.valid?
342 abort "Exiting on invalid contact"
345 # add to list of contacts
346 self.contacts[c.name] = c
348 # add to contact group if specified
350 # ensure group name hasn't been used for a contact already
351 if self.contacts[c.group]
352 abort "Contact Group name '#{c.group}' already used for a Contact"
355 self.contact_groups[c.group] ||= []
356 self.contact_groups[c.group] << c
360 # Remove the given contact from god.
361 # +contact+ is the Contact to remove
364 def self.uncontact(contact)
365 self.contacts.delete(contact.name)
367 self.contact_groups[contact.group].delete(contact)
371 # Control the lifecycle of the given task(s).
372 # +name+ is the name of a task/group (String)
373 # +command+ is the command to run (String)
381 # Returns String[]:task_names
382 def self.control(name, command)
383 # get the list of items
384 items = Array(self.watches[name] || self.groups[name]).dup
390 when "start", "monitor"
391 items.each { |w| jobs << Thread.new { w.monitor if w.state != :up } }
393 items.each { |w| jobs << Thread.new { w.move(:restart) } }
395 items.each { |w| jobs << Thread.new { w.unmonitor.action(:stop) if w.state != :unmonitored } }
397 items.each { |w| jobs << Thread.new { w.unmonitor if w.state != :unmonitored } }
399 items.each { |w| self.unwatch(w) }
401 raise InvalidCommandError.new
404 jobs.each { |j| j.join }
406 items.map { |x| x.name }
409 # Unmonitor and stop all tasks.
411 # Returns true on success
412 # false if all tasks could not be stopped within 10 seconds
414 self.watches.sort.each do |name, w|
416 w.unmonitor if w.state != :unmonitored
417 w.action(:stop) if w.alive?
422 return true unless self.watches.map { |name, w| w.alive? }.any?
429 # Force the termination of god.
430 # * Clean up pid file if one exists
432 # * Hard exit using exit!
434 # Never returns because the process will no longer exist!
436 FileUtils.rm_f(self.pid) if self.pid
437 self.server.stop if self.server
441 # Gather the status of each task.
445 # # => { 'mongrel' => :up, 'nginx' => :up }
447 # Returns { String:task_name => Symbol:status, ... }
450 self.watches.map do |name, w|
451 info[name] = {:state => w.state}
456 # Log lines for the given task since the specified time.
457 # +watch_name+ is the name of the task (may be abbreviated)
458 # +since+ is the Time since which to report log lines
460 # Raises God::NoSuchWatchError if no tasks matched
462 # Returns String:joined_log_lines
463 def self.running_log(watch_name, since)
464 matches = pattern_match(watch_name, self.watches.keys)
467 raise NoSuchWatchError.new
470 LOG.watch_log_since(matches.first, since)
473 # Load a config file into a running god instance. Rescues any exceptions
474 # that the config may raise and reports these back to the caller.
475 # +code+ is a String containing the config file
476 # +filename+ is the filename of the config file
478 # Returns [String[]:task_names, String:errors]
479 def self.running_load(code, filename)
487 eval(code, root_binding, filename)
488 self.pending_watches.each do |w|
489 if previous_state = self.pending_watch_states[w.name]
490 w.monitor unless previous_state == :unmonitored
492 w.monitor if w.autostart?
495 watches = self.pending_watches.dup
496 self.pending_watches.clear
497 self.pending_watch_states.clear
498 rescue Exception => e
499 # don't ever let running_load take down god
500 errors << LOG.finish_capture
502 unless e.instance_of?(SystemExit)
503 errors << e.message << "\n"
504 errors << e.backtrace.join("\n")
508 names = watches.map { |x| x.name }
512 # Load the given file(s) according to the given glob.
513 # +glob+ is the glob-enabled path to load
517 Dir[glob].each do |f|
524 unless test(?d, self.pid_file_directory)
526 FileUtils.mkdir_p(self.pid_file_directory)
527 rescue Errno::EACCES => e
528 abort "Failed to create pid file directory: #{e.message}"
534 unless test(?w, self.pid_file_directory)
535 abort "The pid file directory (#{self.pid_file_directory}) is not writable by #{Etc.getlogin}"
539 # Initialize and startup the machinery that makes god work.
544 # BleakHouseDiagnostic.install
545 # BleakHouseDiagnostic.spin
552 self.server = Socket.new(self.port)
554 # start event handler system
555 EventHandler.start if EventHandler.loaded?
557 # start the timer system
560 # start monitoring any watches set to autostart
561 self.watches.values.each { |w| w.monitor if w.autostart? }
563 # clear pending watches
564 self.pending_watches.clear
569 # join the timer thread so we don't exit
573 # To be called on program exit to start god
582 # Match a shortened pattern against a list of String candidates.
583 # The pattern is expanded into a regular expression by
584 # inserting .* between each character.
585 # +pattern+ is the String containing the abbreviation
586 # +list+ is the Array of Strings to match against
590 # list = %w{ foo bar bars }
592 # God.pattern_match(list, pattern)
593 # # => ['bar', 'bars']
595 # Returns String[]:matched_elements
596 def self.pattern_match(pattern, list)
597 regex = pattern.split('').join('.*')
599 list.select do |item|
600 item =~ Regexp.new(regex)
605 # Runs immediately before the program exits. If $run is true,
606 # start god, if $run is false, exit normally.