1 $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
13 require 'god/system/process'
14 require 'god/dependency_graph'
15 require 'god/timeline'
16 require 'god/configurable'
20 require 'god/behavior'
21 require 'god/behaviors/clean_pid_file'
22 require 'god/behaviors/notify_when_flapping'
24 require 'god/condition'
25 require 'god/conditions/process_running'
26 require 'god/conditions/process_exits'
27 require 'god/conditions/tries'
28 require 'god/conditions/memory_usage'
29 require 'god/conditions/cpu_usage'
30 require 'god/conditions/always'
31 require 'god/conditions/lambda'
32 require 'god/conditions/degrading_lambda'
33 require 'god/conditions/flapping'
34 require 'god/conditions/http_response_code'
35 require 'god/conditions/disk_usage'
36 require 'god/conditions/complex'
39 require 'god/contacts/email'
49 require 'god/event_handler'
50 require 'god/registry'
55 require 'god/cli/version'
56 require 'god/cli/command'
58 $:.unshift File.join(File.dirname(__FILE__), *%w[.. ext god])
60 # App wide logging system
62 LOG.datetime_format = "%Y-%m-%d %H:%M:%S "
64 def applog(watch, level, text)
65 LOG.log(watch, level, text)
68 # The $run global determines whether god should be started when the
69 # program would normally end. This should be set to true if when god
70 # should be started (e.g. `god -c <config file>`) and false otherwise
74 GOD_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
76 # Ensure that Syslog is open
83 # Return the binding of god's root level
88 # Load the event handler system
89 God::EventHandler.load
92 alias_method :abort_orig, :abort
96 applog(nil, :error, text) if text
97 # text ? abort_orig(text) : exit(1)
101 alias_method :exit_orig, :exit
110 def safe_attr_accessor(*args)
112 define_method((arg.to_s + "=").intern) do |other|
113 if !self.running && self.inited
114 abort "God.#{arg} must be set before any Tasks are defined"
117 if self.running && self.inited
118 applog(nil, :warn, "God.#{arg} can't be set while god is running")
122 instance_variable_set(('@' + arg.to_s).intern, other)
125 define_method(arg) do
126 instance_variable_get(('@' + arg.to_s).intern)
135 LOG_BUFFER_SIZE_DEFAULT = 1000
136 PID_FILE_DIRECTORY_DEFAULT = '/var/run/god'
137 DRB_PORT_DEFAULT = 17165
138 DRB_ALLOW_DEFAULT = ['127.0.0.1']
142 safe_attr_accessor :pid,
150 attr_accessor :inited,
153 :pending_watch_states,
161 # initialize class instance variables
166 self.log_buffer_size = nil
167 self.pid_file_directory = nil
169 def self.internal_init
171 return if self.inited
176 self.pending_watches = []
177 self.pending_watch_states = {}
179 self.contact_groups = {}
182 self.log_buffer_size ||= LOG_BUFFER_SIZE_DEFAULT
183 self.pid_file_directory ||= PID_FILE_DIRECTORY_DEFAULT
184 self.port ||= DRB_PORT_DEFAULT
185 self.allow ||= DRB_ALLOW_DEFAULT
186 LOG.level = Logger::INFO
188 # init has been executed
195 # Instantiate a new, empty Watch object and pass it to the mandatory
196 # block. The attributes of the watch will be set by the configuration
198 def self.watch(&block)
199 self.task(Watch, &block)
202 # Instantiate a new, empty Task object and pass it to the mandatory
203 # block. The attributes of the task will be set by the configuration
205 def self.task(klass = Task)
211 # do the post-configuration
214 # if running, completely remove the watch (if necessary) to
215 # prepare for the reload
216 existing_watch = self.watches[t.name]
217 if self.running && existing_watch
218 self.pending_watch_states[existing_watch.name] = existing_watch.state
219 self.unwatch(existing_watch)
222 # ensure the new watch has a unique name
223 if self.watches[t.name] || self.groups[t.name]
224 abort "Task name '#{t.name}' already used for a Task or Group"
227 # ensure watch is internally valid
228 t.valid? || abort("Task '#{t.name}' is not valid (see above)")
230 # add to list of watches
231 self.watches[t.name] = t
233 # add to pending watches
234 self.pending_watches << t
236 # add to group if specified
238 # ensure group name hasn't been used for a watch already
239 if self.watches[t.group]
240 abort "Group name '#{t.group}' already used for a Task"
243 self.groups[t.group] ||= []
244 self.groups[t.group] << t
251 if self.running && existing_watch
252 applog(t, :info, "#{t.name} Reloaded config")
254 applog(t, :info, "#{t.name} Loaded config")
258 def self.unwatch(watch)
260 watch.unmonitor unless watch.state == :unmonitored
265 # remove from watches
266 self.watches.delete(watch.name)
270 self.groups[watch.group].delete(watch)
274 def self.contact(kind)
277 # create the condition
279 c = Contact.generate(kind)
280 rescue NoSuchContactError => e
284 # send to block so config can set attributes
285 yield(c) if block_given?
287 # call prepare on the contact
290 # remove existing contacts of same name
291 existing_contact = self.contacts[c.name]
292 if self.running && existing_contact
293 self.uncontact(existing_contact)
296 # ensure the new contact has a unique name
297 if self.contacts[c.name] || self.contact_groups[c.name]
298 abort "Contact name '#{c.name}' already used for a Contact or Contact Group"
301 # abort if the Contact is invalid, the Contact will have printed
302 # out its own error messages by now
303 unless Contact.valid?(c) && c.valid?
304 abort "Exiting on invalid contact"
307 # add to list of contacts
308 self.contacts[c.name] = c
310 # add to contact group if specified
312 # ensure group name hasn't been used for a contact already
313 if self.contacts[c.group]
314 abort "Contact Group name '#{c.group}' already used for a Contact"
317 self.contact_groups[c.group] ||= []
318 self.contact_groups[c.group] << c
322 def self.uncontact(contact)
323 self.contacts.delete(contact.name)
325 self.contact_groups[contact.group].delete(contact)
329 def self.control(name, command)
330 # get the list of watches
331 watches = Array(self.watches[name] || self.groups[name])
337 when "start", "monitor"
338 watches.each { |w| jobs << Thread.new { w.monitor if w.state != :up } }
340 watches.each { |w| jobs << Thread.new { w.move(:restart) } }
342 watches.each { |w| jobs << Thread.new { w.unmonitor.action(:stop) if w.state != :unmonitored } }
344 watches.each { |w| jobs << Thread.new { w.unmonitor if w.state != :unmonitored } }
346 watches.each { |w| jobs << Thread.new { self.unwatch(w) } }
348 raise InvalidCommandError.new
351 jobs.each { |j| j.join }
353 watches.map { |x| x.name }
357 self.watches.sort.each do |name, w|
359 w.unmonitor if w.state != :unmonitored
360 w.action(:stop) if w.alive?
365 return true unless self.watches.map { |name, w| w.alive? }.any?
372 # Force the termination of god.
373 # * Clean up pid file if one exists
375 # * Hard exit using exit!
377 # Never returns because the process will no longer exist!
379 FileUtils.rm_f(self.pid) if self.pid
380 self.server.stop if self.server
386 self.watches.map do |name, w|
387 info[name] = {:state => w.state}
392 def self.running_log(watch_name, since)
393 matches = pattern_match(self.watches.keys, watch_name)
396 raise NoSuchWatchError.new
399 LOG.watch_log_since(matches.first, since)
402 def self.running_load(code, filename)
409 eval(code, root_binding, filename)
410 self.pending_watches.each do |w|
411 if previous_state = self.pending_watch_states[w.name]
412 w.monitor unless previous_state == :unmonitored
414 w.monitor if w.autostart?
417 watches = self.pending_watches.dup
418 self.pending_watches.clear
419 self.pending_watch_states.clear
420 rescue Exception => e
421 # don't ever let running_load take down god
422 errors << LOG.finish_capture
424 unless e.instance_of?(SystemExit)
425 errors << e.message << "\n"
426 errors << e.backtrace.join("\n")
430 names = watches.map { |x| x.name }
435 Dir[glob].each do |f|
442 unless test(?d, self.pid_file_directory)
444 FileUtils.mkdir_p(self.pid_file_directory)
445 rescue Errno::EACCES => e
446 abort "Failed to create pid file directory: #{e.message}"
452 unless test(?w, self.pid_file_directory)
453 abort "The pid file directory (#{self.pid_file_directory}) is not writable by #{Etc.getlogin}"
463 self.server = Socket.new(self.port)
465 # start event handler system
466 EventHandler.start if EventHandler.loaded?
468 # start the timer system
471 # start monitoring any watches set to autostart
472 self.watches.values.each { |w| w.monitor if w.autostart? }
474 # clear pending watches
475 self.pending_watches.clear
480 # join the timer thread so we don't exit
490 # Match a shortened pattern against a list of String candidates.
491 # The pattern is expanded into a regular expression by
492 # inserting .* between each character.
496 # list = %w{ foo bar bars }
498 # God.pattern_match(list, pattern)
499 # # => ['bar', 'bars']
502 def self.pattern_match(list, pattern)
503 regex = pattern.split('').join('.*')
505 list.select do |item|
506 item =~ Regexp.new(regex)