lib/unicorn.rb

   1 require 'fcntl'
   2
   3 require 'unicorn/socket_helper'
   4 require 'unicorn/const'
   5 require 'unicorn/http_request'
   6 require 'unicorn/http_response'
   7 require 'unicorn/configurator'
   8 require 'unicorn/util'
   9
  10 # Unicorn module containing all of the classes (include C extensions) for running
  11 # a Unicorn web server.  It contains a minimalist HTTP server with just enough
  12 # functionality to service web application requests fast as possible.
  13 module Unicorn
  14   class << self
  15     def run(app, options = {})
  16       HttpServer.new(app, options).start.join
  17     end
  18   end
  19
  20   # This is the process manager of Unicorn. This manages worker
  21   # processes which in turn handle the I/O and application process.
  22   # Listener sockets are started in the master process and shared with
  23   # forked worker children.
  24   class HttpServer
  25     attr_reader :logger
  26     include ::Unicorn::SocketHelper
  27
  28     # prevents IO objects in here from being GC-ed
  29     IO_PURGATORY = []
  30
  31     # all bound listener sockets
  32     LISTENERS = []
  33
  34     # This hash maps PIDs to Workers
  35     WORKERS = {}
  36
  37     # See: http://cr.yp.to/docs/selfpipe.html
  38     SELF_PIPE = []
  39
  40     # signal queue used for self-piping
  41     SIG_QUEUE = []
  42
  43     # We populate this at startup so we can figure out how to reexecute
  44     # and upgrade the currently running instance of Unicorn
  45     START_CTX = {
  46       :argv => ARGV.map { |arg| arg.dup },
  47       # don't rely on Dir.pwd here since it's not symlink-aware, and
  48       # symlink dirs are the default with Capistrano...
  49       :cwd => `/bin/sh -c pwd`.chomp("\n"),
  50       :zero => $0.dup,
  51     }
  52
  53     Worker = Struct.new(:nr, :tempfile) unless defined?(Worker)
  54     class Worker
  55       # worker objects may be compared to just plain numbers
  56       def ==(other_nr)
  57         self.nr == other_nr
  58       end
  59     end
  60
  61     # Creates a working server on host:port (strange things happen if
  62     # port isn't a Number).  Use HttpServer::run to start the server and
  63     # HttpServer.run.join to join the thread that's processing
  64     # incoming requests on the socket.
  65     def initialize(app, options = {})
  66       @app = app
  67       @reexec_pid = 0
  68       @init_listeners = options[:listeners] ? options[:listeners].dup : []
  69       @config = Configurator.new(options.merge(:use_defaults => true))
  70       @listener_opts = {}
  71       @config.commit!(self, :skip => [:listeners, :pid])
  72       @request = HttpRequest.new(@logger)
  73     end
  74
  75     # Runs the thing.  Returns self so you can run join on it
  76     def start
  77       BasicSocket.do_not_reverse_lookup = true
  78
  79       # inherit sockets from parents, they need to be plain Socket objects
  80       # before they become UNIXServer or TCPServer
  81       inherited = ENV['UNICORN_FD'].to_s.split(/,/).map do |fd|
  82         io = Socket.for_fd(fd.to_i)
  83         set_server_sockopt(io, @listener_opts[sock_name(io)])
  84         IO_PURGATORY << io
  85         logger.info "inherited addr=#{sock_name(io)} fd=#{fd}"
  86         server_cast(io)
  87       end
  88
  89       config_listeners = @config[:listeners].dup
  90       LISTENERS.replace(inherited)
  91
  92       # we start out with generic Socket objects that get cast to either
  93       # TCPServer or UNIXServer objects; but since the Socket objects
  94       # share the same OS-level file descriptor as the higher-level *Server
  95       # objects; we need to prevent Socket objects from being garbage-collected
  96       config_listeners -= listener_names
  97       if config_listeners.empty? && LISTENERS.empty?
  98         config_listeners << Unicorn::Const::DEFAULT_LISTEN
  99       end
 100       config_listeners.each { |addr| listen(addr) }
 101       raise ArgumentError, "no listeners" if LISTENERS.empty?
 102       self.pid = @config[:pid]
 103       build_app! if @preload_app
 104       maintain_worker_count
 105       self
 106     end
 107
 108     # replaces current listener set with +listeners+.  This will
 109     # close the socket if it will not exist in the new listener set
 110     def listeners=(listeners)
 111       cur_names, dead_names = [], []
 112       listener_names.each do |name|
 113         if "/" == name[0..0]
 114           # mark unlinked sockets as dead so we can rebind them
 115           (File.socket?(name) ? cur_names : dead_names) << name
 116         else
 117           cur_names << name
 118         end
 119       end
 120       set_names = listener_names(listeners)
 121       dead_names += cur_names - set_names
 122       dead_names.uniq!
 123
 124       LISTENERS.delete_if do |io|
 125         if dead_names.include?(sock_name(io))
 126           IO_PURGATORY.delete_if do |pio|
 127             pio.fileno == io.fileno && (pio.close rescue nil).nil? # true
 128           end
 129           (io.close rescue nil).nil? # true
 130         else
 131           set_server_sockopt(io, @listener_opts[sock_name(io)])
 132           false
 133         end
 134       end
 135
 136       (set_names - cur_names).each { |addr| listen(addr) }
 137     end
 138
 139     def stdout_path=(path); redirect_io($stdout, path); end
 140     def stderr_path=(path); redirect_io($stderr, path); end
 141
 142     # sets the path for the PID file of the master process
 143     def pid=(path)
 144       if path
 145         if x = valid_pid?(path)
 146           return path if @pid && path == @pid && x == $$
 147           raise ArgumentError, "Already running on PID:#{x} " \
 148                                "(or pid=#{path} is stale)"
 149         end
 150       end
 151       unlink_pid_safe(@pid) if @pid
 152       File.open(path, 'wb') { |fp| fp.syswrite("#$$\n") } if path
 153       @pid = path
 154     end
 155
 156     # add a given address to the +listeners+ set, idempotently
 157     # Allows workers to add a private, per-process listener via the
 158     # @after_fork hook.  Very useful for debugging and testing.
 159     def listen(address, opt = {}.merge(@listener_opts[address] || {}))
 160       return if String === address && listener_names.include?(address)
 161
 162       if io = bind_listen(address, opt)
 163         unless TCPServer === io || UNIXServer === io
 164           IO_PURGATORY << io
 165           io = server_cast(io)
 166         end
 167         logger.info "listening on addr=#{sock_name(io)} fd=#{io.fileno}"
 168         LISTENERS << io
 169       else
 170         logger.error "adding listener failed addr=#{address} (in use)"
 171         raise Errno::EADDRINUSE, address
 172       end
 173     end
 174
 175     # monitors children and receives signals forever
 176     # (or until a termination signal is sent).  This handles signals
 177     # one-at-a-time time and we'll happily drop signals in case somebody
 178     # is signalling us too often.
 179     def join
 180       # this pipe is used to wake us up from select(2) in #join when signals
 181       # are trapped.  See trap_deferred
 182       init_self_pipe!
 183       respawn = true
 184
 185       QUEUE_SIGS.each { |sig| trap_deferred(sig) }
 186       trap(:CHLD) { |sig_nr| awaken_master }
 187       proc_name 'master'
 188       logger.info "master process ready" # test_exec.rb relies on this message
 189       begin
 190         loop do
 191           reap_all_workers
 192           case SIG_QUEUE.shift
 193           when nil
 194             murder_lazy_workers
 195             maintain_worker_count if respawn
 196             master_sleep
 197           when :QUIT # graceful shutdown
 198             break
 199           when :TERM, :INT # immediate shutdown
 200             stop(false)
 201             break
 202           when :USR1 # rotate logs
 203             logger.info "master reopening logs..."
 204             Unicorn::Util.reopen_logs
 205             logger.info "master done reopening logs"
 206             kill_each_worker(:USR1)
 207           when :USR2 # exec binary, stay alive in case something went wrong
 208             reexec
 209           when :WINCH
 210             if Process.ppid == 1 || Process.getpgrp != $$
 211               respawn = false
 212               logger.info "gracefully stopping all workers"
 213               kill_each_worker(:QUIT)
 214             else
 215               logger.info "SIGWINCH ignored because we're not daemonized"
 216             end
 217           when :TTIN
 218             @worker_processes += 1
 219           when :TTOU
 220             @worker_processes -= 1 if @worker_processes > 0
 221           when :HUP
 222             respawn = true
 223             if @config.config_file
 224               load_config!
 225               redo # immediate reaping since we may have QUIT workers
 226             else # exec binary and exit if there's no config file
 227               logger.info "config_file not present, reexecuting binary"
 228               reexec
 229               break
 230             end
 231           end
 232         end
 233       rescue Errno::EINTR
 234         retry
 235       rescue Object => e
 236         logger.error "Unhandled master loop exception #{e.inspect}."
 237         logger.error e.backtrace.join("\n")
 238         retry
 239       end
 240       stop # gracefully shutdown all workers on our way out
 241       logger.info "master complete"
 242       unlink_pid_safe(@pid) if @pid
 243     end
 244
 245     # Terminates all workers, but does not exit master process
 246     def stop(graceful = true)
 247       kill_each_worker(graceful ? :QUIT : :TERM)
 248       timeleft = @timeout
 249       step = 0.2
 250       reap_all_workers
 251       until WORKERS.empty?
 252         sleep(step)
 253         reap_all_workers
 254         (timeleft -= step) > 0 and next
 255         kill_each_worker(:KILL)
 256       end
 257     ensure
 258       self.listeners = []
 259     end
 260
 261     private
 262
 263     # list of signals we care about and trap in master.
 264     QUEUE_SIGS = [ :WINCH, :QUIT, :INT, :TERM, :USR1, :USR2, :HUP,
 265                    :TTIN, :TTOU ].freeze
 266
 267     # defer a signal for later processing in #join (master process)
 268     def trap_deferred(signal)
 269       trap(signal) do |sig_nr|
 270         if SIG_QUEUE.size < 5
 271           SIG_QUEUE << signal
 272           awaken_master
 273         else
 274           logger.error "ignoring SIG#{signal}, queue=#{SIG_QUEUE.inspect}"
 275         end
 276       end
 277     end
 278
 279     # wait for a signal hander to wake us up and then consume the pipe
 280     # Wake up every second anyways to run murder_lazy_workers
 281     def master_sleep
 282       begin
 283         ready = IO.select([SELF_PIPE.first], nil, nil, 1) or return
 284         ready.first && ready.first.first or return
 285         loop { SELF_PIPE.first.read_nonblock(Const::CHUNK_SIZE) }
 286       rescue Errno::EAGAIN, Errno::EINTR
 287       end
 288     end
 289
 290     def awaken_master
 291       begin
 292         SELF_PIPE.last.write_nonblock('.') # wakeup master process from select
 293       rescue Errno::EAGAIN, Errno::EINTR
 294         # pipe is full, master should wake up anyways
 295         retry
 296       end
 297     end
 298
 299     # reaps all unreaped workers
 300     def reap_all_workers
 301       begin
 302         loop do
 303           pid, status = Process.waitpid2(-1, Process::WNOHANG)
 304           pid or break
 305           if @reexec_pid == pid
 306             logger.error "reaped #{status.inspect} exec()-ed"
 307             @reexec_pid = 0
 308             self.pid = @pid.chomp('.oldbin') if @pid
 309             proc_name 'master'
 310           else
 311             worker = WORKERS.delete(pid)
 312             worker.tempfile.close rescue nil
 313             logger.info "reaped #{status.inspect} " \
 314                         "worker=#{worker.nr rescue 'unknown'}"
 315           end
 316         end
 317       rescue Errno::ECHILD
 318       end
 319     end
 320
 321     # reexecutes the START_CTX with a new binary
 322     def reexec
 323       if @reexec_pid > 0
 324         begin
 325           Process.kill(0, @reexec_pid)
 326           logger.error "reexec-ed child already running PID:#{@reexec_pid}"
 327           return
 328         rescue Errno::ESRCH
 329           @reexec_pid = 0
 330         end
 331       end
 332
 333       if @pid
 334         old_pid = "#{@pid}.oldbin"
 335         prev_pid = @pid.dup
 336         begin
 337           self.pid = old_pid  # clear the path for a new pid file
 338         rescue ArgumentError
 339           logger.error "old PID:#{valid_pid?(old_pid)} running with " \
 340                        "existing pid=#{old_pid}, refusing rexec"
 341           return
 342         rescue Object => e
 343           logger.error "error writing pid=#{old_pid} #{e.class} #{e.message}"
 344           return
 345         end
 346       end
 347
 348       @reexec_pid = fork do
 349         listener_fds = LISTENERS.map { |sock| sock.fileno }
 350         ENV['UNICORN_FD'] = listener_fds.join(',')
 351         Dir.chdir(START_CTX[:cwd])
 352         cmd = [ START_CTX[:zero] ] + START_CTX[:argv]
 353
 354         # avoid leaking FDs we don't know about, but let before_exec
 355         # unset FD_CLOEXEC, if anything else in the app eventually
 356         # relies on FD inheritence.
 357         (3..1024).each do |io|
 358           next if listener_fds.include?(io)
 359           io = IO.for_fd(io) rescue nil
 360           io or next
 361           IO_PURGATORY << io
 362           io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 363         end
 364         logger.info "executing #{cmd.inspect} (in #{Dir.pwd})"
 365         @before_exec.call(self)
 366         exec(*cmd)
 367       end
 368       proc_name 'master (old)'
 369     end
 370
 371     # forcibly terminate all workers that haven't checked in in @timeout
 372     # seconds.  The timeout is implemented using an unlinked tempfile
 373     # shared between the parent process and each worker.  The worker
 374     # runs File#chmod to modify the ctime of the tempfile.  If the ctime
 375     # is stale for >@timeout seconds, then we'll kill the corresponding
 376     # worker.
 377     def murder_lazy_workers
 378       now = Time.now
 379       WORKERS.each_pair do |pid, worker|
 380         (now - worker.tempfile.ctime) <= @timeout and next
 381         logger.error "worker=#{worker.nr} PID:#{pid} is too old, killing"
 382         kill_worker(:KILL, pid) # take no prisoners for @timeout violations
 383         worker.tempfile.close rescue nil
 384       end
 385     end
 386
 387     def spawn_missing_workers
 388       (0...@worker_processes).each do |worker_nr|
 389         WORKERS.values.include?(worker_nr) and next
 390         begin
 391           Dir.chdir(START_CTX[:cwd])
 392         rescue Errno::ENOENT => err
 393           logger.fatal "#{err.inspect} (#{START_CTX[:cwd]})"
 394           SIG_QUEUE << :QUIT # forcibly emulate SIGQUIT
 395           return
 396         end
 397         tempfile = Tempfile.new(nil) # as short as possible to save dir space
 398         tempfile.unlink # don't allow other processes to find or see it
 399         worker = Worker.new(worker_nr, tempfile)
 400         @before_fork.call(self, worker)
 401         pid = fork { worker_loop(worker) }
 402         WORKERS[pid] = worker
 403       end
 404     end
 405
 406     def maintain_worker_count
 407       (off = WORKERS.size - @worker_processes) == 0 and return
 408       off < 0 and return spawn_missing_workers
 409       WORKERS.each_pair { |pid,w|
 410         w.nr >= @worker_processes and kill_worker(:QUIT, pid) rescue nil
 411       }
 412     end
 413
 414     # once a client is accepted, it is processed in its entirety here
 415     # in 3 easy steps: read request, call app, write app response
 416     def process_client(client)
 417       # one syscall less than "client.nonblock = false":
 418       client.fcntl(Fcntl::F_SETFL, File::RDWR)
 419       HttpResponse.write(client, @app.call(@request.read(client)))
 420     # if we get any error, try to write something back to the client
 421     # assuming we haven't closed the socket, but don't get hung up
 422     # if the socket is already closed or broken.  We'll always ensure
 423     # the socket is closed at the end of this function
 424     rescue EOFError,Errno::ECONNRESET,Errno::EPIPE,Errno::EINVAL,Errno::EBADF
 425       client.write_nonblock(Const::ERROR_500_RESPONSE) rescue nil
 426     rescue HttpParserError # try to tell the client they're bad
 427       client.write_nonblock(Const::ERROR_400_RESPONSE) rescue nil
 428     rescue Object => e
 429       client.write_nonblock(Const::ERROR_500_RESPONSE) rescue nil
 430       logger.error "Read error: #{e.inspect}"
 431       logger.error e.backtrace.join("\n")
 432     ensure
 433       begin
 434         client.closed? or client.close
 435       rescue Object => e
 436         logger.error "Client error: #{e.inspect}"
 437         logger.error e.backtrace.join("\n")
 438       end
 439       @request.reset
 440     end
 441
 442     # gets rid of stuff the worker has no business keeping track of
 443     # to free some resources and drops all sig handlers.
 444     # traps for USR1, USR2, and HUP may be set in the @after_fork Proc
 445     # by the user.
 446     def init_worker_process(worker)
 447       QUEUE_SIGS.each { |sig| trap(sig, 'DEFAULT') }
 448       trap(:CHLD, 'DEFAULT')
 449       SIG_QUEUE.clear
 450       proc_name "worker[#{worker.nr}]"
 451       START_CTX.clear
 452       init_self_pipe!
 453       WORKERS.values.each { |other| other.tempfile.close! rescue nil }
 454       WORKERS.clear
 455       LISTENERS.each { |sock| sock.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) }
 456       worker.tempfile.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 457       @after_fork.call(self, worker) # can drop perms
 458       @timeout /= 2.0 # halve it for select()
 459       build_app! unless @config[:preload_app]
 460     end
 461
 462     def reopen_worker_logs(worker_nr)
 463       @logger.info "worker=#{worker_nr} reopening logs..."
 464       Unicorn::Util.reopen_logs
 465       @logger.info "worker=#{worker_nr} done reopening logs"
 466       init_self_pipe!
 467     end
 468
 469     # runs inside each forked worker, this sits around and waits
 470     # for connections and doesn't die until the parent dies (or is
 471     # given a INT, QUIT, or TERM signal)
 472     def worker_loop(worker)
 473       master_pid = Process.ppid # slightly racy, but less memory usage
 474       init_worker_process(worker)
 475       nr = 0 # this becomes negative if we need to reopen logs
 476       alive = worker.tempfile # tempfile is our lifeline to the master process
 477       ready = LISTENERS
 478       client = nil
 479
 480       # closing anything we IO.select on will raise EBADF
 481       trap(:USR1) { nr = -65536; SELF_PIPE.first.close rescue nil }
 482       trap(:QUIT) { alive = nil; LISTENERS.each { |s| s.close rescue nil } }
 483       [:TERM, :INT].each { |sig| trap(sig) { exit(0) } } # instant shutdown
 484       @logger.info "worker=#{worker.nr} ready"
 485
 486       while alive
 487         reopen_worker_logs(worker.nr) if nr < 0
 488         # we're a goner in @timeout seconds anyways if alive.chmod
 489         # breaks, so don't trap the exception.  Using fchmod() since
 490         # futimes() is not available in base Ruby and I very strongly
 491         # prefer temporary files to be unlinked for security,
 492         # performance and reliability reasons, so utime is out.  No-op
 493         # changes with chmod doesn't update ctime on all filesystems; so
 494         # we change our counter each and every time (after process_client
 495         # and before IO.select).
 496         alive.chmod(nr = 0)
 497
 498         begin
 499           ready.each do |sock|
 500             begin
 501               client = begin
 502                 sock.accept_nonblock
 503               rescue Errno::EAGAIN
 504                 next
 505               end
 506               process_client(client)
 507               alive.chmod(nr += 1)
 508             rescue Errno::ECONNABORTED
 509               # client closed the socket even before accept
 510               client.close rescue nil
 511             end
 512             break if nr < 0
 513           end
 514           client = nil
 515
 516           # make the following bet: if we accepted clients this round,
 517           # we're probably reasonably busy, so avoid calling select()
 518           # and do a speculative accept_nonblock on every listener
 519           # before we sleep again in select().
 520           if nr == 0 # (nr < 0) => reopen logs
 521             master_pid == Process.ppid or exit(0)
 522             alive.chmod(nr += 1)
 523             begin
 524               # timeout used so we can detect parent death:
 525               ret = IO.select(LISTENERS, nil, SELF_PIPE, @timeout) or next
 526               ready = ret.first
 527             rescue Errno::EINTR
 528               ready = LISTENERS
 529             rescue Errno::EBADF => e
 530               nr < 0 or exit(alive ? 1 : 0)
 531             end
 532           end
 533         rescue SignalException, SystemExit => e
 534           raise e
 535         rescue Object => e
 536           if alive
 537             logger.error "Unhandled listen loop exception #{e.inspect}."
 538             logger.error e.backtrace.join("\n")
 539           end
 540         end
 541       end
 542     end
 543
 544     # delivers a signal to a worker and fails gracefully if the worker
 545     # is no longer running.
 546     def kill_worker(signal, pid)
 547       begin
 548         Process.kill(signal, pid)
 549       rescue Errno::ESRCH
 550         worker = WORKERS.delete(pid) and worker.tempfile.close rescue nil
 551       end
 552     end
 553
 554     # delivers a signal to each worker
 555     def kill_each_worker(signal)
 556       WORKERS.keys.each { |pid| kill_worker(signal, pid) }
 557     end
 558
 559     # unlinks a PID file at given +path+ if it contains the current PID
 560     # useful as an at_exit handler.
 561     def unlink_pid_safe(path)
 562       (File.read(path).to_i == $$ and File.unlink(path)) rescue nil
 563     end
 564
 565     # returns a PID if a given path contains a non-stale PID file,
 566     # nil otherwise.
 567     def valid_pid?(path)
 568       if File.exist?(path) && (pid = File.read(path).to_i) > 1
 569         begin
 570           Process.kill(0, pid)
 571           return pid
 572         rescue Errno::ESRCH
 573         end
 574       end
 575       nil
 576     end
 577
 578     def load_config!
 579       begin
 580         logger.info "reloading config_file=#{@config.config_file}"
 581         @config[:listeners].replace(@init_listeners)
 582         @config.reload
 583         @config.commit!(self)
 584         kill_each_worker(:QUIT)
 585         logger.info "done reloading config_file=#{@config.config_file}"
 586       rescue Object => e
 587         logger.error "error reloading config_file=#{@config.config_file}: " \
 588                      "#{e.class} #{e.message}"
 589       end
 590     end
 591
 592     # returns an array of string names for the given listener array
 593     def listener_names(listeners = LISTENERS)
 594       listeners.map { |io| sock_name(io) }
 595     end
 596
 597     def build_app!
 598       @app = @app.call if @app.respond_to?(:arity) && @app.arity == 0
 599     end
 600
 601     def proc_name(tag)
 602       $0 = ([ File.basename(START_CTX[:zero]), tag ] +
 603               START_CTX[:argv]).join(' ')
 604     end
 605
 606     def redirect_io(io, path)
 607       File.open(path, 'a') { |fp| io.reopen(fp) } if path
 608       io.sync = true
 609     end
 610
 611     def init_self_pipe!
 612       SELF_PIPE.each { |io| io.close rescue nil }
 613       SELF_PIPE.replace(IO.pipe)
 614       SELF_PIPE.each { |io| io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) }
 615     end
 616
 617   end
 618 end