lib/unicorn.rb

   1 require 'logger'
   2 require 'fcntl'
   3
   4 require 'unicorn/socket'
   5 require 'unicorn/const'
   6 require 'unicorn/http_request'
   7 require 'unicorn/http_response'
   8 require 'unicorn/configurator'
   9 require 'unicorn/util'
  10
  11 # Unicorn module containing all of the classes (include C extensions) for running
  12 # a Unicorn web server.  It contains a minimalist HTTP server with just enough
  13 # functionality to service web application requests fast as possible.
  14 module Unicorn
  15   class << self
  16     def run(app, options = {})
  17       HttpServer.new(app, options).start.join
  18     end
  19   end
  20
  21   # This is the process manager of Unicorn. This manages worker
  22   # processes which in turn handle the I/O and application process.
  23   # Listener sockets are started in the master process and shared with
  24   # forked worker children.
  25   class HttpServer
  26     attr_reader :logger
  27     include ::Unicorn::SocketHelper
  28
  29     SIG_QUEUE = []
  30     DEFAULT_START_CTX = {
  31       :argv => ARGV.map { |arg| arg.dup },
  32       # don't rely on Dir.pwd here since it's not symlink-aware, and
  33       # symlink dirs are the default with Capistrano...
  34       :cwd => `/bin/sh -c pwd`.chomp("\n"),
  35       :zero => $0.dup,
  36     }.freeze
  37
  38     Worker = Struct.new(:nr, :tempfile) unless defined?(Worker)
  39     class Worker
  40       # worker objects may be compared to just plain numbers
  41       def ==(other_nr)
  42         self.nr == other_nr
  43       end
  44     end
  45
  46     # Creates a working server on host:port (strange things happen if
  47     # port isn't a Number).  Use HttpServer::run to start the server and
  48     # HttpServer.workers.join to join the thread that's processing
  49     # incoming requests on the socket.
  50     def initialize(app, options = {})
  51       start_ctx = options.delete(:start_ctx)
  52       @start_ctx = DEFAULT_START_CTX.dup
  53       @start_ctx.merge!(start_ctx) if start_ctx
  54       @app = app
  55       @master_pid = $$
  56       @workers = Hash.new
  57       @io_purgatory = [] # prevents IO objects in here from being GC-ed
  58       @request = @rd_sig = @wr_sig = nil
  59       @reexec_pid = 0
  60       @init_listeners = options[:listeners] ? options[:listeners].dup : []
  61       @config = Configurator.new(options.merge(:use_defaults => true))
  62       @listener_opts = {}
  63       @config.commit!(self, :skip => [:listeners, :pid])
  64       @listeners = []
  65     end
  66
  67     # Runs the thing.  Returns self so you can run join on it
  68     def start
  69       BasicSocket.do_not_reverse_lookup = true
  70
  71       # inherit sockets from parents, they need to be plain Socket objects
  72       # before they become UNIXServer or TCPServer
  73       inherited = ENV['UNICORN_FD'].to_s.split(/,/).map do |fd|
  74         io = Socket.for_fd(fd.to_i)
  75         set_server_sockopt(io, @listener_opts[sock_name(io)])
  76         @io_purgatory << io
  77         logger.info "inherited addr=#{sock_name(io)} fd=#{fd}"
  78         server_cast(io)
  79       end
  80
  81       config_listeners = @config[:listeners].dup
  82       @listeners.replace(inherited)
  83
  84       # we start out with generic Socket objects that get cast to either
  85       # TCPServer or UNIXServer objects; but since the Socket objects
  86       # share the same OS-level file descriptor as the higher-level *Server
  87       # objects; we need to prevent Socket objects from being garbage-collected
  88       config_listeners -= listener_names
  89       if config_listeners.empty? && @listeners.empty?
  90         config_listeners << Unicorn::Const::DEFAULT_LISTEN
  91       end
  92       config_listeners.each { |addr| listen(addr) }
  93       raise ArgumentError, "no listeners" if @listeners.empty?
  94       self.pid = @config[:pid]
  95       build_app! if @preload_app
  96       File.open(@stderr_path, "a") { |fp| $stderr.reopen(fp) } if @stderr_path
  97       File.open(@stdout_path, "a") { |fp| $stdout.reopen(fp) } if @stdout_path
  98       $stderr.sync = $stdout.sync = true
  99       spawn_missing_workers
 100       self
 101     end
 102
 103     # replaces current listener set with +listeners+.  This will
 104     # close the socket if it will not exist in the new listener set
 105     def listeners=(listeners)
 106       cur_names, dead_names = [], []
 107       listener_names.each do |name|
 108         if "/" == name[0..0]
 109           # mark unlinked sockets as dead so we can rebind them
 110           (File.socket?(name) ? cur_names : dead_names) << name
 111         else
 112           cur_names << name
 113         end
 114       end
 115       set_names = listener_names(listeners)
 116       dead_names += cur_names - set_names
 117       dead_names.uniq!
 118
 119       @listeners.delete_if do |io|
 120         if dead_names.include?(sock_name(io))
 121           @io_purgatory.delete_if do |pio|
 122             pio.fileno == io.fileno && (pio.close rescue nil).nil? # true
 123           end
 124           (io.close rescue nil).nil? # true
 125         else
 126           set_server_sockopt(io, @listener_opts[sock_name(io)])
 127           false
 128         end
 129       end
 130
 131       (set_names - cur_names).each { |addr| listen(addr) }
 132     end
 133
 134     # sets the path for the PID file of the master process
 135     def pid=(path)
 136       if path
 137         if x = valid_pid?(path)
 138           return path if @pid && path == @pid && x == $$
 139           raise ArgumentError, "Already running on PID:#{x} " \
 140                                "(or pid=#{path} is stale)"
 141         end
 142       end
 143       unlink_pid_safe(@pid) if @pid
 144       File.open(path, 'wb') { |fp| fp.syswrite("#$$\n") } if path
 145       @pid = path
 146     end
 147
 148     # add a given address to the +listeners+ set, idempotently
 149     # Allows workers to add a private, per-process listener via the
 150     # @after_fork hook.  Very useful for debugging and testing.
 151     def listen(address, opt = {}.merge(@listener_opts[address] || {}))
 152       return if String === address && listener_names.include?(address)
 153
 154       if io = bind_listen(address, opt)
 155         unless TCPServer === io || UNIXServer === io
 156           @io_purgatory << io
 157           io = server_cast(io)
 158         end
 159         logger.info "listening on addr=#{sock_name(io)} fd=#{io.fileno}"
 160         @listeners << io
 161       else
 162         logger.error "adding listener failed addr=#{address} (in use)"
 163         raise Errno::EADDRINUSE, address
 164       end
 165     end
 166
 167     # monitors children and receives signals forever
 168     # (or until a termination signal is sent).  This handles signals
 169     # one-at-a-time time and we'll happily drop signals in case somebody
 170     # is signalling us too often.
 171     def join
 172       # this pipe is used to wake us up from select(2) in #join when signals
 173       # are trapped.  See trap_deferred
 174       @rd_sig, @wr_sig = IO.pipe unless (@rd_sig && @wr_sig)
 175       mode = nil
 176       respawn = true
 177
 178       QUEUE_SIGS.each { |sig| trap_deferred(sig) }
 179       trap(:CHLD) { |sig_nr| awaken_master }
 180       proc_name 'master'
 181       logger.info "master process ready" # test_exec.rb relies on this message
 182       begin
 183         loop do
 184           reap_all_workers
 185           case (mode = SIG_QUEUE.shift)
 186           when nil
 187             murder_lazy_workers
 188             spawn_missing_workers if respawn
 189             master_sleep
 190           when :QUIT # graceful shutdown
 191             break
 192           when :TERM, :INT # immediate shutdown
 193             stop(false)
 194             break
 195           when :USR1 # rotate logs
 196             logger.info "master rotating logs..."
 197             Unicorn::Util.reopen_logs
 198             logger.info "master done rotating logs"
 199             kill_each_worker(:USR1)
 200           when :USR2 # exec binary, stay alive in case something went wrong
 201             reexec
 202           when :WINCH
 203             if Process.ppid == 1 || Process.getpgrp != $$
 204               respawn = false
 205               logger.info "gracefully stopping all workers"
 206               kill_each_worker(:QUIT)
 207             else
 208               logger.info "SIGWINCH ignored because we're not daemonized"
 209             end
 210           when :HUP
 211             respawn = true
 212             if @config.config_file
 213               load_config!
 214               redo # immediate reaping since we may have QUIT workers
 215             else # exec binary and exit if there's no config file
 216               logger.info "config_file not present, reexecuting binary"
 217               reexec
 218               break
 219             end
 220           else
 221             logger.error "master process in unknown mode: #{mode}"
 222           end
 223         end
 224       rescue Errno::EINTR
 225         retry
 226       rescue Object => e
 227         logger.error "Unhandled master loop exception #{e.inspect}."
 228         logger.error e.backtrace.join("\n")
 229         retry
 230       end
 231       stop # gracefully shutdown all workers on our way out
 232       logger.info "master complete"
 233       unlink_pid_safe(@pid) if @pid
 234     end
 235
 236     # Terminates all workers, but does not exit master process
 237     def stop(graceful = true)
 238       kill_each_worker(graceful ? :QUIT : :TERM)
 239       timeleft = @timeout
 240       step = 0.2
 241       reap_all_workers
 242       until @workers.empty?
 243         sleep(step)
 244         reap_all_workers
 245         (timeleft -= step) > 0 and next
 246         kill_each_worker(:KILL)
 247       end
 248     ensure
 249       self.listeners = []
 250     end
 251
 252     private
 253
 254     # list of signals we care about and trap in master.
 255     QUEUE_SIGS = [ :WINCH, :QUIT, :INT, :TERM, :USR1, :USR2, :HUP ].freeze
 256
 257     # defer a signal for later processing in #join (master process)
 258     def trap_deferred(signal)
 259       trap(signal) do |sig_nr|
 260         if SIG_QUEUE.size < 5
 261           SIG_QUEUE << signal
 262           awaken_master
 263         else
 264           logger.error "ignoring SIG#{signal}, queue=#{SIG_QUEUE.inspect}"
 265         end
 266       end
 267     end
 268
 269     # wait for a signal hander to wake us up and then consume the pipe
 270     # Wake up every second anyways to run murder_lazy_workers
 271     def master_sleep
 272       begin
 273         ready = IO.select([@rd_sig], nil, nil, 1)
 274         ready && ready[0] && ready[0][0] or return
 275         loop { @rd_sig.read_nonblock(Const::CHUNK_SIZE) }
 276       rescue Errno::EAGAIN, Errno::EINTR
 277       end
 278     end
 279
 280     def awaken_master
 281       begin
 282         @wr_sig.write_nonblock('.') # wakeup master process from IO.select
 283       rescue Errno::EAGAIN, Errno::EINTR
 284         # pipe is full, master should wake up anyways
 285         retry
 286       end
 287     end
 288
 289     # reaps all unreaped workers
 290     def reap_all_workers
 291       begin
 292         loop do
 293           pid, status = Process.waitpid2(-1, Process::WNOHANG)
 294           pid or break
 295           if @reexec_pid == pid
 296             logger.error "reaped #{status.inspect} exec()-ed"
 297             @reexec_pid = 0
 298             self.pid = @pid.chomp('.oldbin') if @pid
 299             proc_name 'master'
 300           else
 301             worker = @workers.delete(pid)
 302             worker.tempfile.close rescue nil
 303             logger.info "reaped #{status.inspect} " \
 304                         "worker=#{worker.nr rescue 'unknown'}"
 305           end
 306         end
 307       rescue Errno::ECHILD
 308       end
 309     end
 310
 311     # reexecutes the @start_ctx with a new binary
 312     def reexec
 313       if @reexec_pid > 0
 314         begin
 315           Process.kill(0, @reexec_pid)
 316           logger.error "reexec-ed child already running PID:#{@reexec_pid}"
 317           return
 318         rescue Errno::ESRCH
 319           @reexec_pid = 0
 320         end
 321       end
 322
 323       if @pid
 324         old_pid = "#{@pid}.oldbin"
 325         prev_pid = @pid.dup
 326         begin
 327           self.pid = old_pid  # clear the path for a new pid file
 328         rescue ArgumentError
 329           logger.error "old PID:#{valid_pid?(old_pid)} running with " \
 330                        "existing pid=#{old_pid}, refusing rexec"
 331           return
 332         rescue Object => e
 333           logger.error "error writing pid=#{old_pid} #{e.class} #{e.message}"
 334           return
 335         end
 336       end
 337
 338       @reexec_pid = fork do
 339         listener_fds = @listeners.map { |sock| sock.fileno }
 340         ENV['UNICORN_FD'] = listener_fds.join(',')
 341         Dir.chdir(@start_ctx[:cwd])
 342         cmd = [ @start_ctx[:zero] ] + @start_ctx[:argv]
 343
 344         # avoid leaking FDs we don't know about, but let before_exec
 345         # unset FD_CLOEXEC, if anything else in the app eventually
 346         # relies on FD inheritence.
 347         purgatory = [] # prevent GC of IO objects
 348         (3..1024).each do |io|
 349           next if listener_fds.include?(io)
 350           io = IO.for_fd(io) rescue nil
 351           io or next
 352           purgatory << io
 353           io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 354         end
 355         logger.info "executing #{cmd.inspect} (in #{Dir.pwd})"
 356         @before_exec.call(self) if @before_exec
 357         exec(*cmd)
 358       end
 359       proc_name 'master (old)'
 360     end
 361
 362     # forcibly terminate all workers that haven't checked in in @timeout
 363     # seconds.  The timeout is implemented using an unlinked tempfile
 364     # shared between the parent process and each worker.  The worker
 365     # runs File#chmod to modify the ctime of the tempfile.  If the ctime
 366     # is stale for >@timeout seconds, then we'll kill the corresponding
 367     # worker.
 368     def murder_lazy_workers
 369       now = Time.now
 370       @workers.each_pair do |pid, worker|
 371         (now - worker.tempfile.ctime) <= @timeout and next
 372         logger.error "worker=#{worker.nr} PID:#{pid} is too old, killing"
 373         kill_worker(:KILL, pid) # take no prisoners for @timeout violations
 374         worker.tempfile.close rescue nil
 375       end
 376     end
 377
 378     def spawn_missing_workers
 379       return if @workers.size == @worker_processes
 380       (0...@worker_processes).each do |worker_nr|
 381         @workers.values.include?(worker_nr) and next
 382         begin
 383           Dir.chdir(@start_ctx[:cwd])
 384         rescue Errno::ENOENT => err
 385           logger.fatal "#{err.inspect} (#{@start_ctx[:cwd]})"
 386           SIG_QUEUE << :QUIT # forcibly emulate SIGQUIT
 387           return
 388         end
 389         tempfile = Tempfile.new('') # as short as possible to save dir space
 390         tempfile.unlink # don't allow other processes to find or see it
 391         worker = Worker.new(worker_nr, tempfile)
 392         @before_fork.call(self, worker.nr)
 393         pid = fork { worker_loop(worker) }
 394         @workers[pid] = worker
 395       end
 396     end
 397
 398     # once a client is accepted, it is processed in its entirety here
 399     # in 3 easy steps: read request, call app, write app response
 400     def process_client(client)
 401       # one syscall less than "client.nonblock = false":
 402       client.fcntl(Fcntl::F_SETFL, File::RDWR)
 403       HttpResponse.write(client, @app.call(@request.read(client)))
 404     # if we get any error, try to write something back to the client
 405     # assuming we haven't closed the socket, but don't get hung up
 406     # if the socket is already closed or broken.  We'll always ensure
 407     # the socket is closed at the end of this function
 408     rescue EOFError,Errno::ECONNRESET,Errno::EPIPE,Errno::EINVAL,Errno::EBADF
 409       client.write_nonblock(Const::ERROR_500_RESPONSE) rescue nil
 410     rescue HttpParserError # try to tell the client they're bad
 411       client.write_nonblock(Const::ERROR_400_RESPONSE) rescue nil
 412     rescue Object => e
 413       client.write_nonblock(Const::ERROR_500_RESPONSE) rescue nil
 414       logger.error "Read error: #{e.inspect}"
 415       logger.error e.backtrace.join("\n")
 416     ensure
 417       begin
 418         client.closed? or client.close
 419       rescue Object => e
 420         logger.error "Client error: #{e.inspect}"
 421         logger.error e.backtrace.join("\n")
 422       end
 423       @request.reset
 424     end
 425
 426     # gets rid of stuff the worker has no business keeping track of
 427     # to free some resources and drops all sig handlers.
 428     # traps for USR1, USR2, and HUP may be set in the @after_fork Proc
 429     # by the user.
 430     def init_worker_process(worker)
 431       QUEUE_SIGS.each { |sig| trap(sig, 'DEFAULT') }
 432       trap(:CHLD, 'DEFAULT')
 433       SIG_QUEUE.clear
 434       proc_name "worker[#{worker.nr}]"
 435       @rd_sig.close if @rd_sig
 436       @wr_sig.close if @wr_sig
 437       @workers.values.each { |other| other.tempfile.close rescue nil }
 438       @start_ctx = @workers = @rd_sig = @wr_sig = nil
 439       @listeners.each { |sock| sock.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) }
 440       worker.tempfile.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 441       @after_fork.call(self, worker.nr) if @after_fork # can drop perms
 442       @request = HttpRequest.new(logger)
 443       build_app! unless @preload_app
 444     end
 445
 446     # runs inside each forked worker, this sits around and waits
 447     # for connections and doesn't die until the parent dies (or is
 448     # given a INT, QUIT, or TERM signal)
 449     def worker_loop(worker)
 450       init_worker_process(worker)
 451       nr = 0
 452       tempfile = worker.tempfile
 453       alive = true
 454       ready = @listeners
 455       client = nil
 456       trap(:QUIT) do
 457         alive = false # graceful shutdown
 458         @listeners.each { |sock| sock.close rescue nil } # break IO.select
 459       end
 460       reopen_logs, (rd, wr) = false, IO.pipe
 461       rd.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 462       wr.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 463       trap(:USR1) { reopen_logs = true; rd.close rescue nil } # break IO.select
 464       @logger.info "worker=#{worker.nr} ready"
 465
 466       while alive && @master_pid == Process.ppid
 467         if reopen_logs
 468           reopen_logs = false
 469           @logger.info "worker=#{worker.nr} rotating logs..."
 470           Unicorn::Util.reopen_logs
 471           @logger.info "worker=#{worker.nr} done rotating logs"
 472           wr.close rescue nil
 473           rd, wr = IO.pipe
 474           rd.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 475           wr.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 476         end
 477         # we're a goner in @timeout seconds anyways if tempfile.chmod
 478         # breaks, so don't trap the exception.  Using fchmod() since
 479         # futimes() is not available in base Ruby and I very strongly
 480         # prefer temporary files to be unlinked for security,
 481         # performance and reliability reasons, so utime is out.  No-op
 482         # changes with chmod doesn't update ctime on all filesystems; so
 483         # we increment our counter each and every time.
 484         tempfile.chmod(nr += 1)
 485
 486         begin
 487           accepted = false
 488           ready.each do |sock|
 489             begin
 490               client = begin
 491                 sock.accept_nonblock
 492               rescue Errno::EAGAIN
 493                 next
 494               end
 495               accepted = true
 496               process_client(client)
 497             rescue Errno::ECONNABORTED
 498               # client closed the socket even before accept
 499               client.close rescue nil
 500             end
 501             tempfile.chmod(nr += 1)
 502             break if reopen_logs
 503           end
 504           client = nil
 505
 506           # make the following bet: if we accepted clients this round,
 507           # we're probably reasonably busy, so avoid calling select(2)
 508           # and try to do a blind non-blocking accept(2) on everything
 509           # before we sleep again in select
 510           if accepted || reopen_logs
 511             ready = @listeners
 512           else
 513             begin
 514               tempfile.chmod(nr += 1)
 515               # timeout used so we can detect parent death:
 516               ret = IO.select(@listeners, nil, [rd], @timeout/2.0) or next
 517               ready = ret[0]
 518             rescue Errno::EINTR
 519               ready = @listeners
 520             rescue Errno::EBADF => e
 521               reopen_logs or exit(alive ? 1 : 0)
 522             end
 523           end
 524         rescue SystemExit => e
 525           exit(e.status)
 526         rescue Object => e
 527           if alive
 528             logger.error "Unhandled listen loop exception #{e.inspect}."
 529             logger.error e.backtrace.join("\n")
 530           end
 531         end
 532       end
 533     end
 534
 535     # delivers a signal to a worker and fails gracefully if the worker
 536     # is no longer running.
 537     def kill_worker(signal, pid)
 538       begin
 539         Process.kill(signal, pid)
 540       rescue Errno::ESRCH
 541         worker = @workers.delete(pid) and worker.tempfile.close rescue nil
 542       end
 543     end
 544
 545     # delivers a signal to each worker
 546     def kill_each_worker(signal)
 547       @workers.keys.each { |pid| kill_worker(signal, pid) }
 548     end
 549
 550     # unlinks a PID file at given +path+ if it contains the current PID
 551     # useful as an at_exit handler.
 552     def unlink_pid_safe(path)
 553       (File.read(path).to_i == $$ and File.unlink(path)) rescue nil
 554     end
 555
 556     # returns a PID if a given path contains a non-stale PID file,
 557     # nil otherwise.
 558     def valid_pid?(path)
 559       if File.exist?(path) && (pid = File.read(path).to_i) > 1
 560         begin
 561           Process.kill(0, pid)
 562           return pid
 563         rescue Errno::ESRCH
 564         end
 565       end
 566       nil
 567     end
 568
 569     def load_config!
 570       begin
 571         logger.info "reloading config_file=#{@config.config_file}"
 572         @config[:listeners].replace(@init_listeners)
 573         @config.reload
 574         @config.commit!(self)
 575         kill_each_worker(:QUIT)
 576         logger.info "done reloading config_file=#{@config.config_file}"
 577       rescue Object => e
 578         logger.error "error reloading config_file=#{@config.config_file}: " \
 579                      "#{e.class} #{e.message}"
 580       end
 581     end
 582
 583     # returns an array of string names for the given listener array
 584     def listener_names(listeners = @listeners)
 585       listeners.map { |io| sock_name(io) }
 586     end
 587
 588     def build_app!
 589       @app = @app.call if @app.respond_to?(:arity) && @app.arity == 0
 590     end
 591
 592     def proc_name(tag)
 593       $0 = ([ File.basename(@start_ctx[:zero]), tag ] +
 594               @start_ctx[:argv]).join(' ')
 595     end
 596
 597   end
 598 end