lib/unicorn.rb

   1 require 'logger'
   2 require 'fcntl'
   3
   4 require 'unicorn/socket'
   5 require 'unicorn/const'
   6 require 'unicorn/http_request'
   7 require 'unicorn/http_response'
   8 require 'unicorn/configurator'
   9 require 'unicorn/util'
  10
  11 # Unicorn module containing all of the classes (include C extensions) for running
  12 # a Unicorn web server.  It contains a minimalist HTTP server with just enough
  13 # functionality to service web application requests fast as possible.
  14 module Unicorn
  15   class << self
  16     def run(app, options = {})
  17       HttpServer.new(app, options).start.join
  18     end
  19   end
  20
  21   # This is the process manager of Unicorn. This manages worker
  22   # processes which in turn handle the I/O and application process.
  23   # Listener sockets are started in the master process and shared with
  24   # forked worker children.
  25   class HttpServer
  26     attr_reader :logger
  27     include ::Unicorn::SocketHelper
  28
  29     SIG_QUEUE = []
  30     DEFAULT_START_CTX = {
  31       :argv => ARGV.map { |arg| arg.dup },
  32       # don't rely on Dir.pwd here since it's not symlink-aware, and
  33       # symlink dirs are the default with Capistrano...
  34       :cwd => `/bin/sh -c pwd`.chomp("\n"),
  35       :zero => $0.dup,
  36     }.freeze
  37
  38     Worker = Struct.new(:nr, :tempfile) unless defined?(Worker)
  39     class Worker
  40       # worker objects may be compared to just plain numbers
  41       def ==(other_nr)
  42         self.nr == other_nr
  43       end
  44     end
  45
  46     # Creates a working server on host:port (strange things happen if
  47     # port isn't a Number).  Use HttpServer::run to start the server and
  48     # HttpServer.workers.join to join the thread that's processing
  49     # incoming requests on the socket.
  50     def initialize(app, options = {})
  51       start_ctx = options.delete(:start_ctx)
  52       @start_ctx = DEFAULT_START_CTX.dup
  53       @start_ctx.merge!(start_ctx) if start_ctx
  54       @app = app
  55       @workers = Hash.new
  56       @io_purgatory = [] # prevents IO objects in here from being GC-ed
  57       @request = @rd_sig = @wr_sig = nil
  58       @reexec_pid = 0
  59       @init_listeners = options[:listeners] ? options[:listeners].dup : []
  60       @config = Configurator.new(options.merge(:use_defaults => true))
  61       @listener_opts = {}
  62       @config.commit!(self, :skip => [:listeners, :pid])
  63       @listeners = []
  64     end
  65
  66     # Runs the thing.  Returns self so you can run join on it
  67     def start
  68       BasicSocket.do_not_reverse_lookup = true
  69
  70       # inherit sockets from parents, they need to be plain Socket objects
  71       # before they become UNIXServer or TCPServer
  72       inherited = ENV['UNICORN_FD'].to_s.split(/,/).map do |fd|
  73         io = Socket.for_fd(fd.to_i)
  74         set_server_sockopt(io, @listener_opts[sock_name(io)])
  75         @io_purgatory << io
  76         logger.info "inherited addr=#{sock_name(io)} fd=#{fd}"
  77         server_cast(io)
  78       end
  79
  80       config_listeners = @config[:listeners].dup
  81       @listeners.replace(inherited)
  82
  83       # we start out with generic Socket objects that get cast to either
  84       # TCPServer or UNIXServer objects; but since the Socket objects
  85       # share the same OS-level file descriptor as the higher-level *Server
  86       # objects; we need to prevent Socket objects from being garbage-collected
  87       config_listeners -= listener_names
  88       if config_listeners.empty? && @listeners.empty?
  89         config_listeners << Unicorn::Const::DEFAULT_LISTEN
  90       end
  91       config_listeners.each { |addr| listen(addr) }
  92       raise ArgumentError, "no listeners" if @listeners.empty?
  93       self.pid = @config[:pid]
  94       build_app! if @preload_app
  95       File.open(@stderr_path, "a") { |fp| $stderr.reopen(fp) } if @stderr_path
  96       File.open(@stdout_path, "a") { |fp| $stdout.reopen(fp) } if @stdout_path
  97       $stderr.sync = $stdout.sync = true
  98       spawn_missing_workers
  99       self
 100     end
 101
 102     # replaces current listener set with +listeners+.  This will
 103     # close the socket if it will not exist in the new listener set
 104     def listeners=(listeners)
 105       cur_names, dead_names = [], []
 106       listener_names.each do |name|
 107         if "/" == name[0..0]
 108           # mark unlinked sockets as dead so we can rebind them
 109           (File.socket?(name) ? cur_names : dead_names) << name
 110         else
 111           cur_names << name
 112         end
 113       end
 114       set_names = listener_names(listeners)
 115       dead_names += cur_names - set_names
 116       dead_names.uniq!
 117
 118       @listeners.delete_if do |io|
 119         if dead_names.include?(sock_name(io))
 120           @io_purgatory.delete_if do |pio|
 121             pio.fileno == io.fileno && (pio.close rescue nil).nil? # true
 122           end
 123           (io.close rescue nil).nil? # true
 124         else
 125           set_server_sockopt(io, @listener_opts[sock_name(io)])
 126           false
 127         end
 128       end
 129
 130       (set_names - cur_names).each { |addr| listen(addr) }
 131     end
 132
 133     # sets the path for the PID file of the master process
 134     def pid=(path)
 135       if path
 136         if x = valid_pid?(path)
 137           return path if @pid && path == @pid && x == $$
 138           raise ArgumentError, "Already running on PID:#{x} " \
 139                                "(or pid=#{path} is stale)"
 140         end
 141       end
 142       unlink_pid_safe(@pid) if @pid
 143       File.open(path, 'wb') { |fp| fp.syswrite("#$$\n") } if path
 144       @pid = path
 145     end
 146
 147     # add a given address to the +listeners+ set, idempotently
 148     # Allows workers to add a private, per-process listener via the
 149     # @after_fork hook.  Very useful for debugging and testing.
 150     def listen(address, opt = {}.merge(@listener_opts[address] || {}))
 151       return if String === address && listener_names.include?(address)
 152
 153       if io = bind_listen(address, opt)
 154         unless TCPServer === io || UNIXServer === io
 155           @io_purgatory << io
 156           io = server_cast(io)
 157         end
 158         logger.info "listening on addr=#{sock_name(io)} fd=#{io.fileno}"
 159         @listeners << io
 160       else
 161         logger.error "adding listener failed addr=#{address} (in use)"
 162         raise Errno::EADDRINUSE, address
 163       end
 164     end
 165
 166     # monitors children and receives signals forever
 167     # (or until a termination signal is sent).  This handles signals
 168     # one-at-a-time time and we'll happily drop signals in case somebody
 169     # is signalling us too often.
 170     def join
 171       # this pipe is used to wake us up from select(2) in #join when signals
 172       # are trapped.  See trap_deferred
 173       @rd_sig, @wr_sig = IO.pipe unless (@rd_sig && @wr_sig)
 174       mode = nil
 175       respawn = true
 176
 177       QUEUE_SIGS.each { |sig| trap_deferred(sig) }
 178       trap(:CHLD) { |sig_nr| awaken_master }
 179       proc_name 'master'
 180       logger.info "master process ready" # test_exec.rb relies on this message
 181       begin
 182         loop do
 183           reap_all_workers
 184           case (mode = SIG_QUEUE.shift)
 185           when nil
 186             murder_lazy_workers
 187             spawn_missing_workers if respawn
 188             master_sleep
 189           when :QUIT # graceful shutdown
 190             break
 191           when :TERM, :INT # immediate shutdown
 192             stop(false)
 193             break
 194           when :USR1 # rotate logs
 195             logger.info "master reopening logs..."
 196             Unicorn::Util.reopen_logs
 197             logger.info "master done reopening logs"
 198             kill_each_worker(:USR1)
 199           when :USR2 # exec binary, stay alive in case something went wrong
 200             reexec
 201           when :WINCH
 202             if Process.ppid == 1 || Process.getpgrp != $$
 203               respawn = false
 204               logger.info "gracefully stopping all workers"
 205               kill_each_worker(:QUIT)
 206             else
 207               logger.info "SIGWINCH ignored because we're not daemonized"
 208             end
 209           when :HUP
 210             respawn = true
 211             if @config.config_file
 212               load_config!
 213               redo # immediate reaping since we may have QUIT workers
 214             else # exec binary and exit if there's no config file
 215               logger.info "config_file not present, reexecuting binary"
 216               reexec
 217               break
 218             end
 219           else
 220             logger.error "master process in unknown mode: #{mode}"
 221           end
 222         end
 223       rescue Errno::EINTR
 224         retry
 225       rescue Object => e
 226         logger.error "Unhandled master loop exception #{e.inspect}."
 227         logger.error e.backtrace.join("\n")
 228         retry
 229       end
 230       stop # gracefully shutdown all workers on our way out
 231       logger.info "master complete"
 232       unlink_pid_safe(@pid) if @pid
 233     end
 234
 235     # Terminates all workers, but does not exit master process
 236     def stop(graceful = true)
 237       kill_each_worker(graceful ? :QUIT : :TERM)
 238       timeleft = @timeout
 239       step = 0.2
 240       reap_all_workers
 241       until @workers.empty?
 242         sleep(step)
 243         reap_all_workers
 244         (timeleft -= step) > 0 and next
 245         kill_each_worker(:KILL)
 246       end
 247     ensure
 248       self.listeners = []
 249     end
 250
 251     private
 252
 253     # list of signals we care about and trap in master.
 254     QUEUE_SIGS = [ :WINCH, :QUIT, :INT, :TERM, :USR1, :USR2, :HUP ].freeze
 255
 256     # defer a signal for later processing in #join (master process)
 257     def trap_deferred(signal)
 258       trap(signal) do |sig_nr|
 259         if SIG_QUEUE.size < 5
 260           SIG_QUEUE << signal
 261           awaken_master
 262         else
 263           logger.error "ignoring SIG#{signal}, queue=#{SIG_QUEUE.inspect}"
 264         end
 265       end
 266     end
 267
 268     # wait for a signal hander to wake us up and then consume the pipe
 269     # Wake up every second anyways to run murder_lazy_workers
 270     def master_sleep
 271       begin
 272         ready = IO.select([@rd_sig], nil, nil, 1)
 273         ready && ready[0] && ready[0][0] or return
 274         loop { @rd_sig.read_nonblock(Const::CHUNK_SIZE) }
 275       rescue Errno::EAGAIN, Errno::EINTR
 276       end
 277     end
 278
 279     def awaken_master
 280       begin
 281         @wr_sig.write_nonblock('.') # wakeup master process from IO.select
 282       rescue Errno::EAGAIN, Errno::EINTR
 283         # pipe is full, master should wake up anyways
 284         retry
 285       end
 286     end
 287
 288     # reaps all unreaped workers
 289     def reap_all_workers
 290       begin
 291         loop do
 292           pid, status = Process.waitpid2(-1, Process::WNOHANG)
 293           pid or break
 294           if @reexec_pid == pid
 295             logger.error "reaped #{status.inspect} exec()-ed"
 296             @reexec_pid = 0
 297             self.pid = @pid.chomp('.oldbin') if @pid
 298             proc_name 'master'
 299           else
 300             worker = @workers.delete(pid)
 301             worker.tempfile.close rescue nil
 302             logger.info "reaped #{status.inspect} " \
 303                         "worker=#{worker.nr rescue 'unknown'}"
 304           end
 305         end
 306       rescue Errno::ECHILD
 307       end
 308     end
 309
 310     # reexecutes the @start_ctx with a new binary
 311     def reexec
 312       if @reexec_pid > 0
 313         begin
 314           Process.kill(0, @reexec_pid)
 315           logger.error "reexec-ed child already running PID:#{@reexec_pid}"
 316           return
 317         rescue Errno::ESRCH
 318           @reexec_pid = 0
 319         end
 320       end
 321
 322       if @pid
 323         old_pid = "#{@pid}.oldbin"
 324         prev_pid = @pid.dup
 325         begin
 326           self.pid = old_pid  # clear the path for a new pid file
 327         rescue ArgumentError
 328           logger.error "old PID:#{valid_pid?(old_pid)} running with " \
 329                        "existing pid=#{old_pid}, refusing rexec"
 330           return
 331         rescue Object => e
 332           logger.error "error writing pid=#{old_pid} #{e.class} #{e.message}"
 333           return
 334         end
 335       end
 336
 337       @reexec_pid = fork do
 338         listener_fds = @listeners.map { |sock| sock.fileno }
 339         ENV['UNICORN_FD'] = listener_fds.join(',')
 340         Dir.chdir(@start_ctx[:cwd])
 341         cmd = [ @start_ctx[:zero] ] + @start_ctx[:argv]
 342
 343         # avoid leaking FDs we don't know about, but let before_exec
 344         # unset FD_CLOEXEC, if anything else in the app eventually
 345         # relies on FD inheritence.
 346         purgatory = [] # prevent GC of IO objects
 347         (3..1024).each do |io|
 348           next if listener_fds.include?(io)
 349           io = IO.for_fd(io) rescue nil
 350           io or next
 351           purgatory << io
 352           io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 353         end
 354         logger.info "executing #{cmd.inspect} (in #{Dir.pwd})"
 355         @before_exec.call(self)
 356         exec(*cmd)
 357       end
 358       proc_name 'master (old)'
 359     end
 360
 361     # forcibly terminate all workers that haven't checked in in @timeout
 362     # seconds.  The timeout is implemented using an unlinked tempfile
 363     # shared between the parent process and each worker.  The worker
 364     # runs File#chmod to modify the ctime of the tempfile.  If the ctime
 365     # is stale for >@timeout seconds, then we'll kill the corresponding
 366     # worker.
 367     def murder_lazy_workers
 368       now = Time.now
 369       @workers.each_pair do |pid, worker|
 370         (now - worker.tempfile.ctime) <= @timeout and next
 371         logger.error "worker=#{worker.nr} PID:#{pid} is too old, killing"
 372         kill_worker(:KILL, pid) # take no prisoners for @timeout violations
 373         worker.tempfile.close rescue nil
 374       end
 375     end
 376
 377     def spawn_missing_workers
 378       return if @workers.size == @worker_processes
 379       (0...@worker_processes).each do |worker_nr|
 380         @workers.values.include?(worker_nr) and next
 381         begin
 382           Dir.chdir(@start_ctx[:cwd])
 383         rescue Errno::ENOENT => err
 384           logger.fatal "#{err.inspect} (#{@start_ctx[:cwd]})"
 385           SIG_QUEUE << :QUIT # forcibly emulate SIGQUIT
 386           return
 387         end
 388         tempfile = Tempfile.new('') # as short as possible to save dir space
 389         tempfile.unlink # don't allow other processes to find or see it
 390         worker = Worker.new(worker_nr, tempfile)
 391         @before_fork.call(self, worker)
 392         pid = fork { worker_loop(worker) }
 393         @workers[pid] = worker
 394       end
 395     end
 396
 397     # once a client is accepted, it is processed in its entirety here
 398     # in 3 easy steps: read request, call app, write app response
 399     def process_client(client)
 400       # one syscall less than "client.nonblock = false":
 401       client.fcntl(Fcntl::F_SETFL, File::RDWR)
 402       HttpResponse.write(client, @app.call(@request.read(client)))
 403     # if we get any error, try to write something back to the client
 404     # assuming we haven't closed the socket, but don't get hung up
 405     # if the socket is already closed or broken.  We'll always ensure
 406     # the socket is closed at the end of this function
 407     rescue EOFError,Errno::ECONNRESET,Errno::EPIPE,Errno::EINVAL,Errno::EBADF
 408       client.write_nonblock(Const::ERROR_500_RESPONSE) rescue nil
 409     rescue HttpParserError # try to tell the client they're bad
 410       client.write_nonblock(Const::ERROR_400_RESPONSE) rescue nil
 411     rescue Object => e
 412       client.write_nonblock(Const::ERROR_500_RESPONSE) rescue nil
 413       logger.error "Read error: #{e.inspect}"
 414       logger.error e.backtrace.join("\n")
 415     ensure
 416       begin
 417         client.closed? or client.close
 418       rescue Object => e
 419         logger.error "Client error: #{e.inspect}"
 420         logger.error e.backtrace.join("\n")
 421       end
 422       @request.reset
 423     end
 424
 425     # gets rid of stuff the worker has no business keeping track of
 426     # to free some resources and drops all sig handlers.
 427     # traps for USR1, USR2, and HUP may be set in the @after_fork Proc
 428     # by the user.
 429     def init_worker_process(worker)
 430       QUEUE_SIGS.each { |sig| trap(sig, 'DEFAULT') }
 431       trap(:CHLD, 'DEFAULT')
 432       SIG_QUEUE.clear
 433       proc_name "worker[#{worker.nr}]"
 434       @rd_sig.close if @rd_sig
 435       @wr_sig.close if @wr_sig
 436       @workers.values.each { |other| other.tempfile.close rescue nil }
 437       @start_ctx = @workers = @rd_sig = @wr_sig = nil
 438       @listeners.each { |sock| sock.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) }
 439       worker.tempfile.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 440       @after_fork.call(self, worker) # can drop perms
 441       @request = HttpRequest.new(logger)
 442       build_app! unless @preload_app
 443     end
 444
 445     # runs inside each forked worker, this sits around and waits
 446     # for connections and doesn't die until the parent dies (or is
 447     # given a INT, QUIT, or TERM signal)
 448     def worker_loop(worker)
 449       master_pid = Process.ppid # slightly racy, but less memory usage
 450       init_worker_process(worker)
 451       nr = 0 # this becomes negative if we need to reopen logs
 452       tempfile = worker.tempfile
 453       ready = @listeners
 454       client = nil
 455       rd, wr = IO.pipe
 456       rd.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 457       wr.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 458
 459       # closing anything we IO.select on will raise EBADF
 460       trap(:USR1) { nr = -65536; rd.close rescue nil }
 461       trap(:QUIT) { @listeners.each { |sock| sock.close rescue nil } }
 462       [:TERM, :INT].each { |sig| trap(sig) { exit(0) } } # instant shutdown
 463       @logger.info "worker=#{worker.nr} ready"
 464
 465       while master_pid == Process.ppid
 466         if nr < 0
 467           @logger.info "worker=#{worker.nr} reopening logs..."
 468           Unicorn::Util.reopen_logs
 469           @logger.info "worker=#{worker.nr} done reopening logs"
 470           wr.close rescue nil
 471           rd, wr = IO.pipe
 472           rd.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 473           wr.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 474         end
 475         # we're a goner in @timeout seconds anyways if tempfile.chmod
 476         # breaks, so don't trap the exception.  Using fchmod() since
 477         # futimes() is not available in base Ruby and I very strongly
 478         # prefer temporary files to be unlinked for security,
 479         # performance and reliability reasons, so utime is out.  No-op
 480         # changes with chmod doesn't update ctime on all filesystems; so
 481         # we change our counter each and every time (after process_client
 482         # and before IO.select).
 483         tempfile.chmod(nr = 0)
 484
 485         begin
 486           ready.each do |sock|
 487             begin
 488               client = begin
 489                 sock.accept_nonblock
 490               rescue Errno::EAGAIN
 491                 next
 492               end
 493               process_client(client)
 494             rescue Errno::ECONNABORTED
 495               # client closed the socket even before accept
 496               client.close rescue nil
 497             ensure
 498               tempfile.chmod(nr += 1) if client
 499               break if nr < 0
 500             end
 501           end
 502           client = nil
 503
 504           # make the following bet: if we accepted clients this round,
 505           # we're probably reasonably busy, so avoid calling select()
 506           # and do a speculative accept_nonblock on every listener
 507           # before we sleep again in select().
 508           if nr != 0 # (nr < 0) => reopen logs
 509             ready = @listeners
 510           else
 511             begin
 512               tempfile.chmod(nr += 1)
 513               # timeout used so we can detect parent death:
 514               ret = IO.select(@listeners, nil, [rd], @timeout/2.0) or next
 515               ready = ret[0]
 516             rescue Errno::EINTR
 517               ready = @listeners
 518             rescue Errno::EBADF => e
 519               nr < 0 or exit(@listeners[0].closed? ? 0 : 1)
 520             end
 521           end
 522         rescue SignalException, SystemExit => e
 523           raise e
 524         rescue Object => e
 525           if alive
 526             logger.error "Unhandled listen loop exception #{e.inspect}."
 527             logger.error e.backtrace.join("\n")
 528           end
 529         end
 530       end
 531     end
 532
 533     # delivers a signal to a worker and fails gracefully if the worker
 534     # is no longer running.
 535     def kill_worker(signal, pid)
 536       begin
 537         Process.kill(signal, pid)
 538       rescue Errno::ESRCH
 539         worker = @workers.delete(pid) and worker.tempfile.close rescue nil
 540       end
 541     end
 542
 543     # delivers a signal to each worker
 544     def kill_each_worker(signal)
 545       @workers.keys.each { |pid| kill_worker(signal, pid) }
 546     end
 547
 548     # unlinks a PID file at given +path+ if it contains the current PID
 549     # useful as an at_exit handler.
 550     def unlink_pid_safe(path)
 551       (File.read(path).to_i == $$ and File.unlink(path)) rescue nil
 552     end
 553
 554     # returns a PID if a given path contains a non-stale PID file,
 555     # nil otherwise.
 556     def valid_pid?(path)
 557       if File.exist?(path) && (pid = File.read(path).to_i) > 1
 558         begin
 559           Process.kill(0, pid)
 560           return pid
 561         rescue Errno::ESRCH
 562         end
 563       end
 564       nil
 565     end
 566
 567     def load_config!
 568       begin
 569         logger.info "reloading config_file=#{@config.config_file}"
 570         @config[:listeners].replace(@init_listeners)
 571         @config.reload
 572         @config.commit!(self)
 573         kill_each_worker(:QUIT)
 574         logger.info "done reloading config_file=#{@config.config_file}"
 575       rescue Object => e
 576         logger.error "error reloading config_file=#{@config.config_file}: " \
 577                      "#{e.class} #{e.message}"
 578       end
 579     end
 580
 581     # returns an array of string names for the given listener array
 582     def listener_names(listeners = @listeners)
 583       listeners.map { |io| sock_name(io) }
 584     end
 585
 586     def build_app!
 587       @app = @app.call if @app.respond_to?(:arity) && @app.arity == 0
 588     end
 589
 590     def proc_name(tag)
 591       $0 = ([ File.basename(@start_ctx[:zero]), tag ] +
 592               @start_ctx[:argv]).join(' ')
 593     end
 594
 595   end
 596 end