lib/unicorn.rb

   1 require 'fcntl'
   2
   3 require 'unicorn/socket_helper'
   4 require 'unicorn/const'
   5 require 'unicorn/http_request'
   6 require 'unicorn/http_response'
   7 require 'unicorn/configurator'
   8 require 'unicorn/util'
   9
  10 # Unicorn module containing all of the classes (include C extensions) for running
  11 # a Unicorn web server.  It contains a minimalist HTTP server with just enough
  12 # functionality to service web application requests fast as possible.
  13 module Unicorn
  14   class << self
  15     def run(app, options = {})
  16       HttpServer.new(app, options).start.join
  17     end
  18   end
  19
  20   # This is the process manager of Unicorn. This manages worker
  21   # processes which in turn handle the I/O and application process.
  22   # Listener sockets are started in the master process and shared with
  23   # forked worker children.
  24   class HttpServer
  25     attr_reader :logger
  26     include ::Unicorn::SocketHelper
  27
  28     IO_PURGATORY = [] # prevents IO objects in here from being GC-ed
  29     SIG_QUEUE = []
  30     START_CTX = {
  31       :argv => ARGV.map { |arg| arg.dup },
  32       # don't rely on Dir.pwd here since it's not symlink-aware, and
  33       # symlink dirs are the default with Capistrano...
  34       :cwd => `/bin/sh -c pwd`.chomp("\n"),
  35       :zero => $0.dup,
  36     }
  37
  38     Worker = Struct.new(:nr, :tempfile) unless defined?(Worker)
  39     class Worker
  40       # worker objects may be compared to just plain numbers
  41       def ==(other_nr)
  42         self.nr == other_nr
  43       end
  44     end
  45
  46     # Creates a working server on host:port (strange things happen if
  47     # port isn't a Number).  Use HttpServer::run to start the server and
  48     # HttpServer.workers.join to join the thread that's processing
  49     # incoming requests on the socket.
  50     def initialize(app, options = {})
  51       @app = app
  52       @workers = Hash.new
  53       @request = @rd_sig = @wr_sig = nil
  54       @reexec_pid = 0
  55       @init_listeners = options[:listeners] ? options[:listeners].dup : []
  56       @config = Configurator.new(options.merge(:use_defaults => true))
  57       @listener_opts = {}
  58       @config.commit!(self, :skip => [:listeners, :pid])
  59       @listeners = []
  60     end
  61
  62     # Runs the thing.  Returns self so you can run join on it
  63     def start
  64       BasicSocket.do_not_reverse_lookup = true
  65
  66       # inherit sockets from parents, they need to be plain Socket objects
  67       # before they become UNIXServer or TCPServer
  68       inherited = ENV['UNICORN_FD'].to_s.split(/,/).map do |fd|
  69         io = Socket.for_fd(fd.to_i)
  70         set_server_sockopt(io, @listener_opts[sock_name(io)])
  71         IO_PURGATORY << io
  72         logger.info "inherited addr=#{sock_name(io)} fd=#{fd}"
  73         server_cast(io)
  74       end
  75
  76       config_listeners = @config[:listeners].dup
  77       @listeners.replace(inherited)
  78
  79       # we start out with generic Socket objects that get cast to either
  80       # TCPServer or UNIXServer objects; but since the Socket objects
  81       # share the same OS-level file descriptor as the higher-level *Server
  82       # objects; we need to prevent Socket objects from being garbage-collected
  83       config_listeners -= listener_names
  84       if config_listeners.empty? && @listeners.empty?
  85         config_listeners << Unicorn::Const::DEFAULT_LISTEN
  86       end
  87       config_listeners.each { |addr| listen(addr) }
  88       raise ArgumentError, "no listeners" if @listeners.empty?
  89       self.pid = @config[:pid]
  90       build_app! if @preload_app
  91       File.open(@stderr_path, "a") { |fp| $stderr.reopen(fp) } if @stderr_path
  92       File.open(@stdout_path, "a") { |fp| $stdout.reopen(fp) } if @stdout_path
  93       $stderr.sync = $stdout.sync = true
  94       spawn_missing_workers
  95       self
  96     end
  97
  98     # replaces current listener set with +listeners+.  This will
  99     # close the socket if it will not exist in the new listener set
 100     def listeners=(listeners)
 101       cur_names, dead_names = [], []
 102       listener_names.each do |name|
 103         if "/" == name[0..0]
 104           # mark unlinked sockets as dead so we can rebind them
 105           (File.socket?(name) ? cur_names : dead_names) << name
 106         else
 107           cur_names << name
 108         end
 109       end
 110       set_names = listener_names(listeners)
 111       dead_names += cur_names - set_names
 112       dead_names.uniq!
 113
 114       @listeners.delete_if do |io|
 115         if dead_names.include?(sock_name(io))
 116           IO_PURGATORY.delete_if do |pio|
 117             pio.fileno == io.fileno && (pio.close rescue nil).nil? # true
 118           end
 119           (io.close rescue nil).nil? # true
 120         else
 121           set_server_sockopt(io, @listener_opts[sock_name(io)])
 122           false
 123         end
 124       end
 125
 126       (set_names - cur_names).each { |addr| listen(addr) }
 127     end
 128
 129     # sets the path for the PID file of the master process
 130     def pid=(path)
 131       if path
 132         if x = valid_pid?(path)
 133           return path if @pid && path == @pid && x == $$
 134           raise ArgumentError, "Already running on PID:#{x} " \
 135                                "(or pid=#{path} is stale)"
 136         end
 137       end
 138       unlink_pid_safe(@pid) if @pid
 139       File.open(path, 'wb') { |fp| fp.syswrite("#$$\n") } if path
 140       @pid = path
 141     end
 142
 143     # add a given address to the +listeners+ set, idempotently
 144     # Allows workers to add a private, per-process listener via the
 145     # @after_fork hook.  Very useful for debugging and testing.
 146     def listen(address, opt = {}.merge(@listener_opts[address] || {}))
 147       return if String === address && listener_names.include?(address)
 148
 149       if io = bind_listen(address, opt)
 150         unless TCPServer === io || UNIXServer === io
 151           IO_PURGATORY << io
 152           io = server_cast(io)
 153         end
 154         logger.info "listening on addr=#{sock_name(io)} fd=#{io.fileno}"
 155         @listeners << io
 156       else
 157         logger.error "adding listener failed addr=#{address} (in use)"
 158         raise Errno::EADDRINUSE, address
 159       end
 160     end
 161
 162     # monitors children and receives signals forever
 163     # (or until a termination signal is sent).  This handles signals
 164     # one-at-a-time time and we'll happily drop signals in case somebody
 165     # is signalling us too often.
 166     def join
 167       # this pipe is used to wake us up from select(2) in #join when signals
 168       # are trapped.  See trap_deferred
 169       @rd_sig, @wr_sig = IO.pipe unless (@rd_sig && @wr_sig)
 170       mode = nil
 171       respawn = true
 172
 173       QUEUE_SIGS.each { |sig| trap_deferred(sig) }
 174       trap(:CHLD) { |sig_nr| awaken_master }
 175       proc_name 'master'
 176       logger.info "master process ready" # test_exec.rb relies on this message
 177       begin
 178         loop do
 179           reap_all_workers
 180           case (mode = SIG_QUEUE.shift)
 181           when nil
 182             murder_lazy_workers
 183             spawn_missing_workers if respawn
 184             master_sleep
 185           when :QUIT # graceful shutdown
 186             break
 187           when :TERM, :INT # immediate shutdown
 188             stop(false)
 189             break
 190           when :USR1 # rotate logs
 191             logger.info "master reopening logs..."
 192             Unicorn::Util.reopen_logs
 193             logger.info "master done reopening logs"
 194             kill_each_worker(:USR1)
 195           when :USR2 # exec binary, stay alive in case something went wrong
 196             reexec
 197           when :WINCH
 198             if Process.ppid == 1 || Process.getpgrp != $$
 199               respawn = false
 200               logger.info "gracefully stopping all workers"
 201               kill_each_worker(:QUIT)
 202             else
 203               logger.info "SIGWINCH ignored because we're not daemonized"
 204             end
 205           when :HUP
 206             respawn = true
 207             if @config.config_file
 208               load_config!
 209               redo # immediate reaping since we may have QUIT workers
 210             else # exec binary and exit if there's no config file
 211               logger.info "config_file not present, reexecuting binary"
 212               reexec
 213               break
 214             end
 215           else
 216             logger.error "master process in unknown mode: #{mode}"
 217           end
 218         end
 219       rescue Errno::EINTR
 220         retry
 221       rescue Object => e
 222         logger.error "Unhandled master loop exception #{e.inspect}."
 223         logger.error e.backtrace.join("\n")
 224         retry
 225       end
 226       stop # gracefully shutdown all workers on our way out
 227       logger.info "master complete"
 228       unlink_pid_safe(@pid) if @pid
 229     end
 230
 231     # Terminates all workers, but does not exit master process
 232     def stop(graceful = true)
 233       kill_each_worker(graceful ? :QUIT : :TERM)
 234       timeleft = @timeout
 235       step = 0.2
 236       reap_all_workers
 237       until @workers.empty?
 238         sleep(step)
 239         reap_all_workers
 240         (timeleft -= step) > 0 and next
 241         kill_each_worker(:KILL)
 242       end
 243     ensure
 244       self.listeners = []
 245     end
 246
 247     private
 248
 249     # list of signals we care about and trap in master.
 250     QUEUE_SIGS = [ :WINCH, :QUIT, :INT, :TERM, :USR1, :USR2, :HUP ].freeze
 251
 252     # defer a signal for later processing in #join (master process)
 253     def trap_deferred(signal)
 254       trap(signal) do |sig_nr|
 255         if SIG_QUEUE.size < 5
 256           SIG_QUEUE << signal
 257           awaken_master
 258         else
 259           logger.error "ignoring SIG#{signal}, queue=#{SIG_QUEUE.inspect}"
 260         end
 261       end
 262     end
 263
 264     # wait for a signal hander to wake us up and then consume the pipe
 265     # Wake up every second anyways to run murder_lazy_workers
 266     def master_sleep
 267       begin
 268         ready = IO.select([@rd_sig], nil, nil, 1)
 269         ready && ready[0] && ready[0][0] or return
 270         loop { @rd_sig.read_nonblock(Const::CHUNK_SIZE) }
 271       rescue Errno::EAGAIN, Errno::EINTR
 272       end
 273     end
 274
 275     def awaken_master
 276       begin
 277         @wr_sig.write_nonblock('.') # wakeup master process from IO.select
 278       rescue Errno::EAGAIN, Errno::EINTR
 279         # pipe is full, master should wake up anyways
 280         retry
 281       end
 282     end
 283
 284     # reaps all unreaped workers
 285     def reap_all_workers
 286       begin
 287         loop do
 288           pid, status = Process.waitpid2(-1, Process::WNOHANG)
 289           pid or break
 290           if @reexec_pid == pid
 291             logger.error "reaped #{status.inspect} exec()-ed"
 292             @reexec_pid = 0
 293             self.pid = @pid.chomp('.oldbin') if @pid
 294             proc_name 'master'
 295           else
 296             worker = @workers.delete(pid)
 297             worker.tempfile.close rescue nil
 298             logger.info "reaped #{status.inspect} " \
 299                         "worker=#{worker.nr rescue 'unknown'}"
 300           end
 301         end
 302       rescue Errno::ECHILD
 303       end
 304     end
 305
 306     # reexecutes the START_CTX with a new binary
 307     def reexec
 308       if @reexec_pid > 0
 309         begin
 310           Process.kill(0, @reexec_pid)
 311           logger.error "reexec-ed child already running PID:#{@reexec_pid}"
 312           return
 313         rescue Errno::ESRCH
 314           @reexec_pid = 0
 315         end
 316       end
 317
 318       if @pid
 319         old_pid = "#{@pid}.oldbin"
 320         prev_pid = @pid.dup
 321         begin
 322           self.pid = old_pid  # clear the path for a new pid file
 323         rescue ArgumentError
 324           logger.error "old PID:#{valid_pid?(old_pid)} running with " \
 325                        "existing pid=#{old_pid}, refusing rexec"
 326           return
 327         rescue Object => e
 328           logger.error "error writing pid=#{old_pid} #{e.class} #{e.message}"
 329           return
 330         end
 331       end
 332
 333       @reexec_pid = fork do
 334         listener_fds = @listeners.map { |sock| sock.fileno }
 335         ENV['UNICORN_FD'] = listener_fds.join(',')
 336         Dir.chdir(START_CTX[:cwd])
 337         cmd = [ START_CTX[:zero] ] + START_CTX[:argv]
 338
 339         # avoid leaking FDs we don't know about, but let before_exec
 340         # unset FD_CLOEXEC, if anything else in the app eventually
 341         # relies on FD inheritence.
 342         (3..1024).each do |io|
 343           next if listener_fds.include?(io)
 344           io = IO.for_fd(io) rescue nil
 345           io or next
 346           IO_PURGATORY << io
 347           io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 348         end
 349         logger.info "executing #{cmd.inspect} (in #{Dir.pwd})"
 350         @before_exec.call(self)
 351         exec(*cmd)
 352       end
 353       proc_name 'master (old)'
 354     end
 355
 356     # forcibly terminate all workers that haven't checked in in @timeout
 357     # seconds.  The timeout is implemented using an unlinked tempfile
 358     # shared between the parent process and each worker.  The worker
 359     # runs File#chmod to modify the ctime of the tempfile.  If the ctime
 360     # is stale for >@timeout seconds, then we'll kill the corresponding
 361     # worker.
 362     def murder_lazy_workers
 363       now = Time.now
 364       @workers.each_pair do |pid, worker|
 365         (now - worker.tempfile.ctime) <= @timeout and next
 366         logger.error "worker=#{worker.nr} PID:#{pid} is too old, killing"
 367         kill_worker(:KILL, pid) # take no prisoners for @timeout violations
 368         worker.tempfile.close rescue nil
 369       end
 370     end
 371
 372     def spawn_missing_workers
 373       return if @workers.size == @worker_processes
 374       (0...@worker_processes).each do |worker_nr|
 375         @workers.values.include?(worker_nr) and next
 376         begin
 377           Dir.chdir(START_CTX[:cwd])
 378         rescue Errno::ENOENT => err
 379           logger.fatal "#{err.inspect} (#{START_CTX[:cwd]})"
 380           SIG_QUEUE << :QUIT # forcibly emulate SIGQUIT
 381           return
 382         end
 383         tempfile = Tempfile.new('') # as short as possible to save dir space
 384         tempfile.unlink # don't allow other processes to find or see it
 385         worker = Worker.new(worker_nr, tempfile)
 386         @before_fork.call(self, worker)
 387         pid = fork { worker_loop(worker) }
 388         @workers[pid] = worker
 389       end
 390     end
 391
 392     # once a client is accepted, it is processed in its entirety here
 393     # in 3 easy steps: read request, call app, write app response
 394     def process_client(client)
 395       # one syscall less than "client.nonblock = false":
 396       client.fcntl(Fcntl::F_SETFL, File::RDWR)
 397       HttpResponse.write(client, @app.call(@request.read(client)))
 398     # if we get any error, try to write something back to the client
 399     # assuming we haven't closed the socket, but don't get hung up
 400     # if the socket is already closed or broken.  We'll always ensure
 401     # the socket is closed at the end of this function
 402     rescue EOFError,Errno::ECONNRESET,Errno::EPIPE,Errno::EINVAL,Errno::EBADF
 403       client.write_nonblock(Const::ERROR_500_RESPONSE) rescue nil
 404     rescue HttpParserError # try to tell the client they're bad
 405       client.write_nonblock(Const::ERROR_400_RESPONSE) rescue nil
 406     rescue Object => e
 407       client.write_nonblock(Const::ERROR_500_RESPONSE) rescue nil
 408       logger.error "Read error: #{e.inspect}"
 409       logger.error e.backtrace.join("\n")
 410     ensure
 411       begin
 412         client.closed? or client.close
 413       rescue Object => e
 414         logger.error "Client error: #{e.inspect}"
 415         logger.error e.backtrace.join("\n")
 416       end
 417       @request.reset
 418     end
 419
 420     # gets rid of stuff the worker has no business keeping track of
 421     # to free some resources and drops all sig handlers.
 422     # traps for USR1, USR2, and HUP may be set in the @after_fork Proc
 423     # by the user.
 424     def init_worker_process(worker)
 425       QUEUE_SIGS.each { |sig| trap(sig, 'DEFAULT') }
 426       trap(:CHLD, 'DEFAULT')
 427       SIG_QUEUE.clear
 428       proc_name "worker[#{worker.nr}]"
 429       START_CTX.clear
 430       @rd_sig.close if @rd_sig
 431       @wr_sig.close if @wr_sig
 432       @workers.values.each { |other| other.tempfile.close rescue nil }
 433       @workers = @rd_sig = @wr_sig = nil
 434       @listeners.each { |sock| sock.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) }
 435       worker.tempfile.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 436       @after_fork.call(self, worker) # can drop perms
 437       @request = HttpRequest.new(logger)
 438       build_app! unless @preload_app
 439     end
 440
 441     # runs inside each forked worker, this sits around and waits
 442     # for connections and doesn't die until the parent dies (or is
 443     # given a INT, QUIT, or TERM signal)
 444     def worker_loop(worker)
 445       master_pid = Process.ppid # slightly racy, but less memory usage
 446       init_worker_process(worker)
 447       nr = 0 # this becomes negative if we need to reopen logs
 448       tempfile = worker.tempfile
 449       ready = @listeners
 450       client = nil
 451       rd, wr = IO.pipe
 452       rd.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 453       wr.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 454
 455       # closing anything we IO.select on will raise EBADF
 456       trap(:USR1) { nr = -65536; rd.close rescue nil }
 457       trap(:QUIT) { @listeners.each { |sock| sock.close rescue nil } }
 458       [:TERM, :INT].each { |sig| trap(sig) { exit(0) } } # instant shutdown
 459       @logger.info "worker=#{worker.nr} ready"
 460
 461       while master_pid == Process.ppid
 462         if nr < 0
 463           @logger.info "worker=#{worker.nr} reopening logs..."
 464           Unicorn::Util.reopen_logs
 465           @logger.info "worker=#{worker.nr} done reopening logs"
 466           wr.close rescue nil
 467           rd, wr = IO.pipe
 468           rd.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 469           wr.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 470         end
 471         # we're a goner in @timeout seconds anyways if tempfile.chmod
 472         # breaks, so don't trap the exception.  Using fchmod() since
 473         # futimes() is not available in base Ruby and I very strongly
 474         # prefer temporary files to be unlinked for security,
 475         # performance and reliability reasons, so utime is out.  No-op
 476         # changes with chmod doesn't update ctime on all filesystems; so
 477         # we change our counter each and every time (after process_client
 478         # and before IO.select).
 479         tempfile.chmod(nr = 0)
 480
 481         begin
 482           ready.each do |sock|
 483             begin
 484               client = begin
 485                 sock.accept_nonblock
 486               rescue Errno::EAGAIN
 487                 next
 488               end
 489               process_client(client)
 490             rescue Errno::ECONNABORTED
 491               # client closed the socket even before accept
 492               client.close rescue nil
 493             ensure
 494               tempfile.chmod(nr += 1) if client
 495               break if nr < 0
 496             end
 497           end
 498           client = nil
 499
 500           # make the following bet: if we accepted clients this round,
 501           # we're probably reasonably busy, so avoid calling select()
 502           # and do a speculative accept_nonblock on every listener
 503           # before we sleep again in select().
 504           if nr != 0 # (nr < 0) => reopen logs
 505             ready = @listeners
 506           else
 507             begin
 508               tempfile.chmod(nr += 1)
 509               # timeout used so we can detect parent death:
 510               ret = IO.select(@listeners, nil, [rd], @timeout/2.0) or next
 511               ready = ret[0]
 512             rescue Errno::EINTR
 513               ready = @listeners
 514             rescue Errno::EBADF => e
 515               nr < 0 or exit(@listeners[0].closed? ? 0 : 1)
 516             end
 517           end
 518         rescue SignalException, SystemExit => e
 519           raise e
 520         rescue Object => e
 521           if alive
 522             logger.error "Unhandled listen loop exception #{e.inspect}."
 523             logger.error e.backtrace.join("\n")
 524           end
 525         end
 526       end
 527     end
 528
 529     # delivers a signal to a worker and fails gracefully if the worker
 530     # is no longer running.
 531     def kill_worker(signal, pid)
 532       begin
 533         Process.kill(signal, pid)
 534       rescue Errno::ESRCH
 535         worker = @workers.delete(pid) and worker.tempfile.close rescue nil
 536       end
 537     end
 538
 539     # delivers a signal to each worker
 540     def kill_each_worker(signal)
 541       @workers.keys.each { |pid| kill_worker(signal, pid) }
 542     end
 543
 544     # unlinks a PID file at given +path+ if it contains the current PID
 545     # useful as an at_exit handler.
 546     def unlink_pid_safe(path)
 547       (File.read(path).to_i == $$ and File.unlink(path)) rescue nil
 548     end
 549
 550     # returns a PID if a given path contains a non-stale PID file,
 551     # nil otherwise.
 552     def valid_pid?(path)
 553       if File.exist?(path) && (pid = File.read(path).to_i) > 1
 554         begin
 555           Process.kill(0, pid)
 556           return pid
 557         rescue Errno::ESRCH
 558         end
 559       end
 560       nil
 561     end
 562
 563     def load_config!
 564       begin
 565         logger.info "reloading config_file=#{@config.config_file}"
 566         @config[:listeners].replace(@init_listeners)
 567         @config.reload
 568         @config.commit!(self)
 569         kill_each_worker(:QUIT)
 570         logger.info "done reloading config_file=#{@config.config_file}"
 571       rescue Object => e
 572         logger.error "error reloading config_file=#{@config.config_file}: " \
 573                      "#{e.class} #{e.message}"
 574       end
 575     end
 576
 577     # returns an array of string names for the given listener array
 578     def listener_names(listeners = @listeners)
 579       listeners.map { |io| sock_name(io) }
 580     end
 581
 582     def build_app!
 583       @app = @app.call if @app.respond_to?(:arity) && @app.arity == 0
 584     end
 585
 586     def proc_name(tag)
 587       $0 = ([ File.basename(START_CTX[:zero]), tag ] +
 588               START_CTX[:argv]).join(' ')
 589     end
 590
 591   end
 592 end