lib/unicorn.rb

   1 require 'fcntl'
   2
   3 require 'unicorn/socket_helper'
   4 require 'unicorn/const'
   5 require 'unicorn/http_request'
   6 require 'unicorn/http_response'
   7 require 'unicorn/configurator'
   8 require 'unicorn/util'
   9
  10 # Unicorn module containing all of the classes (include C extensions) for running
  11 # a Unicorn web server.  It contains a minimalist HTTP server with just enough
  12 # functionality to service web application requests fast as possible.
  13 module Unicorn
  14   class << self
  15     def run(app, options = {})
  16       HttpServer.new(app, options).start.join
  17     end
  18   end
  19
  20   # This is the process manager of Unicorn. This manages worker
  21   # processes which in turn handle the I/O and application process.
  22   # Listener sockets are started in the master process and shared with
  23   # forked worker children.
  24   class HttpServer
  25     attr_reader :logger
  26     include ::Unicorn::SocketHelper
  27
  28     # prevents IO objects in here from being GC-ed
  29     IO_PURGATORY = []
  30
  31     # all bound listener sockets
  32     LISTENERS = []
  33
  34     # This hash maps PIDs to Workers
  35     WORKERS = {}
  36
  37     # See: http://cr.yp.to/docs/selfpipe.html
  38     SELF_PIPE = []
  39
  40     # signal queue used for self-piping
  41     SIG_QUEUE = []
  42
  43     # We populate this at startup so we can figure out how to reexecute
  44     # and upgrade the currently running instance of Unicorn
  45     START_CTX = {
  46       :argv => ARGV.map { |arg| arg.dup },
  47       # don't rely on Dir.pwd here since it's not symlink-aware, and
  48       # symlink dirs are the default with Capistrano...
  49       :cwd => `/bin/sh -c pwd`.chomp("\n"),
  50       :zero => $0.dup,
  51     }
  52
  53     Worker = Struct.new(:nr, :tempfile) unless defined?(Worker)
  54     class Worker
  55       # worker objects may be compared to just plain numbers
  56       def ==(other_nr)
  57         self.nr == other_nr
  58       end
  59     end
  60
  61     # Creates a working server on host:port (strange things happen if
  62     # port isn't a Number).  Use HttpServer::run to start the server and
  63     # HttpServer.run.join to join the thread that's processing
  64     # incoming requests on the socket.
  65     def initialize(app, options = {})
  66       @app = app
  67       @pid = nil
  68       @reexec_pid = 0
  69       @init_listeners = options[:listeners] ? options[:listeners].dup : []
  70       @config = Configurator.new(options.merge(:use_defaults => true))
  71       @listener_opts = {}
  72       @config.commit!(self, :skip => [:listeners, :pid])
  73       @request = HttpRequest.new(@logger)
  74     end
  75
  76     # Runs the thing.  Returns self so you can run join on it
  77     def start
  78       BasicSocket.do_not_reverse_lookup = true
  79
  80       # inherit sockets from parents, they need to be plain Socket objects
  81       # before they become UNIXServer or TCPServer
  82       inherited = ENV['UNICORN_FD'].to_s.split(/,/).map do |fd|
  83         io = Socket.for_fd(fd.to_i)
  84         set_server_sockopt(io, @listener_opts[sock_name(io)])
  85         IO_PURGATORY << io
  86         logger.info "inherited addr=#{sock_name(io)} fd=#{fd}"
  87         server_cast(io)
  88       end
  89
  90       config_listeners = @config[:listeners].dup
  91       LISTENERS.replace(inherited)
  92
  93       # we start out with generic Socket objects that get cast to either
  94       # TCPServer or UNIXServer objects; but since the Socket objects
  95       # share the same OS-level file descriptor as the higher-level *Server
  96       # objects; we need to prevent Socket objects from being garbage-collected
  97       config_listeners -= listener_names
  98       if config_listeners.empty? && LISTENERS.empty?
  99         config_listeners << Unicorn::Const::DEFAULT_LISTEN
 100       end
 101       config_listeners.each { |addr| listen(addr) }
 102       raise ArgumentError, "no listeners" if LISTENERS.empty?
 103       self.pid = @config[:pid]
 104       build_app! if @preload_app
 105       maintain_worker_count
 106       self
 107     end
 108
 109     # replaces current listener set with +listeners+.  This will
 110     # close the socket if it will not exist in the new listener set
 111     def listeners=(listeners)
 112       cur_names, dead_names = [], []
 113       listener_names.each do |name|
 114         if "/" == name[0..0]
 115           # mark unlinked sockets as dead so we can rebind them
 116           (File.socket?(name) ? cur_names : dead_names) << name
 117         else
 118           cur_names << name
 119         end
 120       end
 121       set_names = listener_names(listeners)
 122       dead_names += cur_names - set_names
 123       dead_names.uniq!
 124
 125       LISTENERS.delete_if do |io|
 126         if dead_names.include?(sock_name(io))
 127           IO_PURGATORY.delete_if do |pio|
 128             pio.fileno == io.fileno && (pio.close rescue nil).nil? # true
 129           end
 130           (io.close rescue nil).nil? # true
 131         else
 132           set_server_sockopt(io, @listener_opts[sock_name(io)])
 133           false
 134         end
 135       end
 136
 137       (set_names - cur_names).each { |addr| listen(addr) }
 138     end
 139
 140     def stdout_path=(path); redirect_io($stdout, path); end
 141     def stderr_path=(path); redirect_io($stderr, path); end
 142
 143     # sets the path for the PID file of the master process
 144     def pid=(path)
 145       if path
 146         if x = valid_pid?(path)
 147           return path if @pid && path == @pid && x == $$
 148           raise ArgumentError, "Already running on PID:#{x} " \
 149                                "(or pid=#{path} is stale)"
 150         end
 151       end
 152       unlink_pid_safe(@pid) if @pid
 153       File.open(path, 'wb') { |fp| fp.syswrite("#$$\n") } if path
 154       @pid = path
 155     end
 156
 157     # add a given address to the +listeners+ set, idempotently
 158     # Allows workers to add a private, per-process listener via the
 159     # @after_fork hook.  Very useful for debugging and testing.
 160     def listen(address, opt = {}.merge(@listener_opts[address] || {}))
 161       return if String === address && listener_names.include?(address)
 162
 163       if io = bind_listen(address, opt)
 164         unless TCPServer === io || UNIXServer === io
 165           IO_PURGATORY << io
 166           io = server_cast(io)
 167         end
 168         logger.info "listening on addr=#{sock_name(io)} fd=#{io.fileno}"
 169         LISTENERS << io
 170       else
 171         logger.error "adding listener failed addr=#{address} (in use)"
 172         raise Errno::EADDRINUSE, address
 173       end
 174     end
 175
 176     # monitors children and receives signals forever
 177     # (or until a termination signal is sent).  This handles signals
 178     # one-at-a-time time and we'll happily drop signals in case somebody
 179     # is signalling us too often.
 180     def join
 181       # this pipe is used to wake us up from select(2) in #join when signals
 182       # are trapped.  See trap_deferred
 183       init_self_pipe!
 184       respawn = true
 185
 186       QUEUE_SIGS.each { |sig| trap_deferred(sig) }
 187       trap(:CHLD) { |sig_nr| awaken_master }
 188       proc_name 'master'
 189       logger.info "master process ready" # test_exec.rb relies on this message
 190       begin
 191         loop do
 192           reap_all_workers
 193           case SIG_QUEUE.shift
 194           when nil
 195             murder_lazy_workers
 196             maintain_worker_count if respawn
 197             master_sleep
 198           when :QUIT # graceful shutdown
 199             break
 200           when :TERM, :INT # immediate shutdown
 201             stop(false)
 202             break
 203           when :USR1 # rotate logs
 204             logger.info "master reopening logs..."
 205             Unicorn::Util.reopen_logs
 206             logger.info "master done reopening logs"
 207             kill_each_worker(:USR1)
 208           when :USR2 # exec binary, stay alive in case something went wrong
 209             reexec
 210           when :WINCH
 211             if Process.ppid == 1 || Process.getpgrp != $$
 212               respawn = false
 213               logger.info "gracefully stopping all workers"
 214               kill_each_worker(:QUIT)
 215             else
 216               logger.info "SIGWINCH ignored because we're not daemonized"
 217             end
 218           when :TTIN
 219             @worker_processes += 1
 220           when :TTOU
 221             @worker_processes -= 1 if @worker_processes > 0
 222           when :HUP
 223             respawn = true
 224             if @config.config_file
 225               load_config!
 226               redo # immediate reaping since we may have QUIT workers
 227             else # exec binary and exit if there's no config file
 228               logger.info "config_file not present, reexecuting binary"
 229               reexec
 230               break
 231             end
 232           end
 233         end
 234       rescue Errno::EINTR
 235         retry
 236       rescue Object => e
 237         logger.error "Unhandled master loop exception #{e.inspect}."
 238         logger.error e.backtrace.join("\n")
 239         retry
 240       end
 241       stop # gracefully shutdown all workers on our way out
 242       logger.info "master complete"
 243       unlink_pid_safe(@pid) if @pid
 244     end
 245
 246     # Terminates all workers, but does not exit master process
 247     def stop(graceful = true)
 248       kill_each_worker(graceful ? :QUIT : :TERM)
 249       timeleft = @timeout
 250       step = 0.2
 251       reap_all_workers
 252       until WORKERS.empty?
 253         sleep(step)
 254         reap_all_workers
 255         (timeleft -= step) > 0 and next
 256         kill_each_worker(:KILL)
 257       end
 258     ensure
 259       self.listeners = []
 260     end
 261
 262     private
 263
 264     # list of signals we care about and trap in master.
 265     QUEUE_SIGS = [ :WINCH, :QUIT, :INT, :TERM, :USR1, :USR2, :HUP,
 266                    :TTIN, :TTOU ].freeze
 267
 268     # defer a signal for later processing in #join (master process)
 269     def trap_deferred(signal)
 270       trap(signal) do |sig_nr|
 271         if SIG_QUEUE.size < 5
 272           SIG_QUEUE << signal
 273           awaken_master
 274         else
 275           logger.error "ignoring SIG#{signal}, queue=#{SIG_QUEUE.inspect}"
 276         end
 277       end
 278     end
 279
 280     # wait for a signal hander to wake us up and then consume the pipe
 281     # Wake up every second anyways to run murder_lazy_workers
 282     def master_sleep
 283       begin
 284         ready = IO.select([SELF_PIPE.first], nil, nil, 1) or return
 285         ready.first && ready.first.first or return
 286         loop { SELF_PIPE.first.read_nonblock(Const::CHUNK_SIZE) }
 287       rescue Errno::EAGAIN, Errno::EINTR
 288       end
 289     end
 290
 291     def awaken_master
 292       begin
 293         SELF_PIPE.last.write_nonblock('.') # wakeup master process from select
 294       rescue Errno::EAGAIN, Errno::EINTR
 295         # pipe is full, master should wake up anyways
 296         retry
 297       end
 298     end
 299
 300     # reaps all unreaped workers
 301     def reap_all_workers
 302       begin
 303         loop do
 304           pid, status = Process.waitpid2(-1, Process::WNOHANG)
 305           pid or break
 306           if @reexec_pid == pid
 307             logger.error "reaped #{status.inspect} exec()-ed"
 308             @reexec_pid = 0
 309             self.pid = @pid.chomp('.oldbin') if @pid
 310             proc_name 'master'
 311           else
 312             worker = WORKERS.delete(pid)
 313             worker.tempfile.close rescue nil
 314             logger.info "reaped #{status.inspect} " \
 315                         "worker=#{worker.nr rescue 'unknown'}"
 316           end
 317         end
 318       rescue Errno::ECHILD
 319       end
 320     end
 321
 322     # reexecutes the START_CTX with a new binary
 323     def reexec
 324       if @reexec_pid > 0
 325         begin
 326           Process.kill(0, @reexec_pid)
 327           logger.error "reexec-ed child already running PID:#{@reexec_pid}"
 328           return
 329         rescue Errno::ESRCH
 330           @reexec_pid = 0
 331         end
 332       end
 333
 334       if @pid
 335         old_pid = "#{@pid}.oldbin"
 336         prev_pid = @pid.dup
 337         begin
 338           self.pid = old_pid  # clear the path for a new pid file
 339         rescue ArgumentError
 340           logger.error "old PID:#{valid_pid?(old_pid)} running with " \
 341                        "existing pid=#{old_pid}, refusing rexec"
 342           return
 343         rescue Object => e
 344           logger.error "error writing pid=#{old_pid} #{e.class} #{e.message}"
 345           return
 346         end
 347       end
 348
 349       @reexec_pid = fork do
 350         listener_fds = LISTENERS.map { |sock| sock.fileno }
 351         ENV['UNICORN_FD'] = listener_fds.join(',')
 352         Dir.chdir(START_CTX[:cwd])
 353         cmd = [ START_CTX[:zero] ] + START_CTX[:argv]
 354
 355         # avoid leaking FDs we don't know about, but let before_exec
 356         # unset FD_CLOEXEC, if anything else in the app eventually
 357         # relies on FD inheritence.
 358         (3..1024).each do |io|
 359           next if listener_fds.include?(io)
 360           io = IO.for_fd(io) rescue nil
 361           io or next
 362           IO_PURGATORY << io
 363           io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 364         end
 365         logger.info "executing #{cmd.inspect} (in #{Dir.pwd})"
 366         @before_exec.call(self)
 367         exec(*cmd)
 368       end
 369       proc_name 'master (old)'
 370     end
 371
 372     # forcibly terminate all workers that haven't checked in in @timeout
 373     # seconds.  The timeout is implemented using an unlinked tempfile
 374     # shared between the parent process and each worker.  The worker
 375     # runs File#chmod to modify the ctime of the tempfile.  If the ctime
 376     # is stale for >@timeout seconds, then we'll kill the corresponding
 377     # worker.
 378     def murder_lazy_workers
 379       WORKERS.each_pair do |pid, worker|
 380         Time.now - worker.tempfile.ctime <= @timeout and next
 381         logger.error "worker=#{worker.nr} PID:#{pid} is too old, killing"
 382         kill_worker(:KILL, pid) # take no prisoners for @timeout violations
 383         worker.tempfile.close rescue nil
 384       end
 385     end
 386
 387     def spawn_missing_workers
 388       (0...@worker_processes).each do |worker_nr|
 389         WORKERS.values.include?(worker_nr) and next
 390         begin
 391           Dir.chdir(START_CTX[:cwd])
 392         rescue Errno::ENOENT => err
 393           logger.fatal "#{err.inspect} (#{START_CTX[:cwd]})"
 394           SIG_QUEUE << :QUIT # forcibly emulate SIGQUIT
 395           return
 396         end
 397         tempfile = Tempfile.new(nil) # as short as possible to save dir space
 398         tempfile.unlink # don't allow other processes to find or see it
 399         worker = Worker.new(worker_nr, tempfile)
 400         @before_fork.call(self, worker)
 401         pid = fork { worker_loop(worker) }
 402         WORKERS[pid] = worker
 403       end
 404     end
 405
 406     def maintain_worker_count
 407       (off = WORKERS.size - @worker_processes) == 0 and return
 408       off < 0 and return spawn_missing_workers
 409       WORKERS.each_pair { |pid,w|
 410         w.nr >= @worker_processes and kill_worker(:QUIT, pid) rescue nil
 411       }
 412     end
 413
 414     # once a client is accepted, it is processed in its entirety here
 415     # in 3 easy steps: read request, call app, write app response
 416     def process_client(client)
 417       HttpResponse.write(client, @app.call(@request.read(client)))
 418     # if we get any error, try to write something back to the client
 419     # assuming we haven't closed the socket, but don't get hung up
 420     # if the socket is already closed or broken.  We'll always ensure
 421     # the socket is closed at the end of this function
 422     rescue EOFError,Errno::ECONNRESET,Errno::EPIPE,Errno::EINVAL,Errno::EBADF
 423       client.write_nonblock(Const::ERROR_500_RESPONSE) rescue nil
 424       client.close rescue nil
 425     rescue HttpParserError # try to tell the client they're bad
 426       client.write_nonblock(Const::ERROR_400_RESPONSE) rescue nil
 427       client.close rescue nil
 428     rescue Object => e
 429       client.write_nonblock(Const::ERROR_500_RESPONSE) rescue nil
 430       client.close rescue nil
 431       logger.error "Read error: #{e.inspect}"
 432       logger.error e.backtrace.join("\n")
 433     end
 434
 435     # gets rid of stuff the worker has no business keeping track of
 436     # to free some resources and drops all sig handlers.
 437     # traps for USR1, USR2, and HUP may be set in the @after_fork Proc
 438     # by the user.
 439     def init_worker_process(worker)
 440       QUEUE_SIGS.each { |sig| trap(sig, 'IGNORE') }
 441       trap(:CHLD, 'DEFAULT')
 442       SIG_QUEUE.clear
 443       proc_name "worker[#{worker.nr}]"
 444       START_CTX.clear
 445       init_self_pipe!
 446       WORKERS.values.each { |other| other.tempfile.close! rescue nil }
 447       WORKERS.clear
 448       LISTENERS.each { |sock| sock.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) }
 449       worker.tempfile.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
 450       @after_fork.call(self, worker) # can drop perms
 451       @timeout /= 2.0 # halve it for select()
 452       build_app! unless @config[:preload_app]
 453     end
 454
 455     def reopen_worker_logs(worker_nr)
 456       @logger.info "worker=#{worker_nr} reopening logs..."
 457       Unicorn::Util.reopen_logs
 458       @logger.info "worker=#{worker_nr} done reopening logs"
 459       init_self_pipe!
 460     end
 461
 462     # runs inside each forked worker, this sits around and waits
 463     # for connections and doesn't die until the parent dies (or is
 464     # given a INT, QUIT, or TERM signal)
 465     def worker_loop(worker)
 466       master_pid = Process.ppid # slightly racy, but less memory usage
 467       init_worker_process(worker)
 468       nr = 0 # this becomes negative if we need to reopen logs
 469       alive = worker.tempfile # tempfile is our lifeline to the master process
 470       ready = LISTENERS
 471       t = ti = Time.now.to_i
 472
 473       # closing anything we IO.select on will raise EBADF
 474       trap(:USR1) { nr = -65536; SELF_PIPE.first.close rescue nil }
 475       trap(:QUIT) { alive = nil; LISTENERS.each { |s| s.close rescue nil } }
 476       [:TERM, :INT].each { |sig| trap(sig) { exit!(0) } } # instant shutdown
 477       @logger.info "worker=#{worker.nr} ready"
 478
 479       begin
 480         nr < 0 and reopen_worker_logs(worker.nr)
 481         nr = 0
 482
 483         # we're a goner in @timeout seconds anyways if alive.chmod
 484         # breaks, so don't trap the exception.  Using fchmod() since
 485         # futimes() is not available in base Ruby and I very strongly
 486         # prefer temporary files to be unlinked for security,
 487         # performance and reliability reasons, so utime is out.  No-op
 488         # changes with chmod doesn't update ctime on all filesystems; so
 489         # we change our counter each and every time (after process_client
 490         # and before IO.select).
 491         t == (ti = Time.now.to_i) or alive.chmod(t = ti)
 492
 493         ready.each do |sock|
 494           begin
 495             process_client(sock.accept_nonblock)
 496             nr += 1
 497             t == (ti = Time.now.to_i) or alive.chmod(t = ti)
 498           rescue Errno::EAGAIN, Errno::ECONNABORTED
 499           end
 500           break if nr < 0
 501         end
 502
 503         # make the following bet: if we accepted clients this round,
 504         # we're probably reasonably busy, so avoid calling select()
 505         # and do a speculative accept_nonblock on every listener
 506         # before we sleep again in select().
 507         redo unless nr == 0 # (nr < 0) => reopen logs
 508
 509         master_pid == Process.ppid or return
 510         t == (ti = Time.now.to_i) or alive.chmod(t = ti)
 511         begin
 512           # timeout used so we can detect parent death:
 513           ret = IO.select(LISTENERS, nil, SELF_PIPE, @timeout) or redo
 514           ready = ret.first
 515         rescue Errno::EINTR
 516           ready = LISTENERS
 517         rescue Errno::EBADF
 518           nr < 0 or return
 519         end
 520       rescue Object => e
 521         if alive
 522           logger.error "Unhandled listen loop exception #{e.inspect}."
 523           logger.error e.backtrace.join("\n")
 524         end
 525       end while alive
 526     end
 527
 528     # delivers a signal to a worker and fails gracefully if the worker
 529     # is no longer running.
 530     def kill_worker(signal, pid)
 531       begin
 532         Process.kill(signal, pid)
 533       rescue Errno::ESRCH
 534         worker = WORKERS.delete(pid) and worker.tempfile.close rescue nil
 535       end
 536     end
 537
 538     # delivers a signal to each worker
 539     def kill_each_worker(signal)
 540       WORKERS.keys.each { |pid| kill_worker(signal, pid) }
 541     end
 542
 543     # unlinks a PID file at given +path+ if it contains the current PID
 544     # useful as an at_exit handler.
 545     def unlink_pid_safe(path)
 546       (File.read(path).to_i == $$ and File.unlink(path)) rescue nil
 547     end
 548
 549     # returns a PID if a given path contains a non-stale PID file,
 550     # nil otherwise.
 551     def valid_pid?(path)
 552       if File.exist?(path) && (pid = File.read(path).to_i) > 1
 553         begin
 554           Process.kill(0, pid)
 555           return pid
 556         rescue Errno::ESRCH
 557         end
 558       end
 559       nil
 560     end
 561
 562     def load_config!
 563       begin
 564         logger.info "reloading config_file=#{@config.config_file}"
 565         @config[:listeners].replace(@init_listeners)
 566         @config.reload
 567         @config.commit!(self)
 568         kill_each_worker(:QUIT)
 569         logger.info "done reloading config_file=#{@config.config_file}"
 570       rescue Object => e
 571         logger.error "error reloading config_file=#{@config.config_file}: " \
 572                      "#{e.class} #{e.message}"
 573       end
 574     end
 575
 576     # returns an array of string names for the given listener array
 577     def listener_names(listeners = LISTENERS)
 578       listeners.map { |io| sock_name(io) }
 579     end
 580
 581     def build_app!
 582       @app = @app.call if @app.respond_to?(:arity) && @app.arity == 0
 583     end
 584
 585     def proc_name(tag)
 586       $0 = ([ File.basename(START_CTX[:zero]), tag ] +
 587               START_CTX[:argv]).join(' ')
 588     end
 589
 590     def redirect_io(io, path)
 591       File.open(path, 'a') { |fp| io.reopen(fp) } if path
 592       io.sync = true
 593     end
 594
 595     def init_self_pipe!
 596       SELF_PIPE.each { |io| io.close rescue nil }
 597       SELF_PIPE.replace(IO.pipe)
 598       SELF_PIPE.each { |io| io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) }
 599     end
 600
 601   end
 602 end