lib/rainbows/thread_timeout.rb

   1 # -*- encoding: binary -*-
   2 require 'thread'
   3
   4 # Soft timeout middleware for thread-based concurrency models in \Rainbows!
   5 # This timeout only includes application dispatch, and will not take into
   6 # account the (rare) response bodies that are dynamically generated while
   7 # they are being written out to the client.
   8 #
   9 # In your rackup config file (config.ru), the following line will
  10 # cause execution to timeout in 1.5 seconds.
  11 #
  12 #    use Rainbows::ThreadTimeout, :timeout => 1.5
  13 #    run MyApplication.new
  14 #
  15 # You may also specify a threshold, so the timeout does not take
  16 # effect until there are enough active clients.  It does not make
  17 # sense to set a +:threshold+ higher or equal to the
  18 # +worker_connections+ \Rainbows! configuration parameter.
  19 # You may specify a negative threshold to be an absolute
  20 # value relative to the +worker_connections+ parameter, thus
  21 # if you specify a threshold of -1, and have 100 worker_connections,
  22 # ThreadTimeout will only activate when there are 99 active requests.
  23 #
  24 #    use Rainbows::ThreadTimeout, :timeout => 1.5, :threshold => -1
  25 #    run MyApplication.new
  26 #
  27 # This middleware only affects elements below it in the stack, so
  28 # it can be configured to ignore certain endpoints or middlewares.
  29 #
  30 # Timed-out requests will cause this middleware to return with a
  31 # "408 Request Timeout" response.
  32 #
  33 # == Caveats
  34 #
  35 # Badly-written C extensions may not be timed out.  Audit and fix
  36 # (or remove) those extensions before relying on this module.
  37 #
  38 # Do NOT, under any circumstances nest and load this in
  39 # the same middleware stack.  You may load this in parallel in the
  40 # same process completely independent middleware stacks, but DO NOT
  41 # load this twice so it nests.  Things will break!
  42 #
  43 # This will behave badly if system time is changed since Ruby
  44 # does not expose a monotonic clock for users, so don't change
  45 # the system time while this is running.  All servers should be
  46 # running ntpd anyways.
  47 class Rainbows::ThreadTimeout
  48
  49   # :stopdoc:
  50   #
  51   # we subclass Exception to get rid of normal StandardError rescues
  52   # in app-level code.  timeout.rb does something similar
  53   ExecutionExpired = Class.new(Exception)
  54
  55   # The MRI 1.8 won't be usable in January 2038, we'll raise this
  56   # when we eventually drop support for 1.8 (before 2038, hopefully)
  57   NEVER = Time.at(0x7fffffff)
  58
  59   def initialize(app, opts)
  60     # @timeout must be Numeric since we add this to Time
  61     @timeout = opts[:timeout]
  62     Numeric === @timeout or
  63       raise TypeError, "timeout=#{@timeout.inspect} is not numeric"
  64
  65     if @threshold = opts[:threshold]
  66       Integer === @threshold or
  67         raise TypeError, "threshold=#{@threshold.inspect} is not an integer"
  68       @threshold == 0 and raise ArgumentError, "threshold=0 does not make sense"
  69       @threshold < 0 and @threshold += Rainbows.server.worker_connections
  70     end
  71     @app = app
  72
  73     # This is the main datastructure for communicating Threads eligible
  74     # for expiration to the watchdog thread.  If the eligible thread
  75     # completes its job before its expiration time, it will delete itself
  76     # @active.  If the watchdog thread notices the thread is timed out,
  77     # the watchdog thread will delete the thread from this hash as it
  78     # raises the exception.
  79     #
  80     # key: Thread to be timed out
  81     # value: Time of expiration
  82     @active = {}
  83
  84     # Protects all access to @active.  It is important since it also limits
  85     # safe points for asynchronously raising exceptions.
  86     @lock = Mutex.new
  87
  88     # There is one long-running watchdog thread that watches @active and
  89     # kills threads that have been running too long
  90     # see start_watchdog
  91     @watchdog = nil
  92   end
  93
  94   # entry point for Rack middleware
  95   def call(env)
  96     # Once we have this lock, we ensure two things:
  97     # 1) there is only one watchdog thread started
  98     # 2) we can't be killed once we have this lock, it's unlikely
  99     #    to happen unless @timeout is really low and the machine
 100     #    is very slow.
 101     @lock.lock
 102
 103     # we're dead if anything in the next two lines raises, but it's
 104     # highly unlikely that they will, and anything such as NoMemoryError
 105     # is hopeless and we might as well just die anyways.
 106     # initialize guarantees @timeout will be Numeric
 107     start_watchdog(env) unless @watchdog
 108     @active[Thread.current] = Time.now + @timeout
 109
 110     begin
 111       # It is important to unlock inside this begin block
 112       # Mutex#unlock really can't fail here since we did a successful
 113       # Mutex#lock before
 114       @lock.unlock
 115
 116       # Once the Mutex was unlocked, we're open to Thread#raise from
 117       # the watchdog process.  This is the main place we expect to receive
 118       # Thread#raise.  @app is of course the next layer of the Rack
 119       # application stack
 120       @app.call(env)
 121     ensure
 122       # I's still possible to receive a Thread#raise here from
 123       # the watchdog, but that's alright, the "rescue ExecutionExpired"
 124       # line will catch that.
 125       @lock.synchronize { @active.delete(Thread.current) }
 126       # Thread#raise no longer possible here
 127     end
 128     rescue ExecutionExpired
 129       # If we got here, it's because the watchdog thread raised an exception
 130       # here to kill us.  The watchdog uses @active.delete_if with a lock,
 131       # so we guaranteed it's
 132       [ 408, { 'Content-Type' => 'text/plain', 'Content-Length' => '0' }, [] ]
 133   end
 134
 135   # The watchdog thread is the one that does the job of killing threads
 136   # that have expired.
 137   def start_watchdog(env)
 138     @watchdog = Thread.new(env["rack.logger"]) do |logger|
 139       begin
 140         if @threshold
 141           # Hash#size is atomic in MRI 1.8 and 1.9 and we
 142           # expect that from other implementations.
 143           #
 144           # Even without a memory barrier, sleep(@timeout) vs
 145           # sleep(@timeout - time-for-SMP-to-synchronize-a-word)
 146           # is too trivial to worry about here.
 147           sleep(@timeout) while @active.size < @threshold
 148         end
 149
 150         next_expiry = NEVER
 151
 152         # We always lock access to @active, so we can't kill threads
 153         # that are about to release themselves from the eye of the
 154         # watchdog thread.
 155         @lock.synchronize do
 156           now = Time.now
 157           @active.delete_if do |thread, expire_at|
 158             # We also use this loop to get the maximum possible time to
 159             # sleep for if we're not killing the thread.
 160             if expire_at > now
 161               next_expiry = expire_at if next_expiry > expire_at
 162               false
 163             else
 164               # Terminate execution and delete this from the @active
 165               thread.raise(ExecutionExpired)
 166               true
 167             end
 168           end
 169         end
 170
 171         # We always try to sleep as long as possible to avoid consuming
 172         # resources from the app.  So that's the user-configured @timeout
 173         # value.
 174         if next_expiry == NEVER
 175           sleep(@timeout)
 176         else
 177           # sleep until the next known thread is about to expire.
 178           sec = next_expiry - Time.now
 179           sec > 0.0 ? sleep(sec) : Thread.pass # give other threads a chance
 180         end
 181       rescue => e
 182         # just in case
 183         logger.error e
 184       end while true # we run this forever
 185     end
 186   end
 187   # :startdoc:
 188 end