1 /**********************************************************************
7 Copyright (C) 2020 Samuel Grant Dawson Williams
9 **********************************************************************/
12 #include "ruby/fiber/scheduler.h"
14 #include "ruby/io/buffer.h"
16 #include "internal/thread.h"
19 static ID id_scheduler_close
;
24 static ID id_timeout_after
;
25 static ID id_kernel_sleep
;
26 static ID id_process_wait
;
28 static ID id_io_read
, id_io_pread
;
29 static ID id_io_write
, id_io_pwrite
;
31 static ID id_io_select
;
32 static ID id_io_close
;
34 static ID id_address_resolve
;
36 static ID id_fiber_schedule
;
39 * Document-class: Fiber::Scheduler
41 * This is not an existing class, but documentation of the interface that Scheduler
42 * object should comply to in order to be used as argument to Fiber.scheduler and handle non-blocking
43 * fibers. See also the "Non-blocking fibers" section in Fiber class docs for explanations
46 * Scheduler's behavior and usage are expected to be as follows:
48 * * When the execution in the non-blocking Fiber reaches some blocking operation (like
49 * sleep, wait for a process, or a non-ready I/O), it calls some of the scheduler's
50 * hook methods, listed below.
51 * * Scheduler somehow registers what the current fiber is waiting on, and yields control
52 * to other fibers with Fiber.yield (so the fiber would be suspended while expecting its
53 * wait to end, and other fibers in the same thread can perform)
54 * * At the end of the current thread execution, the scheduler's method #scheduler_close is called
55 * * The scheduler runs into a wait loop, checking all the blocked fibers (which it has
56 * registered on hook calls) and resuming them when the awaited resource is ready
57 * (e.g. I/O ready or sleep time elapsed).
59 * This way concurrent execution will be achieved transparently for every
60 * individual Fiber's code.
62 * Scheduler implementations are provided by gems, like
63 * Async[https://github.com/socketry/async].
67 * * #io_wait, #io_read, #io_write, #io_pread, #io_pwrite, and #io_select, #io_close
72 * * #block and #unblock
73 * * (the list is expanded as Ruby developers make more methods having non-blocking calls)
75 * When not specified otherwise, the hook implementations are mandatory: if they are not
76 * implemented, the methods trying to call hook will fail. To provide backward compatibility,
77 * in the future hooks will be optional (if they are not implemented, due to the scheduler
78 * being created for the older Ruby version, the code which needs this hook will not fail,
79 * and will just behave in a blocking fashion).
81 * It is also strongly recommended that the scheduler implements the #fiber method, which is
82 * delegated to by Fiber.schedule.
84 * Sample _toy_ implementation of the scheduler can be found in Ruby's code, in
85 * <tt>test/fiber/scheduler.rb</tt>
89 Init_Fiber_Scheduler(void)
91 id_close
= rb_intern_const("close");
92 id_scheduler_close
= rb_intern_const("scheduler_close");
94 id_block
= rb_intern_const("block");
95 id_unblock
= rb_intern_const("unblock");
97 id_timeout_after
= rb_intern_const("timeout_after");
98 id_kernel_sleep
= rb_intern_const("kernel_sleep");
99 id_process_wait
= rb_intern_const("process_wait");
101 id_io_read
= rb_intern_const("io_read");
102 id_io_pread
= rb_intern_const("io_pread");
103 id_io_write
= rb_intern_const("io_write");
104 id_io_pwrite
= rb_intern_const("io_pwrite");
106 id_io_wait
= rb_intern_const("io_wait");
107 id_io_select
= rb_intern_const("io_select");
108 id_io_close
= rb_intern_const("io_close");
110 id_address_resolve
= rb_intern_const("address_resolve");
112 id_fiber_schedule
= rb_intern_const("fiber");
115 rb_cFiberScheduler
= rb_define_class_under(rb_cFiber
, "Scheduler", rb_cObject
);
116 rb_define_method(rb_cFiberScheduler
, "close", rb_fiber_scheduler_close
, 0);
117 rb_define_method(rb_cFiberScheduler
, "process_wait", rb_fiber_scheduler_process_wait
, 2);
118 rb_define_method(rb_cFiberScheduler
, "io_wait", rb_fiber_scheduler_io_wait
, 3);
119 rb_define_method(rb_cFiberScheduler
, "io_read", rb_fiber_scheduler_io_read
, 4);
120 rb_define_method(rb_cFiberScheduler
, "io_write", rb_fiber_scheduler_io_write
, 4);
121 rb_define_method(rb_cFiberScheduler
, "io_pread", rb_fiber_scheduler_io_pread
, 5);
122 rb_define_method(rb_cFiberScheduler
, "io_pwrite", rb_fiber_scheduler_io_pwrite
, 5);
123 rb_define_method(rb_cFiberScheduler
, "io_select", rb_fiber_scheduler_io_select
, 4);
124 rb_define_method(rb_cFiberScheduler
, "kernel_sleep", rb_fiber_scheduler_kernel_sleep
, 1);
125 rb_define_method(rb_cFiberScheduler
, "address_resolve", rb_fiber_scheduler_address_resolve
, 1);
126 rb_define_method(rb_cFiberScheduler
, "timeout_after", rb_fiber_scheduler_timeout_after
, 3);
127 rb_define_method(rb_cFiberScheduler
, "block", rb_fiber_scheduler_block
, 2);
128 rb_define_method(rb_cFiberScheduler
, "unblock", rb_fiber_scheduler_unblock
, 2);
129 rb_define_method(rb_cFiberScheduler
, "fiber", rb_fiber_scheduler
, -2);
134 rb_fiber_scheduler_get(void)
136 VM_ASSERT(ruby_thread_has_gvl_p());
138 rb_thread_t
*thread
= GET_THREAD();
141 return thread
->scheduler
;
145 verify_interface(VALUE scheduler
)
147 if (!rb_respond_to(scheduler
, id_block
)) {
148 rb_raise(rb_eArgError
, "Scheduler must implement #block");
151 if (!rb_respond_to(scheduler
, id_unblock
)) {
152 rb_raise(rb_eArgError
, "Scheduler must implement #unblock");
155 if (!rb_respond_to(scheduler
, id_kernel_sleep
)) {
156 rb_raise(rb_eArgError
, "Scheduler must implement #kernel_sleep");
159 if (!rb_respond_to(scheduler
, id_io_wait
)) {
160 rb_raise(rb_eArgError
, "Scheduler must implement #io_wait");
165 fiber_scheduler_close(VALUE scheduler
)
167 return rb_fiber_scheduler_close(scheduler
);
171 fiber_scheduler_close_ensure(VALUE _thread
)
173 rb_thread_t
*thread
= (rb_thread_t
*)_thread
;
174 thread
->scheduler
= Qnil
;
180 rb_fiber_scheduler_set(VALUE scheduler
)
182 VM_ASSERT(ruby_thread_has_gvl_p());
184 rb_thread_t
*thread
= GET_THREAD();
187 if (scheduler
!= Qnil
) {
188 verify_interface(scheduler
);
191 // We invoke Scheduler#close when setting it to something else, to ensure
192 // the previous scheduler runs to completion before changing the scheduler.
193 // That way, we do not need to consider interactions, e.g., of a Fiber from
194 // the previous scheduler with the new scheduler.
195 if (thread
->scheduler
!= Qnil
) {
196 // rb_fiber_scheduler_close(thread->scheduler);
197 rb_ensure(fiber_scheduler_close
, thread
->scheduler
, fiber_scheduler_close_ensure
, (VALUE
)thread
);
200 thread
->scheduler
= scheduler
;
202 return thread
->scheduler
;
206 rb_fiber_scheduler_current_for_threadptr(rb_thread_t
*thread
)
210 if (thread
->blocking
== 0) {
211 return thread
->scheduler
;
219 rb_fiber_scheduler_current(void)
221 return rb_fiber_scheduler_current_for_threadptr(GET_THREAD());
224 VALUE
rb_fiber_scheduler_current_for_thread(VALUE thread
)
226 return rb_fiber_scheduler_current_for_threadptr(rb_thread_ptr(thread
));
231 * Document-method: Fiber::Scheduler#close
233 * Called when the current thread exits. The scheduler is expected to implement this
234 * method in order to allow all waiting fibers to finalize their execution.
236 * The suggested pattern is to implement the main event loop in the #close method.
240 rb_fiber_scheduler_close(VALUE scheduler
)
242 VM_ASSERT(ruby_thread_has_gvl_p());
246 // The reason for calling `scheduler_close` before calling `close` is for
247 // legacy schedulers which implement `close` and expect the user to call
248 // it. Subsequently, that method would call `Fiber.set_scheduler(nil)`
249 // which should call `scheduler_close`. If it were to call `close`, it
250 // would create an infinite loop.
252 result
= rb_check_funcall(scheduler
, id_scheduler_close
, 0, NULL
);
253 if (!UNDEF_P(result
)) return result
;
255 result
= rb_check_funcall(scheduler
, id_close
, 0, NULL
);
256 if (!UNDEF_P(result
)) return result
;
262 rb_fiber_scheduler_make_timeout(struct timeval
*timeout
)
265 return rb_float_new((double)timeout
->tv_sec
+ (0.000001f
* timeout
->tv_usec
));
272 * Document-method: Fiber::Scheduler#kernel_sleep
273 * call-seq: kernel_sleep(duration = nil)
275 * Invoked by Kernel#sleep and Mutex#sleep and is expected to provide
276 * an implementation of sleeping in a non-blocking way. Implementation might
277 * register the current fiber in some list of "which fiber wait until what
278 * moment", call Fiber.yield to pass control, and then in #close resume
279 * the fibers whose wait period has elapsed.
283 rb_fiber_scheduler_kernel_sleep(VALUE scheduler
, VALUE timeout
)
285 return rb_funcall(scheduler
, id_kernel_sleep
, 1, timeout
);
289 rb_fiber_scheduler_kernel_sleepv(VALUE scheduler
, int argc
, VALUE
* argv
)
291 return rb_funcallv(scheduler
, id_kernel_sleep
, argc
, argv
);
296 * Document-method: Fiber::Scheduler#timeout_after
297 * call-seq: timeout_after(duration, exception_class, *exception_arguments, &block) -> result of block
299 * Invoked by Timeout.timeout to execute the given +block+ within the given
300 * +duration+. It can also be invoked directly by the scheduler or user code.
302 * Attempt to limit the execution time of a given +block+ to the given
303 * +duration+ if possible. When a non-blocking operation causes the +block+'s
304 * execution time to exceed the specified +duration+, that non-blocking
305 * operation should be interrupted by raising the specified +exception_class+
306 * constructed with the given +exception_arguments+.
308 * General execution timeouts are often considered risky. This implementation
309 * will only interrupt non-blocking operations. This is by design because it's
310 * expected that non-blocking operations can fail for a variety of
311 * unpredictable reasons, so applications should already be robust in handling
312 * these conditions and by implication timeouts.
314 * However, as a result of this design, if the +block+ does not invoke any
315 * non-blocking operations, it will be impossible to interrupt it. If you
316 * desire to provide predictable points for timeouts, consider adding
319 * If the block is executed successfully, its result will be returned.
321 * The exception will typically be raised using Fiber#raise.
324 rb_fiber_scheduler_timeout_after(VALUE scheduler
, VALUE timeout
, VALUE exception
, VALUE message
)
326 VALUE arguments
[] = {
327 timeout
, exception
, message
330 return rb_check_funcall(scheduler
, id_timeout_after
, 3, arguments
);
334 rb_fiber_scheduler_timeout_afterv(VALUE scheduler
, int argc
, VALUE
* argv
)
336 return rb_check_funcall(scheduler
, id_timeout_after
, argc
, argv
);
341 * Document-method: Fiber::Scheduler#process_wait
342 * call-seq: process_wait(pid, flags)
344 * Invoked by Process::Status.wait in order to wait for a specified process.
345 * See that method description for arguments description.
347 * Suggested minimal implementation:
350 * Process::Status.wait(pid, flags)
353 * This hook is optional: if it is not present in the current scheduler,
354 * Process::Status.wait will behave as a blocking method.
356 * Expected to return a Process::Status instance.
359 rb_fiber_scheduler_process_wait(VALUE scheduler
, rb_pid_t pid
, int flags
)
361 VALUE arguments
[] = {
362 PIDT2NUM(pid
), RB_INT2NUM(flags
)
365 return rb_check_funcall(scheduler
, id_process_wait
, 2, arguments
);
369 * Document-method: Fiber::Scheduler#block
370 * call-seq: block(blocker, timeout = nil)
372 * Invoked by methods like Thread.join, and by Mutex, to signify that current
373 * Fiber is blocked until further notice (e.g. #unblock) or until +timeout+ has
376 * +blocker+ is what we are waiting on, informational only (for debugging and
377 * logging). There are no guarantee about its value.
379 * Expected to return boolean, specifying whether the blocking operation was
383 rb_fiber_scheduler_block(VALUE scheduler
, VALUE blocker
, VALUE timeout
)
385 return rb_funcall(scheduler
, id_block
, 2, blocker
, timeout
);
389 * Document-method: Fiber::Scheduler#unblock
390 * call-seq: unblock(blocker, fiber)
392 * Invoked to wake up Fiber previously blocked with #block (for example, Mutex#lock
393 * calls #block and Mutex#unlock calls #unblock). The scheduler should use
394 * the +fiber+ parameter to understand which fiber is unblocked.
396 * +blocker+ is what was awaited for, but it is informational only (for debugging
397 * and logging), and it is not guaranteed to be the same value as the +blocker+ for
402 rb_fiber_scheduler_unblock(VALUE scheduler
, VALUE blocker
, VALUE fiber
)
404 VM_ASSERT(rb_obj_is_fiber(fiber
));
406 return rb_funcall(scheduler
, id_unblock
, 2, blocker
, fiber
);
410 * Document-method: Fiber::Scheduler#io_wait
411 * call-seq: io_wait(io, events, timeout)
413 * Invoked by IO#wait, IO#wait_readable, IO#wait_writable to ask whether the
414 * specified descriptor is ready for specified events within
415 * the specified +timeout+.
417 * +events+ is a bit mask of <tt>IO::READABLE</tt>, <tt>IO::WRITABLE</tt>, and
418 * <tt>IO::PRIORITY</tt>.
420 * Suggested implementation should register which Fiber is waiting for which
421 * resources and immediately calling Fiber.yield to pass control to other
422 * fibers. Then, in the #close method, the scheduler might dispatch all the
423 * I/O resources to fibers waiting for it.
425 * Expected to return the subset of events that are ready immediately.
429 rb_fiber_scheduler_io_wait(VALUE scheduler
, VALUE io
, VALUE events
, VALUE timeout
)
431 return rb_funcall(scheduler
, id_io_wait
, 3, io
, events
, timeout
);
435 rb_fiber_scheduler_io_wait_readable(VALUE scheduler
, VALUE io
)
437 return rb_fiber_scheduler_io_wait(scheduler
, io
, RB_UINT2NUM(RUBY_IO_READABLE
), rb_io_timeout(io
));
441 rb_fiber_scheduler_io_wait_writable(VALUE scheduler
, VALUE io
)
443 return rb_fiber_scheduler_io_wait(scheduler
, io
, RB_UINT2NUM(RUBY_IO_WRITABLE
), rb_io_timeout(io
));
447 * Document-method: Fiber::Scheduler#io_select
448 * call-seq: io_select(readables, writables, exceptables, timeout)
450 * Invoked by IO.select to ask whether the specified descriptors are ready for
451 * specified events within the specified +timeout+.
453 * Expected to return the 3-tuple of Array of IOs that are ready.
456 VALUE
rb_fiber_scheduler_io_select(VALUE scheduler
, VALUE readables
, VALUE writables
, VALUE exceptables
, VALUE timeout
)
458 VALUE arguments
[] = {
459 readables
, writables
, exceptables
, timeout
462 return rb_fiber_scheduler_io_selectv(scheduler
, 4, arguments
);
465 VALUE
rb_fiber_scheduler_io_selectv(VALUE scheduler
, int argc
, VALUE
*argv
)
467 // I wondered about extracting argv, and checking if there is only a single
468 // IO instance, and instead calling `io_wait`. However, it would require a
469 // decent amount of work and it would be hard to preserve the exact
470 // semantics of IO.select.
472 return rb_check_funcall(scheduler
, id_io_select
, argc
, argv
);
476 * Document-method: Fiber::Scheduler#io_read
477 * call-seq: io_read(io, buffer, length, offset) -> read length or -errno
479 * Invoked by IO#read or IO#Buffer.read to read +length+ bytes from +io+ into a
480 * specified +buffer+ (see IO::Buffer) at the given +offset+.
482 * The +length+ argument is the "minimum length to be read". If the IO buffer
483 * size is 8KiB, but the +length+ is +1024+ (1KiB), up to 8KiB might be read,
484 * but at least 1KiB will be. Generally, the only case where less data than
485 * +length+ will be read is if there is an error reading the data.
487 * Specifying a +length+ of 0 is valid and means try reading at least once and
488 * return any available data.
490 * Suggested implementation should try to read from +io+ in a non-blocking
491 * manner and call #io_wait if the +io+ is not ready (which will yield control
494 * See IO::Buffer for an interface available to return data.
496 * Expected to return number of bytes read, or, in case of an error,
497 * <tt>-errno</tt> (negated number corresponding to system's error code).
499 * The method should be considered _experimental_.
502 rb_fiber_scheduler_io_read(VALUE scheduler
, VALUE io
, VALUE buffer
, size_t length
, size_t offset
)
504 VALUE arguments
[] = {
505 io
, buffer
, SIZET2NUM(length
), SIZET2NUM(offset
)
508 return rb_check_funcall(scheduler
, id_io_read
, 4, arguments
);
512 * Document-method: Fiber::Scheduler#io_read
513 * call-seq: io_pread(io, buffer, from, length, offset) -> read length or -errno
515 * Invoked by IO#pread or IO::Buffer#pread to read +length+ bytes from +io+
516 * at offset +from+ into a specified +buffer+ (see IO::Buffer) at the given
519 * This method is semantically the same as #io_read, but it allows to specify
520 * the offset to read from and is often better for asynchronous IO on the same
523 * The method should be considered _experimental_.
526 rb_fiber_scheduler_io_pread(VALUE scheduler
, VALUE io
, rb_off_t from
, VALUE buffer
, size_t length
, size_t offset
)
528 VALUE arguments
[] = {
529 io
, buffer
, OFFT2NUM(from
), SIZET2NUM(length
), SIZET2NUM(offset
)
532 return rb_check_funcall(scheduler
, id_io_pread
, 5, arguments
);
536 * Document-method: Scheduler#io_write
537 * call-seq: io_write(io, buffer, length, offset) -> written length or -errno
539 * Invoked by IO#write or IO::Buffer#write to write +length+ bytes to +io+ from
540 * from a specified +buffer+ (see IO::Buffer) at the given +offset+.
542 * The +length+ argument is the "minimum length to be written". If the IO
543 * buffer size is 8KiB, but the +length+ specified is 1024 (1KiB), at most 8KiB
544 * will be written, but at least 1KiB will be. Generally, the only case where
545 * less data than +length+ will be written is if there is an error writing the
548 * Specifying a +length+ of 0 is valid and means try writing at least once, as
549 * much data as possible.
551 * Suggested implementation should try to write to +io+ in a non-blocking
552 * manner and call #io_wait if the +io+ is not ready (which will yield control
555 * See IO::Buffer for an interface available to get data from buffer
558 * Expected to return number of bytes written, or, in case of an error,
559 * <tt>-errno</tt> (negated number corresponding to system's error code).
561 * The method should be considered _experimental_.
564 rb_fiber_scheduler_io_write(VALUE scheduler
, VALUE io
, VALUE buffer
, size_t length
, size_t offset
)
566 VALUE arguments
[] = {
567 io
, buffer
, SIZET2NUM(length
), SIZET2NUM(offset
)
570 return rb_check_funcall(scheduler
, id_io_write
, 4, arguments
);
574 * Document-method: Fiber::Scheduler#io_pwrite
575 * call-seq: io_pwrite(io, buffer, from, length, offset) -> written length or -errno
577 * Invoked by IO#pwrite or IO::Buffer#pwrite to write +length+ bytes to +io+
578 * at offset +from+ into a specified +buffer+ (see IO::Buffer) at the given
581 * This method is semantically the same as #io_write, but it allows to specify
582 * the offset to write to and is often better for asynchronous IO on the same
585 * The method should be considered _experimental_.
589 rb_fiber_scheduler_io_pwrite(VALUE scheduler
, VALUE io
, rb_off_t from
, VALUE buffer
, size_t length
, size_t offset
)
591 VALUE arguments
[] = {
592 io
, buffer
, OFFT2NUM(from
), SIZET2NUM(length
), SIZET2NUM(offset
)
595 return rb_check_funcall(scheduler
, id_io_pwrite
, 5, arguments
);
599 rb_fiber_scheduler_io_read_memory(VALUE scheduler
, VALUE io
, void *base
, size_t size
, size_t length
)
601 VALUE buffer
= rb_io_buffer_new(base
, size
, RB_IO_BUFFER_LOCKED
);
603 VALUE result
= rb_fiber_scheduler_io_read(scheduler
, io
, buffer
, length
, 0);
605 rb_io_buffer_free_locked(buffer
);
611 rb_fiber_scheduler_io_write_memory(VALUE scheduler
, VALUE io
, const void *base
, size_t size
, size_t length
)
613 VALUE buffer
= rb_io_buffer_new((void*)base
, size
, RB_IO_BUFFER_LOCKED
|RB_IO_BUFFER_READONLY
);
615 VALUE result
= rb_fiber_scheduler_io_write(scheduler
, io
, buffer
, length
, 0);
617 rb_io_buffer_free_locked(buffer
);
623 rb_fiber_scheduler_io_pread_memory(VALUE scheduler
, VALUE io
, rb_off_t from
, void *base
, size_t size
, size_t length
)
625 VALUE buffer
= rb_io_buffer_new(base
, size
, RB_IO_BUFFER_LOCKED
);
627 VALUE result
= rb_fiber_scheduler_io_pread(scheduler
, io
, from
, buffer
, length
, 0);
629 rb_io_buffer_free_locked(buffer
);
635 rb_fiber_scheduler_io_pwrite_memory(VALUE scheduler
, VALUE io
, rb_off_t from
, const void *base
, size_t size
, size_t length
)
637 VALUE buffer
= rb_io_buffer_new((void*)base
, size
, RB_IO_BUFFER_LOCKED
|RB_IO_BUFFER_READONLY
);
639 VALUE result
= rb_fiber_scheduler_io_pwrite(scheduler
, io
, from
, buffer
, length
, 0);
641 rb_io_buffer_free_locked(buffer
);
647 rb_fiber_scheduler_io_close(VALUE scheduler
, VALUE io
)
649 VALUE arguments
[] = {io
};
651 return rb_check_funcall(scheduler
, id_io_close
, 1, arguments
);
655 * Document-method: Fiber::Scheduler#address_resolve
656 * call-seq: address_resolve(hostname) -> array_of_strings or nil
658 * Invoked by any method that performs a non-reverse DNS lookup. The most
659 * notable method is Addrinfo.getaddrinfo, but there are many other.
661 * The method is expected to return an array of strings corresponding to ip
662 * addresses the +hostname+ is resolved to, or +nil+ if it can not be resolved.
664 * Fairly exhaustive list of all possible call-sites:
666 * - Addrinfo.getaddrinfo
671 * - Addrinfo.marshal_load
675 * - IPSocket.getaddress
676 * - TCPSocket.gethostbyname
677 * - UDPSocket#connect
680 * - Socket.getaddrinfo
681 * - Socket.gethostbyname
682 * - Socket.pack_sockaddr_in
683 * - Socket.sockaddr_in
684 * - Socket.unpack_sockaddr_in
687 rb_fiber_scheduler_address_resolve(VALUE scheduler
, VALUE hostname
)
689 VALUE arguments
[] = {
693 return rb_check_funcall(scheduler
, id_address_resolve
, 1, arguments
);
697 * Document-method: Fiber::Scheduler#fiber
698 * call-seq: fiber(&block)
700 * Implementation of the Fiber.schedule. The method is <em>expected</em> to immediately
701 * run the given block of code in a separate non-blocking fiber, and to return that Fiber.
703 * Minimal suggested implementation is:
706 * fiber = Fiber.new(blocking: false, &block)
712 rb_fiber_scheduler_fiber(VALUE scheduler
, int argc
, VALUE
*argv
, int kw_splat
)
714 return rb_funcall_passing_block_kw(scheduler
, id_fiber_schedule
, argc
, argv
, kw_splat
);