detect closed descriptors on EINTR
[ruby_io_splice.git] / ext / io_splice / io_splice_ext.c
blob11817972813d0f1799567b0985d2cb534484a6fe
1 #include "ruby.h"
2 #ifdef HAVE_RUBY_IO_H
3 # include "ruby/io.h"
4 #else
5 # include "rubyio.h"
6 #endif
7 #include <errno.h>
8 #include <fcntl.h>
9 #include <assert.h>
10 #include <sys/uio.h>
11 #include <limits.h>
12 #include <alloca.h>
13 #include <sys/utsname.h>
15 static VALUE sym_EAGAIN;
17 #ifndef F_LINUX_SPECIFIC_BASE
18 # define F_LINUX_SPECIFIC_BASE 1024
19 #endif
21 #ifndef F_GETPIPE_SZ
22 # define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
23 # define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
24 #endif
26 #if ! HAVE_RB_IO_T
27 # define rb_io_t OpenFile
28 #endif
30 #ifdef GetReadFile
31 # define FPTR_TO_FD(fptr) (fileno(GetReadFile(fptr)))
32 #else
33 # if !HAVE_RB_IO_T || (RUBY_VERSION_MAJOR == 1 && RUBY_VERSION_MINOR == 8)
34 # define FPTR_TO_FD(fptr) fileno(fptr->f)
35 # else
36 # define FPTR_TO_FD(fptr) fptr->fd
37 # endif
38 #endif
40 static int my_fileno(VALUE io)
42 rb_io_t *fptr;
44 for (;;) {
45 switch (TYPE(io)) {
46 case T_FIXNUM: return FIX2INT(io);
47 case T_FILE: {
48 GetOpenFile(io, fptr);
49 return FPTR_TO_FD(fptr);
51 default:
52 io = rb_convert_type(io, T_FILE, "IO", "to_io");
53 /* retry */
58 static int check_fileno(VALUE io)
60 int saved_errno = errno;
61 int fd = my_fileno(io);
62 errno = saved_errno;
63 return fd;
65 #ifndef HAVE_RB_THREAD_BLOCKING_REGION
66 /* partial emulation of the 1.9 rb_thread_blocking_region under 1.8 */
67 # include <rubysig.h>
68 # define RUBY_UBF_IO ((rb_unblock_function_t *)-1)
69 typedef void rb_unblock_function_t(void *);
70 typedef VALUE rb_blocking_function_t(void *);
71 static VALUE
72 rb_thread_blocking_region(
73 rb_blocking_function_t *fn, void *data1,
74 rb_unblock_function_t *ubf, void *data2)
76 VALUE rv;
78 assert(RUBY_UBF_IO == ubf && "RUBY_UBF_IO required for emulation");
80 TRAP_BEG;
81 rv = fn(data1);
82 TRAP_END;
84 return rv;
86 #endif /* ! HAVE_RB_THREAD_BLOCKING_REGION */
88 #ifndef RSTRING_PTR
89 # define RSTRING_PTR(s) (RSTRING(s)->ptr)
90 #endif
91 #ifndef RSTRING_LEN
92 # define RSTRING_LEN(s) (RSTRING(s)->len)
93 #endif
94 #ifndef RARRAY_PTR
95 # define RARRAY_PTR(s) (RARRAY(s)->ptr)
96 #endif
97 #ifndef RARRAY_LEN
98 # define RARRAY_LEN(s) (RARRAY(s)->len)
99 #endif
101 static VALUE io_run(rb_blocking_function_t *fn, void *data)
103 return rb_thread_blocking_region(fn, data, RUBY_UBF_IO, 0);
106 struct splice_args {
107 int fd_in;
108 off_t *off_in;
109 int fd_out;
110 off_t *off_out;
111 size_t len;
112 unsigned flags;
115 static VALUE nogvl_splice(void *ptr)
117 struct splice_args *a = ptr;
119 return (VALUE)splice(a->fd_in, a->off_in, a->fd_out, a->off_out,
120 a->len, a->flags);
123 static long do_splice(int argc, VALUE *argv, unsigned dflags)
125 off_t i, o;
126 VALUE fd_in, off_in, fd_out, off_out, len, flags;
127 struct splice_args a;
128 long bytes;
130 rb_scan_args(argc, argv, "51",
131 &fd_in, &off_in, &fd_out, &off_out, &len, &flags);
133 a.off_in = NIL_P(off_in) ? NULL : (i = NUM2OFFT(off_in), &i);
134 a.off_out = NIL_P(off_out) ? NULL : (o = NUM2OFFT(off_out), &o);
135 a.len = (size_t)NUM2ULONG(len);
136 a.flags = NIL_P(flags) ? dflags : NUM2UINT(flags) | dflags;
138 do {
139 a.fd_in = check_fileno(fd_in);
140 a.fd_out = check_fileno(fd_out);
141 bytes = (long)io_run(nogvl_splice, &a);
142 } while (bytes == -1 && errno == EINTR);
144 return bytes;
148 * call-seq:
149 * IO.splice(fd_in, off_in, fd_out, off_out, len) => integer
150 * IO.splice(fd_in, off_in, fd_out, off_out, len, flags) => integer
152 * Splice +len+ bytes from/to a pipe. Either +fd_in+ or +fd_out+
153 * MUST be a pipe. +fd_in+ and +fd_out+ may BOTH be pipes as of
154 * Linux 2.6.31 or later.
156 * +off_in+ and +off_out+ if non-nil may be used to
157 * specify an offset for the non-pipe file descriptor.
159 * +flags+ defaults to zero if unspecified.
160 * +flags+ may be a bitmask of the following flags:
162 * IO::Splice::F_MOVE, IO::Splice::F_NONBLOCK, IO::Splice::F_MORE
164 * Returns the number of bytes spliced.
165 * Raises EOFError when +fd_in+ has reached end of file.
166 * Raises Errno::EAGAIN if the IO::Splice::F_NONBLOCK flag is set
167 * and the pipe has no data to read from or space to write to. May
168 * also raise Errno::EAGAIN if the non-pipe descriptor has no data
169 * to read from or space to write to.
171 * rd, wr = (pipe = IO.pipe).map { |io| io.fileno }
172 * src_io, dst_io = File.open("/path/to/src"), File.open("/path/to/dst")
173 * src, dst = src_io.fileno, dst_io.fileno
175 * nr = IO.splice(src, nil, wr, nil, IO::Splice::PIPE_CAPA, 0)
176 * IO.splice(rd, nil, dst, nil, nr, 0)
178 * As splice never exposes buffers to userspace, it will not take
179 * into account userspace buffering done by Ruby or stdio. It is
180 * also not subject to encoding/decoding filters under Ruby 1.9.
182 * Consider using IO.trysplice if you are using non-blocking I/O on
183 * both descriptors as it avoids the cost of raising common Errno::EAGAIN
184 * exceptions.
186 * See manpage for full documentation:
187 * http://kernel.org/doc/man-pages/online/pages/man2/splice.2.html
189 static VALUE my_splice(int argc, VALUE *argv, VALUE self)
191 long n = do_splice(argc, argv, 0);
193 if (n == 0)
194 rb_eof_error();
195 if (n < 0)
196 rb_sys_fail("splice");
197 return LONG2NUM(n);
201 * call-seq:
202 * IO.trysplice(fd_in, off_in, fd_out, off_out, len) => integer
203 * IO.trysplice(fd_in, off_in, fd_out, off_out, len, flags) => integer
205 * Exactly like IO.splice, except +:EAGAIN+ is returned when either
206 * the read or write end would block instead of raising Errno::EAGAIN.
208 * IO::Splice::F_NONBLOCK is always passed for the pipe descriptor,
209 * but this can still block if the non-pipe descriptor is blocking.
211 * See IO.splice documentation for more details.
213 static VALUE trysplice(int argc, VALUE *argv, VALUE self)
215 long n = do_splice(argc, argv, SPLICE_F_NONBLOCK);
217 if (n == 0)
218 return Qnil;
219 if (n < 0) {
220 if (errno == EAGAIN)
221 return sym_EAGAIN;
222 rb_sys_fail("splice");
224 return LONG2NUM(n);
227 struct tee_args {
228 int fd_in;
229 int fd_out;
230 size_t len;
231 unsigned flags;
234 /* runs without GVL */
235 static VALUE nogvl_tee(void *ptr)
237 struct tee_args *a = ptr;
239 return (VALUE)tee(a->fd_in, a->fd_out, a->len, a->flags);
242 static long do_tee(int argc, VALUE *argv, unsigned dflags)
244 VALUE fd_in, fd_out, len, flags;
245 struct tee_args a;
246 long bytes;
248 rb_scan_args(argc, argv, "31", &fd_in, &fd_out, &len, &flags);
249 a.len = (size_t)NUM2ULONG(len);
250 a.flags = NIL_P(flags) ? dflags : NUM2UINT(flags) | dflags;
252 do {
253 a.fd_in = check_fileno(fd_in);
254 a.fd_out = check_fileno(fd_out);
255 bytes = (long)io_run(nogvl_tee, &a);
256 } while (bytes == -1 && errno == EINTR);
258 return bytes;
262 * call-seq:
263 * IO.tee(fd_in, fd_out, len) => integer
264 * IO.tee(fd_in, fd_out, len, flags) => integer
266 * Copies up to +len+ bytes of data from +fd_in+ to +fd_out+. +fd_in+
267 * and +fd_out+ must both refer to pipe descriptors. +fd_in+ and +fd_out+
268 * may not be endpoints of the same pipe.
270 * +flags+ may be zero (the default) or IO::Splice::F_NONBLOCK
271 * Other IO::Splice flags are currently unimplemented or have no effect.
273 * Returns the number of bytes duplicated if successful.
274 * Raises EOFError when +fd_in+ is closed and emptied.
275 * Raises Errno::EAGAIN when +fd_in+ is empty and/or +fd_out+ is full
276 * and +flags+ contains IO::Splice::F_NONBLOCK
278 * Consider using IO.trytee if you are using IO::Splice::F_NONBLOCK
279 * as it avoids the cost of raising common Errno::EAGAIN exceptions.
281 * See manpage for full documentation:
282 * http://kernel.org/doc/man-pages/online/pages/man2/tee.2.html
284 static VALUE my_tee(int argc, VALUE *argv, VALUE self)
286 long n = do_tee(argc, argv, 0);
288 if (n == 0)
289 rb_eof_error();
290 if (n < 0)
291 rb_sys_fail("tee");
293 return LONG2NUM(n);
297 * call-seq:
298 * IO.trytee(fd_in, fd_out, len) => integer
299 * IO.trytee(fd_in, fd_out, len, flags) => integer
301 * Exactly like IO.tee, except +:EAGAIN+ is returned when either
302 * the read or write end would block instead of raising Errno::EAGAIN.
304 * IO::Splice::F_NONBLOCK is always passed for the pipe descriptor,
305 * but this can still block if the non-pipe descriptor is blocking.
307 * See IO.tee documentation for more details.
309 static VALUE trytee(int argc, VALUE *argv, VALUE self)
311 long n = do_tee(argc, argv, SPLICE_F_NONBLOCK);
313 if (n == 0)
314 return Qnil;
315 if (n < 0) {
316 if (errno == EAGAIN)
317 return sym_EAGAIN;
318 rb_sys_fail("tee");
321 return LONG2NUM(n);
324 struct vmsplice_args {
325 int fd;
326 struct iovec *iov;
327 unsigned long nr_segs;
328 unsigned flags;
331 static VALUE nogvl_vmsplice(void *ptr)
333 struct vmsplice_args *a = ptr;
335 return (VALUE)vmsplice(a->fd, a->iov, a->nr_segs, a->flags);
338 /* this can't be a function since we use alloca() */
339 #define ARY2IOVEC(iov,iovcnt,expect,ary) \
340 do { \
341 VALUE *cur; \
342 struct iovec *tmp; \
343 long n; \
344 cur = RARRAY_PTR(ary); \
345 n = RARRAY_LEN(ary); \
346 if (n > IOV_MAX) \
347 rb_raise(rb_eArgError, "array is larger than IOV_MAX"); \
348 iov = tmp = alloca(sizeof(struct iovec) * n); \
349 expect = 0; \
350 iovcnt = n; \
351 for (; --n >= 0; tmp++, cur++) { \
352 Check_Type(*cur, T_STRING); \
353 tmp->iov_base = RSTRING_PTR(*cur); \
354 tmp->iov_len = RSTRING_LEN(*cur); \
355 expect += tmp->iov_len; \
357 } while (0)
359 static void advance_vmsplice_args(struct vmsplice_args *a, long n)
361 struct iovec *new_iov = a->iov;
362 unsigned long i;
364 /* skip over iovecs we've already written completely */
365 for (i = 0; i < a->nr_segs; i++, new_iov++) {
366 if (n == 0)
367 break;
369 * partially written iov,
370 * modify and retry with current iovec in
371 * front
373 if (new_iov->iov_len > (size_t)n) {
374 VALUE base = (VALUE)new_iov->iov_base;
376 new_iov->iov_len -= n;
377 new_iov->iov_base = (void *)(base + n);
378 break;
381 n -= new_iov->iov_len;
384 /* setup to retry without the already-written iovecs */
385 a->nr_segs -= i;
386 a->iov = new_iov;
390 * call-seq:
391 * IO.vmsplice(fd, string_array) => integer
392 * IO.vmsplice(fd, string_array, flags) => integer
393 * IO.vmsplice(fd, string) => integer
394 * IO.vmsplice(fd, string, flags) => integer
396 * Transfers an array of strings into the pipe descriptor given by fd.
397 * +fd+ must be the writable end of a pipe.
399 * This may allow the kernel to avoid data copies in some cases.
400 * but is (probably) of limited usefulness in Ruby. If you have
401 * use cases or ideas for making this more useful for Ruby users,
402 * please tell us at ruby.io.splice@librelist.com!
404 * Also consider the "sendfile" RubyGem or IO.copy_stream in Ruby 1.9
405 * if you want to do zero-copy file transfers to pipes or sockets. As
406 * of Linux 2.6.33, sendfile(2) can copy to any output descriptor,
407 * not just sockets.
409 * See manpage for full documentation:
410 * http://kernel.org/doc/man-pages/online/pages/man2/vmsplice.2.html
412 static VALUE my_vmsplice(int argc, VALUE * argv, VALUE self)
414 long rv = 0;
415 ssize_t left;
416 struct vmsplice_args a;
417 VALUE fd, data, flags;
419 rb_scan_args(argc, argv, "21", &fd, &data, &flags);
421 switch (TYPE(data)) {
422 case T_STRING: {
423 struct iovec iov;
425 iov.iov_base = RSTRING_PTR(data);
426 iov.iov_len = (size_t)(left = (ssize_t)RSTRING_LEN(data));
427 a.iov = &iov;
428 a.nr_segs = 1;
430 break;
431 case T_ARRAY:
432 ARY2IOVEC(a.iov, a.nr_segs, left, data);
433 break;
434 default:
435 rb_raise(rb_eTypeError, "wrong argument type %s "
436 "(expected a String or Array of strings)",
437 rb_obj_classname(data));
439 a.fd = my_fileno(fd);
440 a.flags = NIL_P(flags) ? 0 : NUM2UINT(flags);
442 for (;;) {
443 long n = (long)io_run(nogvl_vmsplice, &a);
445 if (n < 0) {
446 if (errno == EAGAIN) {
447 if (a.flags & SPLICE_F_NONBLOCK) {
448 rb_sys_fail("vmsplice");
449 } else {
450 a.fd = check_fileno(fd);
451 if (rb_io_wait_writable(a.fd))
452 continue;
454 /* fall through on error */
457 * unlikely to hit this case, return the
458 * already written bytes, we'll let the next
459 * write (or close) fail instead
461 if (rv > 0)
462 break;
463 if (errno == EINTR) {
464 a.fd = check_fileno(fd);
465 continue;
467 rb_sys_fail("vmsplice");
470 rv += n;
471 left -= n;
472 if (left == 0)
473 break;
474 advance_vmsplice_args(&a, n);
477 return LONG2NUM(rv);
481 * call-seq:
482 * reader, writer = IO.pipe
483 * reader.pipe_size => integer
485 * Returns the pipe capacity of the underlying pipe in bytes. The
486 * default capacity is 65536 bytes since Linux 2.6.11, and 4096 bytes
487 * in previous kernels.
489 * Since the pipe is a circular buffer in the same kernel, the size
490 * of the reader is exactly the same as the size of the writer.
492 * This method is only exposed on Linux 2.6.35 or later.
494 static VALUE pipe_size(VALUE self)
496 int size = fcntl(my_fileno(self), F_GETPIPE_SZ);
498 if (size < 0)
499 rb_sys_fail("fcntl(F_GETPIPE_SZ)");
501 return INT2NUM(size);
505 * call-seq:
506 * reader, writer = IO.pipe
507 * reader.pipe_size = integer
509 * Sets and returns the pipe capacity of the underlying pipe in bytes.
511 * This MUST be a power-of-two, or Errno::EINVAL will be raised.
512 * Linux will silently increase this to be equal to the page size
513 * (4096 bytes on most architectures) if the specified value is
514 * less than the size of a page.
516 * For users without CAP_SYS_RESOURCE, this raises Errno::EPERM when
517 * attempting to specify a value greater than the value in
518 * /proc/sys/fs/pipe-max-size.
520 * Since the pipe is a circular buffer in the same kernel, the size
521 * of the reader is exactly the same as the size of the writer.
523 * Raises Errno::EBUSY if the assigned value is less than
524 * the currently filled portion of the pipe.
526 * This method is only exposed on Linux 2.6.35 or later.
528 static VALUE set_pipe_size(VALUE self, VALUE size)
530 int fd = my_fileno(self);
531 int bytes = NUM2INT(size);
532 int rv = fcntl(fd, F_SETPIPE_SZ, bytes);
534 if (rv < 0) {
535 if (errno == ENOMEM) {
536 rb_gc();
537 rv = fcntl(fd, F_SETPIPE_SZ, bytes);
539 if (rv < 0)
540 rb_sys_fail("fcntl(F_SETPIPE_SZ)");
543 return size;
546 void Init_io_splice_ext(void)
548 VALUE mSplice = rb_define_module_under(rb_cIO, "Splice");
549 struct utsname utsname;
551 rb_define_singleton_method(rb_cIO, "splice", my_splice, -1);
552 rb_define_singleton_method(rb_cIO, "trysplice", trysplice, -1);
553 rb_define_singleton_method(rb_cIO, "tee", my_tee, -1);
554 rb_define_singleton_method(rb_cIO, "trytee", trytee, -1);
555 rb_define_singleton_method(rb_cIO, "vmsplice", my_vmsplice, -1);
558 * Attempt to move pages instead of copying. This is only a hint
559 * and support for it was removed in Linux 2.6.21. It will be
560 * re-added for FUSE filesystems only in Linux 2.6.35.
562 rb_define_const(mSplice, "F_MOVE", UINT2NUM(SPLICE_F_MOVE));
565 * Do not block on pipe I/O. This flag only affects the pipe(s)
566 * being spliced from/to and has no effect on the non-pipe
567 * descriptor (which requires non-blocking operation to be set
568 * explicitly).
570 * The non-blocking flag (O_NONBLOCK) on the pipe descriptors
571 * themselves are ignored by this family of functions, and
572 * using this flag is the only way to get non-blocking operation
573 * out of them.
575 rb_define_const(mSplice, "F_NONBLOCK", UINT2NUM(SPLICE_F_NONBLOCK));
578 * Indicate that there may be more data coming into the outbound
579 * descriptor. This can allow the kernel to avoid sending partial
580 * frames from sockets. Currently only used with splice.
582 rb_define_const(mSplice, "F_MORE", UINT2NUM(SPLICE_F_MORE));
585 * Only usable by vmsplice. This flag probably not useful in the
586 * context of Ruby applications which cannot control alignment.
588 rb_define_const(mSplice, "F_GIFT", UINT2NUM(SPLICE_F_GIFT));
591 * The maximum size of an atomic write to a pipe
592 * POSIX requires this to be at least 512 bytes.
593 * Under Linux, this is 4096 bytes.
595 rb_define_const(mSplice, "PIPE_BUF", UINT2NUM(PIPE_BUF));
597 if (uname(&utsname) == -1)
598 rb_sys_fail("uname");
600 /* includes 2.6.35-rc[1-6] */
601 if (strcmp(utsname.release, "2.6.35") >= 0) {
602 rb_define_method(rb_cIO, "pipe_size", pipe_size, 0);
603 rb_define_method(rb_cIO, "pipe_size=", set_pipe_size, 1);
606 * fcntl() command constant used to return the size of a pipe.
607 * This constant is only defined when running Linux 2.6.35
608 * or later. For convenience, use IO#pipe_size instead.
610 rb_define_const(mSplice, "F_GETPIPE_SZ",
611 UINT2NUM(F_GETPIPE_SZ));
614 * fcntl() command constant used to set the size of a pipe.
615 * This constant is only defined when running Linux 2.6.35
616 * or later. For convenience, use IO#pipe_size= instead.
618 rb_define_const(mSplice, "F_SETPIPE_SZ",
619 UINT2NUM(F_SETPIPE_SZ));
622 sym_EAGAIN = ID2SYM(rb_intern("EAGAIN"));