ext/kgio/writev.c

   1 /*
   2  * we're currently too lazy to use rb_ensure to free an allocation, so we
   3  * the abuse rb_str_* API for a temporary buffer
   4  */
   5 #define RSTRING_MODIFIED 1
   6
   7 #include "kgio.h"
   8 #include "my_fileno.h"
   9 #include "nonblock.h"
  10 #ifdef HAVE_WRITEV
  11 #  include <sys/uio.h>
  12 #  define USE_WRITEV 1
  13 #else
  14 #  define USE_WRITEV 0
  15 static ssize_t assert_writev(int fd, void* iov, int len)
  16 {
  17         assert(0 && "you should not try to call writev");
  18         return -1;
  19 }
  20 #  define writev assert_writev
  21 #endif
  22
  23 #ifndef HAVE_RB_ARY_SUBSEQ
  24 static inline VALUE my_ary_subseq(VALUE ary, long idx, long len)
  25 {
  26        VALUE args[2] = { LONG2FIX(idx), LONG2FIX(len) };
  27
  28        return rb_ary_aref(2, args, ary);
  29 }
  30 #define rb_ary_subseq my_ary_subseq
  31 #endif
  32
  33 static VALUE sym_wait_writable;
  34
  35 #ifndef HAVE_WRITEV
  36 #define iovec my_iovec
  37 struct my_iovec {
  38         void  *iov_base;
  39         size_t iov_len;
  40 };
  41 #endif
  42
  43 /* tests for choosing following constants were done on Linux 3.0 x86_64
  44  * (Ubuntu 12.04) Core i3 i3-2330M slowed to 1600MHz
  45  * testing script https://gist.github.com/2850641
  46  * fill free to make more thorough testing and choose better value
  47  */
  48
  49 /* test shows that its meaningless to set WRITEV_MEMLIMIT more that 1M
  50  * even when tcp_wmem set to relatively high value (2M) (in fact, it becomes
  51  * even slower). 512K performs a bit better in average case. */
  52 #define WRITEV_MEMLIMIT (512*1024)
  53 /* same test shows that custom_writev is faster than glibc writev when
  54  * average string is smaller than ~500 bytes and slower when average strings
  55  * is greater then ~600 bytes. 512 bytes were choosen cause current compilers
  56  * turns x/512 into x>>9 */
  57 #define WRITEV_IMPL_THRESHOLD 512
  58
  59 static int iov_max = 1024; /* this could be overriden in init */
  60
  61 struct wrv_args {
  62         VALUE io;
  63         VALUE buf;
  64         VALUE vec_buf; /* FIXME: this requires RSTRING_MODIFY for rbx */
  65         struct iovec *vec;
  66         int iov_cnt;
  67         size_t batch_len;
  68         int something_written;
  69         int fd;
  70 };
  71
  72 static ssize_t custom_writev(int fd, const struct iovec *vec, int iov_cnt, size_t total_len)
  73 {
  74         int i;
  75         ssize_t result;
  76         char *buf, *curbuf;
  77         const struct iovec *curvec = vec;
  78
  79         /* we do not want to use ruby's xmalloc because
  80          * it can fire GC, and we'll free buffer shortly anyway */
  81         curbuf = buf = malloc(total_len);
  82         if (buf == NULL) return -1;
  83
  84         for (i = 0; i < iov_cnt; i++, curvec++) {
  85                 memcpy(curbuf, curvec->iov_base, curvec->iov_len);
  86                 curbuf += curvec->iov_len;
  87         }
  88
  89         result = write(fd, buf, total_len);
  90
  91         /* well, it seems that `free` could not change errno
  92          * but lets save it anyway */
  93         i = errno;
  94         free(buf);
  95         errno = i;
  96
  97         return result;
  98 }
  99
 100 static void prepare_writev(struct wrv_args *a, VALUE io, VALUE ary)
 101 {
 102         a->io = io;
 103         a->fd = my_fileno(io);
 104         a->something_written = 0;
 105
 106         if (TYPE(ary) == T_ARRAY)
 107                 /* rb_ary_subseq will not copy array unless it modified */
 108                 a->buf = rb_ary_subseq(ary, 0, RARRAY_LEN(ary));
 109         else
 110                 a->buf = rb_Array(ary);
 111
 112         a->vec_buf = rb_str_new(0, 0);
 113         a->vec = NULL;
 114 }
 115
 116 #ifndef RARRAY_LENINT
 117 static inline int rarray_int(VALUE val)
 118 {
 119         long num = RARRAY_LEN(val);
 120
 121         if ((long)(int)num != num)
 122                 rb_raise(rb_eRangeError, "%ld cannot to be an int", num);
 123
 124         return (int)num;
 125 }
 126 #define RARRAY_LENINT(n) rarray_int(n)
 127 #endif
 128
 129 static void fill_iovec(struct wrv_args *a)
 130 {
 131         int i;
 132         struct iovec *curvec;
 133
 134         a->iov_cnt = RARRAY_LENINT(a->buf);
 135         a->batch_len = 0;
 136         if (a->iov_cnt == 0) return;
 137         if (a->iov_cnt > iov_max) a->iov_cnt = iov_max;
 138         rb_str_resize(a->vec_buf, sizeof(struct iovec) * a->iov_cnt);
 139         curvec = a->vec = (struct iovec*)RSTRING_PTR(a->vec_buf);
 140
 141         for (i=0; i < a->iov_cnt; i++, curvec++) {
 142                 VALUE str = rb_ary_entry(a->buf, i);
 143                 long str_len, next_len;
 144
 145                 if (TYPE(str) != T_STRING) {
 146                         str = rb_obj_as_string(str);
 147                         rb_ary_store(a->buf, i, str);
 148                 }
 149
 150                 str_len = RSTRING_LEN(str);
 151
 152                 /* lets limit total memory to write,
 153                  * but always take first string */
 154                 next_len = a->batch_len + str_len;
 155                 if (i && next_len > WRITEV_MEMLIMIT) {
 156                         a->iov_cnt = i;
 157                         break;
 158                 }
 159                 a->batch_len = next_len;
 160
 161                 curvec->iov_base = RSTRING_PTR(str);
 162                 curvec->iov_len = str_len;
 163         }
 164 }
 165
 166 static long trim_writev_buffer(struct wrv_args *a, ssize_t n)
 167 {
 168         long i;
 169         long ary_len = RARRAY_LEN(a->buf);
 170
 171         if (n == (ssize_t)a->batch_len) {
 172                 i = a->iov_cnt;
 173                 n = 0;
 174         } else {
 175                 for (i = 0; n && i < ary_len; i++) {
 176                         VALUE entry = rb_ary_entry(a->buf, i);
 177                         n -= (ssize_t)RSTRING_LEN(entry);
 178                         if (n < 0) break;
 179                 }
 180         }
 181
 182         /* all done */
 183         if (i == ary_len) {
 184                 assert(n == 0 && "writev system call is broken");
 185                 a->buf = Qnil;
 186                 return 0;
 187         }
 188
 189         /* partially done, remove fully-written buffers */
 190         if (i > 0)
 191                 a->buf = rb_ary_subseq(a->buf, i, ary_len - i);
 192
 193         /* setup+replace partially written buffer */
 194         if (n < 0) {
 195                 VALUE str = rb_ary_entry(a->buf, 0);
 196                 long str_len = RSTRING_LEN(str);
 197                 str = MY_STR_SUBSEQ(str, str_len + n, -n);
 198                 rb_ary_store(a->buf, 0, str);
 199         }
 200         return RARRAY_LEN(a->buf);
 201 }
 202
 203 static long
 204 writev_check(struct wrv_args *a, ssize_t n, const char *msg, int io_wait)
 205 {
 206         if (n >= 0) {
 207                 if (n > 0) a->something_written = 1;
 208                 return trim_writev_buffer(a, n);
 209         } else if (n < 0) {
 210                 if (errno == EINTR) {
 211                         a->fd = my_fileno(a->io);
 212                         return -1;
 213                 }
 214                 if (errno == EAGAIN) {
 215                         if (io_wait) {
 216                                 (void)kgio_call_wait_writable(a->io);
 217                                 return -1;
 218                         } else if (!a->something_written) {
 219                                 a->buf = sym_wait_writable;
 220                         }
 221                         return 0;
 222                 }
 223                 kgio_wr_sys_fail(msg);
 224         }
 225         return 0;
 226 }
 227
 228 static VALUE my_writev(VALUE io, VALUE ary, int io_wait)
 229 {
 230         struct wrv_args a;
 231         ssize_t n;
 232
 233         prepare_writev(&a, io, ary);
 234         set_nonblocking(a.fd);
 235
 236         do {
 237                 fill_iovec(&a);
 238                 if (a.iov_cnt == 0)
 239                         n = 0;
 240                 else if (a.iov_cnt == 1)
 241                         n = write(a.fd, a.vec[0].iov_base, a.vec[0].iov_len);
 242                 /* for big strings use library function */
 243                 else if (USE_WRITEV &&
 244                         ((long)(a.batch_len/WRITEV_IMPL_THRESHOLD) > a.iov_cnt))
 245                         n = writev(a.fd, a.vec, a.iov_cnt);
 246                 else
 247                         n = custom_writev(a.fd, a.vec, a.iov_cnt, a.batch_len);
 248         } while (writev_check(&a, n, "writev", io_wait) != 0);
 249         rb_str_resize(a.vec_buf, 0);
 250
 251         if (TYPE(a.buf) != T_SYMBOL)
 252                 kgio_autopush_write(io);
 253         return a.buf;
 254 }
 255
 256 /*
 257  * call-seq:
 258  *
 259  *      io.kgio_writev(array)   -> nil
 260  *
 261  * Returns nil when the write completes.
 262  *
 263  * This may block and call any method defined to +kgio_wait_writable+
 264  * for the class.
 265  *
 266  * Note: it uses +Array()+ semantic for converting argument, so that
 267  * it will succeed if you pass something else.
 268  */
 269 static VALUE kgio_writev(VALUE io, VALUE ary)
 270 {
 271         return my_writev(io, ary, 1);
 272 }
 273
 274 /*
 275  * call-seq:
 276  *
 277  *      io.kgio_trywritev(array)        -> nil, Array or :wait_writable
 278  *
 279  * Returns nil if the write was completed in full.
 280  *
 281  * Returns an Array of strings containing the unwritten portion
 282  * if EAGAIN was encountered, but some portion was successfully written.
 283  *
 284  * Returns :wait_writable if EAGAIN is encountered and nothing
 285  * was written.
 286  *
 287  * Note: it uses +Array()+ semantic for converting argument, so that
 288  * it will succeed if you pass something else.
 289  */
 290 static VALUE kgio_trywritev(VALUE io, VALUE ary)
 291 {
 292         return my_writev(io, ary, 0);
 293 }
 294
 295 /*
 296  * call-seq:
 297  *
 298  *      Kgio.trywritev(io, array)    -> nil, Array or :wait_writable
 299  *
 300  * Returns nil if the write was completed in full.
 301  *
 302  * Returns a Array of strings containing the unwritten portion if EAGAIN
 303  * was encountered, but some portion was successfully written.
 304  *
 305  * Returns :wait_writable if EAGAIN is encountered and nothing
 306  * was written.
 307  *
 308  * Maybe used in place of PipeMethods#kgio_trywritev for non-Kgio objects
 309  */
 310 static VALUE s_trywritev(VALUE mod, VALUE io, VALUE ary)
 311 {
 312         return kgio_trywritev(io, ary);
 313 }
 314
 315 void init_kgio_writev(void)
 316 {
 317 #ifdef IOV_MAX
 318         int sys_iov_max = IOV_MAX;
 319 #else
 320         int sys_iov_max = (int)sysconf(_SC_IOV_MAX);
 321 #endif
 322
 323         VALUE mPipeMethods, mSocketMethods;
 324         VALUE mKgio = rb_define_module("Kgio");
 325
 326         if (sys_iov_max < iov_max)
 327                 iov_max = sys_iov_max;
 328
 329         sym_wait_writable = ID2SYM(rb_intern("wait_writable"));
 330
 331         rb_define_singleton_method(mKgio, "trywritev", s_trywritev, 2);
 332
 333         mPipeMethods = rb_define_module_under(mKgio, "PipeMethods");
 334         rb_define_method(mPipeMethods, "kgio_writev", kgio_writev, 1);
 335         rb_define_method(mPipeMethods, "kgio_trywritev", kgio_trywritev, 1);
 336
 337         mSocketMethods = rb_define_module_under(mKgio, "SocketMethods");
 338         rb_define_method(mSocketMethods, "kgio_writev", kgio_writev, 1);
 339         rb_define_method(mSocketMethods, "kgio_trywritev", kgio_trywritev, 1);
 340 }