Add regression test for mutating pack's format string
[ruby.git] / util.c
blob8a188149378cb6102b9051dacd3942ad0206e544
1 /**********************************************************************
3 util.c -
5 $Author$
6 created at: Fri Mar 10 17:22:34 JST 1995
8 Copyright (C) 1993-2008 Yukihiro Matsumoto
10 **********************************************************************/
12 #if defined __MINGW32__ || defined __MINGW64__
13 # define MINGW_HAS_SECURE_API 1
14 #endif
16 #ifndef __STDC_WANT_LIB_EXT1__
17 #define __STDC_WANT_LIB_EXT1__ 1 /* for qsort_s() */
18 #endif
20 #include "ruby/internal/config.h"
22 #include <ctype.h>
23 #include <errno.h>
24 #include <float.h>
25 #include <math.h>
26 #include <stdio.h>
28 #ifdef _WIN32
29 # include "missing/file.h"
30 #endif
32 #include "internal.h"
33 #include "internal/sanitizers.h"
34 #include "internal/imemo.h"
35 #include "internal/util.h"
36 #include "ruby/util.h"
37 #include "ruby_atomic.h"
39 const char ruby_hexdigits[] = "0123456789abcdef0123456789ABCDEF";
40 #define hexdigit ruby_hexdigits
42 unsigned long
43 ruby_scan_oct(const char *start, size_t len, size_t *retlen)
45 register const char *s = start;
46 register unsigned long retval = 0;
47 size_t i;
49 for (i = 0; i < len; i++) {
50 if ((s[0] < '0') || ('7' < s[0])) {
51 break;
53 retval <<= 3;
54 retval |= *s++ - '0';
56 *retlen = (size_t)(s - start);
57 return retval;
60 unsigned long
61 ruby_scan_hex(const char *start, size_t len, size_t *retlen)
63 register const char *s = start;
64 register unsigned long retval = 0;
65 signed char d;
66 size_t i = 0;
68 for (i = 0; i < len; i++) {
69 d = ruby_digit36_to_number_table[(unsigned char)*s];
70 if (d < 0 || 15 < d) {
71 break;
73 retval <<= 4;
74 retval |= d;
75 s++;
77 *retlen = (size_t)(s - start);
78 return retval;
81 const signed char ruby_digit36_to_number_table[] = {
82 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
83 /*0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
84 /*1*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
85 /*2*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
86 /*3*/ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
87 /*4*/ -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
88 /*5*/ 25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
89 /*6*/ -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
90 /*7*/ 25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
91 /*8*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
92 /*9*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
93 /*a*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
94 /*b*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
95 /*c*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
96 /*d*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
97 /*e*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
98 /*f*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
101 NO_SANITIZE("unsigned-integer-overflow", extern unsigned long ruby_scan_digits(const char *str, ssize_t len, int base, size_t *retlen, int *overflow));
102 unsigned long
103 ruby_scan_digits(const char *str, ssize_t len, int base, size_t *retlen, int *overflow)
105 RBIMPL_ASSERT_OR_ASSUME(base >= 2);
106 RBIMPL_ASSERT_OR_ASSUME(base <= 36);
108 const char *start = str;
109 unsigned long ret = 0, x;
110 unsigned long mul_overflow = (~(unsigned long)0) / base;
112 *overflow = 0;
114 if (!len) {
115 *retlen = 0;
116 return 0;
119 do {
120 int d = ruby_digit36_to_number_table[(unsigned char)*str++];
121 if (d == -1 || base <= d) {
122 --str;
123 break;
125 if (mul_overflow < ret)
126 *overflow = 1;
127 ret *= base;
128 x = ret;
129 ret += d;
130 if (ret < x)
131 *overflow = 1;
132 } while (len < 0 || --len);
133 *retlen = str - start;
134 return ret;
137 unsigned long
138 ruby_strtoul(const char *str, char **endptr, int base)
140 int c, b, overflow;
141 int sign = 0;
142 size_t len;
143 unsigned long ret;
144 const char *subject_found = str;
146 if (base < 0) {
147 errno = EINVAL;
148 return 0;
151 if (base == 1 || 36 < base) {
152 errno = EINVAL;
153 return 0;
156 while ((c = *str) && ISSPACE(c))
157 str++;
159 if (c == '+') {
160 sign = 1;
161 str++;
163 else if (c == '-') {
164 sign = -1;
165 str++;
168 if (str[0] == '0') {
169 subject_found = str+1;
170 if (base == 0 || base == 16) {
171 if (str[1] == 'x' || str[1] == 'X') {
172 b = 16;
173 str += 2;
175 else {
176 b = base == 0 ? 8 : 16;
177 str++;
180 else {
181 b = base;
182 str++;
185 else {
186 b = base == 0 ? 10 : base;
189 ret = ruby_scan_digits(str, -1, b, &len, &overflow);
191 if (0 < len)
192 subject_found = str+len;
194 if (endptr)
195 *endptr = (char*)subject_found;
197 if (overflow) {
198 errno = ERANGE;
199 return ULONG_MAX;
202 if (sign < 0) {
203 ret = (unsigned long)(-(long)ret);
204 return ret;
206 else {
207 return ret;
211 #if !defined HAVE_GNU_QSORT_R
212 #include <sys/types.h>
213 #include <stdint.h>
214 #ifdef HAVE_UNISTD_H
215 #include <unistd.h>
216 #endif
218 typedef int (cmpfunc_t)(const void*, const void*, void*);
220 #if defined HAVE_QSORT_S && defined RUBY_MSVCRT_VERSION
221 /* In contrast to its name, Visual Studio qsort_s is incompatible with
222 * C11 in the order of the comparison function's arguments, and same
223 * as BSD qsort_r rather. */
224 # define qsort_r(base, nel, size, arg, cmp) qsort_s(base, nel, size, cmp, arg)
225 # define cmp_bsd_qsort cmp_ms_qsort
226 # define HAVE_BSD_QSORT_R 1
227 #endif
229 #if defined HAVE_BSD_QSORT_R
230 struct bsd_qsort_r_args {
231 cmpfunc_t *cmp;
232 void *arg;
235 static int
236 cmp_bsd_qsort(void *d, const void *a, const void *b)
238 const struct bsd_qsort_r_args *args = d;
239 return (*args->cmp)(a, b, args->arg);
242 void
243 ruby_qsort(void* base, const size_t nel, const size_t size, cmpfunc_t *cmp, void *d)
245 struct bsd_qsort_r_args args;
246 args.cmp = cmp;
247 args.arg = d;
248 qsort_r(base, nel, size, &args, cmp_bsd_qsort);
250 #elif defined HAVE_QSORT_S
251 /* C11 qsort_s has the same arguments as GNU's, but uses
252 * runtime-constraints handler. */
253 void
254 ruby_qsort(void* base, const size_t nel, const size_t size, cmpfunc_t *cmp, void *d)
256 if (!nel || !size) return; /* nothing to sort */
258 /* get rid of runtime-constraints handler for MT-safeness */
259 if (!base || !cmp) return;
260 if (nel > RSIZE_MAX || size > RSIZE_MAX) return;
262 qsort_s(base, nel, size, cmp, d);
264 # define HAVE_GNU_QSORT_R 1
265 #else
266 /* mm.c */
268 #define mmtype long
269 #define mmcount (16 / SIZEOF_LONG)
270 #define A ((mmtype*)a)
271 #define B ((mmtype*)b)
272 #define C ((mmtype*)c)
273 #define D ((mmtype*)d)
275 #define mmstep (sizeof(mmtype) * mmcount)
276 #define mmprepare(base, size) do {\
277 if (((VALUE)(base) % sizeof(mmtype)) == 0 && ((size) % sizeof(mmtype)) == 0) \
278 if ((size) >= mmstep) mmkind = 1;\
279 else mmkind = 0;\
280 else mmkind = -1;\
281 high = ((size) / mmstep) * mmstep;\
282 low = ((size) % mmstep);\
283 } while (0)\
285 #define mmarg mmkind, size, high, low
286 #define mmargdecl int mmkind, size_t size, size_t high, size_t low
288 static void mmswap_(register char *a, register char *b, mmargdecl)
290 if (a == b) return;
291 if (mmkind >= 0) {
292 register mmtype s;
293 #if mmcount > 1
294 if (mmkind > 0) {
295 register char *t = a + high;
296 do {
297 s = A[0]; A[0] = B[0]; B[0] = s;
298 s = A[1]; A[1] = B[1]; B[1] = s;
299 #if mmcount > 2
300 s = A[2]; A[2] = B[2]; B[2] = s;
301 #if mmcount > 3
302 s = A[3]; A[3] = B[3]; B[3] = s;
303 #endif
304 #endif
305 a += mmstep; b += mmstep;
306 } while (a < t);
308 #endif
309 if (low != 0) { s = A[0]; A[0] = B[0]; B[0] = s;
310 #if mmcount > 2
311 if (low >= 2 * sizeof(mmtype)) { s = A[1]; A[1] = B[1]; B[1] = s;
312 #if mmcount > 3
313 if (low >= 3 * sizeof(mmtype)) {s = A[2]; A[2] = B[2]; B[2] = s;}
314 #endif
316 #endif
319 else {
320 register char *t = a + size, s;
321 do {s = *a; *a++ = *b; *b++ = s;} while (a < t);
324 #define mmswap(a,b) mmswap_((a),(b),mmarg)
326 /* a, b, c = b, c, a */
327 static void mmrot3_(register char *a, register char *b, register char *c, mmargdecl)
329 if (mmkind >= 0) {
330 register mmtype s;
331 #if mmcount > 1
332 if (mmkind > 0) {
333 register char *t = a + high;
334 do {
335 s = A[0]; A[0] = B[0]; B[0] = C[0]; C[0] = s;
336 s = A[1]; A[1] = B[1]; B[1] = C[1]; C[1] = s;
337 #if mmcount > 2
338 s = A[2]; A[2] = B[2]; B[2] = C[2]; C[2] = s;
339 #if mmcount > 3
340 s = A[3]; A[3] = B[3]; B[3] = C[3]; C[3] = s;
341 #endif
342 #endif
343 a += mmstep; b += mmstep; c += mmstep;
344 } while (a < t);
346 #endif
347 if (low != 0) { s = A[0]; A[0] = B[0]; B[0] = C[0]; C[0] = s;
348 #if mmcount > 2
349 if (low >= 2 * sizeof(mmtype)) { s = A[1]; A[1] = B[1]; B[1] = C[1]; C[1] = s;
350 #if mmcount > 3
351 if (low == 3 * sizeof(mmtype)) {s = A[2]; A[2] = B[2]; B[2] = C[2]; C[2] = s;}
352 #endif
354 #endif
357 else {
358 register char *t = a + size, s;
359 do {s = *a; *a++ = *b; *b++ = *c; *c++ = s;} while (a < t);
362 #define mmrot3(a,b,c) mmrot3_((a),(b),(c),mmarg)
364 /* qs6.c */
365 /*****************************************************/
366 /* */
367 /* qs6 (Quick sort function) */
368 /* */
369 /* by Tomoyuki Kawamura 1995.4.21 */
370 /* kawamura@tokuyama.ac.jp */
371 /*****************************************************/
373 typedef struct { char *LL, *RR; } stack_node; /* Stack structure for L,l,R,r */
374 #define PUSH(ll,rr) do { top->LL = (ll); top->RR = (rr); ++top; } while (0) /* Push L,l,R,r */
375 #define POP(ll,rr) do { --top; (ll) = top->LL; (rr) = top->RR; } while (0) /* Pop L,l,R,r */
377 #define med3(a,b,c) ((*cmp)((a),(b),d)<0 ? \
378 ((*cmp)((b),(c),d)<0 ? (b) : ((*cmp)((a),(c),d)<0 ? (c) : (a))) : \
379 ((*cmp)((b),(c),d)>0 ? (b) : ((*cmp)((a),(c),d)<0 ? (a) : (c))))
381 void
382 ruby_qsort(void* base, const size_t nel, const size_t size, cmpfunc_t *cmp, void *d)
384 register char *l, *r, *m; /* l,r:left,right group m:median point */
385 register int t, eq_l, eq_r; /* eq_l: all items in left group are equal to S */
386 char *L = base; /* left end of current region */
387 char *R = (char*)base + size*(nel-1); /* right end of current region */
388 size_t chklim = 63; /* threshold of ordering element check */
389 enum {size_bits = sizeof(size) * CHAR_BIT};
390 stack_node stack[size_bits]; /* enough for size_t size */
391 stack_node *top = stack;
392 int mmkind;
393 size_t high, low, n;
395 if (nel <= 1) return; /* need not to sort */
396 mmprepare(base, size);
397 goto start;
399 nxt:
400 if (stack == top) return; /* return if stack is empty */
401 POP(L,R);
403 for (;;) {
404 start:
405 if (L + size == R) { /* 2 elements */
406 if ((*cmp)(L,R,d) > 0) mmswap(L,R);
407 goto nxt;
410 l = L; r = R;
411 n = (r - l + size) / size; /* number of elements */
412 m = l + size * (n >> 1); /* calculate median value */
414 if (n >= 60) {
415 register char *m1;
416 register char *m3;
417 if (n >= 200) {
418 n = size*(n>>3); /* number of bytes in splitting 8 */
420 register char *p1 = l + n;
421 register char *p2 = p1 + n;
422 register char *p3 = p2 + n;
423 m1 = med3(p1, p2, p3);
424 p1 = m + n;
425 p2 = p1 + n;
426 p3 = p2 + n;
427 m3 = med3(p1, p2, p3);
430 else {
431 n = size*(n>>2); /* number of bytes in splitting 4 */
432 m1 = l + n;
433 m3 = m + n;
435 m = med3(m1, m, m3);
438 if ((t = (*cmp)(l,m,d)) < 0) { /*3-5-?*/
439 if ((t = (*cmp)(m,r,d)) < 0) { /*3-5-7*/
440 if (chklim && nel >= chklim) { /* check if already ascending order */
441 char *p;
442 chklim = 0;
443 for (p=l; p<r; p+=size) if ((*cmp)(p,p+size,d) > 0) goto fail;
444 goto nxt;
446 fail: goto loopA; /*3-5-7*/
448 if (t > 0) {
449 if ((*cmp)(l,r,d) <= 0) {mmswap(m,r); goto loopA;} /*3-5-4*/
450 mmrot3(r,m,l); goto loopA; /*3-5-2*/
452 goto loopB; /*3-5-5*/
455 if (t > 0) { /*7-5-?*/
456 if ((t = (*cmp)(m,r,d)) > 0) { /*7-5-3*/
457 if (chklim && nel >= chklim) { /* check if already ascending order */
458 char *p;
459 chklim = 0;
460 for (p=l; p<r; p+=size) if ((*cmp)(p,p+size,d) < 0) goto fail2;
461 while (l<r) {mmswap(l,r); l+=size; r-=size;} /* reverse region */
462 goto nxt;
464 fail2: mmswap(l,r); goto loopA; /*7-5-3*/
466 if (t < 0) {
467 if ((*cmp)(l,r,d) <= 0) {mmswap(l,m); goto loopB;} /*7-5-8*/
468 mmrot3(l,m,r); goto loopA; /*7-5-6*/
470 mmswap(l,r); goto loopA; /*7-5-5*/
473 if ((t = (*cmp)(m,r,d)) < 0) {goto loopA;} /*5-5-7*/
474 if (t > 0) {mmswap(l,r); goto loopB;} /*5-5-3*/
476 /* determining splitting type in case 5-5-5 */ /*5-5-5*/
477 for (;;) {
478 if ((l += size) == r) goto nxt; /*5-5-5*/
479 if (l == m) continue;
480 if ((t = (*cmp)(l,m,d)) > 0) {mmswap(l,r); l = L; goto loopA;}/*575-5*/
481 if (t < 0) {mmswap(L,l); l = L; goto loopB;} /*535-5*/
484 loopA: eq_l = 1; eq_r = 1; /* splitting type A */ /* left <= median < right */
485 for (;;) {
486 for (;;) {
487 if ((l += size) == r)
488 {l -= size; if (l != m) mmswap(m,l); l -= size; goto fin;}
489 if (l == m) continue;
490 if ((t = (*cmp)(l,m,d)) > 0) {eq_r = 0; break;}
491 if (t < 0) eq_l = 0;
493 for (;;) {
494 if (l == (r -= size))
495 {l -= size; if (l != m) mmswap(m,l); l -= size; goto fin;}
496 if (r == m) {m = l; break;}
497 if ((t = (*cmp)(r,m,d)) < 0) {eq_l = 0; break;}
498 if (t == 0) break;
500 mmswap(l,r); /* swap left and right */
503 loopB: eq_l = 1; eq_r = 1; /* splitting type B */ /* left < median <= right */
504 for (;;) {
505 for (;;) {
506 if (l == (r -= size))
507 {r += size; if (r != m) mmswap(r,m); r += size; goto fin;}
508 if (r == m) continue;
509 if ((t = (*cmp)(r,m,d)) < 0) {eq_l = 0; break;}
510 if (t > 0) eq_r = 0;
512 for (;;) {
513 if ((l += size) == r)
514 {r += size; if (r != m) mmswap(r,m); r += size; goto fin;}
515 if (l == m) {m = r; break;}
516 if ((t = (*cmp)(l,m,d)) > 0) {eq_r = 0; break;}
517 if (t == 0) break;
519 mmswap(l,r); /* swap left and right */
522 fin:
523 if (eq_l == 0) /* need to sort left side */
524 if (eq_r == 0) /* need to sort right side */
525 if (l-L < R-r) {PUSH(r,R); R = l;} /* sort left side first */
526 else {PUSH(L,l); L = r;} /* sort right side first */
527 else R = l; /* need to sort left side only */
528 else if (eq_r == 0) L = r; /* need to sort right side only */
529 else goto nxt; /* need not to sort both sides */
532 #endif
533 #endif /* !HAVE_GNU_QSORT_R */
535 char *
536 ruby_strdup(const char *str)
538 char *tmp;
539 size_t len = strlen(str) + 1;
541 tmp = xmalloc(len);
542 memcpy(tmp, str, len);
544 return tmp;
547 #if defined HAVE_GETCWD
548 # if defined NO_GETCWD_MALLOC
550 char *
551 ruby_getcwd(void)
553 VALUE guard = rb_imemo_tmpbuf_auto_free_pointer();
554 int size = 200;
555 char *buf = xmalloc(size);
557 while (!getcwd(buf, size)) {
558 int e = errno;
559 if (e != ERANGE) {
560 rb_free_tmp_buffer(&guard);
561 rb_syserr_fail(e, "getcwd");
563 size *= 2;
564 rb_imemo_tmpbuf_set_ptr(guard, buf);
565 buf = xrealloc(buf, size);
567 rb_free_tmp_buffer(&guard);
568 return buf;
571 # else
573 static const rb_data_type_t getcwd_buffer_guard_type = {
574 .wrap_struct_name = "ruby_getcwd_guard",
575 .function = {
576 .dfree = free // not xfree.
578 .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
581 char *
582 ruby_getcwd(void)
584 VALUE guard = TypedData_Wrap_Struct((VALUE)0, &getcwd_buffer_guard_type, NULL);
585 char *buf, *cwd = getcwd(NULL, 0);
586 RTYPEDDATA_DATA(guard) = cwd;
587 if (!cwd) rb_sys_fail("getcwd");
588 buf = ruby_strdup(cwd); /* allocate by xmalloc */
589 free(cwd);
590 RTYPEDDATA_DATA(RB_GC_GUARD(guard)) = NULL;
591 return buf;
594 # endif
595 #else
597 # ifndef PATH_MAX
598 # define PATH_MAX 8192
599 # endif
601 char *
602 ruby_getcwd(void)
604 char *buf = xmalloc(PATH_MAX+1);
606 if (!getwd(buf)) {
607 int e = errno;
608 xfree(buf);
609 rb_syserr_fail(e, "getwd");
611 return buf;
614 #endif
616 void
617 ruby_each_words(const char *str, void (*func)(const char*, int, void*), void *arg)
619 const char *end;
620 int len;
622 if (!str) return;
623 for (; *str; str = end) {
624 while (ISSPACE(*str) || *str == ',') str++;
625 if (!*str) break;
626 end = str;
627 while (*end && !ISSPACE(*end) && *end != ',') end++;
628 len = (int)(end - str); /* assume no string exceeds INT_MAX */
629 (*func)(str, len, arg);
633 #undef strtod
634 #define strtod ruby_strtod
635 #undef dtoa
636 #define dtoa ruby_dtoa
637 #undef hdtoa
638 #define hdtoa ruby_hdtoa
639 #include "missing/dtoa.c"