3 ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
13 #include "lj_strscan.h"
15 /* -- Scanning numbers ---------------------------------------------------- */
18 ** Rationale for the builtin string to number conversion library:
20 ** It removes a dependency on libc's strtod(), which is a true portability
21 ** nightmare. Mainly due to the plethora of supported OS and toolchain
22 ** combinations. Sadly, the various implementations
23 ** a) are often buggy, incomplete (no hex floats) and/or imprecise,
24 ** b) sometimes crash or hang on certain inputs,
25 ** c) return non-standard NaNs that need to be filtered out, and
26 ** d) fail if the locale-specific decimal separator is not a dot,
27 ** which can only be fixed with atrocious workarounds.
29 ** Also, most of the strtod() implementations are hopelessly bloated,
30 ** which is not just an I-cache hog, but a problem for static linkage
31 ** on embedded systems, too.
33 ** OTOH the builtin conversion function is very compact. Even though it
34 ** does a lot more, like parsing long longs, octal or imaginary numbers
35 ** and returning the result in different formats:
36 ** a) It needs less than 3 KB (!) of machine code (on x64 with -Os),
37 ** b) it doesn't perform any dynamic allocation and,
38 ** c) it needs only around 600 bytes of stack space.
40 ** The builtin function is faster than strtod() for typical inputs, e.g.
41 ** "123", "1.5" or "1e6". Arguably, it's slower for very large exponents,
42 ** which are not very common (this could be fixed, if needed).
44 ** And most importantly, the builtin function is equally precise on all
45 ** platforms. It correctly converts and rounds any input to a double.
46 ** If this is not the case, please send a bug report -- but PLEASE verify
47 ** that the implementation you're comparing to is not the culprit!
49 ** The implementation quickly pre-scans the entire string first and
50 ** handles simple integers on-the-fly. Otherwise, it dispatches to the
51 ** base-specific parser. Hex and octal is straightforward.
53 ** Decimal to binary conversion uses a fixed-length circular buffer in
54 ** base 100. Some simple cases are handled directly. For other cases, the
55 ** number in the buffer is up-scaled or down-scaled until the integer part
56 ** is in the proper range. Then the integer part is rounded and converted
57 ** to a double which is finally rescaled to the result. Denormals need
58 ** special treatment to prevent incorrect 'double rounding'.
61 /* Definitions for circular decimal digit buffer (base 100 = 2 digits/byte). */
62 #define STRSCAN_DIG 1024
63 #define STRSCAN_MAXDIG 800 /* 772 + extra are sufficient. */
64 #define STRSCAN_DDIG (STRSCAN_DIG/2)
65 #define STRSCAN_DMASK (STRSCAN_DDIG-1)
67 /* Helpers for circular buffer. */
68 #define DNEXT(a) (((a)+1) & STRSCAN_DMASK)
69 #define DPREV(a) (((a)-1) & STRSCAN_DMASK)
70 #define DLEN(lo, hi) ((int32_t)(((lo)-(hi)) & STRSCAN_DMASK))
72 #define casecmp(c, k) (((c) | 0x20) == k)
74 /* Final conversion to double. */
75 static void strscan_double(uint64_t x
, TValue
*o
, int32_t ex2
, int32_t neg
)
79 /* Avoid double rounding for denormals. */
80 if (LJ_UNLIKELY(ex2
<= -1075 && x
!= 0)) {
81 /* NYI: all of this generates way too much code on 32 bit CPUs. */
82 #if defined(__GNUC__) && LJ_64
83 int32_t b
= (int32_t)(__builtin_clzll(x
)^63);
85 int32_t b
= (x
>>32) ? 32+(int32_t)lj_fls((uint32_t)(x
>>32)) :
86 (int32_t)lj_fls((uint32_t)x
);
88 if ((int32_t)b
+ ex2
<= -1023 && (int32_t)b
+ ex2
>= -1075) {
89 uint64_t rb
= (uint64_t)1 << (-1075-ex2
);
90 if ((x
& rb
) && ((x
& (rb
+rb
+rb
-1)))) x
+= rb
+rb
;
95 /* Convert to double using a signed int64_t conversion, then rescale. */
96 lua_assert((int64_t)x
>= 0);
97 n
= (double)(int64_t)x
;
99 if (ex2
) n
= ldexp(n
, ex2
);
103 /* Parse hexadecimal number. */
104 static StrScanFmt
strscan_hex(const uint8_t *p
, TValue
*o
,
105 StrScanFmt fmt
, uint32_t opt
,
106 int32_t ex2
, int32_t neg
, uint32_t dig
)
111 /* Scan hex digits. */
112 for (i
= dig
> 16 ? 16 : dig
; i
; i
--, p
++) {
113 uint32_t d
= (*p
!= '.' ? *p
: *++p
); if (d
> '9') d
+= 9;
114 x
= (x
<< 4) + (d
& 15);
117 /* Summarize rounding-effect of excess digits. */
118 for (i
= 16; i
< dig
; i
++, p
++)
119 x
|= ((*p
!= '.' ? *p
: *++p
) != '0'), ex2
+= 4;
121 /* Format-specific handling. */
124 if (!(opt
& STRSCAN_OPT_TONUM
) && x
< 0x80000000u
+neg
) {
125 o
->i
= neg
? -(int32_t)x
: (int32_t)x
;
126 return STRSCAN_INT
; /* Fast path for 32 bit integers. */
128 if (!(opt
& STRSCAN_OPT_C
)) { fmt
= STRSCAN_NUM
; break; }
131 if (dig
> 8) return STRSCAN_ERROR
;
132 o
->i
= neg
? -(int32_t)x
: (int32_t)x
;
136 if (dig
> 16) return STRSCAN_ERROR
;
137 o
->u64
= neg
? (uint64_t)-(int64_t)x
: x
;
143 /* Reduce range then convert to double. */
144 if ((x
& U64x(c0000000
,0000000))) { x
= (x
>> 2) | (x
& 3); ex2
+= 2; }
145 strscan_double(x
, o
, ex2
, neg
);
149 /* Parse octal number. */
150 static StrScanFmt
strscan_oct(const uint8_t *p
, TValue
*o
,
151 StrScanFmt fmt
, int32_t neg
, uint32_t dig
)
155 /* Scan octal digits. */
156 if (dig
> 22 || (dig
== 22 && *p
> '1')) return STRSCAN_ERROR
;
158 if (!(*p
>= '0' && *p
<= '7')) return STRSCAN_ERROR
;
159 x
= (x
<< 3) + (*p
++ & 7);
162 /* Format-specific handling. */
165 if (x
>= 0x80000000u
+neg
) fmt
= STRSCAN_U32
;
168 if ((x
>> 32)) return STRSCAN_ERROR
;
169 o
->i
= neg
? -(int32_t)x
: (int32_t)x
;
174 o
->u64
= neg
? (uint64_t)-(int64_t)x
: x
;
180 /* Parse decimal number. */
181 static StrScanFmt
strscan_dec(const uint8_t *p
, TValue
*o
,
182 StrScanFmt fmt
, uint32_t opt
,
183 int32_t ex10
, int32_t neg
, uint32_t dig
)
185 uint8_t xi
[STRSCAN_DDIG
], *xip
= xi
;
189 if (i
> STRSCAN_MAXDIG
) {
190 ex10
+= (int32_t)(i
- STRSCAN_MAXDIG
);
193 /* Scan unaligned leading digit. */
195 *xip
++ = ((*p
!= '.' ? *p
: *++p
) & 15), i
--, p
++;
196 /* Scan aligned double-digits. */
197 for ( ; i
> 1; i
-= 2) {
198 uint32_t d
= 10 * ((*p
!= '.' ? *p
: *++p
) & 15); p
++;
199 *xip
++ = d
+ ((*p
!= '.' ? *p
: *++p
) & 15); p
++;
201 /* Scan and realign trailing digit. */
202 if (i
) *xip
++ = 10 * ((*p
!= '.' ? *p
: *++p
) & 15), ex10
--, dig
++, p
++;
204 /* Summarize rounding-effect of excess digits. */
205 if (dig
> STRSCAN_MAXDIG
) {
207 if ((*p
!= '.' ? *p
: *++p
) != '0') { xip
[-1] |= 1; break; }
209 } while (--dig
> STRSCAN_MAXDIG
);
210 dig
= STRSCAN_MAXDIG
;
211 } else { /* Simplify exponent. */
212 while (ex10
> 0 && dig
<= 18) *xip
++ = 0, ex10
-= 2, dig
+= 2;
214 } else { /* Only got zeros. */
219 /* Fast path for numbers in integer format (but handles e.g. 1e6, too). */
220 if (dig
<= 20 && ex10
== 0) {
224 for (xis
= xi
+1; xis
< xip
; xis
++) x
= x
* 100 + *xis
;
225 if (!(dig
== 20 && (xi
[0] > 18 || (int64_t)x
>= 0))) { /* No overflow? */
226 /* Format-specific handling. */
229 if (!(opt
& STRSCAN_OPT_TONUM
) && x
< 0x80000000u
+neg
) {
230 o
->i
= neg
? -(int32_t)x
: (int32_t)x
;
231 return STRSCAN_INT
; /* Fast path for 32 bit integers. */
233 if (!(opt
& STRSCAN_OPT_C
)) { fmt
= STRSCAN_NUM
; goto plainnumber
; }
236 if ((x
>> 32) != 0) return STRSCAN_ERROR
;
237 o
->i
= neg
? -(int32_t)x
: (int32_t)x
;
241 o
->u64
= neg
? (uint64_t)-(int64_t)x
: x
;
244 plainnumber
: /* Fast path for plain numbers < 2^63. */
245 if ((int64_t)x
< 0) break;
246 n
= (double)(int64_t)x
;
254 /* Slow non-integer path. */
255 if (fmt
== STRSCAN_INT
) {
256 if ((opt
& STRSCAN_OPT_C
)) return STRSCAN_ERROR
;
258 } else if (fmt
> STRSCAN_INT
) {
259 return STRSCAN_ERROR
;
262 uint32_t hi
= 0, lo
= (uint32_t)(xip
-xi
);
263 int32_t ex2
= 0, idig
= (int32_t)lo
+ (ex10
>> 1);
265 lua_assert(lo
> 0 && (ex10
& 1) == 0);
267 /* Handle simple overflow/underflow. */
268 if (idig
> 310/2) { if (neg
) setminfV(o
); else setpinfV(o
); return fmt
; }
269 else if (idig
< -326/2) { o
->n
= neg
? -0.0 : 0.0; return fmt
; }
271 /* Scale up until we have at least 17 or 18 integer part digits. */
272 while (idig
< 9 && idig
< DLEN(lo
, hi
)) {
275 for (i
= DPREV(lo
); ; i
= DPREV(i
)) {
276 uint32_t d
= (xi
[i
] << 6) + cy
;
277 cy
= (((d
>> 2) * 5243) >> 17); d
= d
- cy
* 100; /* Div/mod 100. */
280 if (d
== 0 && i
== DPREV(lo
)) lo
= i
;
284 if (xi
[DPREV(lo
)] == 0) lo
= DPREV(lo
);
285 else if (hi
== lo
) { lo
= DPREV(lo
); xi
[DPREV(lo
)] |= xi
[lo
]; }
286 xi
[hi
] = (uint8_t)cy
; idig
++;
290 /* Scale down until no more than 17 or 18 integer part digits remain. */
292 uint32_t i
= hi
, cy
= 0;
297 cy
= 100 * (cy
& 0x3f);
298 if (xi
[i
] == 0 && i
== hi
) hi
= DNEXT(hi
), idig
--;
302 if (hi
== lo
) { xi
[DPREV(lo
)] |= 1; break; }
303 xi
[lo
] = (cy
>> 6); lo
= DNEXT(lo
);
304 cy
= 100 * (cy
& 0x3f);
308 /* Collect integer part digits and convert to rescaled double. */
312 for (i
= DNEXT(hi
); --idig
> 0 && i
!= lo
; i
= DNEXT(i
))
315 while (--idig
>= 0) x
= x
* 100;
316 } else { /* Gather round bit from remaining digits. */
319 if (xi
[i
]) { x
|= 1; break; }
323 strscan_double(x
, o
, ex2
, neg
);
329 /* Scan string containing a number. Returns format. Returns value in o. */
330 StrScanFmt
lj_strscan_scan(const uint8_t *p
, TValue
*o
, uint32_t opt
)
334 /* Remove leading space, parse sign and non-numbers. */
335 if (LJ_UNLIKELY(!lj_char_isdigit(*p
))) {
336 while (lj_char_isspace(*p
)) p
++;
337 if (*p
== '+' || *p
== '-') neg
= (*p
++ == '-');
338 if (LJ_UNLIKELY(*p
>= 'A')) { /* Parse "inf", "infinity" or "nan". */
341 if (casecmp(p
[0],'i') && casecmp(p
[1],'n') && casecmp(p
[2],'f')) {
342 if (neg
) setminfV(&tmp
); else setpinfV(&tmp
);
344 if (casecmp(p
[0],'i') && casecmp(p
[1],'n') && casecmp(p
[2],'i') &&
345 casecmp(p
[3],'t') && casecmp(p
[4],'y')) p
+= 5;
346 } else if (casecmp(p
[0],'n') && casecmp(p
[1],'a') && casecmp(p
[2],'n')) {
349 while (lj_char_isspace(*p
)) p
++;
350 if (*p
) return STRSCAN_ERROR
;
356 /* Parse regular number. */
358 StrScanFmt fmt
= STRSCAN_INT
;
359 int cmask
= LJ_CHAR_DIGIT
;
360 int base
= (opt
& STRSCAN_OPT_C
) && *p
== '0' ? 0 : 10;
361 const uint8_t *sp
, *dp
= NULL
;
362 uint32_t dig
= 0, hasdig
= 0, x
= 0;
365 /* Determine base and skip leading zeros. */
366 if (LJ_UNLIKELY(*p
<= '0')) {
367 if (*p
== '0' && casecmp(p
[1], 'x'))
368 base
= 16, cmask
= LJ_CHAR_XDIGIT
, p
+= 2;
372 } else if (*p
== '.') {
373 if (dp
) return STRSCAN_ERROR
;
381 /* Preliminary digit and decimal point scan. */
382 for (sp
= p
; ; p
++) {
383 if (LJ_LIKELY(lj_char_isa(*p
, cmask
))) {
384 x
= x
* 10 + (*p
& 15); /* For fast path below. */
386 } else if (*p
== '.') {
387 if (dp
) return STRSCAN_ERROR
;
393 if (!(hasdig
| dig
)) return STRSCAN_ERROR
;
395 /* Handle decimal point. */
399 ex
= (int32_t)(dp
-(p
-1)); dp
= p
-1;
400 while (ex
< 0 && *dp
-- == '0') ex
++, dig
--; /* Skip trailing zeros. */
401 if (base
== 16) ex
*= 4;
405 /* Parse exponent. */
406 if (casecmp(*p
, (uint32_t)(base
== 16 ? 'p' : 'e'))) {
409 fmt
= STRSCAN_NUM
; p
++;
410 if (*p
== '+' || *p
== '-') negx
= (*p
++ == '-');
411 if (!lj_char_isdigit(*p
)) return STRSCAN_ERROR
;
413 while (lj_char_isdigit(*p
)) {
414 if (xx
< 65536) xx
= xx
* 10 + (*p
& 15);
417 ex
+= negx
? -(int32_t)xx
: (int32_t)xx
;
422 /* I (IMAG), U (U32), LL (I64), ULL/LLU (U64), L (long), UL/LU (ulong). */
423 /* NYI: f (float). Not needed until cp_number() handles non-integers. */
424 if (casecmp(*p
, 'i')) {
425 if (!(opt
& STRSCAN_OPT_IMAG
)) return STRSCAN_ERROR
;
426 p
++; fmt
= STRSCAN_IMAG
;
427 } else if (fmt
== STRSCAN_INT
) {
428 if (casecmp(*p
, 'u')) p
++, fmt
= STRSCAN_U32
;
429 if (casecmp(*p
, 'l')) {
431 if (casecmp(*p
, 'l')) p
++, fmt
+= STRSCAN_I64
- STRSCAN_INT
;
432 else if (!(opt
& STRSCAN_OPT_C
)) return STRSCAN_ERROR
;
433 else if (sizeof(long) == 8) fmt
+= STRSCAN_I64
- STRSCAN_INT
;
435 if (casecmp(*p
, 'u') && (fmt
== STRSCAN_INT
|| fmt
== STRSCAN_I64
))
436 p
++, fmt
+= STRSCAN_U32
- STRSCAN_INT
;
437 if ((fmt
== STRSCAN_U32
&& !(opt
& STRSCAN_OPT_C
)) ||
438 (fmt
>= STRSCAN_I64
&& !(opt
& STRSCAN_OPT_LL
)))
439 return STRSCAN_ERROR
;
441 while (lj_char_isspace(*p
)) p
++;
442 if (*p
) return STRSCAN_ERROR
;
445 /* Fast path for decimal 32 bit integers. */
446 if (fmt
== STRSCAN_INT
&& base
== 10 &&
447 (dig
< 10 || (dig
== 10 && *sp
<= '2' && x
< 0x80000000u
+neg
))) {
448 int32_t y
= neg
? -(int32_t)x
: (int32_t)x
;
449 if ((opt
& STRSCAN_OPT_TONUM
)) {
458 /* Dispatch to base-specific parser. */
459 if (base
== 0 && !(fmt
== STRSCAN_NUM
|| fmt
== STRSCAN_IMAG
))
460 return strscan_oct(sp
, o
, fmt
, neg
, dig
);
462 fmt
= strscan_hex(sp
, o
, fmt
, opt
, ex
, neg
, dig
);
464 fmt
= strscan_dec(sp
, o
, fmt
, opt
, ex
, neg
, dig
);
466 /* Try to convert number to integer, if requested. */
467 if (fmt
== STRSCAN_NUM
&& (opt
& STRSCAN_OPT_TOINT
)) {
469 int32_t i
= lj_num2int(n
);
470 if (n
== (lua_Number
)i
) { o
->i
= i
; return STRSCAN_INT
; }
476 int LJ_FASTCALL
lj_strscan_num(GCstr
*str
, TValue
*o
)
478 StrScanFmt fmt
= lj_strscan_scan((const uint8_t *)strdata(str
), o
,
480 lua_assert(fmt
== STRSCAN_ERROR
|| fmt
== STRSCAN_NUM
);
481 return (fmt
!= STRSCAN_ERROR
);
485 int LJ_FASTCALL
lj_strscan_number(GCstr
*str
, TValue
*o
)
487 StrScanFmt fmt
= lj_strscan_scan((const uint8_t *)strdata(str
), o
,
489 lua_assert(fmt
== STRSCAN_ERROR
|| fmt
== STRSCAN_NUM
|| fmt
== STRSCAN_INT
);
490 if (fmt
== STRSCAN_INT
) setitype(o
, LJ_TISNUM
);
491 return (fmt
!= STRSCAN_ERROR
);