OSX/iOS: Fix SDK incompatibility.
[luajit-2.0.git] / src / lj_strfmt.c
blob909255db8c6c91f8db5060ce2f2c426966865815
1 /*
2 ** String formatting.
3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4 */
6 #include <stdio.h>
8 #define lj_strfmt_c
9 #define LUA_CORE
11 #include "lj_obj.h"
12 #include "lj_err.h"
13 #include "lj_buf.h"
14 #include "lj_str.h"
15 #include "lj_meta.h"
16 #include "lj_state.h"
17 #include "lj_char.h"
18 #include "lj_strfmt.h"
19 #if LJ_HASFFI
20 #include "lj_ctype.h"
21 #endif
22 #include "lj_lib.h"
24 /* -- Format parser ------------------------------------------------------- */
26 static const uint8_t strfmt_map[('x'-'A')+1] = {
27 STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0,
28 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0,
29 0,0,0,0,0,0,
30 STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
31 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
34 SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
36 const uint8_t *p = fs->p, *e = fs->e;
37 fs->str = (const char *)p;
38 for (; p < e; p++) {
39 if (*p == '%') { /* Escape char? */
40 if (p[1] == '%') { /* '%%'? */
41 fs->p = ++p+1;
42 goto retlit;
43 } else {
44 SFormat sf = 0;
45 uint32_t c;
46 if (p != (const uint8_t *)fs->str)
47 break;
48 for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
49 /* Parse flags. */
50 if (*p == '-') sf |= STRFMT_F_LEFT;
51 else if (*p == '+') sf |= STRFMT_F_PLUS;
52 else if (*p == '0') sf |= STRFMT_F_ZERO;
53 else if (*p == ' ') sf |= STRFMT_F_SPACE;
54 else if (*p == '#') sf |= STRFMT_F_ALT;
55 else break;
57 if ((uint32_t)*p - '0' < 10) { /* Parse width. */
58 uint32_t width = (uint32_t)*p++ - '0';
59 if ((uint32_t)*p - '0' < 10)
60 width = (uint32_t)*p++ - '0' + width*10;
61 sf |= (width << STRFMT_SH_WIDTH);
63 if (*p == '.') { /* Parse precision. */
64 uint32_t prec = 0;
65 p++;
66 if ((uint32_t)*p - '0' < 10) {
67 prec = (uint32_t)*p++ - '0';
68 if ((uint32_t)*p - '0' < 10)
69 prec = (uint32_t)*p++ - '0' + prec*10;
71 sf |= ((prec+1) << STRFMT_SH_PREC);
73 /* Parse conversion. */
74 c = (uint32_t)*p - 'A';
75 if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
76 uint32_t sx = strfmt_map[c];
77 if (sx) {
78 fs->p = p+1;
79 return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
82 /* Return error location. */
83 if (*p >= 32) p++;
84 fs->len = (MSize)(p - (const uint8_t *)fs->str);
85 fs->p = fs->e;
86 return STRFMT_ERR;
90 fs->p = p;
91 retlit:
92 fs->len = (MSize)(p - (const uint8_t *)fs->str);
93 return fs->len ? STRFMT_LIT : STRFMT_EOF;
96 /* -- Raw conversions ----------------------------------------------------- */
98 #define WINT_R(x, sh, sc) \
99 { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
101 /* Write integer to buffer. */
102 char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
104 uint32_t u = (uint32_t)k;
105 if (k < 0) { u = ~u+1u; *p++ = '-'; }
106 if (u < 10000) {
107 if (u < 10) goto dig1;
108 if (u < 100) goto dig2;
109 if (u < 1000) goto dig3;
110 } else {
111 uint32_t v = u / 10000; u -= v * 10000;
112 if (v < 10000) {
113 if (v < 10) goto dig5;
114 if (v < 100) goto dig6;
115 if (v < 1000) goto dig7;
116 } else {
117 uint32_t w = v / 10000; v -= w * 10000;
118 if (w >= 10) WINT_R(w, 10, 10)
119 *p++ = (char)('0'+w);
121 WINT_R(v, 23, 1000)
122 dig7: WINT_R(v, 12, 100)
123 dig6: WINT_R(v, 10, 10)
124 dig5: *p++ = (char)('0'+v);
126 WINT_R(u, 23, 1000)
127 dig3: WINT_R(u, 12, 100)
128 dig2: WINT_R(u, 10, 10)
129 dig1: *p++ = (char)('0'+u);
130 return p;
132 #undef WINT_R
134 /* Write pointer to buffer. */
135 char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v)
137 ptrdiff_t x = (ptrdiff_t)v;
138 MSize i, n = STRFMT_MAXBUF_PTR;
139 if (x == 0) {
140 *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L';
141 return p;
143 #if LJ_64
144 /* Shorten output for 64 bit pointers. */
145 n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
146 #endif
147 p[0] = '0';
148 p[1] = 'x';
149 for (i = n-1; i >= 2; i--, x >>= 4)
150 p[i] = "0123456789abcdef"[(x & 15)];
151 return p+n;
154 /* Write ULEB128 to buffer. */
155 char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v)
157 for (; v >= 0x80; v >>= 7)
158 *p++ = (char)((v & 0x7f) | 0x80);
159 *p++ = (char)v;
160 return p;
163 /* Return string or write number to tmp buffer and return pointer to start. */
164 const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
166 SBuf *sb;
167 if (tvisstr(o)) {
168 *lenp = strV(o)->len;
169 return strVdata(o);
170 } else if (tvisbuf(o)) {
171 SBufExt *sbx = bufV(o);
172 *lenp = sbufxlen(sbx);
173 return sbx->r;
174 } else if (tvisint(o)) {
175 sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o));
176 } else if (tvisnum(o)) {
177 sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n);
178 } else {
179 return NULL;
181 *lenp = sbuflen(sb);
182 return sb->b;
185 /* -- Unformatted conversions to buffer ----------------------------------- */
187 /* Add integer to buffer. */
188 SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
190 sb->w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k);
191 return sb;
194 #if LJ_HASJIT
195 /* Add number to buffer. */
196 SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
198 return lj_strfmt_putfnum(sb, STRFMT_G14, o->n);
200 #endif
202 SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
204 sb->w = lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v);
205 return sb;
208 /* Add quoted string to buffer. */
209 static SBuf *strfmt_putquotedlen(SBuf *sb, const char *s, MSize len)
211 lj_buf_putb(sb, '"');
212 while (len--) {
213 uint32_t c = (uint32_t)(uint8_t)*s++;
214 char *w = lj_buf_more(sb, 4);
215 if (c == '"' || c == '\\' || c == '\n') {
216 *w++ = '\\';
217 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
218 uint32_t d;
219 *w++ = '\\';
220 if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
221 *w++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
222 goto tens;
223 } else if (c >= 10) {
224 tens:
225 d = (c * 205) >> 11; c -= d * 10; *w++ = (char)('0'+d);
227 c += '0';
229 *w++ = (char)c;
230 sb->w = w;
232 lj_buf_putb(sb, '"');
233 return sb;
236 #if LJ_HASJIT
237 SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
239 return strfmt_putquotedlen(sb, strdata(str), str->len);
241 #endif
243 /* -- Formatted conversions to buffer ------------------------------------- */
245 /* Add formatted char to buffer. */
246 SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
248 MSize width = STRFMT_WIDTH(sf);
249 char *w = lj_buf_more(sb, width > 1 ? width : 1);
250 if ((sf & STRFMT_F_LEFT)) *w++ = (char)c;
251 while (width-- > 1) *w++ = ' ';
252 if (!(sf & STRFMT_F_LEFT)) *w++ = (char)c;
253 sb->w = w;
254 return sb;
257 /* Add formatted string to buffer. */
258 static SBuf *strfmt_putfstrlen(SBuf *sb, SFormat sf, const char *s, MSize len)
260 MSize width = STRFMT_WIDTH(sf);
261 char *w;
262 if (len > STRFMT_PREC(sf)) len = STRFMT_PREC(sf);
263 w = lj_buf_more(sb, width > len ? width : len);
264 if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
265 while (width-- > len) *w++ = ' ';
266 if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
267 sb->w = w;
268 return sb;
271 #if LJ_HASJIT
272 SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
274 return strfmt_putfstrlen(sb, sf, strdata(str), str->len);
276 #endif
278 /* Add formatted signed/unsigned integer to buffer. */
279 SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
281 char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *w;
282 #ifdef LUA_USE_ASSERT
283 char *ws;
284 #endif
285 MSize prefix = 0, len, prec, pprec, width, need;
287 /* Figure out signed prefixes. */
288 if (STRFMT_TYPE(sf) == STRFMT_INT) {
289 if ((int64_t)k < 0) {
290 k = ~k+1u;
291 prefix = 256 + '-';
292 } else if ((sf & STRFMT_F_PLUS)) {
293 prefix = 256 + '+';
294 } else if ((sf & STRFMT_F_SPACE)) {
295 prefix = 256 + ' ';
299 /* Convert number and store to fixed-size buffer in reverse order. */
300 prec = STRFMT_PREC(sf);
301 if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
302 if (k == 0) { /* Special-case zero argument. */
303 if (prec != 0 ||
304 (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
305 *--q = '0';
306 } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */
307 uint32_t k2;
308 while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
309 k2 = (uint32_t)k;
310 do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
311 } else if ((sf & STRFMT_T_HEX)) { /* Hex. */
312 const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
313 "0123456789abcdef";
314 do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
315 if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x');
316 } else { /* Octal. */
317 do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
318 if ((sf & STRFMT_F_ALT)) *--q = '0';
321 /* Calculate sizes. */
322 len = (MSize)(buf + sizeof(buf) - q);
323 if ((int32_t)len >= (int32_t)prec) prec = len;
324 width = STRFMT_WIDTH(sf);
325 pprec = prec + (prefix >> 8);
326 need = width > pprec ? width : pprec;
327 w = lj_buf_more(sb, need);
328 #ifdef LUA_USE_ASSERT
329 ws = w;
330 #endif
332 /* Format number with leading/trailing whitespace and zeros. */
333 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
334 while (width-- > pprec) *w++ = ' ';
335 if (prefix) {
336 if ((char)prefix >= 'X') *w++ = '0';
337 *w++ = (char)prefix;
339 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
340 while (width-- > pprec) *w++ = '0';
341 while (prec-- > len) *w++ = '0';
342 while (q < buf + sizeof(buf)) *w++ = *q++; /* Add number itself. */
343 if ((sf & STRFMT_F_LEFT))
344 while (width-- > pprec) *w++ = ' ';
346 lj_assertX(need == (MSize)(w - ws), "miscalculated format size");
347 sb->w = w;
348 return sb;
351 /* Add number formatted as signed integer to buffer. */
352 SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
354 int64_t k = (int64_t)n;
355 if (checki32(k) && sf == STRFMT_INT)
356 return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */
357 else
358 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
361 /* Add number formatted as unsigned integer to buffer. */
362 SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
364 int64_t k;
365 if (n >= 9223372036854775808.0)
366 k = (int64_t)(n - 18446744073709551616.0);
367 else
368 k = (int64_t)n;
369 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
372 /* Format stack arguments to buffer. */
373 int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry)
375 int narg = (int)(L->top - L->base);
376 GCstr *fmt = lj_lib_checkstr(L, arg);
377 FormatState fs;
378 SFormat sf;
379 lj_strfmt_init(&fs, strdata(fmt), fmt->len);
380 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
381 if (sf == STRFMT_LIT) {
382 lj_buf_putmem(sb, fs.str, fs.len);
383 } else if (sf == STRFMT_ERR) {
384 lj_err_callerv(L, LJ_ERR_STRFMT,
385 strdata(lj_str_new(L, fs.str, fs.len)));
386 } else {
387 TValue *o = &L->base[arg++];
388 if (arg > narg)
389 lj_err_arg(L, arg, LJ_ERR_NOVAL);
390 switch (STRFMT_TYPE(sf)) {
391 case STRFMT_INT:
392 if (tvisint(o)) {
393 int32_t k = intV(o);
394 if (sf == STRFMT_INT)
395 lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */
396 else
397 lj_strfmt_putfxint(sb, sf, k);
398 break;
400 #if LJ_HASFFI
401 if (tviscdata(o)) {
402 GCcdata *cd = cdataV(o);
403 if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
404 lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
405 break;
408 #endif
409 lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
410 break;
411 case STRFMT_UINT:
412 if (tvisint(o)) {
413 lj_strfmt_putfxint(sb, sf, intV(o));
414 break;
416 #if LJ_HASFFI
417 if (tviscdata(o)) {
418 GCcdata *cd = cdataV(o);
419 if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
420 lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
421 break;
424 #endif
425 lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
426 break;
427 case STRFMT_NUM:
428 lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
429 break;
430 case STRFMT_STR: {
431 MSize len;
432 const char *s;
433 cTValue *mo;
434 if (LJ_UNLIKELY(!tvisstr(o) && !tvisbuf(o)) && retry >= 0 &&
435 !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
436 /* Call __tostring metamethod once. */
437 copyTV(L, L->top++, mo);
438 copyTV(L, L->top++, o);
439 lua_call(L, 1, 1);
440 o = &L->base[arg-1]; /* Stack may have been reallocated. */
441 copyTV(L, o, --L->top); /* Replace inline for retry. */
442 if (retry < 2) { /* Global buffer may have been overwritten. */
443 retry = 1;
444 break;
447 if (LJ_LIKELY(tvisstr(o))) {
448 len = strV(o)->len;
449 s = strVdata(o);
450 #if LJ_HASBUFFER
451 } else if (tvisbuf(o)) {
452 SBufExt *sbx = bufV(o);
453 if (sbx == (SBufExt *)sb) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF);
454 len = sbufxlen(sbx);
455 s = sbx->r;
456 #endif
457 } else {
458 GCstr *str = lj_strfmt_obj(L, o);
459 len = str->len;
460 s = strdata(str);
462 if ((sf & STRFMT_T_QUOTED))
463 strfmt_putquotedlen(sb, s, len); /* No formatting. */
464 else
465 strfmt_putfstrlen(sb, sf, s, len);
466 break;
468 case STRFMT_CHAR:
469 lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
470 break;
471 case STRFMT_PTR: /* No formatting. */
472 lj_strfmt_putptr(sb, lj_obj_ptr(G(L), o));
473 break;
474 default:
475 lj_assertL(0, "bad string format type");
476 break;
480 return retry;
483 /* -- Conversions to strings ---------------------------------------------- */
485 /* Convert integer to string. */
486 GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k)
488 char buf[STRFMT_MAXBUF_INT];
489 MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf);
490 return lj_str_new(L, buf, len);
493 /* Convert integer or number to string. */
494 GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o)
496 return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o);
499 #if LJ_HASJIT
500 /* Convert char value to string. */
501 GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c)
503 char buf[1];
504 buf[0] = c;
505 return lj_str_new(L, buf, 1);
507 #endif
509 /* Raw conversion of object to string. */
510 GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o)
512 if (tvisstr(o)) {
513 return strV(o);
514 } else if (tvisnumber(o)) {
515 return lj_strfmt_number(L, o);
516 } else if (tvisnil(o)) {
517 return lj_str_newlit(L, "nil");
518 } else if (tvisfalse(o)) {
519 return lj_str_newlit(L, "false");
520 } else if (tvistrue(o)) {
521 return lj_str_newlit(L, "true");
522 } else {
523 char buf[8+2+2+16], *p = buf;
524 p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o)));
525 *p++ = ':'; *p++ = ' ';
526 if (tvisfunc(o) && isffunc(funcV(o))) {
527 p = lj_buf_wmem(p, "builtin#", 8);
528 p = lj_strfmt_wint(p, funcV(o)->c.ffid);
529 } else {
530 p = lj_strfmt_wptr(p, lj_obj_ptr(G(L), o));
532 return lj_str_new(L, buf, (size_t)(p - buf));
536 /* -- Internal string formatting ------------------------------------------ */
539 ** These functions are only used for lua_pushfstring(), lua_pushvfstring()
540 ** and for internal string formatting (e.g. error messages). Caveat: unlike
541 ** string.format(), only a limited subset of formats and flags are supported!
543 ** LuaJIT has support for a couple more formats than Lua 5.1/5.2:
544 ** - %d %u %o %x with full formatting, 32 bit integers only.
545 ** - %f and other FP formats are really %.14g.
546 ** - %s %c %p without formatting.
549 /* Push formatted message as a string object to Lua stack. va_list variant. */
550 const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp)
552 SBuf *sb = lj_buf_tmp_(L);
553 FormatState fs;
554 SFormat sf;
555 GCstr *str;
556 lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt));
557 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
558 switch (STRFMT_TYPE(sf)) {
559 case STRFMT_LIT:
560 lj_buf_putmem(sb, fs.str, fs.len);
561 break;
562 case STRFMT_INT:
563 lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t));
564 break;
565 case STRFMT_UINT:
566 lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t));
567 break;
568 case STRFMT_NUM:
569 lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number));
570 break;
571 case STRFMT_STR: {
572 const char *s = va_arg(argp, char *);
573 if (s == NULL) s = "(null)";
574 lj_buf_putmem(sb, s, (MSize)strlen(s));
575 break;
577 case STRFMT_CHAR:
578 lj_buf_putb(sb, va_arg(argp, int));
579 break;
580 case STRFMT_PTR:
581 lj_strfmt_putptr(sb, va_arg(argp, void *));
582 break;
583 case STRFMT_ERR:
584 default:
585 lj_buf_putb(sb, '?');
586 lj_assertL(0, "bad string format near offset %d", fs.len);
587 break;
590 str = lj_buf_str(L, sb);
591 setstrV(L, L->top, str);
592 incr_top(L);
593 return strdata(str);
596 /* Push formatted message as a string object to Lua stack. Vararg variant. */
597 const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
599 const char *msg;
600 va_list argp;
601 va_start(argp, fmt);
602 msg = lj_strfmt_pushvf(L, fmt, argp);
603 va_end(argp);
604 return msg;