3 ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
5 ** Major portions taken verbatim or adapted from the Lua interpreter.
6 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
25 #include "lj_bcdump.h"
27 #include "lj_strfmt.h"
30 /* ------------------------------------------------------------------------ */
32 #define LJLIB_MODULE_string
34 LJLIB_LUA(string_len
) /*
41 LJLIB_ASM(string_byte
) LJLIB_REC(string_range
0)
43 GCstr
*s
= lj_lib_checkstr(L
, 1);
44 int32_t len
= (int32_t)s
->len
;
45 int32_t start
= lj_lib_optint(L
, 2, 1);
46 int32_t stop
= lj_lib_optint(L
, 3, start
);
48 const unsigned char *p
;
49 if (stop
< 0) stop
+= len
+1;
50 if (start
< 0) start
+= len
+1;
51 if (start
<= 0) start
= 1;
52 if (stop
> len
) stop
= len
;
53 if (start
> stop
) return FFH_RES(0); /* Empty interval: return no results. */
56 if ((uint32_t)n
> LUAI_MAXCSTACK
)
57 lj_err_caller(L
, LJ_ERR_STRSLC
);
58 lj_state_checkstack(L
, (MSize
)n
);
59 p
= (const unsigned char *)strdata(s
) + start
;
60 for (i
= 0; i
< n
; i
++)
61 setintV(L
->base
+ i
-1-LJ_FR2
, p
[i
]);
65 LJLIB_ASM(string_char
) LJLIB_REC(.)
67 int i
, nargs
= (int)(L
->top
- L
->base
);
68 char *buf
= lj_buf_tmp(L
, (MSize
)nargs
);
69 for (i
= 1; i
<= nargs
; i
++) {
70 int32_t k
= lj_lib_checkint(L
, i
);
72 lj_err_arg(L
, i
, LJ_ERR_BADVAL
);
75 setstrV(L
, L
->base
-1-LJ_FR2
, lj_str_new(L
, buf
, (size_t)nargs
));
79 LJLIB_ASM(string_sub
) LJLIB_REC(string_range
1)
81 lj_lib_checkstr(L
, 1);
82 lj_lib_checkint(L
, 2);
83 setintV(L
->base
+2, lj_lib_optint(L
, 3, -1));
87 LJLIB_CF(string_rep
) LJLIB_REC(.)
89 GCstr
*s
= lj_lib_checkstr(L
, 1);
90 int32_t rep
= lj_lib_checkint(L
, 2);
91 GCstr
*sep
= lj_lib_optstr(L
, 3);
92 SBuf
*sb
= lj_buf_tmp_(L
);
94 GCstr
*s2
= lj_buf_cat2str(L
, sep
, s
);
100 sb
= lj_buf_putstr_rep(sb
, s
, rep
);
101 setstrV(L
, L
->top
-1, lj_buf_str(L
, sb
));
106 LJLIB_ASM(string_reverse
) LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse
)
108 lj_lib_checkstr(L
, 1);
111 LJLIB_ASM_(string_lower
) LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower
)
112 LJLIB_ASM_(string_upper
) LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper
)
114 /* ------------------------------------------------------------------------ */
116 static int writer_buf(lua_State
*L
, const void *p
, size_t size
, void *sb
)
118 lj_buf_putmem((SBuf
*)sb
, p
, (MSize
)size
);
123 LJLIB_CF(string_dump
)
125 GCfunc
*fn
= lj_lib_checkfunc(L
, 1);
126 int strip
= L
->base
+1 < L
->top
&& tvistruecond(L
->base
+1);
127 SBuf
*sb
= lj_buf_tmp_(L
); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
129 if (!isluafunc(fn
) || lj_bcwrite(L
, funcproto(fn
), writer_buf
, sb
, strip
))
130 lj_err_caller(L
, LJ_ERR_STRDUMP
);
131 setstrV(L
, L
->top
-1, lj_buf_str(L
, sb
));
136 /* ------------------------------------------------------------------------ */
138 /* macro to `unsign' a character */
139 #define uchar(c) ((unsigned char)(c))
141 #define CAP_UNFINISHED (-1)
142 #define CAP_POSITION (-2)
144 typedef struct MatchState
{
145 const char *src_init
; /* init of source string */
146 const char *src_end
; /* end (`\0') of source string */
148 int level
; /* total number of captures (finished or unfinished) */
153 } capture
[LUA_MAXCAPTURES
];
158 static int check_capture(MatchState
*ms
, int l
)
161 if (l
< 0 || l
>= ms
->level
|| ms
->capture
[l
].len
== CAP_UNFINISHED
)
162 lj_err_caller(ms
->L
, LJ_ERR_STRCAPI
);
166 static int capture_to_close(MatchState
*ms
)
168 int level
= ms
->level
;
169 for (level
--; level
>=0; level
--)
170 if (ms
->capture
[level
].len
== CAP_UNFINISHED
) return level
;
171 lj_err_caller(ms
->L
, LJ_ERR_STRPATC
);
172 return 0; /* unreachable */
175 static const char *classend(MatchState
*ms
, const char *p
)
180 lj_err_caller(ms
->L
, LJ_ERR_STRPATE
);
184 do { /* look for a `]' */
186 lj_err_caller(ms
->L
, LJ_ERR_STRPATM
);
187 if (*(p
++) == L_ESC
&& *p
!= '\0')
188 p
++; /* skip escapes (e.g. `%]') */
196 static const unsigned char match_class_map
[32] = {
197 0,LJ_CHAR_ALPHA
,0,LJ_CHAR_CNTRL
,LJ_CHAR_DIGIT
,0,0,LJ_CHAR_GRAPH
,0,0,0,0,
198 LJ_CHAR_LOWER
,0,0,0,LJ_CHAR_PUNCT
,0,0,LJ_CHAR_SPACE
,0,
199 LJ_CHAR_UPPER
,0,LJ_CHAR_ALNUM
,LJ_CHAR_XDIGIT
,0,0,0,0,0,0,0
202 static int match_class(int c
, int cl
)
204 if ((cl
& 0xc0) == 0x40) {
205 int t
= match_class_map
[(cl
&0x1f)];
207 t
= lj_char_isa(c
, t
);
208 return (cl
& 0x20) ? t
: !t
;
210 if (cl
== 'z') return c
== 0;
211 if (cl
== 'Z') return c
!= 0;
216 static int matchbracketclass(int c
, const char *p
, const char *ec
)
221 p
++; /* skip the `^' */
226 if (match_class(c
, uchar(*p
)))
229 else if ((*(p
+1) == '-') && (p
+2 < ec
)) {
231 if (uchar(*(p
-2)) <= c
&& c
<= uchar(*p
))
234 else if (uchar(*p
) == c
) return sig
;
239 static int singlematch(int c
, const char *p
, const char *ep
)
242 case '.': return 1; /* matches any char */
243 case L_ESC
: return match_class(c
, uchar(*(p
+1)));
244 case '[': return matchbracketclass(c
, p
, ep
-1);
245 default: return (uchar(*p
) == c
);
249 static const char *match(MatchState
*ms
, const char *s
, const char *p
);
251 static const char *matchbalance(MatchState
*ms
, const char *s
, const char *p
)
253 if (*p
== 0 || *(p
+1) == 0)
254 lj_err_caller(ms
->L
, LJ_ERR_STRPATU
);
261 while (++s
< ms
->src_end
) {
263 if (--cont
== 0) return s
+1;
264 } else if (*s
== b
) {
269 return NULL
; /* string ends out of balance */
272 static const char *max_expand(MatchState
*ms
, const char *s
,
273 const char *p
, const char *ep
)
275 ptrdiff_t i
= 0; /* counts maximum expand for item */
276 while ((s
+i
)<ms
->src_end
&& singlematch(uchar(*(s
+i
)), p
, ep
))
278 /* keeps trying to match with the maximum repetitions */
280 const char *res
= match(ms
, (s
+i
), ep
+1);
282 i
--; /* else didn't match; reduce 1 repetition to try again */
287 static const char *min_expand(MatchState
*ms
, const char *s
,
288 const char *p
, const char *ep
)
291 const char *res
= match(ms
, s
, ep
+1);
294 else if (s
<ms
->src_end
&& singlematch(uchar(*s
), p
, ep
))
295 s
++; /* try with one more repetition */
301 static const char *start_capture(MatchState
*ms
, const char *s
,
302 const char *p
, int what
)
305 int level
= ms
->level
;
306 if (level
>= LUA_MAXCAPTURES
) lj_err_caller(ms
->L
, LJ_ERR_STRCAPN
);
307 ms
->capture
[level
].init
= s
;
308 ms
->capture
[level
].len
= what
;
310 if ((res
=match(ms
, s
, p
)) == NULL
) /* match failed? */
311 ms
->level
--; /* undo capture */
315 static const char *end_capture(MatchState
*ms
, const char *s
,
318 int l
= capture_to_close(ms
);
320 ms
->capture
[l
].len
= s
- ms
->capture
[l
].init
; /* close capture */
321 if ((res
= match(ms
, s
, p
)) == NULL
) /* match failed? */
322 ms
->capture
[l
].len
= CAP_UNFINISHED
; /* undo capture */
326 static const char *match_capture(MatchState
*ms
, const char *s
, int l
)
329 l
= check_capture(ms
, l
);
330 len
= (size_t)ms
->capture
[l
].len
;
331 if ((size_t)(ms
->src_end
-s
) >= len
&&
332 memcmp(ms
->capture
[l
].init
, s
, len
) == 0)
338 static const char *match(MatchState
*ms
, const char *s
, const char *p
)
340 if (++ms
->depth
> LJ_MAX_XLEVEL
)
341 lj_err_caller(ms
->L
, LJ_ERR_STRPATX
);
342 init
: /* using goto's to optimize tail recursion */
344 case '(': /* start capture */
345 if (*(p
+1) == ')') /* position capture? */
346 s
= start_capture(ms
, s
, p
+2, CAP_POSITION
);
348 s
= start_capture(ms
, s
, p
+1, CAP_UNFINISHED
);
350 case ')': /* end capture */
351 s
= end_capture(ms
, s
, p
+1);
355 case 'b': /* balanced string? */
356 s
= matchbalance(ms
, s
, p
+2);
357 if (s
== NULL
) break;
359 goto init
; /* else s = match(ms, s, p+4); */
360 case 'f': { /* frontier? */
361 const char *ep
; char previous
;
364 lj_err_caller(ms
->L
, LJ_ERR_STRPATB
);
365 ep
= classend(ms
, p
); /* points to what is next */
366 previous
= (s
== ms
->src_init
) ? '\0' : *(s
-1);
367 if (matchbracketclass(uchar(previous
), p
, ep
-1) ||
368 !matchbracketclass(uchar(*s
), p
, ep
-1)) { s
= NULL
; break; }
370 goto init
; /* else s = match(ms, s, ep); */
373 if (lj_char_isdigit(uchar(*(p
+1)))) { /* capture results (%0-%9)? */
374 s
= match_capture(ms
, s
, uchar(*(p
+1)));
375 if (s
== NULL
) break;
377 goto init
; /* else s = match(ms, s, p+2) */
379 goto dflt
; /* case default */
382 case '\0': /* end of pattern */
383 break; /* match succeeded */
385 /* is the `$' the last char in pattern? */
386 if (*(p
+1) != '\0') goto dflt
;
387 if (s
!= ms
->src_end
) s
= NULL
; /* check end of string */
389 default: dflt
: { /* it is a pattern item */
390 const char *ep
= classend(ms
, p
); /* points to what is next */
391 int m
= s
<ms
->src_end
&& singlematch(uchar(*s
), p
, ep
);
393 case '?': { /* optional */
395 if (m
&& ((res
=match(ms
, s
+1, ep
+1)) != NULL
)) {
400 goto init
; /* else s = match(ms, s, ep+1); */
402 case '*': /* 0 or more repetitions */
403 s
= max_expand(ms
, s
, p
, ep
);
405 case '+': /* 1 or more repetitions */
406 s
= (m
? max_expand(ms
, s
+1, p
, ep
) : NULL
);
408 case '-': /* 0 or more repetitions (minimum) */
409 s
= min_expand(ms
, s
, p
, ep
);
412 if (m
) { s
++; p
=ep
; goto init
; } /* else s = match(ms, s+1, ep); */
423 static void push_onecapture(MatchState
*ms
, int i
, const char *s
, const char *e
)
425 if (i
>= ms
->level
) {
426 if (i
== 0) /* ms->level == 0, too */
427 lua_pushlstring(ms
->L
, s
, (size_t)(e
- s
)); /* add whole match */
429 lj_err_caller(ms
->L
, LJ_ERR_STRCAPI
);
431 ptrdiff_t l
= ms
->capture
[i
].len
;
432 if (l
== CAP_UNFINISHED
) lj_err_caller(ms
->L
, LJ_ERR_STRCAPU
);
433 if (l
== CAP_POSITION
)
434 lua_pushinteger(ms
->L
, ms
->capture
[i
].init
- ms
->src_init
+ 1);
436 lua_pushlstring(ms
->L
, ms
->capture
[i
].init
, (size_t)l
);
440 static int push_captures(MatchState
*ms
, const char *s
, const char *e
)
443 int nlevels
= (ms
->level
== 0 && s
) ? 1 : ms
->level
;
444 luaL_checkstack(ms
->L
, nlevels
, "too many captures");
445 for (i
= 0; i
< nlevels
; i
++)
446 push_onecapture(ms
, i
, s
, e
);
447 return nlevels
; /* number of strings pushed */
450 static int str_find_aux(lua_State
*L
, int find
)
452 GCstr
*s
= lj_lib_checkstr(L
, 1);
453 GCstr
*p
= lj_lib_checkstr(L
, 2);
454 int32_t start
= lj_lib_optint(L
, 3, 1);
456 if (start
< 0) start
+= (int32_t)s
->len
; else start
--;
457 if (start
< 0) start
= 0;
467 if (find
&& ((L
->base
+3 < L
->top
&& tvistruecond(L
->base
+3)) ||
468 !lj_str_haspattern(p
))) { /* Search for fixed string. */
469 const char *q
= lj_str_find(strdata(s
)+st
, strdata(p
), s
->len
-st
, p
->len
);
471 setintV(L
->top
-2, (int32_t)(q
-strdata(s
)) + 1);
472 setintV(L
->top
-1, (int32_t)(q
-strdata(s
)) + (int32_t)p
->len
);
475 } else { /* Search for pattern. */
477 const char *pstr
= strdata(p
);
478 const char *sstr
= strdata(s
) + st
;
480 if (*pstr
== '^') { pstr
++; anchor
= 1; }
482 ms
.src_init
= strdata(s
);
483 ms
.src_end
= strdata(s
) + s
->len
;
484 do { /* Loop through string and try to match the pattern. */
486 ms
.level
= ms
.depth
= 0;
487 q
= match(&ms
, sstr
, pstr
);
490 setintV(L
->top
++, (int32_t)(sstr
-(strdata(s
)-1)));
491 setintV(L
->top
++, (int32_t)(q
-strdata(s
)));
492 return push_captures(&ms
, NULL
, NULL
) + 2;
494 return push_captures(&ms
, sstr
, q
);
497 } while (sstr
++ < ms
.src_end
&& !anchor
);
499 setnilV(L
->top
-1); /* Not found. */
503 LJLIB_CF(string_find
) LJLIB_REC(.)
505 return str_find_aux(L
, 1);
508 LJLIB_CF(string_match
)
510 return str_find_aux(L
, 0);
513 LJLIB_NOREG
LJLIB_CF(string_gmatch_aux
)
515 const char *p
= strVdata(lj_lib_upvalue(L
, 2));
516 GCstr
*str
= strV(lj_lib_upvalue(L
, 1));
517 const char *s
= strdata(str
);
518 TValue
*tvpos
= lj_lib_upvalue(L
, 3);
519 const char *src
= s
+ tvpos
->u32
.lo
;
523 ms
.src_end
= s
+ str
->len
;
524 for (; src
<= ms
.src_end
; src
++) {
526 ms
.level
= ms
.depth
= 0;
527 if ((e
= match(&ms
, src
, p
)) != NULL
) {
528 int32_t pos
= (int32_t)(e
- s
);
529 if (e
== src
) pos
++; /* Ensure progress for empty match. */
530 tvpos
->u32
.lo
= (uint32_t)pos
;
531 return push_captures(&ms
, src
, e
);
534 return 0; /* not found */
537 LJLIB_CF(string_gmatch
)
539 lj_lib_checkstr(L
, 1);
540 lj_lib_checkstr(L
, 2);
543 lj_lib_pushcc(L
, lj_cf_string_gmatch_aux
, FF_string_gmatch_aux
, 3);
547 static void add_s(MatchState
*ms
, luaL_Buffer
*b
, const char *s
, const char *e
)
550 const char *news
= lua_tolstring(ms
->L
, 3, &l
);
551 for (i
= 0; i
< l
; i
++) {
552 if (news
[i
] != L_ESC
) {
553 luaL_addchar(b
, news
[i
]);
556 if (!lj_char_isdigit(uchar(news
[i
]))) {
557 luaL_addchar(b
, news
[i
]);
558 } else if (news
[i
] == '0') {
559 luaL_addlstring(b
, s
, (size_t)(e
- s
));
561 push_onecapture(ms
, news
[i
] - '1', s
, e
);
562 luaL_addvalue(b
); /* add capture to accumulated result */
568 static void add_value(MatchState
*ms
, luaL_Buffer
*b
,
569 const char *s
, const char *e
)
571 lua_State
*L
= ms
->L
;
572 switch (lua_type(L
, 3)) {
578 case LUA_TFUNCTION
: {
581 n
= push_captures(ms
, s
, e
);
586 push_onecapture(ms
, 0, s
, e
);
591 if (!lua_toboolean(L
, -1)) { /* nil or false? */
593 lua_pushlstring(L
, s
, (size_t)(e
- s
)); /* keep original text */
594 } else if (!lua_isstring(L
, -1)) {
595 lj_err_callerv(L
, LJ_ERR_STRGSRV
, luaL_typename(L
, -1));
597 luaL_addvalue(b
); /* add result to accumulator */
600 LJLIB_CF(string_gsub
)
603 const char *src
= luaL_checklstring(L
, 1, &srcl
);
604 const char *p
= luaL_checkstring(L
, 2);
605 int tr
= lua_type(L
, 3);
606 int max_s
= luaL_optint(L
, 4, (int)(srcl
+1));
607 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
611 if (!(tr
== LUA_TNUMBER
|| tr
== LUA_TSTRING
||
612 tr
== LUA_TFUNCTION
|| tr
== LUA_TTABLE
))
613 lj_err_arg(L
, 3, LJ_ERR_NOSFT
);
614 luaL_buffinit(L
, &b
);
617 ms
.src_end
= src
+srcl
;
620 ms
.level
= ms
.depth
= 0;
621 e
= match(&ms
, src
, p
);
624 add_value(&ms
, &b
, src
, e
);
626 if (e
&& e
>src
) /* non empty match? */
627 src
= e
; /* skip it */
628 else if (src
< ms
.src_end
)
629 luaL_addchar(&b
, *src
++);
635 luaL_addlstring(&b
, src
, (size_t)(ms
.src_end
-src
));
637 lua_pushinteger(L
, n
); /* number of substitutions */
641 /* ------------------------------------------------------------------------ */
643 /* Emulate tostring() inline. */
644 static GCstr
*string_fmt_tostring(lua_State
*L
, int arg
, int retry
)
646 TValue
*o
= L
->base
+arg
-1;
648 lua_assert(o
< L
->top
); /* Caller already checks for existence. */
649 if (LJ_LIKELY(tvisstr(o
)))
651 if (retry
!= 2 && !tvisnil(mo
= lj_meta_lookup(L
, o
, MM_tostring
))) {
652 copyTV(L
, L
->top
++, mo
);
653 copyTV(L
, L
->top
++, o
);
655 copyTV(L
, L
->base
+arg
-1, --L
->top
);
656 return NULL
; /* Buffer may be overwritten, retry. */
658 return lj_strfmt_obj(L
, o
);
661 LJLIB_CF(string_format
) LJLIB_REC(.)
663 int arg
, top
= (int)(L
->top
- L
->base
);
672 fmt
= lj_lib_checkstr(L
, arg
);
673 lj_strfmt_init(&fs
, strdata(fmt
), fmt
->len
);
674 while ((sf
= lj_strfmt_parse(&fs
)) != STRFMT_EOF
) {
675 if (sf
== STRFMT_LIT
) {
676 lj_buf_putmem(sb
, fs
.str
, fs
.len
);
677 } else if (sf
== STRFMT_ERR
) {
678 lj_err_callerv(L
, LJ_ERR_STRFMT
, strdata(lj_str_new(L
, fs
.str
, fs
.len
)));
681 luaL_argerror(L
, arg
, lj_obj_typename
[0]);
682 switch (STRFMT_TYPE(sf
)) {
684 if (tvisint(L
->base
+arg
-1)) {
685 int32_t k
= intV(L
->base
+arg
-1);
686 if (sf
== STRFMT_INT
)
687 lj_strfmt_putint(sb
, k
); /* Shortcut for plain %d. */
689 lj_strfmt_putfxint(sb
, sf
, k
);
691 lj_strfmt_putfnum_int(sb
, sf
, lj_lib_checknum(L
, arg
));
695 if (tvisint(L
->base
+arg
-1))
696 lj_strfmt_putfxint(sb
, sf
, intV(L
->base
+arg
-1));
698 lj_strfmt_putfnum_uint(sb
, sf
, lj_lib_checknum(L
, arg
));
701 lj_strfmt_putfnum(sb
, sf
, lj_lib_checknum(L
, arg
));
704 GCstr
*str
= string_fmt_tostring(L
, arg
, retry
);
707 else if ((sf
& STRFMT_T_QUOTED
))
708 lj_strfmt_putquoted(sb
, str
); /* No formatting. */
710 lj_strfmt_putfstr(sb
, sf
, str
);
714 lj_strfmt_putfchar(sb
, sf
, lj_lib_checkint(L
, arg
));
716 case STRFMT_PTR
: /* No formatting. */
717 lj_strfmt_putptr(sb
, lj_obj_ptr(L
->base
+arg
-1));
725 if (retry
++ == 1) goto again
;
726 setstrV(L
, L
->top
-1, lj_buf_str(L
, sb
));
731 /* ------------------------------------------------------------------------ */
733 #include "lj_libdef.h"
735 LUALIB_API
int luaopen_string(lua_State
*L
)
739 LJ_LIB_REG(L
, LUA_STRLIBNAME
, string
);
740 #if defined(LUA_COMPAT_GFIND) && !LJ_52
741 lua_getfield(L
, -1, "gmatch");
742 lua_setfield(L
, -2, "gfind");
744 mt
= lj_tab_new(L
, 0, 1);
745 /* NOBARRIER: basemt is a GC root. */
747 setgcref(basemt_it(g
, LJ_TSTR
), obj2gco(mt
));
748 settabV(L
, lj_tab_setstr(L
, mt
, mmname_str(g
, MM_index
)), tabV(L
->top
-1));
749 mt
->nomm
= (uint8_t)(~(1u<<MM_index
));