3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
5 ** Major portions taken verbatim or adapted from the Lua interpreter.
6 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
25 #include "lj_bcdump.h"
27 #include "lj_strfmt.h"
30 /* ------------------------------------------------------------------------ */
32 #define LJLIB_MODULE_string
34 LJLIB_LUA(string_len
) /*
41 LJLIB_ASM(string_byte
) LJLIB_REC(string_range
0)
43 GCstr
*s
= lj_lib_checkstr(L
, 1);
44 int32_t len
= (int32_t)s
->len
;
45 int32_t start
= lj_lib_optint(L
, 2, 1);
46 int32_t stop
= lj_lib_optint(L
, 3, start
);
48 const unsigned char *p
;
49 if (stop
< 0) stop
+= len
+1;
50 if (start
< 0) start
+= len
+1;
51 if (start
<= 0) start
= 1;
52 if (stop
> len
) stop
= len
;
53 if (start
> stop
) return FFH_RES(0); /* Empty interval: return no results. */
56 if ((uint32_t)n
> LUAI_MAXCSTACK
)
57 lj_err_caller(L
, LJ_ERR_STRSLC
);
58 lj_state_checkstack(L
, (MSize
)n
);
59 p
= (const unsigned char *)strdata(s
) + start
;
60 for (i
= 0; i
< n
; i
++)
61 setintV(L
->base
+ i
-1-LJ_FR2
, p
[i
]);
65 LJLIB_ASM(string_char
) LJLIB_REC(.)
67 int i
, nargs
= (int)(L
->top
- L
->base
);
68 char *buf
= lj_buf_tmp(L
, (MSize
)nargs
);
69 for (i
= 1; i
<= nargs
; i
++) {
70 int32_t k
= lj_lib_checkint(L
, i
);
72 lj_err_arg(L
, i
, LJ_ERR_BADVAL
);
75 setstrV(L
, L
->base
-1-LJ_FR2
, lj_str_new(L
, buf
, (size_t)nargs
));
79 LJLIB_ASM(string_sub
) LJLIB_REC(string_range
1)
81 lj_lib_checkstr(L
, 1);
82 lj_lib_checkint(L
, 2);
83 setintV(L
->base
+2, lj_lib_optint(L
, 3, -1));
87 LJLIB_CF(string_rep
) LJLIB_REC(.)
89 GCstr
*s
= lj_lib_checkstr(L
, 1);
90 int32_t rep
= lj_lib_checkint(L
, 2);
91 GCstr
*sep
= lj_lib_optstr(L
, 3);
92 SBuf
*sb
= lj_buf_tmp_(L
);
94 GCstr
*s2
= lj_buf_cat2str(L
, sep
, s
);
100 sb
= lj_buf_putstr_rep(sb
, s
, rep
);
101 setstrV(L
, L
->top
-1, lj_buf_str(L
, sb
));
106 LJLIB_ASM(string_reverse
) LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse
)
108 lj_lib_checkstr(L
, 1);
111 LJLIB_ASM_(string_lower
) LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower
)
112 LJLIB_ASM_(string_upper
) LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper
)
114 /* ------------------------------------------------------------------------ */
116 static int writer_buf(lua_State
*L
, const void *p
, size_t size
, void *sb
)
118 lj_buf_putmem((SBuf
*)sb
, p
, (MSize
)size
);
123 LJLIB_CF(string_dump
)
125 GCproto
*pt
= lj_lib_checkLproto(L
, 1, 1);
128 TValue
*o
= L
->base
+1;
131 const char *mode
= strVdata(o
);
133 while ((c
= *mode
++)) {
134 if (c
== 's') flags
|= BCDUMP_F_STRIP
;
135 if (c
== 'd') flags
|= BCDUMP_F_DETERMINISTIC
;
137 } else if (tvistruecond(o
)) {
138 flags
|= BCDUMP_F_STRIP
;
141 sb
= lj_buf_tmp_(L
); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
143 if (!pt
|| lj_bcwrite(L
, pt
, writer_buf
, sb
, flags
))
144 lj_err_caller(L
, LJ_ERR_STRDUMP
);
145 setstrV(L
, L
->top
-1, lj_buf_str(L
, sb
));
150 /* ------------------------------------------------------------------------ */
152 /* macro to `unsign' a character */
153 #define uchar(c) ((unsigned char)(c))
155 #define CAP_UNFINISHED (-1)
156 #define CAP_POSITION (-2)
158 typedef struct MatchState
{
159 const char *src_init
; /* init of source string */
160 const char *src_end
; /* end (`\0') of source string */
162 int level
; /* total number of captures (finished or unfinished) */
167 } capture
[LUA_MAXCAPTURES
];
172 static int check_capture(MatchState
*ms
, int l
)
175 if (l
< 0 || l
>= ms
->level
|| ms
->capture
[l
].len
== CAP_UNFINISHED
)
176 lj_err_caller(ms
->L
, LJ_ERR_STRCAPI
);
180 static int capture_to_close(MatchState
*ms
)
182 int level
= ms
->level
;
183 for (level
--; level
>=0; level
--)
184 if (ms
->capture
[level
].len
== CAP_UNFINISHED
) return level
;
185 lj_err_caller(ms
->L
, LJ_ERR_STRPATC
);
186 return 0; /* unreachable */
189 static const char *classend(MatchState
*ms
, const char *p
)
194 lj_err_caller(ms
->L
, LJ_ERR_STRPATE
);
198 do { /* look for a `]' */
200 lj_err_caller(ms
->L
, LJ_ERR_STRPATM
);
201 if (*(p
++) == L_ESC
&& *p
!= '\0')
202 p
++; /* skip escapes (e.g. `%]') */
210 static const unsigned char match_class_map
[32] = {
211 0,LJ_CHAR_ALPHA
,0,LJ_CHAR_CNTRL
,LJ_CHAR_DIGIT
,0,0,LJ_CHAR_GRAPH
,0,0,0,0,
212 LJ_CHAR_LOWER
,0,0,0,LJ_CHAR_PUNCT
,0,0,LJ_CHAR_SPACE
,0,
213 LJ_CHAR_UPPER
,0,LJ_CHAR_ALNUM
,LJ_CHAR_XDIGIT
,0,0,0,0,0,0,0
216 static int match_class(int c
, int cl
)
218 if ((cl
& 0xc0) == 0x40) {
219 int t
= match_class_map
[(cl
&0x1f)];
221 t
= lj_char_isa(c
, t
);
222 return (cl
& 0x20) ? t
: !t
;
224 if (cl
== 'z') return c
== 0;
225 if (cl
== 'Z') return c
!= 0;
230 static int matchbracketclass(int c
, const char *p
, const char *ec
)
235 p
++; /* skip the `^' */
240 if (match_class(c
, uchar(*p
)))
243 else if ((*(p
+1) == '-') && (p
+2 < ec
)) {
245 if (uchar(*(p
-2)) <= c
&& c
<= uchar(*p
))
248 else if (uchar(*p
) == c
) return sig
;
253 static int singlematch(int c
, const char *p
, const char *ep
)
256 case '.': return 1; /* matches any char */
257 case L_ESC
: return match_class(c
, uchar(*(p
+1)));
258 case '[': return matchbracketclass(c
, p
, ep
-1);
259 default: return (uchar(*p
) == c
);
263 static const char *match(MatchState
*ms
, const char *s
, const char *p
);
265 static const char *matchbalance(MatchState
*ms
, const char *s
, const char *p
)
267 if (*p
== 0 || *(p
+1) == 0)
268 lj_err_caller(ms
->L
, LJ_ERR_STRPATU
);
275 while (++s
< ms
->src_end
) {
277 if (--cont
== 0) return s
+1;
278 } else if (*s
== b
) {
283 return NULL
; /* string ends out of balance */
286 static const char *max_expand(MatchState
*ms
, const char *s
,
287 const char *p
, const char *ep
)
289 ptrdiff_t i
= 0; /* counts maximum expand for item */
290 while ((s
+i
)<ms
->src_end
&& singlematch(uchar(*(s
+i
)), p
, ep
))
292 /* keeps trying to match with the maximum repetitions */
294 const char *res
= match(ms
, (s
+i
), ep
+1);
296 i
--; /* else didn't match; reduce 1 repetition to try again */
301 static const char *min_expand(MatchState
*ms
, const char *s
,
302 const char *p
, const char *ep
)
305 const char *res
= match(ms
, s
, ep
+1);
308 else if (s
<ms
->src_end
&& singlematch(uchar(*s
), p
, ep
))
309 s
++; /* try with one more repetition */
315 static const char *start_capture(MatchState
*ms
, const char *s
,
316 const char *p
, int what
)
319 int level
= ms
->level
;
320 if (level
>= LUA_MAXCAPTURES
) lj_err_caller(ms
->L
, LJ_ERR_STRCAPN
);
321 ms
->capture
[level
].init
= s
;
322 ms
->capture
[level
].len
= what
;
324 if ((res
=match(ms
, s
, p
)) == NULL
) /* match failed? */
325 ms
->level
--; /* undo capture */
329 static const char *end_capture(MatchState
*ms
, const char *s
,
332 int l
= capture_to_close(ms
);
334 ms
->capture
[l
].len
= s
- ms
->capture
[l
].init
; /* close capture */
335 if ((res
= match(ms
, s
, p
)) == NULL
) /* match failed? */
336 ms
->capture
[l
].len
= CAP_UNFINISHED
; /* undo capture */
340 static const char *match_capture(MatchState
*ms
, const char *s
, int l
)
343 l
= check_capture(ms
, l
);
344 len
= (size_t)ms
->capture
[l
].len
;
345 if ((size_t)(ms
->src_end
-s
) >= len
&&
346 memcmp(ms
->capture
[l
].init
, s
, len
) == 0)
352 static const char *match(MatchState
*ms
, const char *s
, const char *p
)
354 if (++ms
->depth
> LJ_MAX_XLEVEL
)
355 lj_err_caller(ms
->L
, LJ_ERR_STRPATX
);
356 init
: /* using goto's to optimize tail recursion */
358 case '(': /* start capture */
359 if (*(p
+1) == ')') /* position capture? */
360 s
= start_capture(ms
, s
, p
+2, CAP_POSITION
);
362 s
= start_capture(ms
, s
, p
+1, CAP_UNFINISHED
);
364 case ')': /* end capture */
365 s
= end_capture(ms
, s
, p
+1);
369 case 'b': /* balanced string? */
370 s
= matchbalance(ms
, s
, p
+2);
371 if (s
== NULL
) break;
373 goto init
; /* else s = match(ms, s, p+4); */
374 case 'f': { /* frontier? */
375 const char *ep
; char previous
;
378 lj_err_caller(ms
->L
, LJ_ERR_STRPATB
);
379 ep
= classend(ms
, p
); /* points to what is next */
380 previous
= (s
== ms
->src_init
) ? '\0' : *(s
-1);
381 if (matchbracketclass(uchar(previous
), p
, ep
-1) ||
382 !matchbracketclass(uchar(*s
), p
, ep
-1)) { s
= NULL
; break; }
384 goto init
; /* else s = match(ms, s, ep); */
387 if (lj_char_isdigit(uchar(*(p
+1)))) { /* capture results (%0-%9)? */
388 s
= match_capture(ms
, s
, uchar(*(p
+1)));
389 if (s
== NULL
) break;
391 goto init
; /* else s = match(ms, s, p+2) */
393 goto dflt
; /* case default */
396 case '\0': /* end of pattern */
397 break; /* match succeeded */
399 /* is the `$' the last char in pattern? */
400 if (*(p
+1) != '\0') goto dflt
;
401 if (s
!= ms
->src_end
) s
= NULL
; /* check end of string */
403 default: dflt
: { /* it is a pattern item */
404 const char *ep
= classend(ms
, p
); /* points to what is next */
405 int m
= s
<ms
->src_end
&& singlematch(uchar(*s
), p
, ep
);
407 case '?': { /* optional */
409 if (m
&& ((res
=match(ms
, s
+1, ep
+1)) != NULL
)) {
414 goto init
; /* else s = match(ms, s, ep+1); */
416 case '*': /* 0 or more repetitions */
417 s
= max_expand(ms
, s
, p
, ep
);
419 case '+': /* 1 or more repetitions */
420 s
= (m
? max_expand(ms
, s
+1, p
, ep
) : NULL
);
422 case '-': /* 0 or more repetitions (minimum) */
423 s
= min_expand(ms
, s
, p
, ep
);
426 if (m
) { s
++; p
=ep
; goto init
; } /* else s = match(ms, s+1, ep); */
437 static void push_onecapture(MatchState
*ms
, int i
, const char *s
, const char *e
)
439 if (i
>= ms
->level
) {
440 if (i
== 0) /* ms->level == 0, too */
441 lua_pushlstring(ms
->L
, s
, (size_t)(e
- s
)); /* add whole match */
443 lj_err_caller(ms
->L
, LJ_ERR_STRCAPI
);
445 ptrdiff_t l
= ms
->capture
[i
].len
;
446 if (l
== CAP_UNFINISHED
) lj_err_caller(ms
->L
, LJ_ERR_STRCAPU
);
447 if (l
== CAP_POSITION
)
448 lua_pushinteger(ms
->L
, ms
->capture
[i
].init
- ms
->src_init
+ 1);
450 lua_pushlstring(ms
->L
, ms
->capture
[i
].init
, (size_t)l
);
454 static int push_captures(MatchState
*ms
, const char *s
, const char *e
)
457 int nlevels
= (ms
->level
== 0 && s
) ? 1 : ms
->level
;
458 luaL_checkstack(ms
->L
, nlevels
, "too many captures");
459 for (i
= 0; i
< nlevels
; i
++)
460 push_onecapture(ms
, i
, s
, e
);
461 return nlevels
; /* number of strings pushed */
464 static int str_find_aux(lua_State
*L
, int find
)
466 GCstr
*s
= lj_lib_checkstr(L
, 1);
467 GCstr
*p
= lj_lib_checkstr(L
, 2);
468 int32_t start
= lj_lib_optint(L
, 3, 1);
470 if (start
< 0) start
+= (int32_t)s
->len
; else start
--;
471 if (start
< 0) start
= 0;
481 if (find
&& ((L
->base
+3 < L
->top
&& tvistruecond(L
->base
+3)) ||
482 !lj_str_haspattern(p
))) { /* Search for fixed string. */
483 const char *q
= lj_str_find(strdata(s
)+st
, strdata(p
), s
->len
-st
, p
->len
);
485 setintV(L
->top
-2, (int32_t)(q
-strdata(s
)) + 1);
486 setintV(L
->top
-1, (int32_t)(q
-strdata(s
)) + (int32_t)p
->len
);
489 } else { /* Search for pattern. */
491 const char *pstr
= strdata(p
);
492 const char *sstr
= strdata(s
) + st
;
494 if (*pstr
== '^') { pstr
++; anchor
= 1; }
496 ms
.src_init
= strdata(s
);
497 ms
.src_end
= strdata(s
) + s
->len
;
498 do { /* Loop through string and try to match the pattern. */
500 ms
.level
= ms
.depth
= 0;
501 q
= match(&ms
, sstr
, pstr
);
504 setintV(L
->top
++, (int32_t)(sstr
-(strdata(s
)-1)));
505 setintV(L
->top
++, (int32_t)(q
-strdata(s
)));
506 return push_captures(&ms
, NULL
, NULL
) + 2;
508 return push_captures(&ms
, sstr
, q
);
511 } while (sstr
++ < ms
.src_end
&& !anchor
);
513 setnilV(L
->top
-1); /* Not found. */
517 LJLIB_CF(string_find
) LJLIB_REC(.)
519 return str_find_aux(L
, 1);
522 LJLIB_CF(string_match
)
524 return str_find_aux(L
, 0);
527 LJLIB_NOREG
LJLIB_CF(string_gmatch_aux
)
529 const char *p
= strVdata(lj_lib_upvalue(L
, 2));
530 GCstr
*str
= strV(lj_lib_upvalue(L
, 1));
531 const char *s
= strdata(str
);
532 TValue
*tvpos
= lj_lib_upvalue(L
, 3);
533 const char *src
= s
+ tvpos
->u32
.lo
;
537 ms
.src_end
= s
+ str
->len
;
538 for (; src
<= ms
.src_end
; src
++) {
540 ms
.level
= ms
.depth
= 0;
541 if ((e
= match(&ms
, src
, p
)) != NULL
) {
542 int32_t pos
= (int32_t)(e
- s
);
543 if (e
== src
) pos
++; /* Ensure progress for empty match. */
544 tvpos
->u32
.lo
= (uint32_t)pos
;
545 return push_captures(&ms
, src
, e
);
548 return 0; /* not found */
551 LJLIB_CF(string_gmatch
)
553 lj_lib_checkstr(L
, 1);
554 lj_lib_checkstr(L
, 2);
557 lj_lib_pushcc(L
, lj_cf_string_gmatch_aux
, FF_string_gmatch_aux
, 3);
561 static void add_s(MatchState
*ms
, luaL_Buffer
*b
, const char *s
, const char *e
)
564 const char *news
= lua_tolstring(ms
->L
, 3, &l
);
565 for (i
= 0; i
< l
; i
++) {
566 if (news
[i
] != L_ESC
) {
567 luaL_addchar(b
, news
[i
]);
570 if (!lj_char_isdigit(uchar(news
[i
]))) {
571 luaL_addchar(b
, news
[i
]);
572 } else if (news
[i
] == '0') {
573 luaL_addlstring(b
, s
, (size_t)(e
- s
));
575 push_onecapture(ms
, news
[i
] - '1', s
, e
);
576 luaL_addvalue(b
); /* add capture to accumulated result */
582 static void add_value(MatchState
*ms
, luaL_Buffer
*b
,
583 const char *s
, const char *e
)
585 lua_State
*L
= ms
->L
;
586 switch (lua_type(L
, 3)) {
592 case LUA_TFUNCTION
: {
595 n
= push_captures(ms
, s
, e
);
600 push_onecapture(ms
, 0, s
, e
);
605 if (!lua_toboolean(L
, -1)) { /* nil or false? */
607 lua_pushlstring(L
, s
, (size_t)(e
- s
)); /* keep original text */
608 } else if (!lua_isstring(L
, -1)) {
609 lj_err_callerv(L
, LJ_ERR_STRGSRV
, luaL_typename(L
, -1));
611 luaL_addvalue(b
); /* add result to accumulator */
614 LJLIB_CF(string_gsub
)
617 const char *src
= luaL_checklstring(L
, 1, &srcl
);
618 const char *p
= luaL_checkstring(L
, 2);
619 int tr
= lua_type(L
, 3);
620 int max_s
= luaL_optint(L
, 4, (int)(srcl
+1));
621 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
625 if (!(tr
== LUA_TNUMBER
|| tr
== LUA_TSTRING
||
626 tr
== LUA_TFUNCTION
|| tr
== LUA_TTABLE
))
627 lj_err_arg(L
, 3, LJ_ERR_NOSFT
);
628 luaL_buffinit(L
, &b
);
631 ms
.src_end
= src
+srcl
;
634 ms
.level
= ms
.depth
= 0;
635 e
= match(&ms
, src
, p
);
638 add_value(&ms
, &b
, src
, e
);
640 if (e
&& e
>src
) /* non empty match? */
641 src
= e
; /* skip it */
642 else if (src
< ms
.src_end
)
643 luaL_addchar(&b
, *src
++);
649 luaL_addlstring(&b
, src
, (size_t)(ms
.src_end
-src
));
651 lua_pushinteger(L
, n
); /* number of substitutions */
655 /* ------------------------------------------------------------------------ */
657 LJLIB_CF(string_format
) LJLIB_REC(.)
663 retry
= lj_strfmt_putarg(L
, sb
, 1, -retry
);
665 setstrV(L
, L
->top
-1, lj_buf_str(L
, sb
));
670 /* ------------------------------------------------------------------------ */
672 #include "lj_libdef.h"
674 LUALIB_API
int luaopen_string(lua_State
*L
)
678 LJ_LIB_REG(L
, LUA_STRLIBNAME
, string
);
679 mt
= lj_tab_new(L
, 0, 1);
680 /* NOBARRIER: basemt is a GC root. */
682 setgcref(basemt_it(g
, LJ_TSTR
), obj2gco(mt
));
683 settabV(L
, lj_tab_setstr(L
, mt
, mmname_str(g
, MM_index
)), tabV(L
->top
-1));
684 mt
->nomm
= (uint8_t)(~(1u<<MM_index
));
686 lj_lib_prereg(L
, LUA_STRLIBNAME
".buffer", luaopen_string_buffer
, tabV(L
->top
-1));