2 ** $Id: lstrlib.c,v 1.132a 2006/04/26 20:41:19 roberto Exp $
3 ** Standard library for string operations and pattern-matching
4 ** See Copyright Notice in lua.h
23 /* macro to `unsign' a character */
24 #define uchar(c) ((unsigned char)(c))
28 static int str_len (lua_State
*L
) {
30 luaL_checklstring(L
, 1, &l
);
31 lua_pushinteger(L
, l
);
36 static ptrdiff_t posrelat (ptrdiff_t pos
, size_t len
) {
37 /* relative string position: negative means back from end */
38 return (pos
>=0) ? pos
: (ptrdiff_t)len
+pos
+1;
42 static int str_sub (lua_State
*L
) {
44 const char *s
= luaL_checklstring(L
, 1, &l
);
45 ptrdiff_t start
= posrelat(luaL_checkinteger(L
, 2), l
);
46 ptrdiff_t end
= posrelat(luaL_optinteger(L
, 3, -1), l
);
47 if (start
< 1) start
= 1;
48 if (end
> (ptrdiff_t)l
) end
= (ptrdiff_t)l
;
50 lua_pushlstring(L
, s
+start
-1, end
-start
+1);
51 else lua_pushliteral(L
, "");
56 static int str_reverse (lua_State
*L
) {
59 const char *s
= luaL_checklstring(L
, 1, &l
);
61 while (l
--) luaL_addchar(&b
, s
[l
]);
67 static int str_lower (lua_State
*L
) {
71 const char *s
= luaL_checklstring(L
, 1, &l
);
74 luaL_addchar(&b
, tolower(uchar(s
[i
])));
80 static int str_upper (lua_State
*L
) {
84 const char *s
= luaL_checklstring(L
, 1, &l
);
87 luaL_addchar(&b
, toupper(uchar(s
[i
])));
92 static int str_rep (lua_State
*L
) {
95 const char *s
= luaL_checklstring(L
, 1, &l
);
96 int n
= luaL_checkint(L
, 2);
99 luaL_addlstring(&b
, s
, l
);
105 static int str_byte (lua_State
*L
) {
107 const char *s
= luaL_checklstring(L
, 1, &l
);
108 ptrdiff_t posi
= posrelat(luaL_optinteger(L
, 2, 1), l
);
109 ptrdiff_t pose
= posrelat(luaL_optinteger(L
, 3, posi
), l
);
111 if (posi
<= 0) posi
= 1;
112 if ((size_t)pose
> l
) pose
= l
;
113 if (posi
> pose
) return 0; /* empty interval; return no values */
114 n
= (int)(pose
- posi
+ 1);
115 if (posi
+ n
<= pose
) /* overflow? */
116 luaL_error(L
, "string slice too long");
117 luaL_checkstack(L
, n
, "string slice too long");
119 lua_pushinteger(L
, uchar(s
[posi
+i
-1]));
124 static int str_char (lua_State
*L
) {
125 int n
= lua_gettop(L
); /* number of arguments */
128 luaL_buffinit(L
, &b
);
129 for (i
=1; i
<=n
; i
++) {
130 int c
= luaL_checkint(L
, i
);
131 luaL_argcheck(L
, uchar(c
) == c
, i
, "invalid value");
132 luaL_addchar(&b
, uchar(c
));
139 static int writer (lua_State
*L
, const void* b
, size_t size
, void* B
) {
141 luaL_addlstring((luaL_Buffer
*) B
, (const char *)b
, size
);
146 static int str_dump (lua_State
*L
) {
148 luaL_checktype(L
, 1, LUA_TFUNCTION
);
151 if (lua_dump(L
, writer
, &b
) != 0)
152 luaL_error(L
, "unable to dump given function");
160 ** {======================================================
162 ** =======================================================
166 #define CAP_UNFINISHED (-1)
167 #define CAP_POSITION (-2)
169 typedef struct MatchState
{
170 const char *src_init
; /* init of source string */
171 const char *src_end
; /* end (`\0') of source string */
173 int level
; /* total number of captures (finished or unfinished) */
177 } capture
[LUA_MAXCAPTURES
];
182 #define SPECIALS "^$*+?.([%-"
185 static int check_capture (MatchState
*ms
, int l
) {
187 if (l
< 0 || l
>= ms
->level
|| ms
->capture
[l
].len
== CAP_UNFINISHED
)
188 return luaL_error(ms
->L
, "invalid capture index");
193 static int capture_to_close (MatchState
*ms
) {
194 int level
= ms
->level
;
195 for (level
--; level
>=0; level
--)
196 if (ms
->capture
[level
].len
== CAP_UNFINISHED
) return level
;
197 return luaL_error(ms
->L
, "invalid pattern capture");
201 static const char *classend (MatchState
*ms
, const char *p
) {
205 luaL_error(ms
->L
, "malformed pattern (ends with " LUA_QL("%%") ")");
210 do { /* look for a `]' */
212 luaL_error(ms
->L
, "malformed pattern (missing " LUA_QL("]") ")");
213 if (*(p
++) == L_ESC
&& *p
!= '\0')
214 p
++; /* skip escapes (e.g. `%]') */
225 static int match_class (int c
, int cl
) {
227 switch (tolower(cl
)) {
228 case 'a' : res
= isalpha(c
); break;
229 case 'c' : res
= iscntrl(c
); break;
230 case 'd' : res
= isdigit(c
); break;
231 case 'l' : res
= islower(c
); break;
232 case 'p' : res
= ispunct(c
); break;
233 case 's' : res
= isspace(c
); break;
234 case 'u' : res
= isupper(c
); break;
235 case 'w' : res
= isalnum(c
); break;
236 case 'x' : res
= isxdigit(c
); break;
237 case 'z' : res
= (c
== 0); break;
238 default: return (cl
== c
);
240 return (islower(cl
) ? res
: !res
);
244 static int matchbracketclass (int c
, const char *p
, const char *ec
) {
248 p
++; /* skip the `^' */
253 if (match_class(c
, uchar(*p
)))
256 else if ((*(p
+1) == '-') && (p
+2 < ec
)) {
258 if (uchar(*(p
-2)) <= c
&& c
<= uchar(*p
))
261 else if (uchar(*p
) == c
) return sig
;
267 static int singlematch (int c
, const char *p
, const char *ep
) {
269 case '.': return 1; /* matches any char */
270 case L_ESC
: return match_class(c
, uchar(*(p
+1)));
271 case '[': return matchbracketclass(c
, p
, ep
-1);
272 default: return (uchar(*p
) == c
);
277 static const char *match (MatchState
*ms
, const char *s
, const char *p
);
280 static const char *matchbalance (MatchState
*ms
, const char *s
,
282 if (*p
== 0 || *(p
+1) == 0)
283 luaL_error(ms
->L
, "unbalanced pattern");
284 if (*s
!= *p
) return NULL
;
289 while (++s
< ms
->src_end
) {
291 if (--cont
== 0) return s
+1;
293 else if (*s
== b
) cont
++;
296 return NULL
; /* string ends out of balance */
300 static const char *max_expand (MatchState
*ms
, const char *s
,
301 const char *p
, const char *ep
) {
302 ptrdiff_t i
= 0; /* counts maximum expand for item */
303 while ((s
+i
)<ms
->src_end
&& singlematch(uchar(*(s
+i
)), p
, ep
))
305 /* keeps trying to match with the maximum repetitions */
307 const char *res
= match(ms
, (s
+i
), ep
+1);
309 i
--; /* else didn't match; reduce 1 repetition to try again */
315 static const char *min_expand (MatchState
*ms
, const char *s
,
316 const char *p
, const char *ep
) {
318 const char *res
= match(ms
, s
, ep
+1);
321 else if (s
<ms
->src_end
&& singlematch(uchar(*s
), p
, ep
))
322 s
++; /* try with one more repetition */
328 static const char *start_capture (MatchState
*ms
, const char *s
,
329 const char *p
, int what
) {
331 int level
= ms
->level
;
332 if (level
>= LUA_MAXCAPTURES
) luaL_error(ms
->L
, "too many captures");
333 ms
->capture
[level
].init
= s
;
334 ms
->capture
[level
].len
= what
;
336 if ((res
=match(ms
, s
, p
)) == NULL
) /* match failed? */
337 ms
->level
--; /* undo capture */
342 static const char *end_capture (MatchState
*ms
, const char *s
,
344 int l
= capture_to_close(ms
);
346 ms
->capture
[l
].len
= s
- ms
->capture
[l
].init
; /* close capture */
347 if ((res
= match(ms
, s
, p
)) == NULL
) /* match failed? */
348 ms
->capture
[l
].len
= CAP_UNFINISHED
; /* undo capture */
353 static const char *match_capture (MatchState
*ms
, const char *s
, int l
) {
355 l
= check_capture(ms
, l
);
356 len
= ms
->capture
[l
].len
;
357 if ((size_t)(ms
->src_end
-s
) >= len
&&
358 memcmp(ms
->capture
[l
].init
, s
, len
) == 0)
364 static const char *match (MatchState
*ms
, const char *s
, const char *p
) {
365 init
: /* using goto's to optimize tail recursion */
367 case '(': { /* start capture */
368 if (*(p
+1) == ')') /* position capture? */
369 return start_capture(ms
, s
, p
+2, CAP_POSITION
);
371 return start_capture(ms
, s
, p
+1, CAP_UNFINISHED
);
373 case ')': { /* end capture */
374 return end_capture(ms
, s
, p
+1);
378 case 'b': { /* balanced string? */
379 s
= matchbalance(ms
, s
, p
+2);
380 if (s
== NULL
) return NULL
;
381 p
+=4; goto init
; /* else return match(ms, s, p+4); */
383 case 'f': { /* frontier? */
384 const char *ep
; char previous
;
387 luaL_error(ms
->L
, "missing " LUA_QL("[") " after "
388 LUA_QL("%%f") " in pattern");
389 ep
= classend(ms
, p
); /* points to what is next */
390 previous
= (s
== ms
->src_init
) ? '\0' : *(s
-1);
391 if (matchbracketclass(uchar(previous
), p
, ep
-1) ||
392 !matchbracketclass(uchar(*s
), p
, ep
-1)) return NULL
;
393 p
=ep
; goto init
; /* else return match(ms, s, ep); */
396 if (isdigit(uchar(*(p
+1)))) { /* capture results (%0-%9)? */
397 s
= match_capture(ms
, s
, uchar(*(p
+1)));
398 if (s
== NULL
) return NULL
;
399 p
+=2; goto init
; /* else return match(ms, s, p+2) */
401 goto dflt
; /* case default */
405 case '\0': { /* end of pattern */
406 return s
; /* match succeeded */
409 if (*(p
+1) == '\0') /* is the `$' the last char in pattern? */
410 return (s
== ms
->src_end
) ? s
: NULL
; /* check end of string */
413 default: dflt
: { /* it is a pattern item */
414 const char *ep
= classend(ms
, p
); /* points to what is next */
415 int m
= s
<ms
->src_end
&& singlematch(uchar(*s
), p
, ep
);
417 case '?': { /* optional */
419 if (m
&& ((res
=match(ms
, s
+1, ep
+1)) != NULL
))
421 p
=ep
+1; goto init
; /* else return match(ms, s, ep+1); */
423 case '*': { /* 0 or more repetitions */
424 return max_expand(ms
, s
, p
, ep
);
426 case '+': { /* 1 or more repetitions */
427 return (m
? max_expand(ms
, s
+1, p
, ep
) : NULL
);
429 case '-': { /* 0 or more repetitions (minimum) */
430 return min_expand(ms
, s
, p
, ep
);
434 s
++; p
=ep
; goto init
; /* else return match(ms, s+1, ep); */
443 static const char *lmemfind (const char *s1
, size_t l1
,
444 const char *s2
, size_t l2
) {
445 if (l2
== 0) return s1
; /* empty strings are everywhere */
446 else if (l2
> l1
) return NULL
; /* avoids a negative `l1' */
448 const char *init
; /* to search for a `*s2' inside `s1' */
449 l2
--; /* 1st char will be checked by `memchr' */
450 l1
= l1
-l2
; /* `s2' cannot be found after that */
451 while (l1
> 0 && (init
= (const char *)memchr(s1
, *s2
, l1
)) != NULL
) {
452 init
++; /* 1st char is already checked */
453 if (memcmp(init
, s2
+1, l2
) == 0)
455 else { /* correct `l1' and `s1' to try again */
460 return NULL
; /* not found */
465 static void push_onecapture (MatchState
*ms
, int i
, const char *s
,
467 if (i
>= ms
->level
) {
468 if (i
== 0) /* ms->level == 0, too */
469 lua_pushlstring(ms
->L
, s
, e
- s
); /* add whole match */
471 luaL_error(ms
->L
, "invalid capture index");
474 ptrdiff_t l
= ms
->capture
[i
].len
;
475 if (l
== CAP_UNFINISHED
) luaL_error(ms
->L
, "unfinished capture");
476 if (l
== CAP_POSITION
)
477 lua_pushinteger(ms
->L
, ms
->capture
[i
].init
- ms
->src_init
+ 1);
479 lua_pushlstring(ms
->L
, ms
->capture
[i
].init
, l
);
484 static int push_captures (MatchState
*ms
, const char *s
, const char *e
) {
486 int nlevels
= (ms
->level
== 0 && s
) ? 1 : ms
->level
;
487 luaL_checkstack(ms
->L
, nlevels
, "too many captures");
488 for (i
= 0; i
< nlevels
; i
++)
489 push_onecapture(ms
, i
, s
, e
);
490 return nlevels
; /* number of strings pushed */
494 static int str_find_aux (lua_State
*L
, int find
) {
496 const char *s
= luaL_checklstring(L
, 1, &l1
);
497 const char *p
= luaL_checklstring(L
, 2, &l2
);
498 ptrdiff_t init
= posrelat(luaL_optinteger(L
, 3, 1), l1
) - 1;
499 if (init
< 0) init
= 0;
500 else if ((size_t)(init
) > l1
) init
= (ptrdiff_t)l1
;
501 if (find
&& (lua_toboolean(L
, 4) || /* explicit request? */
502 strpbrk(p
, SPECIALS
) == NULL
)) { /* or no special characters? */
503 /* do a plain search */
504 const char *s2
= lmemfind(s
+init
, l1
-init
, p
, l2
);
506 lua_pushinteger(L
, s2
-s
+1);
507 lua_pushinteger(L
, s2
-s
+l2
);
513 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
514 const char *s1
=s
+init
;
521 if ((res
=match(&ms
, s1
, p
)) != NULL
) {
523 lua_pushinteger(L
, s1
-s
+1); /* start */
524 lua_pushinteger(L
, res
-s
); /* end */
525 return push_captures(&ms
, NULL
, 0) + 2;
528 return push_captures(&ms
, s1
, res
);
530 } while (s1
++ < ms
.src_end
&& !anchor
);
532 lua_pushnil(L
); /* not found */
537 static int str_find (lua_State
*L
) {
538 return str_find_aux(L
, 1);
542 static int str_match (lua_State
*L
) {
543 return str_find_aux(L
, 0);
547 static int gmatch_aux (lua_State
*L
) {
550 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
551 const char *p
= lua_tostring(L
, lua_upvalueindex(2));
556 for (src
= s
+ (size_t)lua_tointeger(L
, lua_upvalueindex(3));
561 if ((e
= match(&ms
, src
, p
)) != NULL
) {
562 lua_Integer newstart
= e
-s
;
563 if (e
== src
) newstart
++; /* empty match? go at least one position */
564 lua_pushinteger(L
, newstart
);
565 lua_replace(L
, lua_upvalueindex(3));
566 return push_captures(&ms
, src
, e
);
569 return 0; /* not found */
573 static int gmatch (lua_State
*L
) {
574 luaL_checkstring(L
, 1);
575 luaL_checkstring(L
, 2);
577 lua_pushinteger(L
, 0);
578 lua_pushcclosure(L
, gmatch_aux
, 3);
583 static int gfind_nodef (lua_State
*L
) {
584 return luaL_error(L
, LUA_QL("string.gfind") " was renamed to "
585 LUA_QL("string.gmatch"));
589 static void add_s (MatchState
*ms
, luaL_Buffer
*b
, const char *s
,
592 const char *news
= lua_tolstring(ms
->L
, 3, &l
);
593 for (i
= 0; i
< l
; i
++) {
594 if (news
[i
] != L_ESC
)
595 luaL_addchar(b
, news
[i
]);
598 if (!isdigit(uchar(news
[i
])))
599 luaL_addchar(b
, news
[i
]);
600 else if (news
[i
] == '0')
601 luaL_addlstring(b
, s
, e
- s
);
603 push_onecapture(ms
, news
[i
] - '1', s
, e
);
604 luaL_addvalue(b
); /* add capture to accumulated result */
611 static void add_value (MatchState
*ms
, luaL_Buffer
*b
, const char *s
,
613 lua_State
*L
= ms
->L
;
614 switch (lua_type(L
, 3)) {
620 case LUA_TFUNCTION
: {
623 n
= push_captures(ms
, s
, e
);
628 push_onecapture(ms
, 0, s
, e
);
633 luaL_argerror(L
, 3, "string/function/table expected");
637 if (!lua_toboolean(L
, -1)) { /* nil or false? */
639 lua_pushlstring(L
, s
, e
- s
); /* keep original text */
641 else if (!lua_isstring(L
, -1))
642 luaL_error(L
, "invalid replacement value (a %s)", luaL_typename(L
, -1));
643 luaL_addvalue(b
); /* add result to accumulator */
647 static int str_gsub (lua_State
*L
) {
649 const char *src
= luaL_checklstring(L
, 1, &srcl
);
650 const char *p
= luaL_checkstring(L
, 2);
651 int max_s
= luaL_optint(L
, 4, srcl
+1);
652 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
656 luaL_buffinit(L
, &b
);
659 ms
.src_end
= src
+srcl
;
663 e
= match(&ms
, src
, p
);
666 add_value(&ms
, &b
, src
, e
);
668 if (e
&& e
>src
) /* non empty match? */
669 src
= e
; /* skip it */
670 else if (src
< ms
.src_end
)
671 luaL_addchar(&b
, *src
++);
675 luaL_addlstring(&b
, src
, ms
.src_end
-src
);
677 lua_pushinteger(L
, n
); /* number of substitutions */
681 /* }====================================================== */
684 /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
686 /* valid flags in a format specification */
687 #define FLAGS "-+ #0"
689 ** maximum size of each format specification (such as '%-099.99d')
690 ** (+10 accounts for %99.99x plus margin of error)
692 #define MAX_FORMAT (sizeof(FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
695 static void addquoted (lua_State
*L
, luaL_Buffer
*b
, int arg
) {
697 const char *s
= luaL_checklstring(L
, arg
, &l
);
698 luaL_addchar(b
, '"');
701 case '"': case '\\': case '\n': {
702 luaL_addchar(b
, '\\');
707 luaL_addlstring(b
, "\\r", 2);
711 luaL_addlstring(b
, "\\000", 4);
721 luaL_addchar(b
, '"');
724 static const char *scanformat (lua_State
*L
, const char *strfrmt
, char *form
) {
725 const char *p
= strfrmt
;
726 while (*p
!= '\0' && strchr(FLAGS
, *p
) != NULL
) p
++; /* skip flags */
727 if ((size_t)(p
- strfrmt
) >= sizeof(FLAGS
))
728 luaL_error(L
, "invalid format (repeated flags)");
729 if (isdigit(uchar(*p
))) p
++; /* skip width */
730 if (isdigit(uchar(*p
))) p
++; /* (2 digits at most) */
733 if (isdigit(uchar(*p
))) p
++; /* skip precision */
734 if (isdigit(uchar(*p
))) p
++; /* (2 digits at most) */
736 if (isdigit(uchar(*p
)))
737 luaL_error(L
, "invalid format (width or precision too long)");
739 strncpy(form
, strfrmt
, p
- strfrmt
+ 1);
740 form
+= p
- strfrmt
+ 1;
746 static void addintlen (char *form
) {
747 size_t l
= strlen(form
);
748 char spec
= form
[l
- 1];
749 strcpy(form
+ l
- 1, LUA_INTFRMLEN
);
750 form
[l
+ sizeof(LUA_INTFRMLEN
) - 2] = spec
;
751 form
[l
+ sizeof(LUA_INTFRMLEN
) - 1] = '\0';
755 static int str_format (lua_State
*L
) {
758 const char *strfrmt
= luaL_checklstring(L
, arg
, &sfl
);
759 const char *strfrmt_end
= strfrmt
+sfl
;
761 luaL_buffinit(L
, &b
);
762 while (strfrmt
< strfrmt_end
) {
763 if (*strfrmt
!= L_ESC
)
764 luaL_addchar(&b
, *strfrmt
++);
765 else if (*++strfrmt
== L_ESC
)
766 luaL_addchar(&b
, *strfrmt
++); /* %% */
767 else { /* format item */
768 char form
[MAX_FORMAT
]; /* to store the format (`%...') */
769 char buff
[MAX_ITEM
]; /* to store the formatted item */
771 strfrmt
= scanformat(L
, strfrmt
, form
);
772 switch (*strfrmt
++) {
774 sprintf(buff
, form
, (int)luaL_checknumber(L
, arg
));
777 case 'd': case 'i': {
779 sprintf(buff
, form
, (LUA_INTFRM_T
)luaL_checknumber(L
, arg
));
782 case 'o': case 'u': case 'x': case 'X': {
784 sprintf(buff
, form
, (unsigned LUA_INTFRM_T
)luaL_checknumber(L
, arg
));
787 case 'e': case 'E': case 'f':
788 case 'g': case 'G': {
789 sprintf(buff
, form
, (double)luaL_checknumber(L
, arg
));
793 addquoted(L
, &b
, arg
);
794 continue; /* skip the 'addsize' at the end */
798 const char *s
= luaL_checklstring(L
, arg
, &l
);
799 if (!strchr(form
, '.') && l
>= 100) {
800 /* no precision and string is too long to be formatted;
801 keep original string */
802 lua_pushvalue(L
, arg
);
804 continue; /* skip the `addsize' at the end */
807 sprintf(buff
, form
, s
);
811 default: { /* also treat cases `pnLlh' */
812 return luaL_error(L
, "invalid option " LUA_QL("%%%c") " to "
813 LUA_QL("format"), *(strfrmt
- 1));
816 luaL_addlstring(&b
, buff
, strlen(buff
));
824 static const luaL_Reg strlib
[] = {
829 {"format", str_format
},
830 {"gfind", gfind_nodef
},
834 {"lower", str_lower
},
835 {"match", str_match
},
837 {"reverse", str_reverse
},
839 {"upper", str_upper
},
844 static void createmetatable (lua_State
*L
) {
845 lua_createtable(L
, 0, 1); /* create metatable for strings */
846 lua_pushliteral(L
, ""); /* dummy string */
847 lua_pushvalue(L
, -2);
848 lua_setmetatable(L
, -2); /* set string metatable */
849 lua_pop(L
, 1); /* pop dummy string */
850 lua_pushvalue(L
, -2); /* string library... */
851 lua_setfield(L
, -2, "__index"); /* ...is the __index metamethod */
852 lua_pop(L
, 1); /* pop metatable */
857 ** Open string library
859 LUALIB_API
int luaopen_string (lua_State
*L
) {
860 luaL_register(L
, LUA_STRLIBNAME
, strlib
);
861 #if defined(LUA_COMPAT_GFIND)
862 lua_getfield(L
, -1, "gmatch");
863 lua_setfield(L
, -2, "gfind");