2 ** $Id: lstrlib.c,v 1.56 2000/10/27 16:15:53 roberto Exp $
3 ** Standard library for string operations and pattern-matching
4 ** See Copyright Notice in lua.h
21 static int str_len (lua_State
*L
) {
23 luaL_check_lstr(L
, 1, &l
);
29 static long posrelat (long pos
, size_t len
) {
30 /* relative string position: negative means back from end */
31 return (pos
>=0) ? pos
: (long)len
+pos
+1;
35 static int str_sub (lua_State
*L
) {
37 const char *s
= luaL_check_lstr(L
, 1, &l
);
38 long start
= posrelat(luaL_check_long(L
, 2), l
);
39 long end
= posrelat(luaL_opt_long(L
, 3, -1), l
);
40 if (start
< 1) start
= 1;
41 if (end
> (long)l
) end
= l
;
43 lua_pushlstring(L
, s
+start
-1, end
-start
+1);
44 else lua_pushstring(L
, "");
49 static int str_lower (lua_State
*L
) {
53 const char *s
= luaL_check_lstr(L
, 1, &l
);
56 luaL_putchar(&b
, tolower((unsigned char)(s
[i
])));
62 static int str_upper (lua_State
*L
) {
66 const char *s
= luaL_check_lstr(L
, 1, &l
);
69 luaL_putchar(&b
, toupper((unsigned char)(s
[i
])));
74 static int str_rep (lua_State
*L
) {
77 const char *s
= luaL_check_lstr(L
, 1, &l
);
78 int n
= luaL_check_int(L
, 2);
81 luaL_addlstring(&b
, s
, l
);
87 static int str_byte (lua_State
*L
) {
89 const char *s
= luaL_check_lstr(L
, 1, &l
);
90 long pos
= posrelat(luaL_opt_long(L
, 2, 1), l
);
91 luaL_arg_check(L
, 0<pos
&& (size_t)pos
<=l
, 2, "out of range");
92 lua_pushnumber(L
, (unsigned char)s
[pos
-1]);
97 static int str_char (lua_State
*L
) {
98 int n
= lua_gettop(L
); /* number of arguments */
101 luaL_buffinit(L
, &b
);
102 for (i
=1; i
<=n
; i
++) {
103 int c
= luaL_check_int(L
, i
);
104 luaL_arg_check(L
, (unsigned char)c
== c
, i
, "invalid value");
105 luaL_putchar(&b
, (unsigned char)c
);
114 ** {======================================================
116 ** =======================================================
120 #define MAX_CAPTURES 32 /* arbitrary limit */
125 const char *src_end
; /* end ('\0') of source string */
126 int level
; /* total number of captures (finished or unfinished) */
129 long len
; /* -1 signals unfinished capture */
130 } capture
[MAX_CAPTURES
];
135 #define SPECIALS "^$*+?.([%-"
138 static int check_capture (lua_State
*L
, int l
, struct Capture
*cap
) {
140 if (!(0 <= l
&& l
< cap
->level
&& cap
->capture
[l
].len
!= -1))
141 lua_error(L
, "invalid capture index");
146 static int capture_to_close (lua_State
*L
, struct Capture
*cap
) {
147 int level
= cap
->level
;
148 for (level
--; level
>=0; level
--)
149 if (cap
->capture
[level
].len
== -1) return level
;
150 lua_error(L
, "invalid pattern capture");
151 return 0; /* to avoid warnings */
155 const char *luaI_classend (lua_State
*L
, const char *p
) {
158 if (*p
== '\0') lua_error(L
, "malformed pattern (ends with `%')");
162 do { /* look for a ']' */
163 if (*p
== '\0') lua_error(L
, "malformed pattern (missing `]')");
164 if (*(p
++) == ESC
&& *p
!= '\0') p
++; /* skip escapes (e.g. '%]') */
173 static int match_class (int c
, int cl
) {
175 switch (tolower(cl
)) {
176 case 'a' : res
= isalpha(c
); break;
177 case 'c' : res
= iscntrl(c
); break;
178 case 'd' : res
= isdigit(c
); break;
179 case 'l' : res
= islower(c
); break;
180 case 'p' : res
= ispunct(c
); break;
181 case 's' : res
= isspace(c
); break;
182 case 'u' : res
= isupper(c
); break;
183 case 'w' : res
= isalnum(c
); break;
184 case 'x' : res
= isxdigit(c
); break;
185 case 'z' : res
= (c
== '\0'); break;
186 default: return (cl
== c
);
188 return (islower(cl
) ? res
: !res
);
193 static int matchbracketclass (int c
, const char *p
, const char *endclass
) {
197 p
++; /* skip the '^' */
199 while (++p
< endclass
) {
202 if (match_class(c
, (unsigned char)*p
))
205 else if ((*(p
+1) == '-') && (p
+2 < endclass
)) {
207 if ((int)(unsigned char)*(p
-2) <= c
&& c
<= (int)(unsigned char)*p
)
210 else if ((int)(unsigned char)*p
== c
) return sig
;
217 int luaI_singlematch (int c
, const char *p
, const char *ep
) {
219 case '.': /* matches any char */
222 return match_class(c
, (unsigned char)*(p
+1));
224 return matchbracketclass(c
, p
, ep
-1);
226 return ((unsigned char)*p
== c
);
231 static const char *match (lua_State
*L
, const char *s
, const char *p
,
232 struct Capture
*cap
);
235 static const char *matchbalance (lua_State
*L
, const char *s
, const char *p
,
236 struct Capture
*cap
) {
237 if (*p
== 0 || *(p
+1) == 0)
238 lua_error(L
, "unbalanced pattern");
239 if (*s
!= *p
) return NULL
;
244 while (++s
< cap
->src_end
) {
246 if (--cont
== 0) return s
+1;
248 else if (*s
== b
) cont
++;
251 return NULL
; /* string ends out of balance */
255 static const char *max_expand (lua_State
*L
, const char *s
, const char *p
,
256 const char *ep
, struct Capture
*cap
) {
257 long i
= 0; /* counts maximum expand for item */
258 while ((s
+i
)<cap
->src_end
&& luaI_singlematch((unsigned char)*(s
+i
), p
, ep
))
260 /* keeps trying to match with the maximum repetitions */
262 const char *res
= match(L
, (s
+i
), ep
+1, cap
);
264 i
--; /* else didn't match; reduce 1 repetition to try again */
270 static const char *min_expand (lua_State
*L
, const char *s
, const char *p
,
271 const char *ep
, struct Capture
*cap
) {
273 const char *res
= match(L
, s
, ep
+1, cap
);
276 else if (s
<cap
->src_end
&& luaI_singlematch((unsigned char)*s
, p
, ep
))
277 s
++; /* try with one more repetition */
283 static const char *start_capture (lua_State
*L
, const char *s
, const char *p
,
284 struct Capture
*cap
) {
286 int level
= cap
->level
;
287 if (level
>= MAX_CAPTURES
) lua_error(L
, "too many captures");
288 cap
->capture
[level
].init
= s
;
289 cap
->capture
[level
].len
= -1;
290 cap
->level
= level
+1;
291 if ((res
=match(L
, s
, p
+1, cap
)) == NULL
) /* match failed? */
292 cap
->level
--; /* undo capture */
297 static const char *end_capture (lua_State
*L
, const char *s
, const char *p
,
298 struct Capture
*cap
) {
299 int l
= capture_to_close(L
, cap
);
301 cap
->capture
[l
].len
= s
- cap
->capture
[l
].init
; /* close capture */
302 if ((res
= match(L
, s
, p
+1, cap
)) == NULL
) /* match failed? */
303 cap
->capture
[l
].len
= -1; /* undo capture */
308 static const char *match_capture (lua_State
*L
, const char *s
, int level
,
309 struct Capture
*cap
) {
310 int l
= check_capture(L
, level
, cap
);
311 size_t len
= cap
->capture
[l
].len
;
312 if ((size_t)(cap
->src_end
-s
) >= len
&&
313 memcmp(cap
->capture
[l
].init
, s
, len
) == 0)
319 static const char *match (lua_State
*L
, const char *s
, const char *p
,
320 struct Capture
*cap
) {
321 init
: /* using goto's to optimize tail recursion */
323 case '(': /* start capture */
324 return start_capture(L
, s
, p
, cap
);
325 case ')': /* end capture */
326 return end_capture(L
, s
, p
, cap
);
327 case ESC
: /* may be %[0-9] or %b */
328 if (isdigit((unsigned char)(*(p
+1)))) { /* capture? */
329 s
= match_capture(L
, s
, *(p
+1), cap
);
330 if (s
== NULL
) return NULL
;
331 p
+=2; goto init
; /* else return match(L, s, p+2, cap) */
333 else if (*(p
+1) == 'b') { /* balanced string? */
334 s
= matchbalance(L
, s
, p
+2, cap
);
335 if (s
== NULL
) return NULL
;
336 p
+=4; goto init
; /* else return match(L, s, p+4, cap); */
338 else goto dflt
; /* case default */
339 case '\0': /* end of pattern */
340 return s
; /* match succeeded */
342 if (*(p
+1) == '\0') /* is the '$' the last char in pattern? */
343 return (s
== cap
->src_end
) ? s
: NULL
; /* check end of string */
345 default: dflt
: { /* it is a pattern item */
346 const char *ep
= luaI_classend(L
, p
); /* points to what is next */
347 int m
= s
<cap
->src_end
&& luaI_singlematch((unsigned char)*s
, p
, ep
);
349 case '?': { /* optional */
351 if (m
&& ((res
=match(L
, s
+1, ep
+1, cap
)) != NULL
))
353 p
=ep
+1; goto init
; /* else return match(L, s, ep+1, cap); */
355 case '*': /* 0 or more repetitions */
356 return max_expand(L
, s
, p
, ep
, cap
);
357 case '+': /* 1 or more repetitions */
358 return (m
? max_expand(L
, s
+1, p
, ep
, cap
) : NULL
);
359 case '-': /* 0 or more repetitions (minimum) */
360 return min_expand(L
, s
, p
, ep
, cap
);
363 s
++; p
=ep
; goto init
; /* else return match(L, s+1, ep, cap); */
371 static const char *lmemfind (const char *s1
, size_t l1
,
372 const char *s2
, size_t l2
) {
373 if (l2
== 0) return s1
; /* empty strings are everywhere */
374 else if (l2
> l1
) return NULL
; /* avoids a negative `l1' */
376 const char *init
; /* to search for a `*s2' inside `s1' */
377 l2
--; /* 1st char will be checked by `memchr' */
378 l1
= l1
-l2
; /* `s2' cannot be found after that */
379 while (l1
> 0 && (init
= (const char *)memchr(s1
, *s2
, l1
)) != NULL
) {
380 init
++; /* 1st char is already checked */
381 if (memcmp(init
, s2
+1, l2
) == 0)
383 else { /* correct `l1' and `s1' to try again */
388 return NULL
; /* not found */
393 static int push_captures (lua_State
*L
, struct Capture
*cap
) {
395 luaL_checkstack(L
, cap
->level
, "too many captures");
396 for (i
=0; i
<cap
->level
; i
++) {
397 int l
= cap
->capture
[i
].len
;
398 if (l
== -1) lua_error(L
, "unfinished capture");
399 lua_pushlstring(L
, cap
->capture
[i
].init
, l
);
401 return cap
->level
; /* number of strings pushed */
405 static int str_find (lua_State
*L
) {
407 const char *s
= luaL_check_lstr(L
, 1, &l1
);
408 const char *p
= luaL_check_lstr(L
, 2, &l2
);
409 long init
= posrelat(luaL_opt_long(L
, 3, 1), l1
) - 1;
411 luaL_arg_check(L
, 0 <= init
&& (size_t)init
<= l1
, 3, "out of range");
412 if (lua_gettop(L
) > 3 || /* extra argument? */
413 strpbrk(p
, SPECIALS
) == NULL
) { /* or no special characters? */
414 const char *s2
= lmemfind(s
+init
, l1
-init
, p
, l2
);
416 lua_pushnumber(L
, s2
-s
+1);
417 lua_pushnumber(L
, s2
-s
+l2
);
422 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
423 const char *s1
=s
+init
;
428 if ((res
=match(L
, s1
, p
, &cap
)) != NULL
) {
429 lua_pushnumber(L
, s1
-s
+1); /* start */
430 lua_pushnumber(L
, res
-s
); /* end */
431 return push_captures(L
, &cap
) + 2;
433 } while (s1
++<cap
.src_end
&& !anchor
);
435 lua_pushnil(L
); /* not found */
440 static void add_s (lua_State
*L
, luaL_Buffer
*b
, struct Capture
*cap
) {
441 if (lua_isstring(L
, 3)) {
442 const char *news
= lua_tostring(L
, 3);
443 size_t l
= lua_strlen(L
, 3);
445 for (i
=0; i
<l
; i
++) {
447 luaL_putchar(b
, news
[i
]);
450 if (!isdigit((unsigned char)news
[i
]))
451 luaL_putchar(b
, news
[i
]);
453 int level
= check_capture(L
, news
[i
], cap
);
454 luaL_addlstring(b
, cap
->capture
[level
].init
, cap
->capture
[level
].len
);
459 else { /* is a function */
462 n
= push_captures(L
, cap
);
463 lua_rawcall(L
, n
, 1);
464 if (lua_isstring(L
, -1))
465 luaL_addvalue(b
); /* add return to accumulated result */
467 lua_pop(L
, 1); /* function result is not a string: pop it */
472 static int str_gsub (lua_State
*L
) {
474 const char *src
= luaL_check_lstr(L
, 1, &srcl
);
475 const char *p
= luaL_check_string(L
, 2);
476 int max_s
= luaL_opt_int(L
, 4, srcl
+1);
477 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
482 lua_gettop(L
) >= 3 && (lua_isstring(L
, 3) || lua_isfunction(L
, 3)),
483 3, "string or function expected");
484 luaL_buffinit(L
, &b
);
485 cap
.src_end
= src
+srcl
;
489 e
= match(L
, src
, p
, &cap
);
494 if (e
&& e
>src
) /* non empty match? */
495 src
= e
; /* skip it */
496 else if (src
< cap
.src_end
)
497 luaL_putchar(&b
, *src
++);
501 luaL_addlstring(&b
, src
, cap
.src_end
-src
);
503 lua_pushnumber(L
, n
); /* number of substitutions */
507 /* }====================================================== */
510 static void luaI_addquoted (lua_State
*L
, luaL_Buffer
*b
, int arg
) {
512 const char *s
= luaL_check_lstr(L
, arg
, &l
);
513 luaL_putchar(b
, '"');
516 case '"': case '\\': case '\n':
517 luaL_putchar(b
, '\\');
520 case '\0': luaL_addlstring(b
, "\\000", 4); break;
521 default: luaL_putchar(b
, *s
);
525 luaL_putchar(b
, '"');
528 /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
530 /* maximum size of each format specification (such as '%-099.99d') */
531 #define MAX_FORMAT 20
533 static int str_format (lua_State
*L
) {
535 const char *strfrmt
= luaL_check_string(L
, arg
);
537 luaL_buffinit(L
, &b
);
540 luaL_putchar(&b
, *strfrmt
++);
541 else if (*++strfrmt
== '%')
542 luaL_putchar(&b
, *strfrmt
++); /* %% */
543 else { /* format item */
545 char form
[MAX_FORMAT
]; /* to store the format ('%...') */
546 char buff
[MAX_ITEM
]; /* to store the formatted item */
547 const char *initf
= strfrmt
;
549 if (isdigit((unsigned char)*initf
) && *(initf
+1) == '$') {
551 initf
+= 2; /* skip the 'n$' */
554 cap
.src_end
= strfrmt
+strlen(strfrmt
)+1;
556 strfrmt
= match(L
, initf
, "[-+ #0]*(%d*)%.?(%d*)", &cap
);
557 if (cap
.capture
[0].len
> 2 || cap
.capture
[1].len
> 2 || /* < 100? */
558 strfrmt
-initf
> MAX_FORMAT
-2)
559 lua_error(L
, "invalid format (width or precision too long)");
560 strncpy(form
+1, initf
, strfrmt
-initf
+1); /* +1 to include conversion */
561 form
[strfrmt
-initf
+2] = 0;
562 switch (*strfrmt
++) {
563 case 'c': case 'd': case 'i':
564 sprintf(buff
, form
, luaL_check_int(L
, arg
));
566 case 'o': case 'u': case 'x': case 'X':
567 sprintf(buff
, form
, (unsigned int)luaL_check_number(L
, arg
));
569 case 'e': case 'E': case 'f': case 'g': case 'G':
570 sprintf(buff
, form
, luaL_check_number(L
, arg
));
573 luaI_addquoted(L
, &b
, arg
);
574 continue; /* skip the "addsize" at the end */
577 const char *s
= luaL_check_lstr(L
, arg
, &l
);
578 if (cap
.capture
[1].len
== 0 && l
>= 100) {
579 /* no precision and string is too long to be formatted;
580 keep original string */
581 lua_pushvalue(L
, arg
);
583 continue; /* skip the "addsize" at the end */
586 sprintf(buff
, form
, s
);
590 default: /* also treat cases 'pnLlh' */
591 lua_error(L
, "invalid option in `format'");
593 luaL_addlstring(&b
, buff
, strlen(buff
));
601 static const struct luaL_reg strlib
[] = {
604 {"strlower", str_lower
},
605 {"strupper", str_upper
},
606 {"strchar", str_char
},
608 {"ascii", str_byte
}, /* for compatibility with 3.0 and earlier */
609 {"strbyte", str_byte
},
610 {"format", str_format
},
611 {"strfind", str_find
},
617 ** Open string library
619 LUALIB_API
void lua_strlibopen (lua_State
*L
) {
620 luaL_openl(L
, strlib
);