2 ** $Id: lstrlib.c,v 1.32 1999/06/17 17:04:03 roberto Exp $
3 ** Standard library for strings and pattern-matching
4 ** See Copyright Notice in lua.h
19 static void addnchar (char *s
, int n
)
21 char *b
= luaL_openspace(n
);
27 static void str_len (void)
30 luaL_check_lstr(1, &l
);
35 static void closeandpush (void) {
36 lua_pushlstring(luaL_buffer(), luaL_getsize());
40 static long posrelat (long pos
, long len
) {
41 /* relative string position: negative means back from end */
42 return (pos
>=0) ? pos
: len
+pos
+1;
46 static void str_sub (void) {
48 char *s
= luaL_check_lstr(1, &l
);
49 long start
= posrelat(luaL_check_long(2), l
);
50 long end
= posrelat(luaL_opt_long(3, -1), l
);
51 if (start
< 1) start
= 1;
54 lua_pushlstring(s
+start
-1, end
-start
+1);
55 else lua_pushstring("");
59 static void str_lower (void) {
62 char *s
= luaL_check_lstr(1, &l
);
65 luaL_addchar(tolower((unsigned char)(s
[i
])));
70 static void str_upper (void) {
73 char *s
= luaL_check_lstr(1, &l
);
76 luaL_addchar(toupper((unsigned char)(s
[i
])));
80 static void str_rep (void)
83 char *s
= luaL_check_lstr(1, &l
);
84 int n
= luaL_check_int(2);
92 static void str_byte (void) {
94 char *s
= luaL_check_lstr(1, &l
);
95 long pos
= posrelat(luaL_opt_long(2, 1), l
);
96 luaL_arg_check(0<pos
&& pos
<=l
, 2, "out of range");
97 lua_pushnumber((unsigned char)s
[pos
-1]);
101 static void str_char (void) {
104 while (lua_getparam(++i
) != LUA_NOOBJECT
) {
105 double c
= luaL_check_number(i
);
106 luaL_arg_check((unsigned char)c
== c
, i
, "invalid value");
107 luaL_addchar((unsigned char)c
);
115 ** {======================================================
117 ** =======================================================
121 #define MAX_CAPT 32 /* arbitrary limit */
126 char *src_end
; /* end ('\0') of source string */
127 int level
; /* total number of captures (finished or unfinished) */
130 int len
; /* -1 signals unfinished capture */
136 #define SPECIALS "^$*+?.([%-"
139 static void push_captures (struct Capture
*cap
) {
141 for (i
=0; i
<cap
->level
; i
++) {
142 int l
= cap
->capture
[i
].len
;
143 if (l
== -1) lua_error("unfinished capture");
144 lua_pushlstring(cap
->capture
[i
].init
, l
);
149 static int check_cap (int l
, struct Capture
*cap
) {
151 if (!(0 <= l
&& l
< cap
->level
&& cap
->capture
[l
].len
!= -1))
152 lua_error("invalid capture index");
157 static int capture_to_close (struct Capture
*cap
) {
158 int level
= cap
->level
;
159 for (level
--; level
>=0; level
--)
160 if (cap
->capture
[level
].len
== -1) return level
;
161 lua_error("invalid pattern capture");
162 return 0; /* to avoid warnings */
166 char *luaI_classend (char *p
) {
170 luaL_verror("incorrect pattern (ends with `%c')", ESC
);
176 if (!p
) lua_error("incorrect pattern (missing `]')");
184 static int matchclass (int c
, int cl
) {
186 switch (tolower(cl
)) {
187 case 'a' : res
= isalpha(c
); break;
188 case 'c' : res
= iscntrl(c
); break;
189 case 'd' : res
= isdigit(c
); break;
190 case 'l' : res
= islower(c
); break;
191 case 'p' : res
= ispunct(c
); break;
192 case 's' : res
= isspace(c
); break;
193 case 'u' : res
= isupper(c
); break;
194 case 'w' : res
= isalnum(c
); break;
195 case 'x' : res
= isxdigit(c
); break;
196 case 'z' : res
= (c
== '\0'); break;
197 default: return (cl
== c
);
199 return (islower(cl
) ? res
: !res
);
204 static int matchbracketclass (int c
, char *p
, char *end
) {
208 p
++; /* skip the '^' */
213 if ((p
< end
) && matchclass(c
, (unsigned char)*p
))
216 else if ((*(p
+1) == '-') && (p
+2 < end
)) {
218 if ((int)(unsigned char)*(p
-2) <= c
&& c
<= (int)(unsigned char)*p
)
221 else if ((unsigned char)*p
== c
) return sig
;
228 int luaI_singlematch (int c
, char *p
, char *ep
) {
230 case '.': /* matches any char */
233 return matchclass(c
, (unsigned char)*(p
+1));
235 return matchbracketclass(c
, p
, ep
-1);
237 return ((unsigned char)*p
== c
);
242 static char *match (char *s
, char *p
, struct Capture
*cap
);
245 static char *matchbalance (char *s
, char *p
, struct Capture
*cap
) {
246 if (*p
== 0 || *(p
+1) == 0)
247 lua_error("unbalanced pattern");
248 if (*s
!= *p
) return NULL
;
253 while (++s
< cap
->src_end
) {
255 if (--cont
== 0) return s
+1;
257 else if (*s
== b
) cont
++;
260 return NULL
; /* string ends out of balance */
264 static char *max_expand (char *s
, char *p
, char *ep
, struct Capture
*cap
) {
265 int i
= 0; /* counts maximum expand for item */
266 while ((s
+i
)<cap
->src_end
&& luaI_singlematch((unsigned char)*(s
+i
), p
, ep
))
268 /* keeps trying to match mith the maximum repetitions */
270 char *res
= match((s
+i
), ep
+1, cap
);
272 i
--; /* else didn't match; reduce 1 repetition to try again */
278 static char *min_expand (char *s
, char *p
, char *ep
, struct Capture
*cap
) {
280 char *res
= match(s
, ep
+1, cap
);
283 else if (s
<cap
->src_end
&& luaI_singlematch((unsigned char)*s
, p
, ep
))
284 s
++; /* try with one more repetition */
290 static char *start_capt (char *s
, char *p
, struct Capture
*cap
) {
292 int level
= cap
->level
;
293 if (level
>= MAX_CAPT
) lua_error("too many captures");
294 cap
->capture
[level
].init
= s
;
295 cap
->capture
[level
].len
= -1;
296 cap
->level
= level
+1;
297 if ((res
=match(s
, p
+1, cap
)) == NULL
) /* match failed? */
298 cap
->level
--; /* undo capture */
303 static char *end_capt (char *s
, char *p
, struct Capture
*cap
) {
304 int l
= capture_to_close(cap
);
306 cap
->capture
[l
].len
= s
- cap
->capture
[l
].init
; /* close capture */
307 if ((res
= match(s
, p
+1, cap
)) == NULL
) /* match failed? */
308 cap
->capture
[l
].len
= -1; /* undo capture */
313 static char *match_capture (char *s
, int level
, struct Capture
*cap
) {
314 int l
= check_cap(level
, cap
);
315 int len
= cap
->capture
[l
].len
;
316 if (cap
->src_end
-s
>= len
&&
317 memcmp(cap
->capture
[l
].init
, s
, len
) == 0)
323 static char *match (char *s
, char *p
, struct Capture
*cap
) {
324 init
: /* using goto's to optimize tail recursion */
326 case '(': /* start capture */
327 return start_capt(s
, p
, cap
);
328 case ')': /* end capture */
329 return end_capt(s
, p
, cap
);
330 case ESC
: /* may be %[0-9] or %b */
331 if (isdigit((unsigned char)(*(p
+1)))) { /* capture? */
332 s
= match_capture(s
, *(p
+1), cap
);
333 if (s
== NULL
) return NULL
;
334 p
+=2; goto init
; /* else return match(p+2, s, cap) */
336 else if (*(p
+1) == 'b') { /* balanced string? */
337 s
= matchbalance(s
, p
+2, cap
);
338 if (s
== NULL
) return NULL
;
339 p
+=4; goto init
; /* else return match(p+4, s, cap); */
341 else goto dflt
; /* case default */
342 case '\0': /* end of pattern */
343 return s
; /* match succeeded */
345 if (*(p
+1) == '\0') /* is the '$' the last char in pattern? */
346 return (s
== cap
->src_end
) ? s
: NULL
; /* check end of string */
348 default: dflt
: { /* it is a pattern item */
349 char *ep
= luaI_classend(p
); /* points to what is next */
350 int m
= s
<cap
->src_end
&& luaI_singlematch((unsigned char)*s
, p
, ep
);
352 case '?': { /* optional */
354 if (m
&& ((res
=match(s
+1, ep
+1, cap
)) != NULL
))
356 p
=ep
+1; goto init
; /* else return match(s, ep+1, cap); */
358 case '*': /* 0 or more repetitions */
359 return max_expand(s
, p
, ep
, cap
);
360 case '+': /* 1 or more repetitions */
361 return (m
? max_expand(s
+1, p
, ep
, cap
) : NULL
);
362 case '-': /* 0 or more repetitions (minimum) */
363 return min_expand(s
, p
, ep
, cap
);
366 s
++; p
=ep
; goto init
; /* else return match(s+1, ep, cap); */
373 static void str_find (void) {
375 char *s
= luaL_check_lstr(1, &l
);
376 char *p
= luaL_check_string(2);
377 long init
= posrelat(luaL_opt_long(3, 1), l
) - 1;
379 luaL_arg_check(0 <= init
&& init
<= l
, 3, "out of range");
380 if (lua_getparam(4) != LUA_NOOBJECT
||
381 strpbrk(p
, SPECIALS
) == NULL
) { /* no special characters? */
382 char *s2
= strstr(s
+init
, p
);
384 lua_pushnumber(s2
-s
+1);
385 lua_pushnumber(s2
-s
+strlen(p
));
390 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
396 if ((res
=match(s1
, p
, &cap
)) != NULL
) {
397 lua_pushnumber(s1
-s
+1); /* start */
398 lua_pushnumber(res
-s
); /* end */
402 } while (s1
++<cap
.src_end
&& !anchor
);
404 lua_pushnil(); /* if arrives here, it didn't find */
408 static void add_s (lua_Object newp
, struct Capture
*cap
) {
409 if (lua_isstring(newp
)) {
410 char *news
= lua_getstring(newp
);
411 int l
= lua_strlen(newp
);
413 for (i
=0; i
<l
; i
++) {
415 luaL_addchar(news
[i
]);
418 if (!isdigit((unsigned char)news
[i
]))
419 luaL_addchar(news
[i
]);
421 int level
= check_cap(news
[i
], cap
);
422 addnchar(cap
->capture
[level
].init
, cap
->capture
[level
].len
);
427 else { /* is a function */
433 /* function may use buffer, so save it and create a new one */
434 oldbuff
= luaL_newbuffer(0);
435 status
= lua_callfunction(newp
);
436 /* restore old buffer */
437 luaL_oldbuffer(oldbuff
);
442 res
= lua_getresult(1);
443 if (lua_isstring(res
))
444 addnchar(lua_getstring(res
), lua_strlen(res
));
450 static void str_gsub (void) {
452 char *src
= luaL_check_lstr(1, &srcl
);
453 char *p
= luaL_check_string(2);
454 lua_Object newp
= lua_getparam(3);
455 int max_s
= luaL_opt_int(4, srcl
+1);
456 int anchor
= (*p
== '^') ? (p
++, 1) : 0;
459 luaL_arg_check(lua_isstring(newp
) || lua_isfunction(newp
), 3,
460 "string or function expected");
462 cap
.src_end
= src
+srcl
;
466 e
= match(src
, p
, &cap
);
471 if (e
&& e
>src
) /* non empty match? */
472 src
= e
; /* skip it */
473 else if (src
< cap
.src_end
)
474 luaL_addchar(*src
++);
478 addnchar(src
, cap
.src_end
-src
);
480 lua_pushnumber(n
); /* number of substitutions */
483 /* }====================================================== */
486 static void luaI_addquoted (int arg
) {
488 char *s
= luaL_check_lstr(arg
, &l
);
492 case '"': case '\\': case '\n':
496 case '\0': addnchar("\\000", 4); break;
497 default: luaL_addchar(*s
);
504 /* maximum size of each format specification (such as '%-099.99d') */
505 #define MAX_FORMAT 20 /* arbitrary limit */
507 static void str_format (void) {
509 char *strfrmt
= luaL_check_string(arg
);
513 luaL_addchar(*strfrmt
++);
514 else if (*++strfrmt
== '%')
515 luaL_addchar(*strfrmt
++); /* %% */
516 else { /* format item */
518 char form
[MAX_FORMAT
]; /* to store the format ('%...') */
519 char *buff
; /* to store the formatted item */
520 char *initf
= strfrmt
;
522 if (isdigit((unsigned char)*initf
) && *(initf
+1) == '$') {
524 initf
+= 2; /* skip the 'n$' */
527 cap
.src_end
= strfrmt
+strlen(strfrmt
)+1;
529 strfrmt
= match(initf
, "[-+ #0]*(%d*)%.?(%d*)", &cap
);
530 if (cap
.capture
[0].len
> 2 || cap
.capture
[1].len
> 2 || /* < 100? */
531 strfrmt
-initf
> MAX_FORMAT
-2)
532 lua_error("invalid format (width or precision too long)");
533 strncpy(form
+1, initf
, strfrmt
-initf
+1); /* +1 to include conversion */
534 form
[strfrmt
-initf
+2] = 0;
535 buff
= luaL_openspace(512); /* 512 > size of format('%99.99f', -1e308) */
536 switch (*strfrmt
++) {
537 case 'c': case 'd': case 'i':
538 sprintf(buff
, form
, luaL_check_int(arg
));
540 case 'o': case 'u': case 'x': case 'X':
541 sprintf(buff
, form
, (unsigned int)luaL_check_number(arg
));
543 case 'e': case 'E': case 'f': case 'g': case 'G':
544 sprintf(buff
, form
, luaL_check_number(arg
));
548 continue; /* skip the "addsize" at the end */
551 char *s
= luaL_check_lstr(arg
, &l
);
552 if (cap
.capture
[1].len
== 0 && l
>= 100) {
553 /* no precision and string is too big to be formatted;
554 keep original string */
556 continue; /* skip the "addsize" at the end */
559 sprintf(buff
, form
, s
);
563 default: /* also treat cases 'pnLlh' */
564 lua_error("invalid option in `format'");
566 luaL_addsize(strlen(buff
));
569 closeandpush(); /* push the result */
573 static struct luaL_reg strlib
[] = {
576 {"strlower", str_lower
},
577 {"strupper", str_upper
},
578 {"strchar", str_char
},
580 {"ascii", str_byte
}, /* for compatibility with 3.0 and earlier */
581 {"strbyte", str_byte
},
582 {"format", str_format
},
583 {"strfind", str_find
},
589 ** Open string library
591 void strlib_open (void)
593 luaL_openlib(strlib
, (sizeof(strlib
)/sizeof(strlib
[0])));