Imported from ../lua-3.2.tar.gz.
[lua.git] / src / lib / lstrlib.c
blobb47e21d3af2009e0a433ffb05870c7061e6b298c
1 /*
2 ** $Id: lstrlib.c,v 1.32 1999/06/17 17:04:03 roberto Exp $
3 ** Standard library for strings and pattern-matching
4 ** See Copyright Notice in lua.h
5 */
8 #include <ctype.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
13 #include "lauxlib.h"
14 #include "lua.h"
15 #include "lualib.h"
19 static void addnchar (char *s, int n)
21 char *b = luaL_openspace(n);
22 memcpy(b, s, n);
23 luaL_addsize(n);
27 static void str_len (void)
29 long l;
30 luaL_check_lstr(1, &l);
31 lua_pushnumber(l);
35 static void closeandpush (void) {
36 lua_pushlstring(luaL_buffer(), luaL_getsize());
40 static long posrelat (long pos, long len) {
41 /* relative string position: negative means back from end */
42 return (pos>=0) ? pos : len+pos+1;
46 static void str_sub (void) {
47 long l;
48 char *s = luaL_check_lstr(1, &l);
49 long start = posrelat(luaL_check_long(2), l);
50 long end = posrelat(luaL_opt_long(3, -1), l);
51 if (start < 1) start = 1;
52 if (end > l) end = l;
53 if (start <= end)
54 lua_pushlstring(s+start-1, end-start+1);
55 else lua_pushstring("");
59 static void str_lower (void) {
60 long l;
61 int i;
62 char *s = luaL_check_lstr(1, &l);
63 luaL_resetbuffer();
64 for (i=0; i<l; i++)
65 luaL_addchar(tolower((unsigned char)(s[i])));
66 closeandpush();
70 static void str_upper (void) {
71 long l;
72 int i;
73 char *s = luaL_check_lstr(1, &l);
74 luaL_resetbuffer();
75 for (i=0; i<l; i++)
76 luaL_addchar(toupper((unsigned char)(s[i])));
77 closeandpush();
80 static void str_rep (void)
82 long l;
83 char *s = luaL_check_lstr(1, &l);
84 int n = luaL_check_int(2);
85 luaL_resetbuffer();
86 while (n-- > 0)
87 addnchar(s, l);
88 closeandpush();
92 static void str_byte (void) {
93 long l;
94 char *s = luaL_check_lstr(1, &l);
95 long pos = posrelat(luaL_opt_long(2, 1), l);
96 luaL_arg_check(0<pos && pos<=l, 2, "out of range");
97 lua_pushnumber((unsigned char)s[pos-1]);
101 static void str_char (void) {
102 int i = 0;
103 luaL_resetbuffer();
104 while (lua_getparam(++i) != LUA_NOOBJECT) {
105 double c = luaL_check_number(i);
106 luaL_arg_check((unsigned char)c == c, i, "invalid value");
107 luaL_addchar((unsigned char)c);
109 closeandpush();
115 ** {======================================================
116 ** PATTERN MATCHING
117 ** =======================================================
120 #ifndef MAX_CAPT
121 #define MAX_CAPT 32 /* arbitrary limit */
122 #endif
125 struct Capture {
126 char *src_end; /* end ('\0') of source string */
127 int level; /* total number of captures (finished or unfinished) */
128 struct {
129 char *init;
130 int len; /* -1 signals unfinished capture */
131 } capture[MAX_CAPT];
135 #define ESC '%'
136 #define SPECIALS "^$*+?.([%-"
139 static void push_captures (struct Capture *cap) {
140 int i;
141 for (i=0; i<cap->level; i++) {
142 int l = cap->capture[i].len;
143 if (l == -1) lua_error("unfinished capture");
144 lua_pushlstring(cap->capture[i].init, l);
149 static int check_cap (int l, struct Capture *cap) {
150 l -= '1';
151 if (!(0 <= l && l < cap->level && cap->capture[l].len != -1))
152 lua_error("invalid capture index");
153 return l;
157 static int capture_to_close (struct Capture *cap) {
158 int level = cap->level;
159 for (level--; level>=0; level--)
160 if (cap->capture[level].len == -1) return level;
161 lua_error("invalid pattern capture");
162 return 0; /* to avoid warnings */
166 char *luaI_classend (char *p) {
167 switch (*p++) {
168 case ESC:
169 if (*p == '\0')
170 luaL_verror("incorrect pattern (ends with `%c')", ESC);
171 return p+1;
172 case '[':
173 if (*p == '^') p++;
174 if (*p == ']') p++;
175 p = strchr(p, ']');
176 if (!p) lua_error("incorrect pattern (missing `]')");
177 return p+1;
178 default:
179 return p;
184 static int matchclass (int c, int cl) {
185 int res;
186 switch (tolower(cl)) {
187 case 'a' : res = isalpha(c); break;
188 case 'c' : res = iscntrl(c); break;
189 case 'd' : res = isdigit(c); break;
190 case 'l' : res = islower(c); break;
191 case 'p' : res = ispunct(c); break;
192 case 's' : res = isspace(c); break;
193 case 'u' : res = isupper(c); break;
194 case 'w' : res = isalnum(c); break;
195 case 'x' : res = isxdigit(c); break;
196 case 'z' : res = (c == '\0'); break;
197 default: return (cl == c);
199 return (islower(cl) ? res : !res);
204 static int matchbracketclass (int c, char *p, char *end) {
205 int sig = 1;
206 if (*(p+1) == '^') {
207 sig = 0;
208 p++; /* skip the '^' */
210 while (++p < end) {
211 if (*p == ESC) {
212 p++;
213 if ((p < end) && matchclass(c, (unsigned char)*p))
214 return sig;
216 else if ((*(p+1) == '-') && (p+2 < end)) {
217 p+=2;
218 if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p)
219 return sig;
221 else if ((unsigned char)*p == c) return sig;
223 return !sig;
228 int luaI_singlematch (int c, char *p, char *ep) {
229 switch (*p) {
230 case '.': /* matches any char */
231 return 1;
232 case ESC:
233 return matchclass(c, (unsigned char)*(p+1));
234 case '[':
235 return matchbracketclass(c, p, ep-1);
236 default:
237 return ((unsigned char)*p == c);
242 static char *match (char *s, char *p, struct Capture *cap);
245 static char *matchbalance (char *s, char *p, struct Capture *cap) {
246 if (*p == 0 || *(p+1) == 0)
247 lua_error("unbalanced pattern");
248 if (*s != *p) return NULL;
249 else {
250 int b = *p;
251 int e = *(p+1);
252 int cont = 1;
253 while (++s < cap->src_end) {
254 if (*s == e) {
255 if (--cont == 0) return s+1;
257 else if (*s == b) cont++;
260 return NULL; /* string ends out of balance */
264 static char *max_expand (char *s, char *p, char *ep, struct Capture *cap) {
265 int i = 0; /* counts maximum expand for item */
266 while ((s+i)<cap->src_end && luaI_singlematch((unsigned char)*(s+i), p, ep))
267 i++;
268 /* keeps trying to match mith the maximum repetitions */
269 while (i>=0) {
270 char *res = match((s+i), ep+1, cap);
271 if (res) return res;
272 i--; /* else didn't match; reduce 1 repetition to try again */
274 return NULL;
278 static char *min_expand (char *s, char *p, char *ep, struct Capture *cap) {
279 for (;;) {
280 char *res = match(s, ep+1, cap);
281 if (res != NULL)
282 return res;
283 else if (s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep))
284 s++; /* try with one more repetition */
285 else return NULL;
290 static char *start_capt (char *s, char *p, struct Capture *cap) {
291 char *res;
292 int level = cap->level;
293 if (level >= MAX_CAPT) lua_error("too many captures");
294 cap->capture[level].init = s;
295 cap->capture[level].len = -1;
296 cap->level = level+1;
297 if ((res=match(s, p+1, cap)) == NULL) /* match failed? */
298 cap->level--; /* undo capture */
299 return res;
303 static char *end_capt (char *s, char *p, struct Capture *cap) {
304 int l = capture_to_close(cap);
305 char *res;
306 cap->capture[l].len = s - cap->capture[l].init; /* close capture */
307 if ((res = match(s, p+1, cap)) == NULL) /* match failed? */
308 cap->capture[l].len = -1; /* undo capture */
309 return res;
313 static char *match_capture (char *s, int level, struct Capture *cap) {
314 int l = check_cap(level, cap);
315 int len = cap->capture[l].len;
316 if (cap->src_end-s >= len &&
317 memcmp(cap->capture[l].init, s, len) == 0)
318 return s+len;
319 else return NULL;
323 static char *match (char *s, char *p, struct Capture *cap) {
324 init: /* using goto's to optimize tail recursion */
325 switch (*p) {
326 case '(': /* start capture */
327 return start_capt(s, p, cap);
328 case ')': /* end capture */
329 return end_capt(s, p, cap);
330 case ESC: /* may be %[0-9] or %b */
331 if (isdigit((unsigned char)(*(p+1)))) { /* capture? */
332 s = match_capture(s, *(p+1), cap);
333 if (s == NULL) return NULL;
334 p+=2; goto init; /* else return match(p+2, s, cap) */
336 else if (*(p+1) == 'b') { /* balanced string? */
337 s = matchbalance(s, p+2, cap);
338 if (s == NULL) return NULL;
339 p+=4; goto init; /* else return match(p+4, s, cap); */
341 else goto dflt; /* case default */
342 case '\0': /* end of pattern */
343 return s; /* match succeeded */
344 case '$':
345 if (*(p+1) == '\0') /* is the '$' the last char in pattern? */
346 return (s == cap->src_end) ? s : NULL; /* check end of string */
347 else goto dflt;
348 default: dflt: { /* it is a pattern item */
349 char *ep = luaI_classend(p); /* points to what is next */
350 int m = s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep);
351 switch (*ep) {
352 case '?': { /* optional */
353 char *res;
354 if (m && ((res=match(s+1, ep+1, cap)) != NULL))
355 return res;
356 p=ep+1; goto init; /* else return match(s, ep+1, cap); */
358 case '*': /* 0 or more repetitions */
359 return max_expand(s, p, ep, cap);
360 case '+': /* 1 or more repetitions */
361 return (m ? max_expand(s+1, p, ep, cap) : NULL);
362 case '-': /* 0 or more repetitions (minimum) */
363 return min_expand(s, p, ep, cap);
364 default:
365 if (!m) return NULL;
366 s++; p=ep; goto init; /* else return match(s+1, ep, cap); */
373 static void str_find (void) {
374 long l;
375 char *s = luaL_check_lstr(1, &l);
376 char *p = luaL_check_string(2);
377 long init = posrelat(luaL_opt_long(3, 1), l) - 1;
378 struct Capture cap;
379 luaL_arg_check(0 <= init && init <= l, 3, "out of range");
380 if (lua_getparam(4) != LUA_NOOBJECT ||
381 strpbrk(p, SPECIALS) == NULL) { /* no special characters? */
382 char *s2 = strstr(s+init, p);
383 if (s2) {
384 lua_pushnumber(s2-s+1);
385 lua_pushnumber(s2-s+strlen(p));
386 return;
389 else {
390 int anchor = (*p == '^') ? (p++, 1) : 0;
391 char *s1=s+init;
392 cap.src_end = s+l;
393 do {
394 char *res;
395 cap.level = 0;
396 if ((res=match(s1, p, &cap)) != NULL) {
397 lua_pushnumber(s1-s+1); /* start */
398 lua_pushnumber(res-s); /* end */
399 push_captures(&cap);
400 return;
402 } while (s1++<cap.src_end && !anchor);
404 lua_pushnil(); /* if arrives here, it didn't find */
408 static void add_s (lua_Object newp, struct Capture *cap) {
409 if (lua_isstring(newp)) {
410 char *news = lua_getstring(newp);
411 int l = lua_strlen(newp);
412 int i;
413 for (i=0; i<l; i++) {
414 if (news[i] != ESC)
415 luaL_addchar(news[i]);
416 else {
417 i++; /* skip ESC */
418 if (!isdigit((unsigned char)news[i]))
419 luaL_addchar(news[i]);
420 else {
421 int level = check_cap(news[i], cap);
422 addnchar(cap->capture[level].init, cap->capture[level].len);
427 else { /* is a function */
428 lua_Object res;
429 int status;
430 int oldbuff;
431 lua_beginblock();
432 push_captures(cap);
433 /* function may use buffer, so save it and create a new one */
434 oldbuff = luaL_newbuffer(0);
435 status = lua_callfunction(newp);
436 /* restore old buffer */
437 luaL_oldbuffer(oldbuff);
438 if (status != 0) {
439 lua_endblock();
440 lua_error(NULL);
442 res = lua_getresult(1);
443 if (lua_isstring(res))
444 addnchar(lua_getstring(res), lua_strlen(res));
445 lua_endblock();
450 static void str_gsub (void) {
451 long srcl;
452 char *src = luaL_check_lstr(1, &srcl);
453 char *p = luaL_check_string(2);
454 lua_Object newp = lua_getparam(3);
455 int max_s = luaL_opt_int(4, srcl+1);
456 int anchor = (*p == '^') ? (p++, 1) : 0;
457 int n = 0;
458 struct Capture cap;
459 luaL_arg_check(lua_isstring(newp) || lua_isfunction(newp), 3,
460 "string or function expected");
461 luaL_resetbuffer();
462 cap.src_end = src+srcl;
463 while (n < max_s) {
464 char *e;
465 cap.level = 0;
466 e = match(src, p, &cap);
467 if (e) {
468 n++;
469 add_s(newp, &cap);
471 if (e && e>src) /* non empty match? */
472 src = e; /* skip it */
473 else if (src < cap.src_end)
474 luaL_addchar(*src++);
475 else break;
476 if (anchor) break;
478 addnchar(src, cap.src_end-src);
479 closeandpush();
480 lua_pushnumber(n); /* number of substitutions */
483 /* }====================================================== */
486 static void luaI_addquoted (int arg) {
487 long l;
488 char *s = luaL_check_lstr(arg, &l);
489 luaL_addchar('"');
490 while (l--) {
491 switch (*s) {
492 case '"': case '\\': case '\n':
493 luaL_addchar('\\');
494 luaL_addchar(*s);
495 break;
496 case '\0': addnchar("\\000", 4); break;
497 default: luaL_addchar(*s);
499 s++;
501 luaL_addchar('"');
504 /* maximum size of each format specification (such as '%-099.99d') */
505 #define MAX_FORMAT 20 /* arbitrary limit */
507 static void str_format (void) {
508 int arg = 1;
509 char *strfrmt = luaL_check_string(arg);
510 luaL_resetbuffer();
511 while (*strfrmt) {
512 if (*strfrmt != '%')
513 luaL_addchar(*strfrmt++);
514 else if (*++strfrmt == '%')
515 luaL_addchar(*strfrmt++); /* %% */
516 else { /* format item */
517 struct Capture cap;
518 char form[MAX_FORMAT]; /* to store the format ('%...') */
519 char *buff; /* to store the formatted item */
520 char *initf = strfrmt;
521 form[0] = '%';
522 if (isdigit((unsigned char)*initf) && *(initf+1) == '$') {
523 arg = *initf - '0';
524 initf += 2; /* skip the 'n$' */
526 arg++;
527 cap.src_end = strfrmt+strlen(strfrmt)+1;
528 cap.level = 0;
529 strfrmt = match(initf, "[-+ #0]*(%d*)%.?(%d*)", &cap);
530 if (cap.capture[0].len > 2 || cap.capture[1].len > 2 || /* < 100? */
531 strfrmt-initf > MAX_FORMAT-2)
532 lua_error("invalid format (width or precision too long)");
533 strncpy(form+1, initf, strfrmt-initf+1); /* +1 to include conversion */
534 form[strfrmt-initf+2] = 0;
535 buff = luaL_openspace(512); /* 512 > size of format('%99.99f', -1e308) */
536 switch (*strfrmt++) {
537 case 'c': case 'd': case 'i':
538 sprintf(buff, form, luaL_check_int(arg));
539 break;
540 case 'o': case 'u': case 'x': case 'X':
541 sprintf(buff, form, (unsigned int)luaL_check_number(arg));
542 break;
543 case 'e': case 'E': case 'f': case 'g': case 'G':
544 sprintf(buff, form, luaL_check_number(arg));
545 break;
546 case 'q':
547 luaI_addquoted(arg);
548 continue; /* skip the "addsize" at the end */
549 case 's': {
550 long l;
551 char *s = luaL_check_lstr(arg, &l);
552 if (cap.capture[1].len == 0 && l >= 100) {
553 /* no precision and string is too big to be formatted;
554 keep original string */
555 addnchar(s, l);
556 continue; /* skip the "addsize" at the end */
558 else {
559 sprintf(buff, form, s);
560 break;
563 default: /* also treat cases 'pnLlh' */
564 lua_error("invalid option in `format'");
566 luaL_addsize(strlen(buff));
569 closeandpush(); /* push the result */
573 static struct luaL_reg strlib[] = {
574 {"strlen", str_len},
575 {"strsub", str_sub},
576 {"strlower", str_lower},
577 {"strupper", str_upper},
578 {"strchar", str_char},
579 {"strrep", str_rep},
580 {"ascii", str_byte}, /* for compatibility with 3.0 and earlier */
581 {"strbyte", str_byte},
582 {"format", str_format},
583 {"strfind", str_find},
584 {"gsub", str_gsub}
589 ** Open string library
591 void strlib_open (void)
593 luaL_openlib(strlib, (sizeof(strlib)/sizeof(strlib[0])));