beta-0.89.2
[luatex.git] / source / texk / web2c / luatexdir / slnunicode / slnunico.c
blob60ef95baca46908d1825d8404c8608110709f509
1 /*
2 * Selene Unicode/UTF-8
3 * This additions
4 * Copyright (c) 2005 Malete Partner, Berlin, partner@malete.org
5 * Available under "Lua 5.0 license", see http://www.lua.org/license.html#5
6 * $Id: slnunico.c,v 1.5 2006/07/26 17:20:04 paul Exp $
8 * contains code from
9 ** lstrlib.c,v 1.109 2004/12/01 15:46:06 roberto Exp
10 ** Standard library for string operations and pattern-matching
11 ** See Copyright Notice in lua.h
13 * uses the udata table and a couple of expressions from Tcl 8.4.x UTF-8
14 * which comes with the following license.terms:
16 This software is copyrighted by the Regents of the University of
17 California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState
18 Corporation and other parties. The following terms apply to all files
19 associated with the software unless explicitly disclaimed in
20 individual files.
22 The authors hereby grant permission to use, copy, modify, distribute,
23 and license this software and its documentation for any purpose, provided
24 that existing copyright notices are retained in all copies and that this
25 notice is included verbatim in any distributions. No written agreement,
26 license, or royalty fee is required for any of the authorized uses.
27 Modifications to this software may be copyrighted by their authors
28 and need not follow the licensing terms described here, provided that
29 the new terms are clearly indicated on the first page of each file where
30 they apply.
32 IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
33 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
34 ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
35 DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
38 THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
39 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
40 FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
41 IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
42 NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
43 MODIFICATIONS.
45 GOVERNMENT USE: If you are acquiring this software on behalf of the
46 U.S. government, the Government shall have only "Restricted Rights"
47 in the software and related documentation as defined in the Federal
48 Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
49 are acquiring the software on behalf of the Department of Defense, the
50 software shall be classified as "Commercial Computer Software" and the
51 Government shall have only "Restricted Rights" as defined in Clause
52 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
53 authors grant the U.S. Government and others acting in its behalf
54 permission to use and distribute the software in accordance with the
55 terms specified in this license.
57 (end of Tcl license terms)
61 According to http://ietf.org/rfc/rfc3629.txt we support up to 4-byte
62 (21 bit) sequences encoding the UTF-16 reachable 0-0x10FFFF.
63 Any byte not part of a 2-4 byte sequence in that range decodes to itself.
64 Ill formed (non-shortest) "C0 80" will be decoded as two code points C0 and 80,
65 not code point 0; see security considerations in the RFC.
66 However, UTF-16 surrogates (D800-DFFF) are accepted.
68 See http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
69 for default grapheme clusters.
70 Lazy westerners we are (and lacking the Hangul_Syllable_Type data),
71 we care for base char + Grapheme_Extend, but not for Hangul syllable sequences.
73 For http://unicode.org/Public/UNIDATA/UCD.html#Grapheme_Extend
74 we use Mn (NON_SPACING_MARK) + Me (ENCLOSING_MARK),
75 ignoring the 18 mostly south asian Other_Grapheme_Extend (16 Mc, 2 Cf) from
76 http://www.unicode.org/Public/UNIDATA/PropList.txt
79 #include <ctype.h>
80 #include <stddef.h>
81 #include <stdio.h>
82 #include <stdlib.h>
83 #include <string.h>
84 #include <stdint.h>
86 #define lstrlib_c
87 #define LUA_LIB
89 #include "lua.h"
91 #ifdef LuajitTeX
92 #include "lua/lauxlib_bridge.h"
93 #else
94 #include "lauxlib.h"
95 #endif
96 #include "lualib.h"
98 #ifndef SLN_UNICODENAME /* unless set it luaconf */
99 # define SLN_UNICODENAME "unicode"
100 #endif
102 #define LUA_MAXCAPTURES 32
103 #if defined(LUA_USELONGLONG)
105 #define LUA_INTFRMLEN "ll"
106 #define LUA_INTFRM_T long long
108 #else
110 #define LUA_INTFRMLEN "l"
111 #define LUA_INTFRM_T long
113 #endif
116 UTF-8 Bit Distribution pag 103 Unicode 5.0
117 First byte Lenght
118 00..7f 1 byte
119 c0..df 2 bytes
120 e0..ef 3 bytes
121 f0..f7 4 bytes
123 #define U8_LENGTH(c) ((unsigned char)(c)<=0x7f ? 1 : ((unsigned char)(c)<=0xdf ? 2 : ((unsigned char)(c)<=0xef ? 3 : ((unsigned char)(c)<=0xf7 ? 4:-1))))
125 #include "slnudata.c"
126 #define charinfo(c) (~0xFFFF&(c) ? 0 : GetUniCharInfo(c)) /* BMP only */
127 #define charcat(c) (UNICODE_CATEGORY_MASK & charinfo(c))
128 #define Grapheme_Extend(code) \
129 (1 & (((1<<NON_SPACING_MARK)|(1<<ENCLOSING_MARK)) >> charcat(code)))
131 enum { /* operation modes */
132 MODE_ASCII, /* single byte 7bit */
133 MODE_LATIN, /* single byte 8859-1 */
134 MODE_UTF8, /* UTF-8 by code points */
135 MODE_GRAPH /* UTF-8 by grapheme clusters */
136 #define MODE_MBYTE(mode) (~1&(mode))
140 /* macro to `unsign' a character */
141 #define uchar(c) ((unsigned char)(c))
143 typedef const unsigned char cuc; /* it's just toooo long :) */
146 static void utf8_enco (luaL_Buffer *b, unsigned c)
148 if (0x80 > c) {
149 luaL_addchar(b, c);
150 return;
152 if (0x800 > c)
153 luaL_addchar(b, 0xC0|(c>>6));
154 else {
155 if (0x10000 > c)
156 luaL_addchar(b, 0xE0|(c>>12));
157 else {
158 luaL_addchar(b, 0xF0|(c>>18));
159 luaL_addchar(b, 0x80|(0x3F&(c>>12)));
161 luaL_addchar(b, 0x80|(0x3F&(c>>6)));
163 luaL_addchar(b, 0x80|(0x3F&c));
164 } /* utf8_enco */
167 /* end must be > *pp */
168 static unsigned utf8_deco (const char **pp, const char *end)
170 register cuc *p = (cuc*)*pp, * const e = (cuc*)end;
171 unsigned first = *p, code;
173 *pp = (const char*)++p; /* eat one */
174 /* check ASCII, dangling cont., non-shortest or not continued */
175 if (0xC2 > first || e == p || 0x80 != (0xC0&*p)) return first;
176 code = 0x3F&*p++; /* save 1st cont. */
177 /* check 2 byte (5+6 = 11 bit) sequence up to 0x7FF */
178 if (0xE0 > first) { /* any >= C2 is valid */
179 code |= (0x1F&first)<<6;
180 goto seq;
182 if (e != p && 0x80 == (0xC0&*p)) { /* is continued */
183 code = code<<6 | (0x3F&*p++); /* save 2nd */
184 if (0xF0 > first) { /* 3 byte (4+6+6 = 16 bit) seq -- want 2nd cont. */
185 if ( 0xF800&(code |= (0x0F&first)<<12) /* >0x7FF: not non-shortest */
186 /* && 0xD800 != (0xD800 & code) -- nah, let surrogates pass */
188 goto seq;
189 } else if (e != p && 0x80 == (0xC0&*p) /* check 3rd */
190 /* catch 0xF4 < first and other out-of-bounds */
191 /* TH: add the 256 out-of-range glyphs in 'plane 18' */
192 && 0x110100 > (code = (0x0F&first)<<18 | code<<6 | (0x3F&*p++))
193 && 0xFFFF < code /* not a 16 bitty */
195 goto seq;
197 return first;
198 seq:
199 *pp = (const char*)p;
200 return code;
201 } /* utf8_deco */
204 /* reverse decode before pp > start */
205 static unsigned utf8_oced (const char **pp, const char *start)
207 register cuc *p = (cuc*)*pp, * const s = (cuc*)start;
208 unsigned last = *--p, code;
210 *pp = (const char*)p; /* eat one */
211 /* check non-continuer or at the edge */
212 if (0x80 != (0xC0&last) || s == p) return last;
213 code = 0x3F&last; /* save last cont. */
214 if (0xC0 == (0xE0&*--p)) { /* preceeded by 2 byte seq starter */
215 if (0xC2 <= *p) { code |= (0x1F&*p)<<6; goto seq; }
216 } else if (0x80 == (0xC0&*p) && s<p) {
217 code |= (0x3F&*p)<<6;
218 if (0xE0 == (0xF0&*--p)) { /* 3 byte starter */
219 if (0xF800&(code |= (0x0F&*p)<<12)) goto seq;
220 } else if (0x80 == (0xC0&*p) && s<=--p /* valid 4 byte ? */
221 /* TH: add the 256 out-of-range glyphs in 'plane 18' */
222 && 0x110100 > (code |= (0x0F&*p)<<18 | (0x3F&p[1])<<12)
223 && 0xFFFF < code
225 goto seq;
227 return last;
228 seq:
229 *pp = (const char*)p;
230 return code;
231 } /* utf8_oced */
234 /* skip over Grapheme_Extend codes */
235 static void utf8_graphext (const char **pp, const char *end)
237 const char *p = *pp;
238 for (; p < end; *pp=p) {
239 unsigned code = utf8_deco(&p, end);
240 if (!Grapheme_Extend(code)) break;
242 } /* utf8_graphext */
245 static int utf8_count (const char **pp, int bytes, int graph, int max)
247 const char *const end = *pp+bytes;
248 int count = 0;
249 while (*pp < end && count != max) {
250 unsigned code = utf8_deco(pp, end);
251 count++;
252 if (!graph) continue;
253 if (Grapheme_Extend(code) && 1<count) count--; /* uncount */
255 if (graph && count == max) /* gather more extending */
256 utf8_graphext(pp, end);
257 return count;
258 } /* utf8_count */
262 static int unic_len (lua_State *L) {
263 size_t l;
264 const char *s = luaL_checklstring(L, 1, &l);
265 int mode = lua_tointeger(L, lua_upvalueindex(1));
266 if (MODE_MBYTE(mode)) l = (size_t)utf8_count(&s, l, mode-2, -1);
267 lua_pushinteger(L, l);
268 return 1;
272 static ptrdiff_t posrelat (ptrdiff_t pos, size_t len) {
273 /* relative string position: negative means back from end */
274 return (pos>=0) ? pos : (ptrdiff_t)len+pos+1;
278 static int unic_sub (lua_State *L) {
279 size_t l;
280 const char *s = luaL_checklstring(L, 1, &l), *p, *e=s+l;
281 ptrdiff_t start = luaL_checkinteger(L, 2);
282 ptrdiff_t end = luaL_optinteger(L, 3, -1);
283 int mode = lua_tointeger(L, lua_upvalueindex(1));
285 if (MODE_MBYTE(mode)) { p=s; l = (size_t)utf8_count(&p, l, mode-2, -1); }
286 start = posrelat(start, l);
287 end = posrelat(end, l);
288 if (start < 1) start = 1;
289 if (end > (ptrdiff_t)l) end = (ptrdiff_t)l;
290 if (start > end)
291 lua_pushliteral(L, "");
292 else {
293 l = end - --start; /* #units */
294 if (!(MODE_MBYTE(mode))) /* single byte */
295 s += start;
296 else {
297 if (start) utf8_count(&s, e-s, mode-2, start); /* skip */
298 p = s;
299 utf8_count(&p, e-p, mode-2, l);
300 l = p-s;
302 lua_pushlstring(L, s, l);
304 return 1;
308 static int str_reverse (lua_State *L) { /* TODO? whatfor? */
309 size_t l;
310 luaL_Buffer b;
311 const char *s = luaL_checklstring(L, 1, &l), *p = s+l, *q;
312 int mode = lua_tointeger(L, lua_upvalueindex(1)), mb = MODE_MBYTE(mode);
314 luaL_buffinit(L, &b);
315 if (!mb)
316 while (s < p--) luaL_addchar(&b, *p);
317 else {
318 unsigned code;
319 while (s < p) {
320 q = p;
321 code = utf8_oced(&p, s);
322 if (MODE_GRAPH == mode)
323 while (Grapheme_Extend(code) && p>s) code = utf8_oced(&p, s);
324 luaL_addlstring(&b, p, q-p);
327 luaL_pushresult(&b);
328 return 1;
333 static int unic_lower (lua_State *L) {
334 size_t l;
335 luaL_Buffer b;
336 const char *s = luaL_checklstring(L, 1, &l), * const e=s+l;
337 int mode = lua_tointeger(L, lua_upvalueindex(1)), mb = MODE_MBYTE(mode);
338 luaL_buffinit(L, &b);
339 while (s < e) {
340 unsigned c = mb ? utf8_deco(&s, e) : uchar(*s++);
341 int info = charinfo(c);
342 if (GetCaseType(info)&0x02 && (mode || !(0x80&c))) c += GetDelta(info);
343 if (mb) utf8_enco(&b, c); else luaL_addchar(&b, c);
345 luaL_pushresult(&b);
346 return 1;
350 static int unic_upper (lua_State *L) {
351 size_t l;
352 luaL_Buffer b;
353 const char *s = luaL_checklstring(L, 1, &l), * const e=s+l;
354 int mode = lua_tointeger(L, lua_upvalueindex(1)), mb = MODE_MBYTE(mode);
355 luaL_buffinit(L, &b);
356 while (s < e) {
357 unsigned c = mb ? utf8_deco(&s, e) : uchar(*s++);
358 int info = charinfo(c);
359 if (GetCaseType(info)&0x04 && (mode || !(0x80&c))) c -= GetDelta(info);
360 if (mb) utf8_enco(&b, c); else luaL_addchar(&b, c);
362 luaL_pushresult(&b);
363 return 1;
367 static int str_rep (lua_State *L) {
368 size_t l;
369 luaL_Buffer b;
370 const char *s = luaL_checklstring(L, 1, &l);
371 int n = luaL_checkint(L, 2);
372 luaL_buffinit(L, &b);
373 while (n-- > 0)
374 luaL_addlstring(&b, s, l);
375 luaL_pushresult(&b);
376 return 1;
380 static int unic_byte (lua_State *L) {
381 size_t l;
382 ptrdiff_t posi, pose;
383 const char *s = luaL_checklstring(L, 1, &l), *p, *e=s+l;
384 int n, mode = lua_tointeger(L, lua_upvalueindex(1)), mb = MODE_MBYTE(mode);
386 if (mb) { p=s; l = (size_t)utf8_count(&p, l, mode-2, -1); }
387 posi = posrelat(luaL_optinteger(L, 2, 1), l);
388 pose = posrelat(luaL_optinteger(L, 3, posi), l);
389 if (posi <= 0) posi = 1;
390 if ((size_t)pose > l) pose = l;
391 if (0 >= (n = pose - --posi)) return 0; /* empty interval */
392 if (!mb)
393 e = (s += posi) + n;
394 else {
395 if (posi) utf8_count(&s, e-s, mode-2, posi); /* skip */
396 p=s;
397 utf8_count(&p, e-s, mode-2, n);
398 e=p;
400 /* byte count is upper bound on #elements */
401 luaL_checkstack(L, e-s, "string slice too long");
402 for (n=0; s<e; n++)
403 lua_pushinteger(L, mb ? utf8_deco(&s, e) : uchar(*s++));
404 return n;
408 static int unic_char (lua_State *L) {
409 int i, n = lua_gettop(L); /* number of arguments */
410 int mode = lua_tointeger(L, lua_upvalueindex(1)), mb = MODE_MBYTE(mode);
411 /* TH: add the 256 out-of-range glyphs in 'plane 18' */
412 unsigned lim = mb ? 0x110100 : 0x100;
414 luaL_Buffer b;
415 luaL_buffinit(L, &b);
416 for (i=1; i<=n; i++) {
417 unsigned c = luaL_checkint(L, i);
418 luaL_argcheck(L, lim > c, i, "invalid value");
419 if (mb) utf8_enco(&b, c); else luaL_addchar(&b, c);
421 luaL_pushresult(&b);
422 return 1;
426 static int writer (lua_State *L, const void* b, size_t size, void* B) {
427 (void)L;
428 luaL_addlstring((luaL_Buffer*) B, (const char *)b, size);
429 return 0;
433 static int str_dump (lua_State *L) {
434 luaL_Buffer b;
435 luaL_checktype(L, 1, LUA_TFUNCTION);
436 lua_settop(L, 1);
437 luaL_buffinit(L,&b);
438 if (lua_dump(L, writer, &b) != 0)
439 luaL_error(L, "unable to dump given function");
440 luaL_pushresult(&b);
441 return 1;
447 ** {======================================================
448 ** PATTERN MATCHING
449 ** =======================================================
450 * find/gfind(_aux) -> match, push_captures
451 * gsub -> match, add_s (-> push_captures)
452 * push_captures, add_s -> push_onecapture
453 * match ->
454 * start/end_capture -> match,
455 * match_capture, matchbalance, classend -> -,
456 * min/max_expand -> match, singlematch
457 * singlematch -> matchbracketclass, match_class,
458 * matchbracketclass -> match_class -> -,
462 #define CAP_UNFINISHED (-1)
463 #define CAP_POSITION (-2)
465 typedef struct MatchState {
466 const char *src_init; /* init of source string */
467 const char *src_end; /* end (`\0') of source string */
468 lua_State *L;
469 int level; /* total number of captures (finished or unfinished) */
470 int mode;
471 int mb;
472 struct {
473 const char *init;
474 ptrdiff_t len;
475 } capture[LUA_MAXCAPTURES];
476 } MatchState;
479 #define L_ESC '%'
480 #define SPECIALS "^$*+?.([%-"
483 static int check_capture (MatchState *ms, int l) {
484 l -= '1';
485 if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
486 return luaL_error(ms->L, "invalid capture index");
487 return l;
491 static int capture_to_close (MatchState *ms)
493 int level = ms->level;
494 for (level--; level>=0; level--)
495 if (ms->capture[level].len == CAP_UNFINISHED) return level;
496 return luaL_error(ms->L, "invalid pattern capture");
500 static const char *classend (MatchState *ms, const char *p)
502 switch (*p) {
503 case L_ESC:
504 if (!*++p) luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")");
505 break;
506 case '[':
507 /* if (*p == '^') p++; -- no effect */
508 do { /* look for a `]' */
509 if (!*p) luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")");
510 if (L_ESC == *(p++) && *p) p++; /* skip escapes (e.g. `%]') */
511 } while (']' != *p);
512 break;
513 default:
514 if (!ms->mb) break;
515 utf8_deco(&p, p+4);
516 return p;
518 return p+1;
519 } /* classend */
523 * The following macros are used for fast character category tests. The
524 * x_BITS values are shifted right by the category value to determine whether
525 * the given category is included in the set.
528 #define LETTER_BITS ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) \
529 | (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1 << OTHER_LETTER))
531 #define DIGIT_BITS (1 << DECIMAL_DIGIT_NUMBER)
533 #define NUMBER_BITS (1 << DECIMAL_DIGIT_NUMBER) \
534 | (1 << LETTER_NUMBER) | (1 << OTHER_NUMBER)
536 #define SPACE_BITS ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) \
537 | (1 << PARAGRAPH_SEPARATOR))
539 #define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION)
541 #define PUNCT_BITS ((1 << CONNECTOR_PUNCTUATION) | \
542 (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \
543 (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \
544 (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION))
547 /* character c matches class cl. undefined for cl not ascii */
548 static int match_class (int c, int cl, int mode)
550 int msk, res;
551 switch (0x20|cl /*tolower*/) {
552 case 'a' : msk = LETTER_BITS; break;
553 case 'c' : msk = 1<<CONTROL; break;
554 case 'x' : /* hexdigits */
555 if (0x40==(~0x3f&c)/*64-127*/ && 1&(0x7e/*a-f*/>>(0x1f&c))) goto matched;
556 case 'd' : msk = 1<<DECIMAL_DIGIT_NUMBER; mode=0;/* ASCII only */ break;
557 case 'l' : msk = 1<<LOWERCASE_LETTER; break;
558 case 'n' : msk = NUMBER_BITS; break; /* new */
559 case 'p' : msk = PUNCT_BITS; break;
560 case 's' :
561 #define STDSPACE /* standard "space" controls 9-13 */ \
562 (1<<9/*TAB*/|1<<10/*LF*/|1<<11/*VT*/|1<<12/*FF*/|1<<13/*CR*/)
563 if (!(~0x1f & c) && 1&(STDSPACE >> c)) goto matched;
564 msk = SPACE_BITS;
565 break;
566 case 'u' : msk = 1<<UPPERCASE_LETTER; break;
568 this is not compatible to lua 5.1, where %w is just a letter or a digit
569 case 'w' : msk = LETTER_BITS|NUMBER_BITS|CONNECTOR_BITS; break;
571 case 'w' : msk = LETTER_BITS|NUMBER_BITS; break;
572 case 'z' : if (!c) goto matched; msk = 0; break;
573 default: return cl == c;
575 res = 1 & (msk >> charcat(c));
576 if (!mode && 0x80&c) res = 0;
577 if (0) {
578 matched:
579 res = 1;
581 return 0x20&cl /*islower*/ ? res : !res;
582 } /* match_class */
585 /* decode single byte or UTF-8 seq; advance *s */
586 static unsigned deco (const MatchState *ms, const char **s, const char *e)
588 return ms->mb ? utf8_deco(s, e) : *(unsigned char*)(*s)++;
591 /* s must be < ms->src_end, p < ep */
592 static const char *singlematch (const MatchState *ms,
593 const char *s, const char *p, const char *ep)
595 int neg = 0;
596 unsigned c1, c2;
597 unsigned c;
598 #ifdef OPTIMIZE_SIZE
599 c = deco(ms, &s, ms->src_end);
600 #else
601 if (!ms->mb || !(0x80&*s))
602 c = *(unsigned char*)s++;
603 else
604 c = utf8_deco(&s, ms->src_end);
605 #endif
607 switch (*p) {
608 case L_ESC:
609 if (match_class(c, uchar(p[1]), ms->mode)) {
610 case '.': /* the all class */
611 #ifndef OPTIMIZE_SIZE
612 if (MODE_GRAPH != ms->mode) return s; /* common fast path */
613 #endif
614 goto matched_class;
616 s = 0;
617 break;
618 default:
619 #ifdef OPTIMIZE_SIZE
620 c1 = deco(ms, &p, ep);
621 #else
622 if (!ms->mb || !(0x80&*p))
623 c1 = *(unsigned char*)p++;
624 else
625 c1 = utf8_deco(&p, ep);
626 #endif
627 if (c != c1) s = 0;
628 break;
629 case '[': /* matchbracketclass */
630 ep--; /* now on the ']' */
631 if ((neg = '^' == *++p)) p++; /* skip the `^' */
632 while (p < ep) {
633 if (*p == L_ESC) {
634 if (match_class(c, uchar(*++p), ms->mode)) goto matched_class_in_brack;
635 p++;
636 continue;
638 c1 = deco(ms, &p, ep);
639 /* in lua-5.1 and 5.1.1 a trailing '-' is allowed
640 dynasm.lua relies on this
642 if ( ep <= p + 1 || '-' != *p ) {
643 const char *op = p, *es;
644 if (MODE_GRAPH == ms->mode) utf8_graphext(&p, ep);
645 if (c != c1) continue;
646 if (MODE_GRAPH != ms->mode) goto matched;
647 /* comp grapheme extension */
648 es = s;
649 utf8_graphext(&es, ms->src_end);
650 if (es-s == p-op && (es==s || !memcmp(s, op, es-s))) goto matched;
651 continue;
654 ++p;
655 /* range c1-c2 -- no extend support in range bounds... */
656 /* if (ep == ++p) break; see above */ /* bugger - trailing dash */
657 c2 = deco(ms, &p, ep);
658 if (c2 < c1) { unsigned swap=c1; c1=c2; c2=swap; }
659 if (c1 <= c && c <= c2) goto matched_class_in_brack; /* ...but extend match */
661 /* not matched */
662 neg = !neg;
663 matched:
664 if (neg) s = 0;
665 /* matchbracketclass */
667 return s;
668 matched_class_in_brack: /* matched %something or range in [] */
669 if (neg)
670 s = 0;
671 else {
672 matched_class: /* matched %something or . */
673 if (MODE_GRAPH == ms->mode) utf8_graphext(&s, ms->src_end);
675 return s;
679 static const char *match (MatchState *ms, const char *s, const char *p);
682 static const char *matchbalance (MatchState *ms, const char *s,
683 const char *p) {
684 if (*p == 0 || *(p+1) == 0)
685 luaL_error(ms->L, "unbalanced pattern");
686 if (*s != *p) return NULL;
687 else {
688 int b = *p;
689 int e = *(p+1);
690 int cont = 1;
691 while (++s < ms->src_end) {
692 if (*s == e) {
693 if (--cont == 0) return s+1;
695 else if (*s == b) cont++;
698 return NULL; /* string ends out of balance */
702 static const char *max_expand (MatchState *ms,
703 const char *s, const char *p, const char *ep)
705 const char *sp = s, *es;
706 while (sp<ms->src_end && (es = singlematch(ms, sp, p, ep)))
707 sp = es;
708 /* keeps trying to match with the maximum repetitions */
709 while (sp>=s) {
710 const char *res = match(ms, sp, ep+1);
711 if (res || sp==s) return res;
712 if (!ms->mb)
713 sp--; /* else didn't match; reduce 1 repetition to try again */
714 else {
715 unsigned code = utf8_oced(&sp, s);
716 if (MODE_GRAPH == ms->mode)
717 while (Grapheme_Extend(code) && sp>s) code = utf8_oced(&sp, s);
720 return NULL;
724 static const char *min_expand (MatchState *ms,
725 const char *s, const char *p, const char *ep)
727 do {
728 const char *res = match(ms, s, ep+1);
729 if (res) return res;
730 if (s >= ms->src_end) break;
731 } while ((s = singlematch(ms, s, p, ep))); /* try with one more repetition */
732 return NULL;
736 static const char *start_capture (MatchState *ms, const char *s,
737 const char *p, int what) {
738 const char *res;
739 int level = ms->level;
740 if (level >= LUA_MAXCAPTURES) luaL_error(ms->L, "too many captures");
741 ms->capture[level].init = s;
742 ms->capture[level].len = what;
743 ms->level = level+1;
744 if ((res=match(ms, s, p)) == NULL) /* match failed? */
745 ms->level--; /* undo capture */
746 return res;
750 static const char *end_capture (MatchState *ms, const char *s,
751 const char *p) {
752 int l = capture_to_close(ms);
753 const char *res;
754 ms->capture[l].len = s - ms->capture[l].init; /* close capture */
755 if ((res = match(ms, s, p)) == NULL) /* match failed? */
756 ms->capture[l].len = CAP_UNFINISHED; /* undo capture */
757 return res;
761 static const char *match_capture (MatchState *ms, const char *s, int l) {
762 size_t len;
763 l = check_capture(ms, l);
764 len = ms->capture[l].len;
765 if ((size_t)(ms->src_end-s) >= len &&
766 memcmp(ms->capture[l].init, s, len) == 0)
767 return s+len;
768 else return NULL;
772 static const char *match (MatchState *ms, const char *s, const char *p) {
773 init: /* using goto's to optimize tail recursion */
774 switch (*p) {
775 case '(': { /* start capture */
776 if (*(p+1) == ')') /* position capture? */
777 return start_capture(ms, s, p+2, CAP_POSITION);
778 else
779 return start_capture(ms, s, p+1, CAP_UNFINISHED);
781 case ')': { /* end capture */
782 return end_capture(ms, s, p+1);
784 case L_ESC: {
785 switch (*(p+1)) {
786 case 'b': { /* balanced string? */
787 s = matchbalance(ms, s, p+2);
788 if (s == NULL) return NULL;
789 p+=4; goto init; /* else return match(ms, s, p+4); */
791 #if 0 /* TODO */
792 case 'f': { /* frontier? */
793 const char *ep; char previous;
794 p += 2;
795 if (*p != '[')
796 luaL_error(ms->L, "missing " LUA_QL("[") " after "
797 LUA_QL("%%f") " in pattern" );
798 luaL_error(ms->L, "missing `[' after `%%f' in pattern");
799 ep = classend(ms, p); /* points to what is next */
800 /* with UTF-8, getting the previous is more complicated */
801 previous = (s == ms->src_init) ? '\0' : *(s-1);
802 /* use singlematch to apply all necessary magic */
803 if (singlematch(uchar(previous), p, ep-1) ||
804 !singlematch(uchar(*s), p, ep-1)) return NULL;
805 p=ep; goto init; /* else return match(ms, s, ep); */
807 #endif
808 default: {
809 if (isdigit(uchar(*(p+1)))) { /* capture results (%0-%9)? */
810 s = match_capture(ms, s, uchar(*(p+1)));
811 if (s == NULL) return NULL;
812 p+=2; goto init; /* else return match(ms, s, p+2) */
814 goto dflt; /* case default */
818 case '\0': { /* end of pattern */
819 return s; /* match succeeded */
821 case '$': {
822 if (*(p+1) == '\0') /* is the `$' the last char in pattern? */
823 return (s == ms->src_end) ? s : NULL; /* check end of string */
824 else goto dflt; /* ??? */
826 default: dflt: { /* it is a pattern item */
827 const char *ep = classend(ms, p); /* points to what is next */
828 const char *es = 0;
829 if (s < ms->src_end) es = singlematch(ms, s, p, ep);
830 switch (*ep) {
831 case '?': { /* optional */
832 const char *res;
833 if (es && (res=match(ms, es, ep+1))) return res;
834 p=ep+1; goto init; /* else return match(ms, s, ep+1); */
836 case '*': { /* 0 or more repetitions */
837 return max_expand(ms, s, p, ep);
839 case '+': { /* 1 or more repetitions */
840 return (es ? max_expand(ms, es, p, ep) : NULL);
842 case '-': { /* 0 or more repetitions (minimum) */
843 return min_expand(ms, s, p, ep);
845 default: {
846 if (!es) return NULL;
847 s=es; p=ep; goto init; /* else return match(ms, s+1, ep); */
856 static const char *lmemfind (const char *s1, size_t l1,
857 const char *s2, size_t l2) {
858 if (l2 == 0) return s1; /* empty strings are everywhere */
859 else if (l2 > l1) return NULL; /* avoids a negative `l1' */
860 else {
861 const char *init; /* to search for a `*s2' inside `s1' */
862 l2--; /* 1st char will be checked by `memchr' */
863 l1 = l1-l2; /* `s2' cannot be found after that */
864 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
865 init++; /* 1st char is already checked */
866 if (memcmp(init, s2+1, l2) == 0)
867 return init-1;
868 else { /* correct `l1' and `s1' to try again */
869 l1 -= init-s1;
870 s1 = init;
873 return NULL; /* not found */
878 static void push_onecapture (MatchState *ms, int i, const char *s,
879 const char *e )
881 if (i >= ms->level) {
882 if (i == 0) /* ms->level == 0, too */
883 lua_pushlstring(ms->L, s, e - s); /* add whole match */
884 else
885 luaL_error(ms->L, "invalid capture index");
887 else {
888 ptrdiff_t l = ms->capture[i].len;
889 if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture");
890 if (l == CAP_POSITION)
891 lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1);
892 else
893 lua_pushlstring(ms->L, ms->capture[i].init, l);
898 static int push_captures (MatchState *ms, const char *s, const char *e) {
899 int i;
900 int nlevels = (ms->level == 0 && s) ? 1 : ms->level;
901 luaL_checkstack(ms->L, nlevels, "too many captures");
902 for ( i = 0; i < nlevels; i++ )
903 push_onecapture( ms, i, s, e );
904 return nlevels; /* number of strings pushed */
908 static int unic_find_aux (lua_State *L, int find) {
909 size_t l1, l2;
910 const char *s = luaL_checklstring(L, 1, &l1);
911 const char *p = luaL_checklstring(L, 2, &l2);
912 ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1;
913 if (init < 0) init = 0;
914 else if ((size_t)(init) > l1) init = (ptrdiff_t)l1;
915 if (find && (lua_toboolean(L, 4) || /* explicit request? */
916 strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */
917 /* do a plain search */
918 const char *s2 = lmemfind(s+init, l1-init, p, l2);
919 if (s2) {
920 lua_pushinteger(L, s2-s+1);
921 lua_pushinteger(L, s2-s+l2);
922 return 2;
925 else {
926 MatchState ms;
927 int anchor = (*p == '^') ? (p++, 1) : 0;
928 const char *s1=s+init;
929 unsigned char u8_lenght = U8_LENGTH( (unsigned char)s[0] );
930 ms.L = L;
931 ms.src_init = s;
932 ms.src_end = s+l1;
933 ms.mode = lua_tointeger(L, lua_upvalueindex(1));
934 ms.mb = MODE_MBYTE(ms.mode);
937 /* LS/HH : patch for tracker issue 869, concerning "%s" match of à; the old code */
938 /* increments by 1 on a failure and can end up in the middle of an utf sequence */
939 /* so this was a major bug. */
941 do {
942 const char *res;
943 ms.level = 0;
944 if ((res=match(&ms, s1, p)) != NULL) {
945 if (find) {
946 lua_pushinteger(L, s1-s+1); /* start */
947 lua_pushinteger(L, res-s); /* end */
948 return push_captures(&ms, NULL, 0) + 2;
949 } else
950 return push_captures(&ms, s1, res);
952 s1 = s1 + (ms.mode > MODE_LATIN ? U8_LENGTH( uchar(s1[0])) : 1) ;
953 } while (s1 < ms.src_end && !anchor);
955 lua_pushnil(L); /* not found */
956 return 1;
959 static int unic_find (lua_State *L) {
960 return unic_find_aux(L, 1);
964 static int unic_match (lua_State *L) {
965 return unic_find_aux(L, 0);
970 static int gmatch_aux (lua_State *L) {
971 MatchState ms;
972 size_t ls;
973 const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
974 const char *p = lua_tostring(L, lua_upvalueindex(2));
975 const char *src;
976 ms.L = L;
977 ms.src_init = s;
978 ms.src_end = s+ls;
979 ms.mode = lua_tointeger(L, lua_upvalueindex(4));
980 ms.mb = MODE_MBYTE(ms.mode);
981 for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3));
982 src <= ms.src_end;
983 src++)
985 const char *e;
986 ms.level = 0;
987 if ((e = match(&ms, src, p)) != NULL) {
988 lua_Integer newstart = e-s;
989 if (e == src) newstart++; /* empty match? go at least one position */
990 lua_pushinteger(L, newstart);
991 lua_replace(L, lua_upvalueindex(3));
992 return push_captures(&ms, src, e);
995 return 0; /* not found */
1000 static int gmatch (lua_State *L) {
1001 luaL_checkstring(L, 1);
1002 luaL_checkstring(L, 2);
1003 lua_settop(L, 2);
1004 lua_pushinteger(L, 0);
1005 lua_pushinteger(L, lua_upvalueindex(1));
1006 lua_pushcclosure(L, gmatch_aux, 4);
1007 return 1;
1010 static int gfind_nodef (lua_State *L) {
1011 return luaL_error(L, LUA_QL("string.gfind") " was renamed to "
1012 LUA_QL("string.gmatch"));
1016 static void add_s (MatchState *ms, luaL_Buffer *b,
1017 const char *s, const char *e)
1019 size_t l, i;
1020 const char *news = lua_tolstring(ms->L, 3, &l);
1021 for (i = 0; i < l; i++) {
1022 if (news[i] != L_ESC)
1023 luaL_addchar(b, news[i]);
1024 else {
1025 i++; /* skip ESC */
1026 if (!isdigit(uchar(news[i])))
1027 luaL_addchar(b, news[i]);
1028 else if (news[i] == '0')
1029 luaL_addlstring(b, s, e - s);
1030 else {
1031 push_onecapture(ms, news[i] - '1', s, e);
1032 luaL_addvalue(b); /* add capture to accumulated result */
1038 static void add_value (MatchState *ms, luaL_Buffer *b, const char *s,
1039 const char *e)
1041 lua_State *L = ms->L;
1042 switch (lua_type(L, 3)) {
1043 case LUA_TNUMBER:
1044 case LUA_TSTRING: {
1045 add_s(ms, b, s, e);
1046 return;
1048 case LUA_TFUNCTION: {
1049 int n;
1050 lua_pushvalue(L, 3);
1051 n = push_captures(ms, s, e);
1052 lua_call(L, n, 1);
1053 break;
1055 case LUA_TTABLE: {
1056 push_onecapture(ms, 0, s, e);
1057 lua_gettable(L, 3);
1058 break;
1060 default: {
1061 luaL_argerror(L, 3, "string/function/table expected");
1062 return;
1065 if (!lua_toboolean(L, -1)) { /* nil or false? */
1066 lua_pop(L, 1);
1067 lua_pushlstring(L, s, e - s); /* keep original text */
1069 else if (!lua_isstring(L, -1))
1070 luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1));
1071 luaL_addvalue(b); /* add result to accumulator */
1074 static int unic_gsub (lua_State *L) {
1075 size_t srcl;
1076 const char *src = luaL_checklstring(L, 1, &srcl);
1077 const char *p = luaL_checkstring(L, 2);
1078 int max_s = luaL_optint(L, 4, srcl+1);
1079 int anchor = (*p == '^') ? (p++, 1) : 0;
1080 int n = 0;
1081 MatchState ms;
1082 luaL_Buffer b;
1083 luaL_buffinit(L, &b);
1084 ms.L = L;
1085 ms.src_init = src;
1086 ms.src_end = src+srcl;
1087 ms.mode = lua_tointeger(L, lua_upvalueindex(1));
1088 ms.mb = MODE_MBYTE(ms.mode);
1089 while (n < max_s) {
1090 const char *e;
1091 ms.level = 0;
1092 e = match(&ms, src, p);
1093 if (e) {
1094 n++;
1095 add_value(&ms, &b, src, e);
1097 if (e && e>src) /* non empty match? */
1098 src = e; /* skip it */
1099 else if (src < ms.src_end)
1100 luaL_addchar(&b, *src++);
1101 else break;
1102 if (anchor) break;
1104 luaL_addlstring(&b, src, ms.src_end-src);
1105 luaL_pushresult(&b);
1106 lua_pushinteger(L, n); /* number of substitutions */
1107 return 2;
1110 /* }====================================================== */
1113 /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
1114 #define MAX_ITEM 512
1115 /* valid flags in a format specification */
1116 #define FLAGS "-+ #0"
1118 ** maximum size of each format specification (such as '%-099.99d')
1119 ** (+10 accounts for %99.99x plus margin of error)
1121 #define MAX_FORMAT (sizeof(FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
1124 static void addquoted (lua_State *L, luaL_Buffer *b, int arg) {
1125 size_t l;
1126 const char *s = luaL_checklstring(L, arg, &l);
1127 luaL_addchar(b, '"');
1128 while (l--) {
1129 switch (*s) {
1130 case '"': case '\\': case '\n': {
1131 luaL_addchar(b, '\\');
1132 luaL_addchar(b, *s);
1133 break;
1135 case '\r': {
1136 luaL_addlstring(b, "\\r", 2);
1137 break;
1139 case '\0': {
1140 luaL_addlstring(b, "\\000", 4);
1141 break;
1143 default: {
1144 luaL_addchar(b, *s);
1145 break;
1148 s++;
1150 luaL_addchar(b, '"');
1154 static const char *scanformat (lua_State *L, const char *strfrmt, char *form,
1155 int *hasprecision)
1157 const char *p = strfrmt;
1158 while (strchr(FLAGS, *p)) p++; /* skip flags */
1159 if ((size_t)(p - strfrmt) >= sizeof(FLAGS))
1160 luaL_error(L, "invalid format (repeated flags)");
1161 if (isdigit(uchar(*p))) p++; /* skip width */
1162 if (isdigit(uchar(*p))) p++; /* (2 digits at most) */
1163 if (*p == '.') {
1164 p++;
1165 *hasprecision = 1;
1166 if (isdigit(uchar(*p))) p++; /* skip precision */
1167 if (isdigit(uchar(*p))) p++; /* (2 digits at most) */
1169 if (isdigit(uchar(*p)))
1170 luaL_error(L, "invalid format (width or precision too long)");
1171 form[0] = L_ESC;
1172 strncpy(form+1, strfrmt, p-strfrmt+1);
1173 form[p-strfrmt+2] = 0;
1174 return p;
1177 static void addintlen (char *form) {
1178 size_t l = strlen(form);
1179 char spec = form[l - 1];
1180 strcpy(form + l - 1, LUA_INTFRMLEN);
1181 form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
1182 form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
1185 static int str_format (lua_State *L) {
1186 int arg = 1;
1187 size_t sfl;
1188 const char *strfrmt = luaL_checklstring(L, arg, &sfl);
1189 const char *strfrmt_end = strfrmt+sfl;
1190 luaL_Buffer b;
1191 luaL_buffinit(L, &b);
1192 while (strfrmt < strfrmt_end) {
1193 if (*strfrmt != L_ESC)
1194 luaL_addchar(&b, *strfrmt++);
1195 else if (*++strfrmt == L_ESC)
1196 luaL_addchar(&b, *strfrmt++); /* %% */
1197 else { /* format item */
1198 char form[MAX_FORMAT]; /* to store the format (`%...') */
1199 char buff[MAX_ITEM]; /* to store the formatted item */
1200 int hasprecision = 0;
1201 arg++;
1202 strfrmt = scanformat(L, strfrmt, form, &hasprecision);
1203 switch (*strfrmt++) {
1204 case 'c': {
1205 #ifdef LUA_USE_SNPRINTF
1206 snprintf( buff, MAX_ITEM, form,
1207 (int) luaL_checknumber( L, arg ) );
1208 #else
1209 sprintf(buff, form, (int) luaL_checknumber( L, arg ) );
1210 #endif
1211 break;
1213 case 'd': case 'i': {
1214 addintlen( form );
1215 #ifdef LUA_USE_SNPRINTF
1216 snprintf( buff, MAX_ITEM, form,
1217 (LUA_INTFRM_T) luaL_checknumber(L, arg) );
1218 #else
1219 sprintf(buff, form,
1220 (LUA_INTFRM_T) luaL_checknumber(L, arg) );
1221 #endif
1222 break;
1224 case 'o': case 'u': case 'x': case 'X': {
1225 addintlen( form );
1226 #ifdef LUA_USE_SNPRINTF
1227 snprintf(buff, MAX_ITEM, form,
1228 (unsigned LUA_INTFRM_T) luaL_checknumber(L, arg) );
1229 #else
1230 sprintf(buff, form,
1231 (unsigned LUA_INTFRM_T) luaL_checknumber(L, arg) );
1232 #endif
1233 break;
1235 case 'e': case 'E': case 'f':
1236 case 'g': case 'G': {
1237 #ifndef LUA_NUMBER_DOUBLE
1238 luaL_argerror( L, 1, "double formatting not supported" );
1239 #else
1240 # ifdef __dietlibc__
1241 # warning "double formatting is broken in dietlibc"
1242 # endif
1243 # ifdef LUA_USE_SNPRINTF
1244 snprintf(buff, MAX_ITEM, form,
1245 (double) luaL_checknumber(L, arg) );
1246 # else
1247 sprintf(buff, form, (double) luaL_checknumber(L, arg) );
1248 # endif
1249 #endif
1250 break;
1252 case 'q': {
1253 addquoted(L, &b, arg);
1254 continue; /* skip the `addsize' at the end */
1256 case 's': {
1257 size_t l;
1258 const char *s = luaL_checklstring(L, arg, &l);
1259 if (!hasprecision && l >= 100) {
1260 /* no precision and string is too long to be formatted;
1261 keep original string */
1262 lua_pushvalue(L, arg);
1263 luaL_addvalue(&b);
1264 continue; /* skip the `addsize' at the end */
1266 else {
1267 #ifdef LUA_USE_SNPRINTF
1268 snprintf(buff, MAX_ITEM, form, s);
1269 #else
1270 sprintf(buff, form, s);
1271 #endif
1272 break;
1275 default: { /* also treat cases `pnLlh' */
1276 return luaL_error(L, "invalid option " LUA_QL("%%%c") " to "
1277 LUA_QL("format"), *(strfrmt - 1));
1280 luaL_addlstring(&b, buff, strlen(buff));
1283 luaL_pushresult(&b);
1284 return 1;
1287 #ifdef WANT_EXT_MATCH
1288 static struct { const char *k; int v; } unicflags[] = {
1289 { "ASCII", MODE_ASCII }
1290 ,{ "LATIN", MODE_LATIN }
1291 ,{ "UTF8", MODE_UTF8 }
1292 ,{ "GRAPH", MODE_GRAPH }
1294 #define unicflags_sz ( sizeof( unicflags ) / sizeof( unicflags[0] ) )
1297 allow direkt match calls from c
1299 int ext_uni_match ( void *state, const char *s, size_t n,
1300 const char *p, int init, int mode )
1302 lua_State *L = state;
1303 MatchState ms;
1304 int anchor = (*p == '^') ? (p++, 1) : 0;
1305 const char *s1;
1306 int i = posrelat( init, n ) - 1;
1307 if (i < 0) i = 0;
1308 else if ((size_t)(i) > n) i = (ptrdiff_t)n;
1309 s1 = s + i;
1310 ms.L = L;
1311 ms.src_init = s;
1312 ms.src_end = s + n;
1313 ms.mode = mode;
1314 ms.mb = MODE_MBYTE(mode);
1315 do {
1316 const char *res;
1317 ms.level = 0;
1318 if ( ( res=match(&ms, s1, p)) != NULL )
1319 return 1;
1320 } while ( s1++ < ms.src_end && !anchor );
1321 return 0;
1323 #endif
1325 static const luaL_Reg uniclib[] = {
1326 {"byte", unic_byte}, /* no cluster ! */
1327 {"char", unic_char},
1328 {"dump", str_dump},
1329 {"find", unic_find}, /* cluster */
1330 {"format", str_format},
1331 {"gfind", gfind_nodef},
1332 {"gmatch", gmatch}, /* cluster */
1333 {"gsub", unic_gsub}, /* cluster */
1334 {"len", unic_len}, /* cluster/byte opt. */
1335 {"lower", unic_lower},
1336 {"match", unic_match}, /* cluster */
1337 {"rep", str_rep},
1338 {"reverse", str_reverse},
1339 {"sub", unic_sub}, /* cluster/byte opt. */
1340 {"upper", unic_upper},
1341 {NULL, NULL}
1344 #if defined( SLNUNICODE_AS_STRING ) && defined( STRING_WITH_METAT )
1345 static void createmetatable (lua_State *L) {
1346 lua_newtable(L); /* create metatable for strings */
1347 lua_pushliteral(L, ""); /* dummy string */
1348 lua_pushvalue(L, -2);
1349 lua_setmetatable(L, -2); /* set string metatable */
1350 lua_pop(L, 1); /* pop dummy string */
1351 lua_pushvalue(L, -2); /* string library... */
1352 lua_setfield(L, -2, "__index"); /* ...is the __index metamethod */
1353 lua_pop(L, 1); /* pop metatable */
1355 #endif
1358 ** Open string library
1360 LUALIB_API int luaopen_unicode (lua_State *L) {
1361 /* register unicode itself so require("unicode") works */
1362 luaL_register(L, SLN_UNICODENAME,
1363 uniclib + (sizeof uniclib/sizeof uniclib[0] - 1)); /* empty func list */
1364 lua_pop(L, 1);
1365 lua_getglobal(L,SLN_UNICODENAME);
1366 lua_newtable(L);
1367 lua_pushinteger(L, MODE_ASCII);
1368 luaL_setfuncs(L, uniclib, 1);
1369 lua_setfield(L, -2, "ascii");
1371 lua_newtable(L);
1372 lua_pushinteger(L, MODE_LATIN);
1373 luaL_setfuncs(L, uniclib, 1);
1374 lua_setfield(L, -2, "latin1");
1376 lua_newtable(L);
1377 lua_pushinteger(L, MODE_GRAPH);
1378 luaL_setfuncs(L, uniclib, 1);
1379 lua_setfield(L, -2, "grapheme");
1381 lua_newtable(L);
1382 lua_pushinteger(L, MODE_UTF8);
1383 luaL_setfuncs(L, uniclib, 1);
1384 lua_setfield(L, -2, "utf8");
1386 #ifdef WANT_EXT_MATCH
1388 unsigned i;
1389 const char *ln = SLN_UNICODENAME ".mode";
1390 luaL_findtable( L, LUA_REGISTRYINDEX, "_LOADED", 1 );
1391 lua_getfield( L, -1, ln );
1392 if ( !lua_istable(L, -1 ) ) {
1393 lua_pop( L, 1 );
1394 if ( luaL_findtable( L, LUA_GLOBALSINDEX, ln, unicflags_sz ) )
1395 luaL_error( L, "name conflict for module " LUA_QS, ln );
1396 lua_pushvalue( L, -1 );
1397 lua_setfield( L, -3, ln );
1399 lua_remove( L, -2 );
1400 for( i = 0; unicflags_sz > i; ++i ) {
1401 lua_pushnumber( L, unicflags[i].v );
1402 lua_setfield( L, -2, unicflags[i].k );
1405 #endif
1406 return 1;