1 /* lstrlibext.c for luajitex
3 Copyright 2013 Luigi Scarso
5 Code from lstrlibext.c for LuaTeX
6 Original version copyright 2012 Taco Hoekwater <taco@luatex.org>
8 This file is part of LuajitTeX.
10 LuajitTeX is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2 of the License, or (at your
13 option) any later version.
15 LuajitTeX is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License along
21 with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
36 /*#include "lua51/lj_obj.h"*/
37 /*#include "lua51/lj_gc.h"*/
38 /*#include "lua51/lj_err.h"*/
39 /*#include "lua51/lj_str.h"*/
40 /*#include "lua51/lj_tab.h"*/
41 /*#include "lua51/lj_meta.h"*/
42 /*#include "lua51/lj_state.h"*/
43 /*#include "lua51/lj_ff.h" */
44 /*#include "lua51/lj_bcdump.h"*/
45 /*#include "lua51/lj_char.h"*/
46 /*#include "lua51/lj_lib.h"*/
50 static int bytepairs_aux (lua_State
*L
) {
53 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
54 int ind
= lua_tointeger(L
, lua_upvalueindex(2));
57 lua_pushinteger(L
, (ind
+2)); /* iterator */
59 lua_pushinteger(L
, (ind
+1)); /* iterator */
61 lua_replace(L
, lua_upvalueindex(2));
62 i
= (unsigned char)*(s
+ind
);
63 lua_pushinteger(L
, i
); /* byte one */
65 i
= (unsigned char)*(s
+ind
+1);
66 lua_pushinteger(L
, i
); /* byte two */
68 lua_pushnil(L
); /* odd string length */
72 return 0; /* string ended */
76 static int str_bytepairs (lua_State
*L
) {
77 luaL_checkstring(L
, 1);
79 lua_pushinteger(L
, 0);
80 lua_pushcclosure(L
, bytepairs_aux
, 2);
85 static int bytes_aux (lua_State
*L
) {
88 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
89 int ind
= lua_tointeger(L
, lua_upvalueindex(2));
91 lua_pushinteger(L
, (ind
+1)); /* iterator */
92 lua_replace(L
, lua_upvalueindex(2));
93 i
= (unsigned char)*(s
+ind
);
94 lua_pushinteger(L
, i
); /* byte */
97 return 0; /* string ended */
101 static int str_bytes (lua_State
*L
) {
102 luaL_checkstring(L
, 1);
104 lua_pushinteger(L
, 0);
105 lua_pushcclosure(L
, bytes_aux
, 2);
110 static int utf_failed(lua_State
*L
, int new_ind
) {
111 static char fffd
[3] = {0xEF,0xBF,0xBD};
112 lua_pushinteger(L
, new_ind
); /* iterator */
113 lua_replace(L
, lua_upvalueindex(2));
114 lua_pushlstring(L
, fffd
, 3);
119 static int utfcharacters_aux (lua_State
*L
) {
120 static const unsigned char mask
[4] = {0x80,0xE0,0xF0,0xF8};
121 static const unsigned char mequ
[4] = {0x00,0xC0,0xE0,0xF0};
125 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
126 int ind
= lua_tointeger(L
, lua_upvalueindex(2));
127 if (ind
>=(int)ls
) return 0; /* end of string */
128 c
= (unsigned) s
[ind
];
130 if ((c
&mask
[j
])==mequ
[j
]) {
132 if (ind
+1+j
>(int)ls
) return utf_failed(L
,ls
); /* will not fit */
133 for (k
=1; k
<=j
; k
++) {
134 c
= (unsigned) s
[ind
+k
];
135 if ((c
&0xC0)!=0x80) return utf_failed(L
,ind
+k
); /* bad follow */
137 lua_pushinteger(L
, ind
+1+j
); /* iterator */
138 lua_replace(L
, lua_upvalueindex(2));
139 lua_pushlstring(L
, s
+ind
, 1+j
);
143 return utf_failed(L
,ind
+1); /* we found a follow byte! */
147 static int str_utfcharacters (lua_State
*L
) {
148 luaL_checkstring(L
, 1);
150 lua_pushinteger(L
, 0);
151 lua_pushcclosure(L
, utfcharacters_aux
, 2);
157 static int utfvalues_aux (lua_State
*L
) {
163 unsigned int v
= 0xFFFD;
165 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
166 int ind
= lua_tointeger(L
, lua_upvalueindex(2));
172 } else if (i
>=0xF0) {
173 if ((ind
+3)<(int)ls
&& ((unsigned)*(s
+ind
+1))>=0x80
174 && ((unsigned)*(s
+ind
+2))>=0x80 && ((unsigned)*(s
+ind
+3))>=0x80) {
176 j
= ((unsigned)*(s
+ind
+1))-128;
177 k
= ((unsigned)*(s
+ind
+2))-128;
178 l
= ((unsigned)*(s
+ind
+3))-128;
179 v
= (((((i
-0xF0)*64) + j
)*64) + k
)*64 + l
;
181 } else if (i
>=0xE0) {
182 if ((ind
+2)<(int)ls
&& ((unsigned)*(s
+ind
+1))>=0x80 && ((unsigned)*(s
+ind
+2))>=0x80) {
184 j
= ((unsigned)*(s
+ind
+1))-128;
185 k
= ((unsigned)*(s
+ind
+2))-128;
186 v
= (((i
-0xE0)*64) + j
)*64 + k
;
189 } else if (i
>=0xC0) {
190 if ((ind
+1)<(int)ls
&& ((unsigned)*(s
+ind
+1))>=0x80) {
192 j
= ((unsigned)*(s
+ind
+1))-128;
193 v
= ((i
-0xC0)*64) + j
;
196 lua_pushinteger(L
, (ind
+numbytes
)); /* iterator */
197 lua_replace(L
, lua_upvalueindex(2));
198 lua_pushinteger(L
, v
);
201 return 0; /* string ended */
205 static int str_utfvalues (lua_State
*L
) {
206 luaL_checkstring(L
, 1);
208 lua_pushinteger(L
, 0);
209 lua_pushcclosure(L
, utfvalues_aux
, 2);
214 static int characterpairs_aux (lua_State
*L
) {
217 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
218 int ind
= lua_tointeger(L
, lua_upvalueindex(2));
221 lua_pushinteger(L
, (ind
+2)); /* iterator */
223 lua_pushinteger(L
, (ind
+1)); /* iterator */
225 lua_replace(L
, lua_upvalueindex(2));
226 b
[0] = *(s
+ind
); b
[1] = 0;
227 lua_pushlstring(L
, b
, 1);
230 lua_pushlstring(L
, b
, 1);
232 lua_pushlstring(L
, b
+1, 0);
236 return 0; /* string ended */
240 static int str_characterpairs (lua_State
*L
) {
241 luaL_checkstring(L
, 1);
243 lua_pushinteger(L
, 0);
244 lua_pushcclosure(L
, characterpairs_aux
, 2);
249 static int characters_aux (lua_State
*L
) {
252 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
253 int ind
= lua_tointeger(L
, lua_upvalueindex(2));
255 lua_pushinteger(L
, (ind
+1)); /* iterator */
256 lua_replace(L
, lua_upvalueindex(2));
257 b
[0] = *(s
+ind
); b
[1] = 0;
258 lua_pushlstring(L
, b
, 1);
261 return 0; /* string ended */
265 static int str_characters (lua_State
*L
) {
266 luaL_checkstring(L
, 1);
268 lua_pushinteger(L
, 0);
269 lua_pushcclosure(L
, characters_aux
, 2);
273 static int str_split (lua_State
*L
) {
279 const char *s
= luaL_checklstring(L
, 1, &l
);
280 const char *joiner
= luaL_optstring(L
, 2, " +");
287 orig
= p
= malloc(l
+1);
289 fprintf(stderr
, "fatal: memory exhausted (malloc of %u bytes).\n",(int)(l
+1));
298 lua_pushlstring(L
,q
,1); q
++;
299 lua_rawseti(L
,-2,n
); n
++;
304 if (*(joiner
+1) == '+') {
313 if (*(p
+i
)==*joiner
) {
315 lua_pushlstring(L
,q
,((p
+i
)-q
));
316 lua_rawseti(L
,-2,n
); n
++;
318 while(*(p
+i
+1)==*joiner
) {
325 if (mult
&& q
==(p
+l
)) {
330 lua_pushlstring(L
,q
,strlen(q
));
338 static const luaL_Reg strlibext
[] = {
339 {"utfvalues", str_utfvalues
},
340 {"utfcharacters", str_utfcharacters
},
341 {"characters", str_characters
},
342 {"characterpairs", str_characterpairs
},
343 {"bytes", str_bytes
},
344 {"bytepairs", str_bytepairs
},
345 {"explode", str_split
},
346 /* {"dump", str_dump} already in luajit */
353 /* ------------------------------------------------------------------------ */
355 /* lj_libdef.h is generated by buildvm, it's not available on source */
356 /* #include "lua51/lj_libdef.h" */
358 /* LUALIB_API int luaopen_string(lua_State *L) */
361 /* global_State *g; */
362 /* LJ_LIB_REG(L, LUA_STRLIBNAME, string); */
363 /* luaL_register(L, LUA_STRLIBNAME, strlib); */
364 /* //LJ_LIB_REG(L, LUA_STRLIBNAME, strlib); */
365 /* #if defined(LUA_COMPAT_GFIND) && !LJ_52 */
366 /* lua_getfield(L, -1, "gmatch"); */
367 /* lua_setfield(L, -2, "gfind"); */
369 /* mt = lj_tab_new(L, 0, 1); */
370 /* /\* NOBARRIER: basemt is a GC root. *\/ */
372 /* setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); */
373 /* settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1)); */
374 /* mt->nomm = (uint8_t)(~(1u<<MM_index)); */
378 /* void open_strlibext(lua_State *L) */
381 /* v = luaopen_string(L); */
385 void open_strlibext(lua_State
* L
)
388 lua_getglobal(L
, "string");
389 for (lib
=strlibext
;lib
->name
;lib
++) {
390 lua_pushcfunction(L
, lib
->func
);
391 lua_setfield(L
, -2, lib
->name
);