3 Copyright 2012 Taco Hoekwater <taco@luatex.org>
5 This file is part of LuaTeX.
7 LuaTeX is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2 of the License, or (at your
10 option) any later version.
12 LuaTeX is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License along
18 with LuaTeX; if not, see <http://www.gnu.org/licenses/>. */
20 /* The relative ordering of the header files is important here,
21 otherwise some of the defines that are needed for lua_sdump
33 static int str_split (lua_State
*L
) {
39 const char *s
= luaL_checklstring(L
, 1, &l
);
40 const char *joiner
= luaL_optstring(L
, 2, " +");
47 orig
= p
= malloc(l
+1);
49 fprintf(stderr
, "fatal: memory exhausted (malloc of %u bytes).\n",(int)(l
+1));
58 lua_pushlstring(L
,q
,1); q
++;
59 lua_rawseti(L
,-2,n
); n
++;
64 if (*(joiner
+1) == '+') {
73 if (*(p
+i
)==*joiner
) {
75 lua_pushlstring(L
,q
,((p
+i
)-q
));
76 lua_rawseti(L
,-2,n
); n
++;
78 while(*(p
+i
+1)==*joiner
) {
85 if (mult
&& q
==(p
+l
)) {
90 lua_pushlstring(L
,q
,strlen(q
));
97 static int characters_aux (lua_State
*L
) {
100 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
101 int ind
= lua_tointeger(L
, lua_upvalueindex(2));
103 lua_pushinteger(L
, (ind
+1)); /* iterator */
104 lua_replace(L
, lua_upvalueindex(2));
105 b
[0] = *(s
+ind
); b
[1] = 0;
106 lua_pushlstring(L
, b
, 1);
109 return 0; /* string ended */
113 static int str_characters (lua_State
*L
) {
114 luaL_checkstring(L
, 1);
116 lua_pushinteger(L
, 0);
117 lua_pushcclosure(L
, characters_aux
, 2);
122 static int utf_failed(lua_State
*L
, int new_ind
) {
123 static char fffd
[3] = {0xEF,0xBF,0xBD};
124 lua_pushinteger(L
, new_ind
); /* iterator */
125 lua_replace(L
, lua_upvalueindex(2));
126 lua_pushlstring(L
, fffd
, 3);
130 static int utfcharacters_aux (lua_State
*L
) {
131 static const unsigned char mask
[4] = {0x80,0xE0,0xF0,0xF8};
132 static const unsigned char mequ
[4] = {0x00,0xC0,0xE0,0xF0};
136 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
137 int ind
= lua_tointeger(L
, lua_upvalueindex(2));
138 if (ind
>=(int)ls
) return 0; /* end of string */
139 c
= (unsigned) s
[ind
];
141 if ((c
&mask
[j
])==mequ
[j
]) {
143 if (ind
+1+j
>(int)ls
) return utf_failed(L
,ls
); /* will not fit */
144 for (k
=1; k
<=j
; k
++) {
145 c
= (unsigned) s
[ind
+k
];
146 if ((c
&0xC0)!=0x80) return utf_failed(L
,ind
+k
); /* bad follow */
148 lua_pushinteger(L
, ind
+1+j
); /* iterator */
149 lua_replace(L
, lua_upvalueindex(2));
150 lua_pushlstring(L
, s
+ind
, 1+j
);
154 return utf_failed(L
,ind
+1); /* we found a follow byte! */
158 static int str_utfcharacters (lua_State
*L
) {
159 luaL_checkstring(L
, 1);
161 lua_pushinteger(L
, 0);
162 lua_pushcclosure(L
, utfcharacters_aux
, 2);
167 static int utfvalues_aux (lua_State
*L
) {
173 unsigned int v
= 0xFFFD;
175 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
176 int ind
= lua_tointeger(L
, lua_upvalueindex(2));
182 } else if (i
>=0xF0) {
183 if ((ind
+3)<(int)ls
&& ((unsigned)*(s
+ind
+1))>=0x80
184 && ((unsigned)*(s
+ind
+2))>=0x80 && ((unsigned)*(s
+ind
+3))>=0x80) {
186 j
= ((unsigned)*(s
+ind
+1))-128;
187 k
= ((unsigned)*(s
+ind
+2))-128;
188 l
= ((unsigned)*(s
+ind
+3))-128;
189 v
= (((((i
-0xF0)*64) + j
)*64) + k
)*64 + l
;
191 } else if (i
>=0xE0) {
192 if ((ind
+2)<(int)ls
&& ((unsigned)*(s
+ind
+1))>=0x80 && ((unsigned)*(s
+ind
+2))>=0x80) {
194 j
= ((unsigned)*(s
+ind
+1))-128;
195 k
= ((unsigned)*(s
+ind
+2))-128;
196 v
= (((i
-0xE0)*64) + j
)*64 + k
;
199 } else if (i
>=0xC0) {
200 if ((ind
+1)<(int)ls
&& ((unsigned)*(s
+ind
+1))>=0x80) {
202 j
= ((unsigned)*(s
+ind
+1))-128;
203 v
= ((i
-0xC0)*64) + j
;
206 lua_pushinteger(L
, (ind
+numbytes
)); /* iterator */
207 lua_replace(L
, lua_upvalueindex(2));
208 lua_pushinteger(L
, v
);
211 return 0; /* string ended */
215 static int str_utfvalues (lua_State
*L
) {
216 luaL_checkstring(L
, 1);
218 lua_pushinteger(L
, 0);
219 lua_pushcclosure(L
, utfvalues_aux
, 2);
225 static int characterpairs_aux (lua_State
*L
) {
228 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
229 int ind
= lua_tointeger(L
, lua_upvalueindex(2));
232 lua_pushinteger(L
, (ind
+2)); /* iterator */
234 lua_pushinteger(L
, (ind
+1)); /* iterator */
236 lua_replace(L
, lua_upvalueindex(2));
237 b
[0] = *(s
+ind
); b
[1] = 0;
238 lua_pushlstring(L
, b
, 1);
241 lua_pushlstring(L
, b
, 1);
243 lua_pushlstring(L
, b
+1, 0);
247 return 0; /* string ended */
251 static int str_characterpairs (lua_State
*L
) {
252 luaL_checkstring(L
, 1);
254 lua_pushinteger(L
, 0);
255 lua_pushcclosure(L
, characterpairs_aux
, 2);
259 static int bytes_aux (lua_State
*L
) {
262 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
263 int ind
= lua_tointeger(L
, lua_upvalueindex(2));
265 lua_pushinteger(L
, (ind
+1)); /* iterator */
266 lua_replace(L
, lua_upvalueindex(2));
267 i
= (unsigned char)*(s
+ind
);
268 lua_pushinteger(L
, i
); /* byte */
271 return 0; /* string ended */
274 static int str_bytes (lua_State
*L
) {
275 luaL_checkstring(L
, 1);
277 lua_pushinteger(L
, 0);
278 lua_pushcclosure(L
, bytes_aux
, 2);
282 static int bytepairs_aux (lua_State
*L
) {
285 const char *s
= lua_tolstring(L
, lua_upvalueindex(1), &ls
);
286 int ind
= lua_tointeger(L
, lua_upvalueindex(2));
289 lua_pushinteger(L
, (ind
+2)); /* iterator */
291 lua_pushinteger(L
, (ind
+1)); /* iterator */
293 lua_replace(L
, lua_upvalueindex(2));
294 i
= (unsigned char)*(s
+ind
);
295 lua_pushinteger(L
, i
); /* byte one */
297 i
= (unsigned char)*(s
+ind
+1);
298 lua_pushinteger(L
, i
); /* byte two */
300 lua_pushnil(L
); /* odd string length */
304 return 0; /* string ended */
308 static int str_bytepairs (lua_State
*L
) {
309 luaL_checkstring(L
, 1);
311 lua_pushinteger(L
, 0);
312 lua_pushcclosure(L
, bytepairs_aux
, 2);
318 static int writer (lua_State
*L
, const void* b
, size_t size
, void* B
) {
320 luaL_addlstring((luaL_Buffer
*) B
, (const char *)b
, size
);
324 static int lua_sdump (lua_State
*L
, lua_Writer writer
, void *data
, int stripping
) {
328 api_checknelems(L
, 1);
331 status
= luaU_dump(L
, getproto(o
), writer
, data
, stripping
);
338 static int str_dump (lua_State
*L
) {
341 luaL_checktype(L
, 1, LUA_TFUNCTION
);
342 if (lua_gettop(L
)==2) {
343 stripping
= lua_toboolean(L
,2);
347 if (lua_sdump(L
, writer
, &b
, stripping
) != 0)
348 return luaL_error(L
, "unable to dump given function");
355 static const luaL_Reg strlibext
[] = {
356 {"utfvalues", str_utfvalues
},
357 {"utfcharacters", str_utfcharacters
},
358 {"characters", str_characters
},
359 {"characterpairs", str_characterpairs
},
360 {"bytes", str_bytes
},
361 {"bytepairs", str_bytepairs
},
362 {"explode", str_split
},
367 void open_strlibext(lua_State
* L
)
370 lua_getglobal(L
, "string");
371 for (lib
=strlibext
;lib
->name
;lib
++) {
372 lua_pushcfunction(L
, lib
->func
);
373 lua_setfield(L
, -2, lib
->name
);