Update manual
[jpcrr.git] / mnj / lua / Loader.java
blob093e8d39886191937916e07a36e99a5b06e8164a
1 /* $Header: //info.ravenbrook.com/project/jili/version/1.1/code/mnj/lua/Loader.java#1 $
2 * Copyright (c) 2006 Nokia Corporation and/or its subsidiary(-ies).
3 * All rights reserved.
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject
11 * to the following conditions:
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 package mnj.lua;
27 import java.io.EOFException;
28 import java.io.InputStream;
29 import java.io.IOException;
31 /**
32 * Loads Lua 5.1 binary chunks.
33 * This loader is restricted to loading Lua 5.1 binary chunks where:
34 * <ul>
35 * <li><code>LUAC_VERSION</code> is <code>0x51</code>.</li>
36 * <li><code>int</code> is 32 bits.</li>
37 * <li><code>size_t</code> is 32 bits.</li>
38 * <li><code>Instruction</code> is 32 bits (this is a type defined in
39 * the PUC-Rio Lua).</li>
40 * <li><code>lua_Number</code> is an IEEE 754 64-bit double. Suitable
41 * for passing to {@link java.lang.Double#longBitsToDouble}.</li>
42 * <li>endianness does not matter (the loader swabs as appropriate).</li>
43 * </ul>
44 * Any Lua chunk compiled by a stock Lua 5.1 running on a 32-bit Windows
45 * PC or at 32-bit OS X machine should be fine.
47 final class Loader
49 /**
50 * Whether integers in the binary chunk are stored big-endian or
51 * little-endian. Recall that the number 0x12345678 is stored: 0x12
52 * 0x34 0x56 0x78 in big-endian format; and, 0x78 0x56 0x34 0x12 in
53 * little-endian format.
55 private boolean bigendian;
56 private InputStream in;
58 // auxiliary for reading ints/numbers
59 private byte [] intbuf = new byte [4] ;
60 private byte [] longbuf = new byte [8] ;
62 /**
63 * A new chunk loader. The <code>InputStream</code> must be
64 * positioned at the beginning of the <code>LUA_SIGNATURE</code> that
65 * marks the beginning of a Lua binary chunk.
66 * @param in The binary stream from which the chunk is read.
67 * @param name The name of the chunk.
69 Loader(InputStream in, String name)
71 if (null == in)
73 throw new NullPointerException();
75 this.in = in;
78 /**
79 * Loads (undumps) a dumped binary chunk.
80 * @throws IOException if chunk is malformed or unacceptable.
82 Proto undump() throws IOException
84 this.header();
85 return this.function(null);
89 /**
90 * Primitive reader for undumping.
91 * Reads exactly enough bytes from <code>this.in</code> to fill the
92 * array <code>b</code>. If there aren't enough to fill
93 * <code>b</code> then an exception is thrown. Similar to
94 * <code>LoadBlock</code> from PUC-Rio's <code>lundump.c</code>.
95 * @param b byte array to fill.
96 * @throws EOFException when the stream is exhausted too early.
97 * @throws IOException when the underlying stream does.
99 private void block(byte []b) throws IOException
101 int n = in.read(b);
102 if (n != b.length)
103 throw new EOFException();
107 * Undumps a byte as an 8 bit unsigned number. Returns
108 * an int to accommodate the range.
110 private int byteLoad() throws IOException
112 int c = in.read() ;
113 if (c == -1)
114 throw new EOFException();
115 else
116 return c & 0xFF ; // paranoia
120 * Undumps the code for a <code>Proto</code>. The code is an array of
121 * VM instructions.
123 private int[] code() throws IOException
125 int n = intLoad();
126 int[] code = new int[n];
128 for (int i=0; i<n; ++i)
130 // :Instruction:size Here we assume that a dumped Instruction is
131 // the same size as a dumped int.
132 code[i] = intLoad();
135 return code;
139 * Undumps the constant array contained inside a <code>Proto</code>
140 * object. First half of <code>LoadConstants</code>, see
141 * <code>proto</code> for the second half of
142 * <code>LoadConstants</code>.
144 private Slot[] constant() throws IOException
146 int n = intLoad();
147 Slot[] k = new Slot[n];
149 // Load each constant one by one. We use the following values for
150 // the Lua tagtypes (taken from <code>lua.h</code> from the PUC-Rio
151 // Lua 5.1 distribution):
152 // LUA_TNIL 0
153 // LUA_TBOOLEAN 1
154 // LUA_TNUMBER 3
155 // LUA_TSTRING 4
156 // All other tagtypes are invalid
158 // :todo: Currently a new Slot is created for each constant.
159 // Consider a space optimisation whereby identical constants have
160 // the same Slot. Constants are pooled per function anyway (so a
161 // function never has 2 identical constants), so would have to work
162 // across functions. The easy cases of nil, true, false, might be
163 // worth doing since that doesn't require a global table.
165 for (int i=0; i<n; ++i)
167 int t = byteLoad();
168 switch (t)
170 case 0: // LUA_TNIL
171 k[i] = new Slot(Lua.NIL);
172 break;
174 case 1: // LUA_TBOOLEAN
175 int b = byteLoad();
176 // assert b >= 0;
177 if (b > 1)
178 throw new IOException();
180 k[i] = new Slot(Lua.valueOfBoolean(b != 0));
181 break;
183 case 3: // LUA_TNUMBER
184 k[i] = new Slot(number());
185 break;
187 case 4: // LUA_TSTRING
188 k[i] = new Slot(string());
189 break;
191 default:
192 throw new IOException();
196 return k;
200 * Undumps the debug info for a <code>Proto</code>.
201 * @param proto The Proto instance to which debug info will be added.
203 private void debug(Proto proto) throws IOException
205 // lineinfo
206 int n = intLoad();
207 int[] lineinfo = new int[n];
209 for (int i=0; i<n; ++i)
211 lineinfo[i] = intLoad();
214 // locvars
215 n = intLoad();
216 LocVar[] locvar = new LocVar[n];
217 for (int i=0; i<n; ++i)
219 String s = string();
220 int start = intLoad();
221 int end = intLoad();
222 locvar[i] = new LocVar(s, start, end);
225 // upvalue (names)
226 n = intLoad();
227 String[] upvalue = new String[n];
228 for (int i=0; i<n; ++i)
230 upvalue[i] = string();
233 proto.debug(lineinfo, locvar, upvalue);
235 return;
239 * Undumps a Proto object. This is named 'function' after
240 * <code>LoadFunction</code> in PUC-Rio's <code>lundump.c</code>.
241 * @param parentSource Name of parent source "file".
242 * @throws IOException when binary is malformed.
244 private Proto function(String parentSource) throws IOException
246 String source;
247 int linedefined;
248 int lastlinedefined;
249 int nups;
250 int numparams;
251 int varargByte;
252 boolean vararg;
253 int maxstacksize;
254 int[] code;
255 Slot[] constant;
256 Proto[] proto;
258 source = this.string();
259 if (null == source)
261 source = parentSource;
263 linedefined = this.intLoad();
264 lastlinedefined = this.intLoad();
265 nups = this.byteLoad();
266 numparams = this.byteLoad();
267 varargByte = this.byteLoad();
268 // "is_vararg" is a 3-bit field, with the following bit meanings
269 // (see "lobject.h"):
270 // 1 - VARARG_HASARG
271 // 2 - VARARG_ISVARARG
272 // 4 - VARARG_NEEDSARG
273 // Values 1 and 4 (bits 0 and 2) are only used for 5.0
274 // compatibility.
275 // HASARG indicates that a function was compiled in 5.0
276 // compatibility mode and is declared to have ... in its parameter
277 // list.
278 // NEEDSARG indicates that a function was compiled in 5.0
279 // compatibility mode and is declared to have ... in its parameter
280 // list and does _not_ use the 5.1 style of vararg access (using ...
281 // as an expression). It is assumed to use 5.0 style vararg access
282 // (the local 'arg' variable). This is not supported in Jill.
283 // ISVARARG indicates that a function has ... in its parameter list
284 // (whether compiled in 5.0 compatibility mode or not).
286 // At runtime NEEDSARG changes the protocol for calling a vararg
287 // function. We don't support this, so we check that it is absent
288 // here in the loader.
290 // That means that the legal values for this field ar 0,1,2,3.
291 if (varargByte < 0 || varargByte > 3)
293 throw new IOException();
295 vararg = (0 != varargByte);
296 maxstacksize = this.byteLoad();
297 code = this.code();
298 constant = this.constant();
299 proto = this.proto(source);
300 Proto newProto = new Proto(constant, code, proto, nups,
301 numparams, vararg, maxstacksize);
302 newProto.setSource(source);
303 newProto.setLinedefined(linedefined);
304 newProto.setLastlinedefined(lastlinedefined);
306 this.debug(newProto);
307 // :todo: call code verifier
308 return newProto;
311 private static final int HEADERSIZE = 12;
313 /** A chunk header that is correct. Except for the endian byte, at
314 * index 6, which is always overwritten with the one from the file,
315 * before comparison. We cope with either endianness.
316 * Default access so that {@link Lua#load} can read the first entry.
317 * On no account should anyone except {@link #header} modify
318 * this array.
320 static final byte[] HEADER = new byte[]
322 033, (byte)'L', (byte)'u', (byte)'a',
323 0x51, 0, 99, 4,
324 4, 4, 8, 0};
327 * Loads and checks the binary chunk header. Sets
328 * <code>this.bigendian</code> accordingly.
330 * A Lua 5.1 header looks like this:
331 * <pre>
332 * b[0] 0x33
333 * b[1..3] "Lua";
334 * b[4] 0x51 (LUAC_VERSION)
335 * b[5] 0 (LUAC_FORMAT)
336 * b[6] 0 big-endian, 1 little-endian
337 * b[7] 4 (sizeof(int))
338 * b[8] 4 (sizeof(size_t))
339 * b[9] 4 (sizeof(Instruction))
340 * b[10] 8 (sizeof(lua_Number))
341 * b[11] 0 (floating point)
342 * </pre>
344 * To conserve JVM bytecodes the sizes of the types <code>int</code>,
345 * <code>size_t</code>, <code>Instruction</code>,
346 * <code>lua_Number</code> are assumed by the code to be 4, 4, 4, and
347 * 8, respectively. Where this assumption is made the tags :int:size,
348 * :size_t:size :Instruction:size :lua_Number:size will appear so that
349 * you can grep for them, should you wish to modify this loader to
350 * load binary chunks from different architectures.
352 * @throws IOException when header is malformed or not suitable.
354 private void header() throws IOException
356 byte[] buf = new byte[HEADERSIZE];
358 block(buf);
360 // poke the HEADER's endianness byte and compare.
361 HEADER[6] = buf[6];
363 if (buf[6] < 0 || buf[6] > 1 || !arrayEquals(HEADER, buf))
365 throw new IOException();
367 bigendian = (buf[6] == 0);
371 * Undumps an int. This method swabs accordingly.
372 * size_t and Instruction need swabbing too, but the code
373 * simply uses this method to load size_t and Instruction.
375 private int intLoad() throws IOException
377 // :int:size Here we assume an int is 4 bytes.
378 block(intbuf);
380 int i;
381 // Caution: byte is signed so "&0xff" converts to unsigned value.
382 if (bigendian)
384 i = ((intbuf[0]&0xff) << 24) | ((intbuf[1]&0xff) << 16) |
385 ((intbuf[2]&0xff) << 8) | (intbuf[3]&0xff);
387 else
389 i = ((intbuf[3]&0xff) << 24) | ((intbuf[2]&0xff) << 16) |
390 ((intbuf[1]&0xff) << 8) | (intbuf[0]&0xff);
392 return i;
394 /* minimum footprint version?
395 int result = 0 ;
396 for (int shift = 0 ; shift < 32 ; shift+=8)
398 int byt = byteLoad () ;
399 if (bigendian)
400 result = (result << 8) | byt ;
401 else
402 result |= byt << shift ;
404 return result ;
407 /* another version?
408 if (bigendian)
410 int result = byteLoad() << 24 ;
411 result |= byteLoad () << 16 ;
412 result |= byteLoad () << 8 ;
413 result |= byteLoad () ;
414 return result;
416 else
418 int result = byteLoad() ;
419 result |= byteLoad () << 8 ;
420 result |= byteLoad () << 16 ;
421 result |= byteLoad () << 24 ;
422 return result ;
428 * Undumps a Lua number. Which is assumed to be a 64-bit IEEE double.
430 private Object number() throws IOException
432 // :lua_Number:size Here we assume that the size is 8.
433 block(longbuf);
434 // Big-endian architectures store doubles with the sign bit first;
435 // little-endian is the other way around.
436 long l = 0;
437 for (int i=0; i<8; ++i)
439 if (bigendian)
440 l = (l << 8) | (longbuf[i]&0xff);
441 else
442 l = (l >>> 8) | (((long)(longbuf[i]&0xff)) << 56);
444 double d = Double.longBitsToDouble(l);
445 return Lua.valueOfNumber(d);
449 * Undumps the <code>Proto</code> array contained inside a
450 * <code>Proto</code> object. These are the <code>Proto</code>
451 * objects for all inner functions defined inside an existing
452 * function. Corresponds to the second half of PUC-Rio's
453 * <code>LoadConstants</code> function. See <code>constant</code> for
454 * the first half.
456 private Proto[] proto(String source) throws IOException
458 int n = intLoad();
459 Proto[] p = new Proto[n];
461 for (int i=0; i<n; ++i)
463 p[i] = function(source);
465 return p;
469 * Undumps a {@link String} or <code>null</code>. As per
470 * <code>LoadString</code> in
471 * PUC-Rio's lundump.c. Strings are converted from the binary
472 * using the UTF-8 encoding, using the {@link
473 * java.lang.String#String(byte[], String) String(byte[], String)}
474 * constructor.
476 private String string() throws IOException
478 // :size_t:size we assume that size_t is same size as int.
479 int size = intLoad();
480 if (size == 0)
482 return null;
485 byte[] buf = new byte[size-1];
486 block(buf);
487 // Discard trailing NUL byte
488 if (in.read() == -1)
489 throw new EOFException() ;
491 return (new String(buf, "UTF-8")).intern();
495 * CLDC 1.1 does not provide <code>java.util.Arrays</code> so we make
496 * do with this.
498 private static boolean arrayEquals(byte[] x, byte[] y)
500 if (x.length != y.length)
502 return false;
504 for (int i=0; i < x.length; ++i)
506 if (x[i] != y[i])
508 return false;
511 return true;