Merged revisions 78818 via svnmerge from
[python/dscho.git] / Objects / codeobject.c
blob832b4e90e3849591d27f1f0782aaf469fe7a5f46
1 #include "Python.h"
2 #include "code.h"
3 #include "structmember.h"
5 #define NAME_CHARS \
6 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
8 /* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */
10 static int
11 all_name_chars(Py_UNICODE *s)
13 static char ok_name_char[256];
14 static unsigned char *name_chars = (unsigned char *)NAME_CHARS;
16 if (ok_name_char[*name_chars] == 0) {
17 unsigned char *p;
18 for (p = name_chars; *p; p++)
19 ok_name_char[*p] = 1;
21 while (*s) {
22 if (*s >= 128)
23 return 0;
24 if (ok_name_char[*s++] == 0)
25 return 0;
27 return 1;
30 static void
31 intern_strings(PyObject *tuple)
33 Py_ssize_t i;
35 for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) {
36 PyObject *v = PyTuple_GET_ITEM(tuple, i);
37 if (v == NULL || !PyUnicode_CheckExact(v)) {
38 Py_FatalError("non-string found in code slot");
40 PyUnicode_InternInPlace(&PyTuple_GET_ITEM(tuple, i));
45 PyCodeObject *
46 PyCode_New(int argcount, int kwonlyargcount,
47 int nlocals, int stacksize, int flags,
48 PyObject *code, PyObject *consts, PyObject *names,
49 PyObject *varnames, PyObject *freevars, PyObject *cellvars,
50 PyObject *filename, PyObject *name, int firstlineno,
51 PyObject *lnotab)
53 PyCodeObject *co;
54 Py_ssize_t i;
56 /* Check argument types */
57 if (argcount < 0 || nlocals < 0 ||
58 code == NULL ||
59 consts == NULL || !PyTuple_Check(consts) ||
60 names == NULL || !PyTuple_Check(names) ||
61 varnames == NULL || !PyTuple_Check(varnames) ||
62 freevars == NULL || !PyTuple_Check(freevars) ||
63 cellvars == NULL || !PyTuple_Check(cellvars) ||
64 name == NULL || !PyUnicode_Check(name) ||
65 filename == NULL || !PyUnicode_Check(filename) ||
66 lnotab == NULL || !PyBytes_Check(lnotab) ||
67 !PyObject_CheckReadBuffer(code)) {
68 PyErr_BadInternalCall();
69 return NULL;
71 intern_strings(names);
72 intern_strings(varnames);
73 intern_strings(freevars);
74 intern_strings(cellvars);
75 /* Intern selected string constants */
76 for (i = PyTuple_Size(consts); --i >= 0; ) {
77 PyObject *v = PyTuple_GetItem(consts, i);
78 if (!PyUnicode_Check(v))
79 continue;
80 if (!all_name_chars(PyUnicode_AS_UNICODE(v)))
81 continue;
82 PyUnicode_InternInPlace(&PyTuple_GET_ITEM(consts, i));
84 co = PyObject_NEW(PyCodeObject, &PyCode_Type);
85 if (co != NULL) {
86 co->co_argcount = argcount;
87 co->co_kwonlyargcount = kwonlyargcount;
88 co->co_nlocals = nlocals;
89 co->co_stacksize = stacksize;
90 co->co_flags = flags;
91 Py_INCREF(code);
92 co->co_code = code;
93 Py_INCREF(consts);
94 co->co_consts = consts;
95 Py_INCREF(names);
96 co->co_names = names;
97 Py_INCREF(varnames);
98 co->co_varnames = varnames;
99 Py_INCREF(freevars);
100 co->co_freevars = freevars;
101 Py_INCREF(cellvars);
102 co->co_cellvars = cellvars;
103 Py_INCREF(filename);
104 co->co_filename = filename;
105 Py_INCREF(name);
106 co->co_name = name;
107 co->co_firstlineno = firstlineno;
108 Py_INCREF(lnotab);
109 co->co_lnotab = lnotab;
110 co->co_zombieframe = NULL;
112 return co;
116 #define OFF(x) offsetof(PyCodeObject, x)
118 static PyMemberDef code_memberlist[] = {
119 {"co_argcount", T_INT, OFF(co_argcount), READONLY},
120 {"co_kwonlyargcount", T_INT, OFF(co_kwonlyargcount), READONLY},
121 {"co_nlocals", T_INT, OFF(co_nlocals), READONLY},
122 {"co_stacksize",T_INT, OFF(co_stacksize), READONLY},
123 {"co_flags", T_INT, OFF(co_flags), READONLY},
124 {"co_code", T_OBJECT, OFF(co_code), READONLY},
125 {"co_consts", T_OBJECT, OFF(co_consts), READONLY},
126 {"co_names", T_OBJECT, OFF(co_names), READONLY},
127 {"co_varnames", T_OBJECT, OFF(co_varnames), READONLY},
128 {"co_freevars", T_OBJECT, OFF(co_freevars), READONLY},
129 {"co_cellvars", T_OBJECT, OFF(co_cellvars), READONLY},
130 {"co_filename", T_OBJECT, OFF(co_filename), READONLY},
131 {"co_name", T_OBJECT, OFF(co_name), READONLY},
132 {"co_firstlineno", T_INT, OFF(co_firstlineno), READONLY},
133 {"co_lnotab", T_OBJECT, OFF(co_lnotab), READONLY},
134 {NULL} /* Sentinel */
137 /* Helper for code_new: return a shallow copy of a tuple that is
138 guaranteed to contain exact strings, by converting string subclasses
139 to exact strings and complaining if a non-string is found. */
140 static PyObject*
141 validate_and_copy_tuple(PyObject *tup)
143 PyObject *newtuple;
144 PyObject *item;
145 Py_ssize_t i, len;
147 len = PyTuple_GET_SIZE(tup);
148 newtuple = PyTuple_New(len);
149 if (newtuple == NULL)
150 return NULL;
152 for (i = 0; i < len; i++) {
153 item = PyTuple_GET_ITEM(tup, i);
154 if (PyUnicode_CheckExact(item)) {
155 Py_INCREF(item);
157 else if (!PyUnicode_Check(item)) {
158 PyErr_Format(
159 PyExc_TypeError,
160 "name tuples must contain only "
161 "strings, not '%.500s'",
162 item->ob_type->tp_name);
163 Py_DECREF(newtuple);
164 return NULL;
166 else {
167 item = PyUnicode_FromUnicode(
168 PyUnicode_AS_UNICODE(item),
169 PyUnicode_GET_SIZE(item));
170 if (item == NULL) {
171 Py_DECREF(newtuple);
172 return NULL;
175 PyTuple_SET_ITEM(newtuple, i, item);
178 return newtuple;
181 PyDoc_STRVAR(code_doc,
182 "code(argcount, kwonlyargcount, nlocals, stacksize, flags, codestring,\n\
183 constants, names, varnames, filename, name, firstlineno,\n\
184 lnotab[, freevars[, cellvars]])\n\
186 Create a code object. Not for the faint of heart.");
188 static PyObject *
189 code_new(PyTypeObject *type, PyObject *args, PyObject *kw)
191 int argcount;
192 int kwonlyargcount;
193 int nlocals;
194 int stacksize;
195 int flags;
196 PyObject *co = NULL;
197 PyObject *code;
198 PyObject *consts;
199 PyObject *names, *ournames = NULL;
200 PyObject *varnames, *ourvarnames = NULL;
201 PyObject *freevars = NULL, *ourfreevars = NULL;
202 PyObject *cellvars = NULL, *ourcellvars = NULL;
203 PyObject *filename;
204 PyObject *name;
205 int firstlineno;
206 PyObject *lnotab;
208 if (!PyArg_ParseTuple(args, "iiiiiSO!O!O!UUiS|O!O!:code",
209 &argcount, &kwonlyargcount,
210 &nlocals, &stacksize, &flags,
211 &code,
212 &PyTuple_Type, &consts,
213 &PyTuple_Type, &names,
214 &PyTuple_Type, &varnames,
215 &filename, &name,
216 &firstlineno, &lnotab,
217 &PyTuple_Type, &freevars,
218 &PyTuple_Type, &cellvars))
219 return NULL;
221 if (argcount < 0) {
222 PyErr_SetString(
223 PyExc_ValueError,
224 "code: argcount must not be negative");
225 goto cleanup;
228 if (kwonlyargcount < 0) {
229 PyErr_SetString(
230 PyExc_ValueError,
231 "code: kwonlyargcount must not be negative");
232 goto cleanup;
234 if (nlocals < 0) {
235 PyErr_SetString(
236 PyExc_ValueError,
237 "code: nlocals must not be negative");
238 goto cleanup;
241 ournames = validate_and_copy_tuple(names);
242 if (ournames == NULL)
243 goto cleanup;
244 ourvarnames = validate_and_copy_tuple(varnames);
245 if (ourvarnames == NULL)
246 goto cleanup;
247 if (freevars)
248 ourfreevars = validate_and_copy_tuple(freevars);
249 else
250 ourfreevars = PyTuple_New(0);
251 if (ourfreevars == NULL)
252 goto cleanup;
253 if (cellvars)
254 ourcellvars = validate_and_copy_tuple(cellvars);
255 else
256 ourcellvars = PyTuple_New(0);
257 if (ourcellvars == NULL)
258 goto cleanup;
260 co = (PyObject *)PyCode_New(argcount, kwonlyargcount,
261 nlocals, stacksize, flags,
262 code, consts, ournames, ourvarnames,
263 ourfreevars, ourcellvars, filename,
264 name, firstlineno, lnotab);
265 cleanup:
266 Py_XDECREF(ournames);
267 Py_XDECREF(ourvarnames);
268 Py_XDECREF(ourfreevars);
269 Py_XDECREF(ourcellvars);
270 return co;
273 static void
274 code_dealloc(PyCodeObject *co)
276 Py_XDECREF(co->co_code);
277 Py_XDECREF(co->co_consts);
278 Py_XDECREF(co->co_names);
279 Py_XDECREF(co->co_varnames);
280 Py_XDECREF(co->co_freevars);
281 Py_XDECREF(co->co_cellvars);
282 Py_XDECREF(co->co_filename);
283 Py_XDECREF(co->co_name);
284 Py_XDECREF(co->co_lnotab);
285 if (co->co_zombieframe != NULL)
286 PyObject_GC_Del(co->co_zombieframe);
287 PyObject_DEL(co);
290 static PyObject *
291 code_repr(PyCodeObject *co)
293 int lineno = -1;
294 char *filename = "???";
296 if (co->co_firstlineno != 0)
297 lineno = co->co_firstlineno;
298 if (co->co_filename && PyUnicode_Check(co->co_filename))
299 filename = _PyUnicode_AsString(co->co_filename);
300 return PyUnicode_FromFormat(
301 "<code object %.100U at %p, file \"%.300s\", line %d>",
302 co->co_name, co, filename, lineno);
305 static PyObject *
306 code_richcompare(PyObject *self, PyObject *other, int op)
308 PyCodeObject *co, *cp;
309 int eq;
310 PyObject *res;
312 if ((op != Py_EQ && op != Py_NE) ||
313 !PyCode_Check(self) ||
314 !PyCode_Check(other)) {
315 Py_INCREF(Py_NotImplemented);
316 return Py_NotImplemented;
319 co = (PyCodeObject *)self;
320 cp = (PyCodeObject *)other;
322 eq = PyObject_RichCompareBool(co->co_name, cp->co_name, Py_EQ);
323 if (eq <= 0) goto unequal;
324 eq = co->co_argcount == cp->co_argcount;
325 if (!eq) goto unequal;
326 eq = co->co_kwonlyargcount == cp->co_kwonlyargcount;
327 if (!eq) goto unequal;
328 eq = co->co_nlocals == cp->co_nlocals;
329 if (!eq) goto unequal;
330 eq = co->co_flags == cp->co_flags;
331 if (!eq) goto unequal;
332 eq = co->co_firstlineno == cp->co_firstlineno;
333 if (!eq) goto unequal;
334 eq = PyObject_RichCompareBool(co->co_code, cp->co_code, Py_EQ);
335 if (eq <= 0) goto unequal;
336 eq = PyObject_RichCompareBool(co->co_consts, cp->co_consts, Py_EQ);
337 if (eq <= 0) goto unequal;
338 eq = PyObject_RichCompareBool(co->co_names, cp->co_names, Py_EQ);
339 if (eq <= 0) goto unequal;
340 eq = PyObject_RichCompareBool(co->co_varnames, cp->co_varnames, Py_EQ);
341 if (eq <= 0) goto unequal;
342 eq = PyObject_RichCompareBool(co->co_freevars, cp->co_freevars, Py_EQ);
343 if (eq <= 0) goto unequal;
344 eq = PyObject_RichCompareBool(co->co_cellvars, cp->co_cellvars, Py_EQ);
345 if (eq <= 0) goto unequal;
347 if (op == Py_EQ)
348 res = Py_True;
349 else
350 res = Py_False;
351 goto done;
353 unequal:
354 if (eq < 0)
355 return NULL;
356 if (op == Py_NE)
357 res = Py_True;
358 else
359 res = Py_False;
361 done:
362 Py_INCREF(res);
363 return res;
366 static long
367 code_hash(PyCodeObject *co)
369 long h, h0, h1, h2, h3, h4, h5, h6;
370 h0 = PyObject_Hash(co->co_name);
371 if (h0 == -1) return -1;
372 h1 = PyObject_Hash(co->co_code);
373 if (h1 == -1) return -1;
374 h2 = PyObject_Hash(co->co_consts);
375 if (h2 == -1) return -1;
376 h3 = PyObject_Hash(co->co_names);
377 if (h3 == -1) return -1;
378 h4 = PyObject_Hash(co->co_varnames);
379 if (h4 == -1) return -1;
380 h5 = PyObject_Hash(co->co_freevars);
381 if (h5 == -1) return -1;
382 h6 = PyObject_Hash(co->co_cellvars);
383 if (h6 == -1) return -1;
384 h = h0 ^ h1 ^ h2 ^ h3 ^ h4 ^ h5 ^ h6 ^
385 co->co_argcount ^ co->co_kwonlyargcount ^
386 co->co_nlocals ^ co->co_flags;
387 if (h == -1) h = -2;
388 return h;
391 /* XXX code objects need to participate in GC? */
393 PyTypeObject PyCode_Type = {
394 PyVarObject_HEAD_INIT(&PyType_Type, 0)
395 "code",
396 sizeof(PyCodeObject),
398 (destructor)code_dealloc, /* tp_dealloc */
399 0, /* tp_print */
400 0, /* tp_getattr */
401 0, /* tp_setattr */
402 0, /* tp_reserved */
403 (reprfunc)code_repr, /* tp_repr */
404 0, /* tp_as_number */
405 0, /* tp_as_sequence */
406 0, /* tp_as_mapping */
407 (hashfunc)code_hash, /* tp_hash */
408 0, /* tp_call */
409 0, /* tp_str */
410 PyObject_GenericGetAttr, /* tp_getattro */
411 0, /* tp_setattro */
412 0, /* tp_as_buffer */
413 Py_TPFLAGS_DEFAULT, /* tp_flags */
414 code_doc, /* tp_doc */
415 0, /* tp_traverse */
416 0, /* tp_clear */
417 code_richcompare, /* tp_richcompare */
418 0, /* tp_weaklistoffset */
419 0, /* tp_iter */
420 0, /* tp_iternext */
421 0, /* tp_methods */
422 code_memberlist, /* tp_members */
423 0, /* tp_getset */
424 0, /* tp_base */
425 0, /* tp_dict */
426 0, /* tp_descr_get */
427 0, /* tp_descr_set */
428 0, /* tp_dictoffset */
429 0, /* tp_init */
430 0, /* tp_alloc */
431 code_new, /* tp_new */
434 /* All about c_lnotab.
436 c_lnotab is an array of unsigned bytes disguised as a Python string. In -O
437 mode, SET_LINENO opcodes aren't generated, and bytecode offsets are mapped
438 to source code line #s (when needed for tracebacks) via c_lnotab instead.
439 The array is conceptually a list of
440 (bytecode offset increment, line number increment)
441 pairs. The details are important and delicate, best illustrated by example:
443 byte code offset source code line number
446 50 7
447 350 307
448 361 308
450 The first trick is that these numbers aren't stored, only the increments
451 from one row to the next (this doesn't really work, but it's a start):
453 0, 1, 6, 1, 44, 5, 300, 300, 11, 1
455 The second trick is that an unsigned byte can't hold negative values, or
456 values larger than 255, so (a) there's a deep assumption that byte code
457 offsets and their corresponding line #s both increase monotonically, and (b)
458 if at least one column jumps by more than 255 from one row to the next, more
459 than one pair is written to the table. In case #b, there's no way to know
460 from looking at the table later how many were written. That's the delicate
461 part. A user of c_lnotab desiring to find the source line number
462 corresponding to a bytecode address A should do something like this
464 lineno = addr = 0
465 for addr_incr, line_incr in c_lnotab:
466 addr += addr_incr
467 if addr > A:
468 return lineno
469 lineno += line_incr
471 In order for this to work, when the addr field increments by more than 255,
472 the line # increment in each pair generated must be 0 until the remaining addr
473 increment is < 256. So, in the example above, com_set_lineno should not (as
474 was actually done until 2.2) expand 300, 300 to 255, 255, 45, 45, but to
475 255, 0, 45, 255, 0, 45.
479 PyCode_Addr2Line(PyCodeObject *co, int addrq)
481 int size = PyBytes_Size(co->co_lnotab) / 2;
482 unsigned char *p = (unsigned char*)PyBytes_AsString(co->co_lnotab);
483 int line = co->co_firstlineno;
484 int addr = 0;
485 while (--size >= 0) {
486 addr += *p++;
487 if (addr > addrq)
488 break;
489 line += *p++;
491 return line;
495 Check whether the current instruction is at the start of a line.
499 /* The theory of SET_LINENO-less tracing.
501 In a nutshell, we use the co_lnotab field of the code object
502 to tell when execution has moved onto a different line.
504 As mentioned above, the basic idea is so set things up so
505 that
507 *instr_lb <= frame->f_lasti < *instr_ub
509 is true so long as execution does not change lines.
511 This is all fairly simple. Digging the information out of
512 co_lnotab takes some work, but is conceptually clear.
514 Somewhat harder to explain is why we don't *always* call the
515 line trace function when the above test fails.
517 Consider this code:
519 1: def f(a):
520 2: if a:
521 3: print 1
522 4: else:
523 5: print 2
525 which compiles to this:
527 2 0 LOAD_FAST 0 (a)
528 3 JUMP_IF_FALSE 9 (to 15)
529 6 POP_TOP
531 3 7 LOAD_CONST 1 (1)
532 10 PRINT_ITEM
533 11 PRINT_NEWLINE
534 12 JUMP_FORWARD 6 (to 21)
535 >> 15 POP_TOP
537 5 16 LOAD_CONST 2 (2)
538 19 PRINT_ITEM
539 20 PRINT_NEWLINE
540 >> 21 LOAD_CONST 0 (None)
541 24 RETURN_VALUE
543 If 'a' is false, execution will jump to instruction at offset
544 15 and the co_lnotab will claim that execution has moved to
545 line 3. This is at best misleading. In this case we could
546 associate the POP_TOP with line 4, but that doesn't make
547 sense in all cases (I think).
549 What we do is only call the line trace function if the co_lnotab
550 indicates we have jumped to the *start* of a line, i.e. if the
551 current instruction offset matches the offset given for the
552 start of a line by the co_lnotab.
554 This also takes care of the situation where 'a' is true.
555 Execution will jump from instruction offset 12 to offset 21.
556 Then the co_lnotab would imply that execution has moved to line
557 5, which is again misleading.
559 Why do we set f_lineno when tracing? Well, consider the code
560 above when 'a' is true. If stepping through this with 'n' in
561 pdb, you would stop at line 1 with a "call" type event, then
562 line events on lines 2 and 3, then a "return" type event -- but
563 you would be shown line 5 during this event. This is a change
564 from the behaviour in 2.2 and before, and I've found it
565 confusing in practice. By setting and using f_lineno when
566 tracing, one can report a line number different from that
567 suggested by f_lasti on this one occasion where it's desirable.
571 int
572 PyCode_CheckLineNumber(PyCodeObject* co, int lasti, PyAddrPair *bounds)
574 int size, addr, line;
575 unsigned char* p;
577 p = (unsigned char*)PyBytes_AS_STRING(co->co_lnotab);
578 size = PyBytes_GET_SIZE(co->co_lnotab) / 2;
580 addr = 0;
581 line = co->co_firstlineno;
582 assert(line > 0);
584 /* possible optimization: if f->f_lasti == instr_ub
585 (likely to be a common case) then we already know
586 instr_lb -- if we stored the matching value of p
587 somwhere we could skip the first while loop. */
589 /* see comments in compile.c for the description of
590 co_lnotab. A point to remember: increments to p
591 should come in pairs -- although we don't care about
592 the line increments here, treating them as byte
593 increments gets confusing, to say the least. */
595 bounds->ap_lower = 0;
596 while (size > 0) {
597 if (addr + *p > lasti)
598 break;
599 addr += *p++;
600 if (*p)
601 bounds->ap_lower = addr;
602 line += *p++;
603 --size;
606 /* If lasti and addr don't match exactly, we don't want to
607 change the lineno slot on the frame or execute a trace
608 function. Return -1 instead.
610 if (addr != lasti)
611 line = -1;
613 if (size > 0) {
614 while (--size >= 0) {
615 addr += *p++;
616 if (*p++)
617 break;
619 bounds->ap_upper = addr;
621 else {
622 bounds->ap_upper = INT_MAX;
625 return line;