1 /* Peephole optimizations for bytecode compiler. */
5 #include "Python-ast.h"
14 #define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
15 #define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
16 #define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
17 #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
18 #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
19 #define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
20 #define ISBASICBLOCK(blocks, start, bytes) \
21 (blocks[start]==blocks[start+bytes-1])
23 /* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
24 with LOAD_CONST (c1, c2, ... cn).
25 The consts table must still be in list form so that the
26 new constant (c1, c2, ... cn) can be appended.
27 Called with codestr pointing to the first LOAD_CONST.
28 Bails out with no change if one or more of the LOAD_CONSTs is missing.
29 Also works for BUILD_LIST when followed by an "in" or "not in" test.
32 tuple_of_constants(unsigned char *codestr
, int n
, PyObject
*consts
)
34 PyObject
*newconst
, *constant
;
35 Py_ssize_t i
, arg
, len_consts
;
38 assert(PyList_CheckExact(consts
));
39 assert(codestr
[n
*3] == BUILD_TUPLE
|| codestr
[n
*3] == BUILD_LIST
);
40 assert(GETARG(codestr
, (n
*3)) == n
);
42 assert(codestr
[i
*3] == LOAD_CONST
);
44 /* Buildup new tuple of constants */
45 newconst
= PyTuple_New(n
);
48 len_consts
= PyList_GET_SIZE(consts
);
49 for (i
=0 ; i
<n
; i
++) {
50 arg
= GETARG(codestr
, (i
*3));
51 assert(arg
< len_consts
);
52 constant
= PyList_GET_ITEM(consts
, arg
);
54 PyTuple_SET_ITEM(newconst
, i
, constant
);
57 /* Append folded constant onto consts */
58 if (PyList_Append(consts
, newconst
)) {
64 /* Write NOPs over old LOAD_CONSTS and
65 add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
66 memset(codestr
, NOP
, n
*3);
67 codestr
[n
*3] = LOAD_CONST
;
68 SETARG(codestr
, (n
*3), len_consts
);
72 /* Replace LOAD_CONST c1. LOAD_CONST c2 BINOP
73 with LOAD_CONST binop(c1,c2)
74 The consts table must still be in list form so that the
75 new constant can be appended.
76 Called with codestr pointing to the first LOAD_CONST.
77 Abandons the transformation if the folding fails (i.e. 1+'a').
78 If the new constant is a sequence, only folds when the size
79 is below a threshold value. That keeps pyc files from
80 becoming large in the presence of code like: (None,)*1000.
83 fold_binops_on_constants(unsigned char *codestr
, PyObject
*consts
)
85 PyObject
*newconst
, *v
, *w
;
86 Py_ssize_t len_consts
, size
;
90 assert(PyList_CheckExact(consts
));
91 assert(codestr
[0] == LOAD_CONST
);
92 assert(codestr
[3] == LOAD_CONST
);
94 /* Create new constant */
95 v
= PyList_GET_ITEM(consts
, GETARG(codestr
, 0));
96 w
= PyList_GET_ITEM(consts
, GETARG(codestr
, 3));
100 newconst
= PyNumber_Power(v
, w
, Py_None
);
102 case BINARY_MULTIPLY
:
103 newconst
= PyNumber_Multiply(v
, w
);
106 /* Cannot fold this operation statically since
107 the result can depend on the run-time presence
110 case BINARY_TRUE_DIVIDE
:
111 newconst
= PyNumber_TrueDivide(v
, w
);
113 case BINARY_FLOOR_DIVIDE
:
114 newconst
= PyNumber_FloorDivide(v
, w
);
117 newconst
= PyNumber_Remainder(v
, w
);
120 newconst
= PyNumber_Add(v
, w
);
122 case BINARY_SUBTRACT
:
123 newconst
= PyNumber_Subtract(v
, w
);
126 newconst
= PyObject_GetItem(v
, w
);
129 newconst
= PyNumber_Lshift(v
, w
);
132 newconst
= PyNumber_Rshift(v
, w
);
135 newconst
= PyNumber_And(v
, w
);
138 newconst
= PyNumber_Xor(v
, w
);
141 newconst
= PyNumber_Or(v
, w
);
144 /* Called with an unknown opcode */
145 PyErr_Format(PyExc_SystemError
,
146 "unexpected binary operation %d on a constant",
150 if (newconst
== NULL
) {
154 size
= PyObject_Size(newconst
);
157 else if (size
> 20) {
162 /* Append folded constant into consts table */
163 len_consts
= PyList_GET_SIZE(consts
);
164 if (PyList_Append(consts
, newconst
)) {
170 /* Write NOP NOP NOP NOP LOAD_CONST newconst */
171 memset(codestr
, NOP
, 4);
172 codestr
[4] = LOAD_CONST
;
173 SETARG(codestr
, 4, len_consts
);
178 fold_unaryops_on_constants(unsigned char *codestr
, PyObject
*consts
)
180 PyObject
*newconst
=NULL
, *v
;
181 Py_ssize_t len_consts
;
185 assert(PyList_CheckExact(consts
));
186 assert(codestr
[0] == LOAD_CONST
);
188 /* Create new constant */
189 v
= PyList_GET_ITEM(consts
, GETARG(codestr
, 0));
193 /* Preserve the sign of -0.0 */
194 if (PyObject_IsTrue(v
) == 1)
195 newconst
= PyNumber_Negative(v
);
198 newconst
= PyObject_Repr(v
);
201 newconst
= PyNumber_Invert(v
);
204 /* Called with an unknown opcode */
205 PyErr_Format(PyExc_SystemError
,
206 "unexpected unary operation %d on a constant",
210 if (newconst
== NULL
) {
215 /* Append folded constant into consts table */
216 len_consts
= PyList_GET_SIZE(consts
);
217 if (PyList_Append(consts
, newconst
)) {
223 /* Write NOP LOAD_CONST newconst */
225 codestr
[1] = LOAD_CONST
;
226 SETARG(codestr
, 1, len_consts
);
230 static unsigned int *
231 markblocks(unsigned char *code
, int len
)
233 unsigned int *blocks
= (unsigned int *)PyMem_Malloc(len
*sizeof(int));
234 int i
,j
, opcode
, blockcnt
= 0;
236 if (blocks
== NULL
) {
240 memset(blocks
, 0, len
*sizeof(int));
242 /* Mark labels in the first pass */
243 for (i
=0 ; i
<len
; i
+=CODESIZE(opcode
)) {
255 j
= GETJUMPTGT(code
, i
);
260 /* Build block numbers in the second pass */
261 for (i
=0 ; i
<len
; i
++) {
262 blockcnt
+= blocks
[i
]; /* increment blockcnt over labels */
263 blocks
[i
] = blockcnt
;
268 /* Perform basic peephole optimizations to components of a code object.
269 The consts object should still be in list form to allow new constants
272 To keep the optimizer simple, it bails out (does nothing) for code
273 containing extended arguments or that has a length over 32,700. That
274 allows us to avoid overflow and sign issues. Likewise, it bails when
275 the lineno table has complex encoding for gaps >= 255.
277 Optimizations are restricted to simple transformations occuring within a
278 single basic block. All transformations keep the code size the same or
279 smaller. For those that reduce size, the gaps are initially filled with
280 NOPs. Later those NOPs are removed and the jump addresses retargeted in
281 a single pass. Line numbering is adjusted accordingly. */
284 PyCode_Optimize(PyObject
*code
, PyObject
* consts
, PyObject
*names
,
285 PyObject
*lineno_obj
)
287 Py_ssize_t i
, j
, codelen
;
289 int tgt
, tgttgt
, opcode
;
290 unsigned char *codestr
= NULL
;
291 unsigned char *lineno
;
293 int new_line
, cum_orig_line
, last_line
, tabsiz
;
294 int cumlc
=0, lastlc
=0; /* Count runs of consecutive LOAD_CONSTs */
295 unsigned int *blocks
= NULL
;
298 /* Bail out if an exception is set */
299 if (PyErr_Occurred())
302 /* Bypass optimization when the lineno table is too complex */
303 assert(PyString_Check(lineno_obj
));
304 lineno
= (unsigned char*)PyString_AS_STRING(lineno_obj
);
305 tabsiz
= PyString_GET_SIZE(lineno_obj
);
306 if (memchr(lineno
, 255, tabsiz
) != NULL
)
309 /* Avoid situations where jump retargeting could overflow */
310 assert(PyString_Check(code
));
311 codelen
= PyString_GET_SIZE(code
);
315 /* Make a modifiable copy of the code string */
316 codestr
= (unsigned char *)PyMem_Malloc(codelen
);
319 codestr
= (unsigned char *)memcpy(codestr
,
320 PyString_AS_STRING(code
), codelen
);
322 /* Verify that RETURN_VALUE terminates the codestring. This allows
323 the various transformation patterns to look ahead several
324 instructions without additional checks to make sure they are not
325 looking beyond the end of the code string.
327 if (codestr
[codelen
-1] != RETURN_VALUE
)
330 /* Mapping to new jump targets after NOPs are removed */
331 addrmap
= (int *)PyMem_Malloc(codelen
* sizeof(int));
335 blocks
= markblocks(codestr
, codelen
);
338 assert(PyList_Check(consts
));
340 for (i
=0 ; i
<codelen
; i
+= CODESIZE(codestr
[i
])) {
348 /* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP with
349 with JUMP_IF_TRUE POP_TOP */
351 if (codestr
[i
+1] != JUMP_IF_FALSE
||
352 codestr
[i
+4] != POP_TOP
||
353 !ISBASICBLOCK(blocks
,i
,5))
355 tgt
= GETJUMPTGT(codestr
, (i
+1));
356 if (codestr
[tgt
] != POP_TOP
)
358 j
= GETARG(codestr
, i
+1) + 1;
359 codestr
[i
] = JUMP_IF_TRUE
;
360 SETARG(codestr
, i
, j
);
361 codestr
[i
+3] = POP_TOP
;
365 /* not a is b --> a is not b
366 not a in b --> a not in b
367 not a is not b --> a is b
368 not a not in b --> a in b
371 j
= GETARG(codestr
, i
);
372 if (j
< 6 || j
> 9 ||
373 codestr
[i
+3] != UNARY_NOT
||
374 !ISBASICBLOCK(blocks
,i
,4))
376 SETARG(codestr
, i
, (j
^1));
380 /* Replace LOAD_GLOBAL/LOAD_NAME None
381 with LOAD_CONST None */
384 j
= GETARG(codestr
, i
);
385 name
= PyString_AsString(PyTuple_GET_ITEM(names
, j
));
386 if (name
== NULL
|| strcmp(name
, "None") != 0)
388 for (j
=0 ; j
< PyList_GET_SIZE(consts
) ; j
++) {
389 if (PyList_GET_ITEM(consts
, j
) == Py_None
)
392 if (j
== PyList_GET_SIZE(consts
)) {
393 if (PyList_Append(consts
, Py_None
) == -1)
396 assert(PyList_GET_ITEM(consts
, j
) == Py_None
);
397 codestr
[i
] = LOAD_CONST
;
398 SETARG(codestr
, i
, j
);
402 /* Skip over LOAD_CONST trueconst
403 JUMP_IF_FALSE xx POP_TOP */
406 j
= GETARG(codestr
, i
);
407 if (codestr
[i
+3] != JUMP_IF_FALSE
||
408 codestr
[i
+6] != POP_TOP
||
409 !ISBASICBLOCK(blocks
,i
,7) ||
410 !PyObject_IsTrue(PyList_GET_ITEM(consts
, j
)))
412 memset(codestr
+i
, NOP
, 7);
416 /* Try to fold tuples of constants (includes a case for lists
417 which are only used for "in" and "not in" tests).
418 Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
419 Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
420 Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
423 j
= GETARG(codestr
, i
);
427 ((opcode
== BUILD_TUPLE
&&
428 ISBASICBLOCK(blocks
, h
, 3*(j
+1))) ||
429 (opcode
== BUILD_LIST
&&
430 codestr
[i
+3]==COMPARE_OP
&&
431 ISBASICBLOCK(blocks
, h
, 3*(j
+2)) &&
432 (GETARG(codestr
,i
+3)==6 ||
433 GETARG(codestr
,i
+3)==7))) &&
434 tuple_of_constants(&codestr
[h
], j
, consts
)) {
435 assert(codestr
[i
] == LOAD_CONST
);
439 if (codestr
[i
+3] != UNPACK_SEQUENCE
||
440 !ISBASICBLOCK(blocks
,i
,6) ||
441 j
!= GETARG(codestr
, i
+3))
444 memset(codestr
+i
, NOP
, 6);
446 codestr
[i
] = ROT_TWO
;
447 memset(codestr
+i
+1, NOP
, 5);
449 codestr
[i
] = ROT_THREE
;
450 codestr
[i
+1] = ROT_TWO
;
451 memset(codestr
+i
+2, NOP
, 4);
455 /* Fold binary ops on constants.
456 LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */
458 case BINARY_MULTIPLY
:
459 case BINARY_TRUE_DIVIDE
:
460 case BINARY_FLOOR_DIVIDE
:
463 case BINARY_SUBTRACT
:
471 ISBASICBLOCK(blocks
, i
-6, 7) &&
472 fold_binops_on_constants(&codestr
[i
-6], consts
)) {
474 assert(codestr
[i
] == LOAD_CONST
);
479 /* Fold unary ops on constants.
480 LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */
485 ISBASICBLOCK(blocks
, i
-3, 4) &&
486 fold_unaryops_on_constants(&codestr
[i
-3], consts
)) {
488 assert(codestr
[i
] == LOAD_CONST
);
493 /* Simplify conditional jump to conditional jump where the
494 result of the first test implies the success of a similar
495 test or the failure of the opposite test.
501 x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z
502 x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3
503 where y+3 is the instruction following the second test.
507 tgt
= GETJUMPTGT(codestr
, i
);
509 if (j
== JUMP_IF_FALSE
|| j
== JUMP_IF_TRUE
) {
511 tgttgt
= GETJUMPTGT(codestr
, tgt
) - i
- 3;
512 SETARG(codestr
, i
, tgttgt
);
515 SETARG(codestr
, i
, tgt
);
519 /* Intentional fallthrough */
521 /* Replace jumps to unconditional jumps */
529 tgt
= GETJUMPTGT(codestr
, i
);
530 /* Replace JUMP_* to a RETURN into just a RETURN */
531 if (UNCONDITIONAL_JUMP(opcode
) &&
532 codestr
[tgt
] == RETURN_VALUE
) {
533 codestr
[i
] = RETURN_VALUE
;
534 memset(codestr
+i
+1, NOP
, 2);
537 if (!UNCONDITIONAL_JUMP(codestr
[tgt
]))
539 tgttgt
= GETJUMPTGT(codestr
, tgt
);
540 if (opcode
== JUMP_FORWARD
) /* JMP_ABS can go backwards */
541 opcode
= JUMP_ABSOLUTE
;
542 if (!ABSOLUTE_JUMP(opcode
))
543 tgttgt
-= i
+ 3; /* Calc relative jump addr */
544 if (tgttgt
< 0) /* No backward relative jumps */
547 SETARG(codestr
, i
, tgttgt
);
553 /* Replace RETURN LOAD_CONST None RETURN with just RETURN */
554 /* Remove unreachable JUMPs after RETURN */
558 if (codestr
[i
+4] == RETURN_VALUE
&&
559 ISBASICBLOCK(blocks
,i
,5))
560 memset(codestr
+i
+1, NOP
, 4);
561 else if (UNCONDITIONAL_JUMP(codestr
[i
+1]) &&
562 ISBASICBLOCK(blocks
,i
,4))
563 memset(codestr
+i
+1, NOP
, 3);
568 /* Fixup linenotab */
569 for (i
=0, nops
=0 ; i
<codelen
; i
+= CODESIZE(codestr
[i
])) {
570 addrmap
[i
] = i
- nops
;
571 if (codestr
[i
] == NOP
)
576 for (i
=0 ; i
< tabsiz
; i
+=2) {
577 cum_orig_line
+= lineno
[i
];
578 new_line
= addrmap
[cum_orig_line
];
579 assert (new_line
- last_line
< 255);
580 lineno
[i
] =((unsigned char)(new_line
- last_line
));
581 last_line
= new_line
;
584 /* Remove NOPs and fixup jump targets */
585 for (i
=0, h
=0 ; i
<codelen
; ) {
594 j
= addrmap
[GETARG(codestr
, i
)];
595 SETARG(codestr
, i
, j
);
605 j
= addrmap
[GETARG(codestr
, i
) + i
+ 3] - addrmap
[i
] - 3;
606 SETARG(codestr
, i
, j
);
609 adj
= CODESIZE(opcode
);
611 codestr
[h
++] = codestr
[i
++];
613 assert(h
+ nops
== codelen
);
615 code
= PyString_FromStringAndSize((char *)codestr
, h
);