1 /* Peephole optimizations for bytecode compiler. */
5 #include "Python-ast.h"
14 #define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
15 #define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
16 #define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
17 || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
18 #define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \
19 || op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
20 || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
21 #define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP)
22 #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
23 #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
24 #define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
25 #define ISBASICBLOCK(blocks, start, bytes) \
26 (blocks[start]==blocks[start+bytes-1])
28 /* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
29 with LOAD_CONST (c1, c2, ... cn).
30 The consts table must still be in list form so that the
31 new constant (c1, c2, ... cn) can be appended.
32 Called with codestr pointing to the first LOAD_CONST.
33 Bails out with no change if one or more of the LOAD_CONSTs is missing.
34 Also works for BUILD_LIST when followed by an "in" or "not in" test.
37 tuple_of_constants(unsigned char *codestr
, Py_ssize_t n
, PyObject
*consts
)
39 PyObject
*newconst
, *constant
;
40 Py_ssize_t i
, arg
, len_consts
;
43 assert(PyList_CheckExact(consts
));
44 assert(codestr
[n
*3] == BUILD_TUPLE
|| codestr
[n
*3] == BUILD_LIST
);
45 assert(GETARG(codestr
, (n
*3)) == n
);
47 assert(codestr
[i
*3] == LOAD_CONST
);
49 /* Buildup new tuple of constants */
50 newconst
= PyTuple_New(n
);
53 len_consts
= PyList_GET_SIZE(consts
);
54 for (i
=0 ; i
<n
; i
++) {
55 arg
= GETARG(codestr
, (i
*3));
56 assert(arg
< len_consts
);
57 constant
= PyList_GET_ITEM(consts
, arg
);
59 PyTuple_SET_ITEM(newconst
, i
, constant
);
62 /* Append folded constant onto consts */
63 if (PyList_Append(consts
, newconst
)) {
69 /* Write NOPs over old LOAD_CONSTS and
70 add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
71 memset(codestr
, NOP
, n
*3);
72 codestr
[n
*3] = LOAD_CONST
;
73 SETARG(codestr
, (n
*3), len_consts
);
77 /* Replace LOAD_CONST c1. LOAD_CONST c2 BINOP
78 with LOAD_CONST binop(c1,c2)
79 The consts table must still be in list form so that the
80 new constant can be appended.
81 Called with codestr pointing to the first LOAD_CONST.
82 Abandons the transformation if the folding fails (i.e. 1+'a').
83 If the new constant is a sequence, only folds when the size
84 is below a threshold value. That keeps pyc files from
85 becoming large in the presence of code like: (None,)*1000.
88 fold_binops_on_constants(unsigned char *codestr
, PyObject
*consts
)
90 PyObject
*newconst
, *v
, *w
;
91 Py_ssize_t len_consts
, size
;
95 assert(PyList_CheckExact(consts
));
96 assert(codestr
[0] == LOAD_CONST
);
97 assert(codestr
[3] == LOAD_CONST
);
99 /* Create new constant */
100 v
= PyList_GET_ITEM(consts
, GETARG(codestr
, 0));
101 w
= PyList_GET_ITEM(consts
, GETARG(codestr
, 3));
105 newconst
= PyNumber_Power(v
, w
, Py_None
);
107 case BINARY_MULTIPLY
:
108 newconst
= PyNumber_Multiply(v
, w
);
111 /* Cannot fold this operation statically since
112 the result can depend on the run-time presence
115 case BINARY_TRUE_DIVIDE
:
116 newconst
= PyNumber_TrueDivide(v
, w
);
118 case BINARY_FLOOR_DIVIDE
:
119 newconst
= PyNumber_FloorDivide(v
, w
);
122 newconst
= PyNumber_Remainder(v
, w
);
125 newconst
= PyNumber_Add(v
, w
);
127 case BINARY_SUBTRACT
:
128 newconst
= PyNumber_Subtract(v
, w
);
131 newconst
= PyObject_GetItem(v
, w
);
134 newconst
= PyNumber_Lshift(v
, w
);
137 newconst
= PyNumber_Rshift(v
, w
);
140 newconst
= PyNumber_And(v
, w
);
143 newconst
= PyNumber_Xor(v
, w
);
146 newconst
= PyNumber_Or(v
, w
);
149 /* Called with an unknown opcode */
150 PyErr_Format(PyExc_SystemError
,
151 "unexpected binary operation %d on a constant",
155 if (newconst
== NULL
) {
159 size
= PyObject_Size(newconst
);
162 else if (size
> 20) {
167 /* Append folded constant into consts table */
168 len_consts
= PyList_GET_SIZE(consts
);
169 if (PyList_Append(consts
, newconst
)) {
175 /* Write NOP NOP NOP NOP LOAD_CONST newconst */
176 memset(codestr
, NOP
, 4);
177 codestr
[4] = LOAD_CONST
;
178 SETARG(codestr
, 4, len_consts
);
183 fold_unaryops_on_constants(unsigned char *codestr
, PyObject
*consts
)
185 PyObject
*newconst
=NULL
, *v
;
186 Py_ssize_t len_consts
;
190 assert(PyList_CheckExact(consts
));
191 assert(codestr
[0] == LOAD_CONST
);
193 /* Create new constant */
194 v
= PyList_GET_ITEM(consts
, GETARG(codestr
, 0));
198 /* Preserve the sign of -0.0 */
199 if (PyObject_IsTrue(v
) == 1)
200 newconst
= PyNumber_Negative(v
);
203 newconst
= PyObject_Repr(v
);
206 newconst
= PyNumber_Invert(v
);
209 /* Called with an unknown opcode */
210 PyErr_Format(PyExc_SystemError
,
211 "unexpected unary operation %d on a constant",
215 if (newconst
== NULL
) {
220 /* Append folded constant into consts table */
221 len_consts
= PyList_GET_SIZE(consts
);
222 if (PyList_Append(consts
, newconst
)) {
228 /* Write NOP LOAD_CONST newconst */
230 codestr
[1] = LOAD_CONST
;
231 SETARG(codestr
, 1, len_consts
);
235 static unsigned int *
236 markblocks(unsigned char *code
, Py_ssize_t len
)
238 unsigned int *blocks
= (unsigned int *)PyMem_Malloc(len
*sizeof(int));
239 int i
,j
, opcode
, blockcnt
= 0;
241 if (blocks
== NULL
) {
245 memset(blocks
, 0, len
*sizeof(int));
247 /* Mark labels in the first pass */
248 for (i
=0 ; i
<len
; i
+=CODESIZE(opcode
)) {
253 case JUMP_IF_FALSE_OR_POP
:
254 case JUMP_IF_TRUE_OR_POP
:
255 case POP_JUMP_IF_FALSE
:
256 case POP_JUMP_IF_TRUE
:
263 j
= GETJUMPTGT(code
, i
);
268 /* Build block numbers in the second pass */
269 for (i
=0 ; i
<len
; i
++) {
270 blockcnt
+= blocks
[i
]; /* increment blockcnt over labels */
271 blocks
[i
] = blockcnt
;
276 /* Perform basic peephole optimizations to components of a code object.
277 The consts object should still be in list form to allow new constants
280 To keep the optimizer simple, it bails out (does nothing) for code
281 containing extended arguments or that has a length over 32,700. That
282 allows us to avoid overflow and sign issues. Likewise, it bails when
283 the lineno table has complex encoding for gaps >= 255.
285 Optimizations are restricted to simple transformations occuring within a
286 single basic block. All transformations keep the code size the same or
287 smaller. For those that reduce size, the gaps are initially filled with
288 NOPs. Later those NOPs are removed and the jump addresses retargeted in
289 a single pass. Line numbering is adjusted accordingly. */
292 PyCode_Optimize(PyObject
*code
, PyObject
* consts
, PyObject
*names
,
293 PyObject
*lineno_obj
)
295 Py_ssize_t i
, j
, codelen
;
297 int tgt
, tgttgt
, opcode
;
298 unsigned char *codestr
= NULL
;
299 unsigned char *lineno
;
301 int new_line
, cum_orig_line
, last_line
, tabsiz
;
302 int cumlc
=0, lastlc
=0; /* Count runs of consecutive LOAD_CONSTs */
303 unsigned int *blocks
= NULL
;
306 /* Bail out if an exception is set */
307 if (PyErr_Occurred())
310 /* Bypass optimization when the lineno table is too complex */
311 assert(PyString_Check(lineno_obj
));
312 lineno
= (unsigned char*)PyString_AS_STRING(lineno_obj
);
313 tabsiz
= PyString_GET_SIZE(lineno_obj
);
314 if (memchr(lineno
, 255, tabsiz
) != NULL
)
317 /* Avoid situations where jump retargeting could overflow */
318 assert(PyString_Check(code
));
319 codelen
= PyString_GET_SIZE(code
);
323 /* Make a modifiable copy of the code string */
324 codestr
= (unsigned char *)PyMem_Malloc(codelen
);
327 codestr
= (unsigned char *)memcpy(codestr
,
328 PyString_AS_STRING(code
), codelen
);
330 /* Verify that RETURN_VALUE terminates the codestring. This allows
331 the various transformation patterns to look ahead several
332 instructions without additional checks to make sure they are not
333 looking beyond the end of the code string.
335 if (codestr
[codelen
-1] != RETURN_VALUE
)
338 /* Mapping to new jump targets after NOPs are removed */
339 addrmap
= (int *)PyMem_Malloc(codelen
* sizeof(int));
343 blocks
= markblocks(codestr
, codelen
);
346 assert(PyList_Check(consts
));
348 for (i
=0 ; i
<codelen
; i
+= CODESIZE(codestr
[i
])) {
356 /* Replace UNARY_NOT POP_JUMP_IF_FALSE
357 with POP_JUMP_IF_TRUE */
359 if (codestr
[i
+1] != POP_JUMP_IF_FALSE
360 || !ISBASICBLOCK(blocks
,i
,4))
362 j
= GETARG(codestr
, i
+1);
363 codestr
[i
] = POP_JUMP_IF_TRUE
;
364 SETARG(codestr
, i
, j
);
366 goto reoptimize_current
;
368 /* not a is b --> a is not b
369 not a in b --> a not in b
370 not a is not b --> a is b
371 not a not in b --> a in b
374 j
= GETARG(codestr
, i
);
375 if (j
< 6 || j
> 9 ||
376 codestr
[i
+3] != UNARY_NOT
||
377 !ISBASICBLOCK(blocks
,i
,4))
379 SETARG(codestr
, i
, (j
^1));
383 /* Replace LOAD_GLOBAL/LOAD_NAME None
384 with LOAD_CONST None */
387 j
= GETARG(codestr
, i
);
388 name
= PyString_AsString(PyTuple_GET_ITEM(names
, j
));
389 if (name
== NULL
|| strcmp(name
, "None") != 0)
391 for (j
=0 ; j
< PyList_GET_SIZE(consts
) ; j
++) {
392 if (PyList_GET_ITEM(consts
, j
) == Py_None
)
395 if (j
== PyList_GET_SIZE(consts
)) {
396 if (PyList_Append(consts
, Py_None
) == -1)
399 assert(PyList_GET_ITEM(consts
, j
) == Py_None
);
400 codestr
[i
] = LOAD_CONST
;
401 SETARG(codestr
, i
, j
);
405 /* Skip over LOAD_CONST trueconst
406 POP_JUMP_IF_FALSE xx. This improves
407 "while 1" performance. */
410 j
= GETARG(codestr
, i
);
411 if (codestr
[i
+3] != POP_JUMP_IF_FALSE
||
412 !ISBASICBLOCK(blocks
,i
,6) ||
413 !PyObject_IsTrue(PyList_GET_ITEM(consts
, j
)))
415 memset(codestr
+i
, NOP
, 6);
419 /* Try to fold tuples of constants (includes a case for lists
420 which are only used for "in" and "not in" tests).
421 Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
422 Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
423 Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
426 j
= GETARG(codestr
, i
);
430 ((opcode
== BUILD_TUPLE
&&
431 ISBASICBLOCK(blocks
, h
, 3*(j
+1))) ||
432 (opcode
== BUILD_LIST
&&
433 codestr
[i
+3]==COMPARE_OP
&&
434 ISBASICBLOCK(blocks
, h
, 3*(j
+2)) &&
435 (GETARG(codestr
,i
+3)==6 ||
436 GETARG(codestr
,i
+3)==7))) &&
437 tuple_of_constants(&codestr
[h
], j
, consts
)) {
438 assert(codestr
[i
] == LOAD_CONST
);
442 if (codestr
[i
+3] != UNPACK_SEQUENCE
||
443 !ISBASICBLOCK(blocks
,i
,6) ||
444 j
!= GETARG(codestr
, i
+3))
447 memset(codestr
+i
, NOP
, 6);
449 codestr
[i
] = ROT_TWO
;
450 memset(codestr
+i
+1, NOP
, 5);
452 codestr
[i
] = ROT_THREE
;
453 codestr
[i
+1] = ROT_TWO
;
454 memset(codestr
+i
+2, NOP
, 4);
458 /* Fold binary ops on constants.
459 LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */
461 case BINARY_MULTIPLY
:
462 case BINARY_TRUE_DIVIDE
:
463 case BINARY_FLOOR_DIVIDE
:
466 case BINARY_SUBTRACT
:
474 ISBASICBLOCK(blocks
, i
-6, 7) &&
475 fold_binops_on_constants(&codestr
[i
-6], consts
)) {
477 assert(codestr
[i
] == LOAD_CONST
);
482 /* Fold unary ops on constants.
483 LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */
488 ISBASICBLOCK(blocks
, i
-3, 4) &&
489 fold_unaryops_on_constants(&codestr
[i
-3], consts
)) {
491 assert(codestr
[i
] == LOAD_CONST
);
496 /* Simplify conditional jump to conditional jump where the
497 result of the first test implies the success of a similar
498 test or the failure of the opposite test.
504 x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_FALSE_OR_POP z
505 --> x:JUMP_IF_FALSE_OR_POP z
506 x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_TRUE_OR_POP z
507 --> x:POP_JUMP_IF_FALSE y+3
508 where y+3 is the instruction following the second test.
510 case JUMP_IF_FALSE_OR_POP
:
511 case JUMP_IF_TRUE_OR_POP
:
512 tgt
= GETJUMPTGT(codestr
, i
);
514 if (CONDITIONAL_JUMP(j
)) {
515 /* NOTE: all possible jumps here are
517 if (JUMPS_ON_TRUE(j
) == JUMPS_ON_TRUE(opcode
)) {
518 /* The second jump will be
519 taken iff the first is. */
520 tgttgt
= GETJUMPTGT(codestr
, tgt
);
521 /* The current opcode inherits
522 its target's stack behaviour */
524 SETARG(codestr
, i
, tgttgt
);
525 goto reoptimize_current
;
527 /* The second jump is not taken
528 if the first is (so jump past
529 it), and all conditional
530 jumps pop their argument when
531 they're not taken (so change
532 the first jump to pop its
533 argument when it's taken). */
534 if (JUMPS_ON_TRUE(opcode
))
535 codestr
[i
] = POP_JUMP_IF_TRUE
;
537 codestr
[i
] = POP_JUMP_IF_FALSE
;
538 SETARG(codestr
, i
, (tgt
+ 3));
539 goto reoptimize_current
;
542 /* Intentional fallthrough */
544 /* Replace jumps to unconditional jumps */
545 case POP_JUMP_IF_FALSE
:
546 case POP_JUMP_IF_TRUE
:
555 tgt
= GETJUMPTGT(codestr
, i
);
556 /* Replace JUMP_* to a RETURN into just a RETURN */
557 if (UNCONDITIONAL_JUMP(opcode
) &&
558 codestr
[tgt
] == RETURN_VALUE
) {
559 codestr
[i
] = RETURN_VALUE
;
560 memset(codestr
+i
+1, NOP
, 2);
563 if (!UNCONDITIONAL_JUMP(codestr
[tgt
]))
565 tgttgt
= GETJUMPTGT(codestr
, tgt
);
566 if (opcode
== JUMP_FORWARD
) /* JMP_ABS can go backwards */
567 opcode
= JUMP_ABSOLUTE
;
568 if (!ABSOLUTE_JUMP(opcode
))
569 tgttgt
-= i
+ 3; /* Calc relative jump addr */
570 if (tgttgt
< 0) /* No backward relative jumps */
573 SETARG(codestr
, i
, tgttgt
);
579 /* Replace RETURN LOAD_CONST None RETURN with just RETURN */
580 /* Remove unreachable JUMPs after RETURN */
584 if (codestr
[i
+4] == RETURN_VALUE
&&
585 ISBASICBLOCK(blocks
,i
,5))
586 memset(codestr
+i
+1, NOP
, 4);
587 else if (UNCONDITIONAL_JUMP(codestr
[i
+1]) &&
588 ISBASICBLOCK(blocks
,i
,4))
589 memset(codestr
+i
+1, NOP
, 3);
594 /* Fixup linenotab */
595 for (i
=0, nops
=0 ; i
<codelen
; i
+= CODESIZE(codestr
[i
])) {
596 addrmap
[i
] = i
- nops
;
597 if (codestr
[i
] == NOP
)
602 for (i
=0 ; i
< tabsiz
; i
+=2) {
603 cum_orig_line
+= lineno
[i
];
604 new_line
= addrmap
[cum_orig_line
];
605 assert (new_line
- last_line
< 255);
606 lineno
[i
] =((unsigned char)(new_line
- last_line
));
607 last_line
= new_line
;
610 /* Remove NOPs and fixup jump targets */
611 for (i
=0, h
=0 ; i
<codelen
; ) {
620 case POP_JUMP_IF_FALSE
:
621 case POP_JUMP_IF_TRUE
:
622 case JUMP_IF_FALSE_OR_POP
:
623 case JUMP_IF_TRUE_OR_POP
:
624 j
= addrmap
[GETARG(codestr
, i
)];
625 SETARG(codestr
, i
, j
);
634 j
= addrmap
[GETARG(codestr
, i
) + i
+ 3] - addrmap
[i
] - 3;
635 SETARG(codestr
, i
, j
);
638 adj
= CODESIZE(opcode
);
640 codestr
[h
++] = codestr
[i
++];
642 assert(h
+ nops
== codelen
);
644 code
= PyString_FromStringAndSize((char *)codestr
, h
);