1 /* Peephole optimizations for bytecode compiler. */
5 #include "Python-ast.h"
14 #define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
15 #define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
16 #define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
17 || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
18 #define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \
19 || op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
20 || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
21 #define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP)
22 #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
23 #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
24 #define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
25 #define ISBASICBLOCK(blocks, start, bytes) \
26 (blocks[start]==blocks[start+bytes-1])
28 /* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
29 with LOAD_CONST (c1, c2, ... cn).
30 The consts table must still be in list form so that the
31 new constant (c1, c2, ... cn) can be appended.
32 Called with codestr pointing to the first LOAD_CONST.
33 Bails out with no change if one or more of the LOAD_CONSTs is missing.
34 Also works for BUILD_LIST when followed by an "in" or "not in" test.
37 tuple_of_constants(unsigned char *codestr
, Py_ssize_t n
, PyObject
*consts
)
39 PyObject
*newconst
, *constant
;
40 Py_ssize_t i
, arg
, len_consts
;
43 assert(PyList_CheckExact(consts
));
44 assert(codestr
[n
*3] == BUILD_TUPLE
|| codestr
[n
*3] == BUILD_LIST
);
45 assert(GETARG(codestr
, (n
*3)) == n
);
47 assert(codestr
[i
*3] == LOAD_CONST
);
49 /* Buildup new tuple of constants */
50 newconst
= PyTuple_New(n
);
53 len_consts
= PyList_GET_SIZE(consts
);
54 for (i
=0 ; i
<n
; i
++) {
55 arg
= GETARG(codestr
, (i
*3));
56 assert(arg
< len_consts
);
57 constant
= PyList_GET_ITEM(consts
, arg
);
59 PyTuple_SET_ITEM(newconst
, i
, constant
);
62 /* Append folded constant onto consts */
63 if (PyList_Append(consts
, newconst
)) {
69 /* Write NOPs over old LOAD_CONSTS and
70 add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
71 memset(codestr
, NOP
, n
*3);
72 codestr
[n
*3] = LOAD_CONST
;
73 SETARG(codestr
, (n
*3), len_consts
);
77 /* Replace LOAD_CONST c1. LOAD_CONST c2 BINOP
78 with LOAD_CONST binop(c1,c2)
79 The consts table must still be in list form so that the
80 new constant can be appended.
81 Called with codestr pointing to the first LOAD_CONST.
82 Abandons the transformation if the folding fails (i.e. 1+'a').
83 If the new constant is a sequence, only folds when the size
84 is below a threshold value. That keeps pyc files from
85 becoming large in the presence of code like: (None,)*1000.
88 fold_binops_on_constants(unsigned char *codestr
, PyObject
*consts
)
90 PyObject
*newconst
, *v
, *w
;
91 Py_ssize_t len_consts
, size
;
95 assert(PyList_CheckExact(consts
));
96 assert(codestr
[0] == LOAD_CONST
);
97 assert(codestr
[3] == LOAD_CONST
);
99 /* Create new constant */
100 v
= PyList_GET_ITEM(consts
, GETARG(codestr
, 0));
101 w
= PyList_GET_ITEM(consts
, GETARG(codestr
, 3));
105 newconst
= PyNumber_Power(v
, w
, Py_None
);
107 case BINARY_MULTIPLY
:
108 newconst
= PyNumber_Multiply(v
, w
);
110 case BINARY_TRUE_DIVIDE
:
111 newconst
= PyNumber_TrueDivide(v
, w
);
113 case BINARY_FLOOR_DIVIDE
:
114 newconst
= PyNumber_FloorDivide(v
, w
);
117 newconst
= PyNumber_Remainder(v
, w
);
120 newconst
= PyNumber_Add(v
, w
);
122 case BINARY_SUBTRACT
:
123 newconst
= PyNumber_Subtract(v
, w
);
126 newconst
= PyObject_GetItem(v
, w
);
129 newconst
= PyNumber_Lshift(v
, w
);
132 newconst
= PyNumber_Rshift(v
, w
);
135 newconst
= PyNumber_And(v
, w
);
138 newconst
= PyNumber_Xor(v
, w
);
141 newconst
= PyNumber_Or(v
, w
);
144 /* Called with an unknown opcode */
145 PyErr_Format(PyExc_SystemError
,
146 "unexpected binary operation %d on a constant",
150 if (newconst
== NULL
) {
154 size
= PyObject_Size(newconst
);
157 else if (size
> 20) {
162 /* Append folded constant into consts table */
163 len_consts
= PyList_GET_SIZE(consts
);
164 if (PyList_Append(consts
, newconst
)) {
170 /* Write NOP NOP NOP NOP LOAD_CONST newconst */
171 memset(codestr
, NOP
, 4);
172 codestr
[4] = LOAD_CONST
;
173 SETARG(codestr
, 4, len_consts
);
178 fold_unaryops_on_constants(unsigned char *codestr
, PyObject
*consts
)
180 PyObject
*newconst
=NULL
, *v
;
181 Py_ssize_t len_consts
;
185 assert(PyList_CheckExact(consts
));
186 assert(codestr
[0] == LOAD_CONST
);
188 /* Create new constant */
189 v
= PyList_GET_ITEM(consts
, GETARG(codestr
, 0));
193 /* Preserve the sign of -0.0 */
194 if (PyObject_IsTrue(v
) == 1)
195 newconst
= PyNumber_Negative(v
);
198 newconst
= PyNumber_Invert(v
);
201 /* Called with an unknown opcode */
202 PyErr_Format(PyExc_SystemError
,
203 "unexpected unary operation %d on a constant",
207 if (newconst
== NULL
) {
212 /* Append folded constant into consts table */
213 len_consts
= PyList_GET_SIZE(consts
);
214 if (PyList_Append(consts
, newconst
)) {
220 /* Write NOP LOAD_CONST newconst */
222 codestr
[1] = LOAD_CONST
;
223 SETARG(codestr
, 1, len_consts
);
227 static unsigned int *
228 markblocks(unsigned char *code
, Py_ssize_t len
)
230 unsigned int *blocks
= (unsigned int *)PyMem_Malloc(len
*sizeof(int));
231 int i
,j
, opcode
, blockcnt
= 0;
233 if (blocks
== NULL
) {
237 memset(blocks
, 0, len
*sizeof(int));
239 /* Mark labels in the first pass */
240 for (i
=0 ; i
<len
; i
+=CODESIZE(opcode
)) {
245 case JUMP_IF_FALSE_OR_POP
:
246 case JUMP_IF_TRUE_OR_POP
:
247 case POP_JUMP_IF_FALSE
:
248 case POP_JUMP_IF_TRUE
:
254 j
= GETJUMPTGT(code
, i
);
259 /* Build block numbers in the second pass */
260 for (i
=0 ; i
<len
; i
++) {
261 blockcnt
+= blocks
[i
]; /* increment blockcnt over labels */
262 blocks
[i
] = blockcnt
;
267 /* Helper to replace LOAD_NAME None/True/False with LOAD_CONST
268 Returns: 0 if no change, 1 if change, -1 if error */
270 load_global(unsigned char *codestr
, Py_ssize_t i
, char *name
, PyObject
*consts
)
276 if (strcmp(name
, "None") == 0)
278 else if (strcmp(name
, "True") == 0)
280 else if (strcmp(name
, "False") == 0)
284 for (j
= 0; j
< PyList_GET_SIZE(consts
); j
++) {
285 if (PyList_GET_ITEM(consts
, j
) == obj
)
288 if (j
== PyList_GET_SIZE(consts
)) {
289 if (PyList_Append(consts
, obj
) < 0)
292 assert(PyList_GET_ITEM(consts
, j
) == obj
);
293 codestr
[i
] = LOAD_CONST
;
294 SETARG(codestr
, i
, j
);
298 /* Perform basic peephole optimizations to components of a code object.
299 The consts object should still be in list form to allow new constants
302 To keep the optimizer simple, it bails out (does nothing) for code that
303 has a length over 32,700, and does not calculate extended arguments.
304 That allows us to avoid overflow and sign issues. Likewise, it bails when
305 the lineno table has complex encoding for gaps >= 255. EXTENDED_ARG can
306 appear before MAKE_FUNCTION; in this case both opcodes are skipped.
307 EXTENDED_ARG preceding any other opcode causes the optimizer to bail.
309 Optimizations are restricted to simple transformations occuring within a
310 single basic block. All transformations keep the code size the same or
311 smaller. For those that reduce size, the gaps are initially filled with
312 NOPs. Later those NOPs are removed and the jump addresses retargeted in
313 a single pass. Line numbering is adjusted accordingly. */
316 PyCode_Optimize(PyObject
*code
, PyObject
* consts
, PyObject
*names
,
317 PyObject
*lineno_obj
)
319 Py_ssize_t i
, j
, codelen
;
321 int tgt
, tgttgt
, opcode
;
322 unsigned char *codestr
= NULL
;
323 unsigned char *lineno
;
325 int new_line
, cum_orig_line
, last_line
, tabsiz
;
326 int cumlc
=0, lastlc
=0; /* Count runs of consecutive LOAD_CONSTs */
327 unsigned int *blocks
= NULL
;
330 /* Bail out if an exception is set */
331 if (PyErr_Occurred())
334 /* Bypass optimization when the lineno table is too complex */
335 assert(PyBytes_Check(lineno_obj
));
336 lineno
= (unsigned char*)PyBytes_AS_STRING(lineno_obj
);
337 tabsiz
= PyBytes_GET_SIZE(lineno_obj
);
338 if (memchr(lineno
, 255, tabsiz
) != NULL
)
341 /* Avoid situations where jump retargeting could overflow */
342 assert(PyBytes_Check(code
));
343 codelen
= PyBytes_GET_SIZE(code
);
347 /* Make a modifiable copy of the code string */
348 codestr
= (unsigned char *)PyMem_Malloc(codelen
);
351 codestr
= (unsigned char *)memcpy(codestr
,
352 PyBytes_AS_STRING(code
), codelen
);
354 /* Verify that RETURN_VALUE terminates the codestring. This allows
355 the various transformation patterns to look ahead several
356 instructions without additional checks to make sure they are not
357 looking beyond the end of the code string.
359 if (codestr
[codelen
-1] != RETURN_VALUE
)
362 /* Mapping to new jump targets after NOPs are removed */
363 addrmap
= (int *)PyMem_Malloc(codelen
* sizeof(int));
367 blocks
= markblocks(codestr
, codelen
);
370 assert(PyList_Check(consts
));
372 for (i
=0 ; i
<codelen
; i
+= CODESIZE(codestr
[i
])) {
380 /* Replace UNARY_NOT POP_JUMP_IF_FALSE
381 with POP_JUMP_IF_TRUE */
383 if (codestr
[i
+1] != POP_JUMP_IF_FALSE
384 || !ISBASICBLOCK(blocks
,i
,4))
386 j
= GETARG(codestr
, i
+1);
387 codestr
[i
] = POP_JUMP_IF_TRUE
;
388 SETARG(codestr
, i
, j
);
390 goto reoptimize_current
;
392 /* not a is b --> a is not b
393 not a in b --> a not in b
394 not a is not b --> a is b
395 not a not in b --> a in b
398 j
= GETARG(codestr
, i
);
399 if (j
< 6 || j
> 9 ||
400 codestr
[i
+3] != UNARY_NOT
||
401 !ISBASICBLOCK(blocks
,i
,4))
403 SETARG(codestr
, i
, (j
^1));
407 /* Replace LOAD_GLOBAL/LOAD_NAME None/True/False
408 with LOAD_CONST None/True/False */
411 j
= GETARG(codestr
, i
);
412 name
= _PyUnicode_AsString(PyTuple_GET_ITEM(names
, j
));
413 h
= load_global(codestr
, i
, name
, consts
);
421 /* Skip over LOAD_CONST trueconst
422 POP_JUMP_IF_FALSE xx. This improves
423 "while 1" performance. */
426 j
= GETARG(codestr
, i
);
427 if (codestr
[i
+3] != POP_JUMP_IF_FALSE
||
428 !ISBASICBLOCK(blocks
,i
,6) ||
429 !PyObject_IsTrue(PyList_GET_ITEM(consts
, j
)))
431 memset(codestr
+i
, NOP
, 6);
435 /* Try to fold tuples of constants (includes a case for lists
436 which are only used for "in" and "not in" tests).
437 Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
438 Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
439 Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
442 j
= GETARG(codestr
, i
);
446 ((opcode
== BUILD_TUPLE
&&
447 ISBASICBLOCK(blocks
, h
, 3*(j
+1))) ||
448 (opcode
== BUILD_LIST
&&
449 codestr
[i
+3]==COMPARE_OP
&&
450 ISBASICBLOCK(blocks
, h
, 3*(j
+2)) &&
451 (GETARG(codestr
,i
+3)==6 ||
452 GETARG(codestr
,i
+3)==7))) &&
453 tuple_of_constants(&codestr
[h
], j
, consts
)) {
454 assert(codestr
[i
] == LOAD_CONST
);
458 if (codestr
[i
+3] != UNPACK_SEQUENCE
||
459 !ISBASICBLOCK(blocks
,i
,6) ||
460 j
!= GETARG(codestr
, i
+3))
463 memset(codestr
+i
, NOP
, 6);
465 codestr
[i
] = ROT_TWO
;
466 memset(codestr
+i
+1, NOP
, 5);
468 codestr
[i
] = ROT_THREE
;
469 codestr
[i
+1] = ROT_TWO
;
470 memset(codestr
+i
+2, NOP
, 4);
474 /* Fold binary ops on constants.
475 LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */
477 case BINARY_MULTIPLY
:
478 case BINARY_TRUE_DIVIDE
:
479 case BINARY_FLOOR_DIVIDE
:
482 case BINARY_SUBTRACT
:
490 ISBASICBLOCK(blocks
, i
-6, 7) &&
491 fold_binops_on_constants(&codestr
[i
-6], consts
)) {
493 assert(codestr
[i
] == LOAD_CONST
);
498 /* Fold unary ops on constants.
499 LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */
503 ISBASICBLOCK(blocks
, i
-3, 4) &&
504 fold_unaryops_on_constants(&codestr
[i
-3], consts
)) {
506 assert(codestr
[i
] == LOAD_CONST
);
511 /* Simplify conditional jump to conditional jump where the
512 result of the first test implies the success of a similar
513 test or the failure of the opposite test.
519 x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_FALSE_OR_POP z
520 --> x:JUMP_IF_FALSE_OR_POP z
521 x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_TRUE_OR_POP z
522 --> x:POP_JUMP_IF_FALSE y+3
523 where y+3 is the instruction following the second test.
525 case JUMP_IF_FALSE_OR_POP
:
526 case JUMP_IF_TRUE_OR_POP
:
527 tgt
= GETJUMPTGT(codestr
, i
);
529 if (CONDITIONAL_JUMP(j
)) {
530 /* NOTE: all possible jumps here are
532 if (JUMPS_ON_TRUE(j
) == JUMPS_ON_TRUE(opcode
)) {
533 /* The second jump will be
534 taken iff the first is. */
535 tgttgt
= GETJUMPTGT(codestr
, tgt
);
536 /* The current opcode inherits
537 its target's stack behaviour */
539 SETARG(codestr
, i
, tgttgt
);
540 goto reoptimize_current
;
542 /* The second jump is not taken
543 if the first is (so jump past
544 it), and all conditional
545 jumps pop their argument when
546 they're not taken (so change
547 the first jump to pop its
548 argument when it's taken). */
549 if (JUMPS_ON_TRUE(opcode
))
550 codestr
[i
] = POP_JUMP_IF_TRUE
;
552 codestr
[i
] = POP_JUMP_IF_FALSE
;
553 SETARG(codestr
, i
, (tgt
+ 3));
554 goto reoptimize_current
;
557 /* Intentional fallthrough */
559 /* Replace jumps to unconditional jumps */
560 case POP_JUMP_IF_FALSE
:
561 case POP_JUMP_IF_TRUE
:
569 tgt
= GETJUMPTGT(codestr
, i
);
570 /* Replace JUMP_* to a RETURN into just a RETURN */
571 if (UNCONDITIONAL_JUMP(opcode
) &&
572 codestr
[tgt
] == RETURN_VALUE
) {
573 codestr
[i
] = RETURN_VALUE
;
574 memset(codestr
+i
+1, NOP
, 2);
577 if (!UNCONDITIONAL_JUMP(codestr
[tgt
]))
579 tgttgt
= GETJUMPTGT(codestr
, tgt
);
580 if (opcode
== JUMP_FORWARD
) /* JMP_ABS can go backwards */
581 opcode
= JUMP_ABSOLUTE
;
582 if (!ABSOLUTE_JUMP(opcode
))
583 tgttgt
-= i
+ 3; /* Calc relative jump addr */
584 if (tgttgt
< 0) /* No backward relative jumps */
587 SETARG(codestr
, i
, tgttgt
);
591 if (codestr
[i
+3] != MAKE_FUNCTION
)
593 /* don't visit MAKE_FUNCTION as GETARG will be wrong */
597 /* Replace RETURN LOAD_CONST None RETURN with just RETURN */
598 /* Remove unreachable JUMPs after RETURN */
602 if (codestr
[i
+4] == RETURN_VALUE
&&
603 ISBASICBLOCK(blocks
,i
,5))
604 memset(codestr
+i
+1, NOP
, 4);
605 else if (UNCONDITIONAL_JUMP(codestr
[i
+1]) &&
606 ISBASICBLOCK(blocks
,i
,4))
607 memset(codestr
+i
+1, NOP
, 3);
612 /* Fixup linenotab */
613 for (i
=0, nops
=0 ; i
<codelen
; i
+= CODESIZE(codestr
[i
])) {
614 addrmap
[i
] = i
- nops
;
615 if (codestr
[i
] == NOP
)
620 for (i
=0 ; i
< tabsiz
; i
+=2) {
621 cum_orig_line
+= lineno
[i
];
622 new_line
= addrmap
[cum_orig_line
];
623 assert (new_line
- last_line
< 255);
624 lineno
[i
] =((unsigned char)(new_line
- last_line
));
625 last_line
= new_line
;
628 /* Remove NOPs and fixup jump targets */
629 for (i
=0, h
=0 ; i
<codelen
; ) {
638 case POP_JUMP_IF_FALSE
:
639 case POP_JUMP_IF_TRUE
:
640 case JUMP_IF_FALSE_OR_POP
:
641 case JUMP_IF_TRUE_OR_POP
:
642 j
= addrmap
[GETARG(codestr
, i
)];
643 SETARG(codestr
, i
, j
);
651 j
= addrmap
[GETARG(codestr
, i
) + i
+ 3] - addrmap
[i
] - 3;
652 SETARG(codestr
, i
, j
);
655 adj
= CODESIZE(opcode
);
657 codestr
[h
++] = codestr
[i
++];
659 assert(h
+ nops
== codelen
);
661 code
= PyBytes_FromStringAndSize((char *)codestr
, h
);