1 """A flow graph representation for Python bytecode"""
7 from compiler
import misc
9 import CO_OPTIMIZED
, CO_NEWLOCALS
, CO_VARARGS
, CO_VARKEYWORDS
13 self
.current
= self
.entry
= Block()
14 self
.exit
= Block("exit")
15 self
.blocks
= misc
.Set()
16 self
.blocks
.add(self
.entry
)
17 self
.blocks
.add(self
.exit
)
19 def startBlock(self
, block
):
22 print "end", repr(self
.current
)
23 print " next", self
.current
.next
24 print " prev", self
.current
.prev
25 print " ", self
.current
.get_children()
29 def nextBlock(self
, block
=None):
30 # XXX think we need to specify when there is implicit transfer
31 # from one block to the next. might be better to represent this
32 # with explicit JUMP_ABSOLUTE instructions that are optimized
33 # out when they are unnecessary.
35 # I think this strategy works: each block has a child
36 # designated as "next" which is returned as the last of the
37 # children. because the nodes in a graph are emitted in
38 # reverse post order, the "next" block will always be emitted
39 # immediately after its parent.
40 # Worry: maintaining this invariant could be tricky
42 block
= self
.newBlock()
44 # Note: If the current block ends with an unconditional control
45 # transfer, then it is techically incorrect to add an implicit
46 # transfer to the block graph. Doing so results in code generation
47 # for unreachable blocks. That doesn't appear to be very common
48 # with Python code and since the built-in compiler doesn't optimize
49 # it out we don't either.
50 self
.current
.addNext(block
)
51 self
.startBlock(block
)
58 def startExitBlock(self
):
59 self
.startBlock(self
.exit
)
63 def _enable_debug(self
):
66 def _disable_debug(self
):
69 def emit(self
, *inst
):
72 if len(inst
) == 2 and isinstance(inst
[1], Block
):
73 self
.current
.addOutEdge(inst
[1])
74 self
.current
.emit(inst
)
76 def getBlocksInOrder(self
):
77 """Return the blocks in reverse postorder
79 i.e. each node appears before all of its successors
81 order
= order_blocks(self
.entry
, self
.exit
)
85 return self
.blocks
.elements()
88 """Return nodes appropriate for use with dominator"""
91 def getContainedGraphs(self
):
93 for b
in self
.getBlocks():
94 l
.extend(b
.getContainedGraphs())
98 def order_blocks(start_block
, exit_block
):
99 """Order blocks so that they are emitted in the right order"""
101 # - when a block has a next block, the next block must be emitted just after
102 # - when a block has followers (relative jumps), it must be emitted before
104 # - all reachable blocks must be emitted
107 # Find all the blocks to be emitted.
115 for c
in b
.get_children():
116 if c
not in remaining
:
119 # A block is dominated by another block if that block must be emitted
123 if __debug__
and b
.next
:
124 assert b
is b
.next
[0].prev
[0], (b
, b
.next
)
125 # Make sure every block appears in dominators, even if no
126 # other block must precede it.
127 dominators
.setdefault(b
, set())
128 # preceeding blocks dominate following blocks
129 for c
in b
.get_followers():
131 dominators
.setdefault(c
, set()).add(b
)
132 # Any block that has a next pointer leading to c is also
133 # dominated because the whole chain will be emitted at once.
134 # Walk backwards and add them all.
135 if c
.prev
and c
.prev
[0] is not b
:
141 # Find a block that can be emitted next.
143 for c
in dominators
[b
]:
145 break # can't emit yet, dominated by a remaining block
148 assert 0, 'circular dependency, cannot find next block'
157 elif b
is not exit_block
and not b
.has_unconditional_transfer():
158 order
.append(exit_block
)
168 def __init__(self
, label
=''):
170 self
.outEdges
= set()
172 self
.bid
= Block
._count
175 Block
._count
= Block
._count
+ 1
179 return "<block %s id=%d>" % (self
.label
, self
.bid
)
181 return "<block id=%d>" % (self
.bid
)
184 insts
= map(str, self
.insts
)
185 return "<block %s %d:\n%s>" % (self
.label
, self
.bid
,
188 def emit(self
, inst
):
190 self
.insts
.append(inst
)
192 def getInstructions(self
):
195 def addOutEdge(self
, block
):
196 self
.outEdges
.add(block
)
198 def addNext(self
, block
):
199 self
.next
.append(block
)
200 assert len(self
.next
) == 1, map(str, self
.next
)
201 block
.prev
.append(self
)
202 assert len(block
.prev
) == 1, map(str, block
.prev
)
204 _uncond_transfer
= ('RETURN_VALUE', 'RAISE_VARARGS',
205 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP',
208 def has_unconditional_transfer(self
):
209 """Returns True if there is an unconditional transfer to an other block
210 at the end of this block. This means there is no risk for the bytecode
211 executer to go past this block's bytecode."""
213 op
, arg
= self
.insts
[-1]
214 except (IndexError, ValueError):
216 return op
in self
._uncond
_transfer
218 def get_children(self
):
219 return list(self
.outEdges
) + self
.next
221 def get_followers(self
):
222 """Get the whole list of followers, including the next block."""
223 followers
= set(self
.next
)
224 # Blocks that must be emitted *after* this one, because of
225 # bytecode offsets (e.g. relative jumps) pointing to them.
226 for inst
in self
.insts
:
227 if inst
[0] in PyFlowGraph
.hasjrel
:
228 followers
.add(inst
[1])
231 def getContainedGraphs(self
):
232 """Return all graphs contained within this block.
234 For example, a MAKE_FUNCTION block will contain a reference to
235 the graph for the function body.
238 for inst
in self
.insts
:
242 if hasattr(op
, 'graph'):
243 contained
.append(op
.graph
)
246 # flags for code objects
248 # the FlowGraph is transformed in place; it exists in one of these states
254 class PyFlowGraph(FlowGraph
):
255 super_init
= FlowGraph
.__init
__
257 def __init__(self
, name
, filename
, args
=(), optimized
=0, klass
=None):
260 self
.filename
= filename
261 self
.docstring
= None
262 self
.args
= args
# XXX
263 self
.argcount
= getArgCount(args
)
266 self
.flags
= CO_OPTIMIZED | CO_NEWLOCALS
271 # Free variables found by the symbol table scan, including
272 # variables used only in nested scopes, are included here.
275 # The closure list is used to track the order of cell
276 # variables and free variables in the resulting code object.
277 # The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both
278 # kinds of variables.
280 self
.varnames
= list(args
) or []
281 for i
in range(len(self
.varnames
)):
282 var
= self
.varnames
[i
]
283 if isinstance(var
, TupleArg
):
284 self
.varnames
[i
] = var
.getName()
287 def setDocstring(self
, doc
):
290 def setFlag(self
, flag
):
291 self
.flags
= self
.flags | flag
292 if flag
== CO_VARARGS
:
293 self
.argcount
= self
.argcount
- 1
295 def checkFlag(self
, flag
):
296 if self
.flags
& flag
:
299 def setFreeVars(self
, names
):
300 self
.freevars
= list(names
)
302 def setCellVars(self
, names
):
303 self
.cellvars
= names
306 """Get a Python code object"""
307 assert self
.stage
== RAW
308 self
.computeStackDepth()
310 assert self
.stage
== FLAT
312 assert self
.stage
== CONV
314 assert self
.stage
== DONE
315 return self
.newCodeObject()
317 def dump(self
, io
=None):
324 if opname
== "SET_LINENO":
327 print "\t", "%3d" % pc
, opname
330 print "\t", "%3d" % pc
, opname
, t
[1]
335 def computeStackDepth(self
):
336 """Compute the max stack depth.
338 Approach is to compute the stack effect of each basic block.
339 Then find the path through the code with the largest total
344 for b
in self
.getBlocks():
345 depth
[b
] = findDepth(b
.getInstructions())
354 children
= b
.get_children()
356 return max([max_depth(c
, d
) for c
in children
])
358 if not b
.label
== "exit":
359 return max_depth(self
.exit
, d
)
363 self
.stacksize
= max_depth(self
.entry
, 0)
365 def flattenGraph(self
):
366 """Arrange the blocks in order and resolve jumps"""
367 assert self
.stage
== RAW
368 self
.insts
= insts
= []
372 for b
in self
.getBlocksInOrder():
374 for inst
in b
.getInstructions():
378 elif inst
[0] != "SET_LINENO":
383 for i
in range(len(insts
)):
387 elif inst
[0] != "SET_LINENO":
390 if opname
in self
.hasjrel
:
392 offset
= begin
[oparg
] - pc
393 insts
[i
] = opname
, offset
394 elif opname
in self
.hasjabs
:
395 insts
[i
] = opname
, begin
[inst
[1]]
399 for i
in dis
.hasjrel
:
400 hasjrel
.add(dis
.opname
[i
])
402 for i
in dis
.hasjabs
:
403 hasjabs
.add(dis
.opname
[i
])
405 def convertArgs(self
):
406 """Convert arguments from symbolic to concrete form"""
407 assert self
.stage
== FLAT
408 self
.consts
.insert(0, self
.docstring
)
410 for i
in range(len(self
.insts
)):
414 conv
= self
._converters
.get(opname
, None)
416 self
.insts
[i
] = opname
, conv(self
, oparg
)
419 def sort_cellvars(self
):
420 """Sort cellvars in the order of varnames and prune from freevars.
423 for name
in self
.cellvars
:
425 self
.cellvars
= [name
for name
in self
.varnames
427 for name
in self
.cellvars
:
429 self
.cellvars
= self
.cellvars
+ cells
.keys()
430 self
.closure
= self
.cellvars
+ self
.freevars
432 def _lookupName(self
, name
, list):
433 """Return index of name in list, appending if necessary
435 This routine uses a list instead of a dictionary, because a
436 dictionary can't store two different keys if the keys have the
437 same value but different types, e.g. 2 and 2L. The compiler
438 must treat these two separately, so it does an explicit type
439 comparison before comparing the values.
442 for i
in range(len(list)):
443 if t
== type(list[i
]) and list[i
] == name
:
450 def _convert_LOAD_CONST(self
, arg
):
451 if hasattr(arg
, 'getCode'):
453 return self
._lookupName
(arg
, self
.consts
)
455 def _convert_LOAD_FAST(self
, arg
):
456 self
._lookupName
(arg
, self
.names
)
457 return self
._lookupName
(arg
, self
.varnames
)
458 _convert_STORE_FAST
= _convert_LOAD_FAST
459 _convert_DELETE_FAST
= _convert_LOAD_FAST
461 def _convert_LOAD_NAME(self
, arg
):
462 if self
.klass
is None:
463 self
._lookupName
(arg
, self
.varnames
)
464 return self
._lookupName
(arg
, self
.names
)
466 def _convert_NAME(self
, arg
):
467 if self
.klass
is None:
468 self
._lookupName
(arg
, self
.varnames
)
469 return self
._lookupName
(arg
, self
.names
)
470 _convert_STORE_NAME
= _convert_NAME
471 _convert_DELETE_NAME
= _convert_NAME
472 _convert_IMPORT_NAME
= _convert_NAME
473 _convert_IMPORT_FROM
= _convert_NAME
474 _convert_STORE_ATTR
= _convert_NAME
475 _convert_LOAD_ATTR
= _convert_NAME
476 _convert_DELETE_ATTR
= _convert_NAME
477 _convert_LOAD_GLOBAL
= _convert_NAME
478 _convert_STORE_GLOBAL
= _convert_NAME
479 _convert_DELETE_GLOBAL
= _convert_NAME
481 def _convert_DEREF(self
, arg
):
482 self
._lookupName
(arg
, self
.names
)
483 self
._lookupName
(arg
, self
.varnames
)
484 return self
._lookupName
(arg
, self
.closure
)
485 _convert_LOAD_DEREF
= _convert_DEREF
486 _convert_STORE_DEREF
= _convert_DEREF
488 def _convert_LOAD_CLOSURE(self
, arg
):
489 self
._lookupName
(arg
, self
.varnames
)
490 return self
._lookupName
(arg
, self
.closure
)
492 _cmp
= list(dis
.cmp_op
)
493 def _convert_COMPARE_OP(self
, arg
):
494 return self
._cmp
.index(arg
)
496 # similarly for other opcodes...
498 for name
, obj
in locals().items():
499 if name
[:9] == "_convert_":
501 _converters
[opname
] = obj
502 del name
, obj
, opname
504 def makeByteCode(self
):
505 assert self
.stage
== CONV
506 self
.lnotab
= lnotab
= LineAddrTable()
510 lnotab
.addCode(self
.opnum
[opname
])
513 if opname
== "SET_LINENO":
514 lnotab
.nextLine(oparg
)
516 hi
, lo
= twobyte(oparg
)
518 lnotab
.addCode(self
.opnum
[opname
], lo
, hi
)
521 print self
.opnum
[opname
], lo
, hi
526 for num
in range(len(dis
.opname
)):
527 opnum
[dis
.opname
[num
]] = num
530 def newCodeObject(self
):
531 assert self
.stage
== DONE
532 if (self
.flags
& CO_NEWLOCALS
) == 0:
535 nlocals
= len(self
.varnames
)
536 argcount
= self
.argcount
537 if self
.flags
& CO_VARKEYWORDS
:
538 argcount
= argcount
- 1
539 return types
.CodeType(argcount
, nlocals
, self
.stacksize
, self
.flags
,
540 self
.lnotab
.getCode(), self
.getConsts(),
541 tuple(self
.names
), tuple(self
.varnames
),
542 self
.filename
, self
.name
, self
.lnotab
.firstline
,
543 self
.lnotab
.getTable(), tuple(self
.freevars
),
544 tuple(self
.cellvars
))
547 """Return a tuple for the const slot of the code object
549 Must convert references to code (MAKE_FUNCTION) to code
553 for elt
in self
.consts
:
554 if isinstance(elt
, PyFlowGraph
):
560 if opname
[:4] == 'JUMP':
564 """Helper for marking func defs with nested tuples in arglist"""
565 def __init__(self
, count
, names
):
569 return "TupleArg(%s, %s)" % (self
.count
, self
.names
)
571 return ".%d" % self
.count
573 def getArgCount(args
):
577 if isinstance(arg
, TupleArg
):
578 numNames
= len(misc
.flatten(arg
.names
))
579 argcount
= argcount
- numNames
583 """Convert an int argument into high and low bytes"""
584 assert isinstance(val
, int)
585 return divmod(val
, 256)
590 This class builds the lnotab, which is documented in compile.c.
591 Here's a brief recap:
593 For each SET_LINENO instruction after the first one, two bytes are
594 added to lnotab. (In some cases, multiple two-byte entries are
595 added.) The first byte is the distance in bytes between the
596 instruction for the last SET_LINENO and the current SET_LINENO.
597 The second byte is offset in line numbers. If either offset is
598 greater than 255, multiple two-byte entries are added -- see
599 compile.c for the delicate details.
610 def addCode(self
, *args
):
612 self
.code
.append(chr(arg
))
613 self
.codeOffset
= self
.codeOffset
+ len(args
)
615 def nextLine(self
, lineno
):
616 if self
.firstline
== 0:
617 self
.firstline
= lineno
618 self
.lastline
= lineno
621 addr
= self
.codeOffset
- self
.lastoff
622 line
= lineno
- self
.lastline
623 # Python assumes that lineno always increases with
624 # increasing bytecode address (lnotab is unsigned char).
625 # Depending on when SET_LINENO instructions are emitted
626 # this is not always true. Consider the code:
629 # In the bytecode stream, the assignment to "a" occurs
630 # after the loading of "b". This works with the C Python
631 # compiler because it only generates a SET_LINENO instruction
632 # for the assignment.
634 push
= self
.lnotab
.append
639 push(addr
); push(255)
642 if addr
> 0 or line
> 0:
643 push(addr
); push(line
)
644 self
.lastline
= lineno
645 self
.lastoff
= self
.codeOffset
648 return ''.join(self
.code
)
651 return ''.join(map(chr, self
.lnotab
))
653 class StackDepthTracker
:
654 # XXX 1. need to keep track of stack depth on jumps
655 # XXX 2. at least partly as a result, this code is broken
657 def findDepth(self
, insts
, debug
=0):
664 delta
= self
.effect
.get(opname
, None)
665 if delta
is not None:
666 depth
= depth
+ delta
669 for pat
, pat_delta
in self
.patterns
:
670 if opname
[:len(pat
)] == pat
:
672 depth
= depth
+ delta
674 # if we still haven't found a match
676 meth
= getattr(self
, opname
, None)
678 depth
= depth
+ meth(i
[1])
682 print depth
, maxDepth
696 'DELETE_SLICE+0': -1,
697 'DELETE_SLICE+1': -2,
698 'DELETE_SLICE+2': -2,
699 'DELETE_SLICE+3': -3,
718 'LOAD_ATTR': 0, # unlike other loads
731 def UNPACK_SEQUENCE(self
, count
):
733 def BUILD_TUPLE(self
, count
):
735 def BUILD_LIST(self
, count
):
737 def CALL_FUNCTION(self
, argc
):
738 hi
, lo
= divmod(argc
, 256)
739 return -(lo
+ hi
* 2)
740 def CALL_FUNCTION_VAR(self
, argc
):
741 return self
.CALL_FUNCTION(argc
)-1
742 def CALL_FUNCTION_KW(self
, argc
):
743 return self
.CALL_FUNCTION(argc
)-1
744 def CALL_FUNCTION_VAR_KW(self
, argc
):
745 return self
.CALL_FUNCTION(argc
)-2
746 def MAKE_FUNCTION(self
, argc
):
748 def MAKE_CLOSURE(self
, argc
):
749 # XXX need to account for free variables too!
751 def BUILD_SLICE(self
, argc
):
756 def DUP_TOPX(self
, argc
):
759 findDepth
= StackDepthTracker().findDepth