1 """A flow graph representation for Python bytecode"""
7 from compiler
import misc
9 import CO_OPTIMIZED
, CO_NEWLOCALS
, CO_VARARGS
, CO_VARKEYWORDS
13 self
.current
= self
.entry
= Block()
14 self
.exit
= Block("exit")
15 self
.blocks
= misc
.Set()
16 self
.blocks
.add(self
.entry
)
17 self
.blocks
.add(self
.exit
)
19 def startBlock(self
, block
):
22 print "end", repr(self
.current
)
23 print " next", self
.current
.next
24 print " ", self
.current
.get_children()
28 def nextBlock(self
, block
=None):
29 # XXX think we need to specify when there is implicit transfer
30 # from one block to the next. might be better to represent this
31 # with explicit JUMP_ABSOLUTE instructions that are optimized
32 # out when they are unnecessary.
34 # I think this strategy works: each block has a child
35 # designated as "next" which is returned as the last of the
36 # children. because the nodes in a graph are emitted in
37 # reverse post order, the "next" block will always be emitted
38 # immediately after its parent.
39 # Worry: maintaining this invariant could be tricky
41 block
= self
.newBlock()
43 # Note: If the current block ends with an unconditional
44 # control transfer, then it is incorrect to add an implicit
45 # transfer to the block graph. The current code requires
46 # these edges to get the blocks emitted in the right order,
47 # however. :-( If a client needs to remove these edges, call
50 self
.current
.addNext(block
)
51 self
.startBlock(block
)
58 def startExitBlock(self
):
59 self
.startBlock(self
.exit
)
63 def _enable_debug(self
):
66 def _disable_debug(self
):
69 def emit(self
, *inst
):
72 if inst
[0] in ['RETURN_VALUE', 'YIELD_VALUE']:
73 self
.current
.addOutEdge(self
.exit
)
74 if len(inst
) == 2 and isinstance(inst
[1], Block
):
75 self
.current
.addOutEdge(inst
[1])
76 self
.current
.emit(inst
)
78 def getBlocksInOrder(self
):
79 """Return the blocks in reverse postorder
81 i.e. each node appears before all of its successors
83 # XXX make sure every node that doesn't have an explicit next
84 # is set so that next points to exit
85 for b
in self
.blocks
.elements():
90 order
= dfs_postorder(self
.entry
, {})
92 self
.fixupOrder(order
, self
.exit
)
94 if not self
.exit
in order
:
95 order
.append(self
.exit
)
99 def fixupOrder(self
, blocks
, default_next
):
100 """Fixup bad order introduced by DFS."""
102 # XXX This is a total mess. There must be a better way to get
103 # the code blocks in the right order.
105 self
.fixupOrderHonorNext(blocks
, default_next
)
106 self
.fixupOrderForward(blocks
, default_next
)
108 def fixupOrderHonorNext(self
, blocks
, default_next
):
109 """Fix one problem with DFS.
111 The DFS uses child block, but doesn't know about the special
112 "next" block. As a result, the DFS can order blocks so that a
113 block isn't next to the right block for implicit control
117 for i
in range(len(blocks
)):
120 for i
in range(0, len(blocks
) - 1):
123 if not b
.next
or b
.next
[0] == default_next
or b
.next
[0] == n
:
125 # The blocks are in the wrong order. Find the chain of
126 # blocks to insert where they belong.
130 while elt
.next
and elt
.next
[0] != default_next
:
131 chain
.append(elt
.next
[0])
133 # Now remove the blocks in the chain from the current
134 # block list, so that they can be re-inserted.
138 l
.append((index
[b
], b
))
143 # Insert the chain in the proper location
144 blocks
[i
:i
+ 1] = [cur
] + chain
145 # Finally, re-compute the block indexes
146 for i
in range(len(blocks
)):
149 def fixupOrderForward(self
, blocks
, default_next
):
150 """Make sure all JUMP_FORWARDs jump forward"""
155 index
[b
] = len(chains
)
157 if b
.next
and b
.next
[0] == default_next
:
165 for i
in range(len(chains
)):
168 for c
in b
.get_children():
172 if inst
[0] == 'JUMP_FORWARD':
177 constraints
.append((index
[c
], i
))
182 # XXX just do one for now
183 # do swaps to get things in the right order
184 goes_before
, a_chain
= constraints
[0]
185 assert a_chain
> goes_before
188 chains
.insert(goes_before
, c
)
196 return self
.blocks
.elements()
199 """Return nodes appropriate for use with dominator"""
202 def getContainedGraphs(self
):
204 for b
in self
.getBlocks():
205 l
.extend(b
.getContainedGraphs())
208 def dfs_postorder(b
, seen
):
209 """Depth-first search of tree rooted at b, return in postorder"""
212 for c
in b
.get_children():
215 order
= order
+ dfs_postorder(c
, seen
)
222 def __init__(self
, label
=''):
224 self
.inEdges
= misc
.Set()
225 self
.outEdges
= misc
.Set()
227 self
.bid
= Block
._count
229 Block
._count
= Block
._count
+ 1
233 return "<block %s id=%d>" % (self
.label
, self
.bid
)
235 return "<block id=%d>" % (self
.bid
)
238 insts
= map(str, self
.insts
)
239 return "<block %s %d:\n%s>" % (self
.label
, self
.bid
,
242 def emit(self
, inst
):
245 self
.outEdges
.add(inst
[1])
246 self
.insts
.append(inst
)
248 def getInstructions(self
):
251 def addInEdge(self
, block
):
252 self
.inEdges
.add(block
)
254 def addOutEdge(self
, block
):
255 self
.outEdges
.add(block
)
257 def addNext(self
, block
):
258 self
.next
.append(block
)
259 assert len(self
.next
) == 1, map(str, self
.next
)
261 _uncond_transfer
= ('RETURN_VALUE', 'RAISE_VARARGS', 'YIELD_VALUE',
262 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP')
265 """Remove bogus edge for unconditional transfers
267 Each block has a next edge that accounts for implicit control
268 transfers, e.g. from a JUMP_IF_FALSE to the block that will be
269 executed if the test is true.
271 These edges must remain for the current assembler code to
272 work. If they are removed, the dfs_postorder gets things in
273 weird orders. However, they shouldn't be there for other
274 purposes, e.g. conversion to SSA form. This method will
275 remove the next edge when it follows an unconditional control
279 op
, arg
= self
.insts
[-1]
280 except (IndexError, ValueError):
282 if op
in self
._uncond
_transfer
:
285 def get_children(self
):
286 if self
.next
and self
.next
[0] in self
.outEdges
:
287 self
.outEdges
.remove(self
.next
[0])
288 return self
.outEdges
.elements() + self
.next
290 def getContainedGraphs(self
):
291 """Return all graphs contained within this block.
293 For example, a MAKE_FUNCTION block will contain a reference to
294 the graph for the function body.
297 for inst
in self
.insts
:
301 if hasattr(op
, 'graph'):
302 contained
.append(op
.graph
)
305 # flags for code objects
307 # the FlowGraph is transformed in place; it exists in one of these states
313 class PyFlowGraph(FlowGraph
):
314 super_init
= FlowGraph
.__init
__
316 def __init__(self
, name
, filename
, args
=(), optimized
=0, klass
=None):
319 self
.filename
= filename
320 self
.docstring
= None
321 self
.args
= args
# XXX
322 self
.argcount
= getArgCount(args
)
325 self
.flags
= CO_OPTIMIZED | CO_NEWLOCALS
330 # Free variables found by the symbol table scan, including
331 # variables used only in nested scopes, are included here.
334 # The closure list is used to track the order of cell
335 # variables and free variables in the resulting code object.
336 # The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both
337 # kinds of variables.
339 self
.varnames
= list(args
) or []
340 for i
in range(len(self
.varnames
)):
341 var
= self
.varnames
[i
]
342 if isinstance(var
, TupleArg
):
343 self
.varnames
[i
] = var
.getName()
346 def setDocstring(self
, doc
):
349 def setFlag(self
, flag
):
350 self
.flags
= self
.flags | flag
351 if flag
== CO_VARARGS
:
352 self
.argcount
= self
.argcount
- 1
354 def checkFlag(self
, flag
):
355 if self
.flags
& flag
:
358 def setFreeVars(self
, names
):
359 self
.freevars
= list(names
)
361 def setCellVars(self
, names
):
362 self
.cellvars
= names
365 """Get a Python code object"""
366 assert self
.stage
== RAW
367 self
.computeStackDepth()
369 assert self
.stage
== FLAT
371 assert self
.stage
== CONV
373 assert self
.stage
== DONE
374 return self
.newCodeObject()
376 def dump(self
, io
=None):
383 if opname
== "SET_LINENO":
386 print "\t", "%3d" % pc
, opname
389 print "\t", "%3d" % pc
, opname
, t
[1]
394 def computeStackDepth(self
):
395 """Compute the max stack depth.
397 Approach is to compute the stack effect of each basic block.
398 Then find the path through the code with the largest total
403 for b
in self
.getBlocks():
404 depth
[b
] = findDepth(b
.getInstructions())
413 children
= b
.get_children()
415 return max([max_depth(c
, d
) for c
in children
])
417 if not b
.label
== "exit":
418 return max_depth(self
.exit
, d
)
422 self
.stacksize
= max_depth(self
.entry
, 0)
424 def flattenGraph(self
):
425 """Arrange the blocks in order and resolve jumps"""
426 assert self
.stage
== RAW
427 self
.insts
= insts
= []
431 for b
in self
.getBlocksInOrder():
433 for inst
in b
.getInstructions():
437 elif inst
[0] != "SET_LINENO":
442 for i
in range(len(insts
)):
446 elif inst
[0] != "SET_LINENO":
449 if self
.hasjrel
.has_elt(opname
):
451 offset
= begin
[oparg
] - pc
452 insts
[i
] = opname
, offset
453 elif self
.hasjabs
.has_elt(opname
):
454 insts
[i
] = opname
, begin
[inst
[1]]
458 for i
in dis
.hasjrel
:
459 hasjrel
.add(dis
.opname
[i
])
461 for i
in dis
.hasjabs
:
462 hasjabs
.add(dis
.opname
[i
])
464 def convertArgs(self
):
465 """Convert arguments from symbolic to concrete form"""
466 assert self
.stage
== FLAT
467 self
.consts
.insert(0, self
.docstring
)
469 for i
in range(len(self
.insts
)):
473 conv
= self
._converters
.get(opname
, None)
475 self
.insts
[i
] = opname
, conv(self
, oparg
)
478 def sort_cellvars(self
):
479 """Sort cellvars in the order of varnames and prune from freevars.
482 for name
in self
.cellvars
:
484 self
.cellvars
= [name
for name
in self
.varnames
485 if cells
.has_key(name
)]
486 for name
in self
.cellvars
:
488 self
.cellvars
= self
.cellvars
+ cells
.keys()
489 self
.closure
= self
.cellvars
+ self
.freevars
491 def _lookupName(self
, name
, list):
492 """Return index of name in list, appending if necessary
494 This routine uses a list instead of a dictionary, because a
495 dictionary can't store two different keys if the keys have the
496 same value but different types, e.g. 2 and 2L. The compiler
497 must treat these two separately, so it does an explicit type
498 comparison before comparing the values.
501 for i
in range(len(list)):
502 if t
== type(list[i
]) and list[i
] == name
:
509 def _convert_LOAD_CONST(self
, arg
):
510 if hasattr(arg
, 'getCode'):
512 return self
._lookupName
(arg
, self
.consts
)
514 def _convert_LOAD_FAST(self
, arg
):
515 self
._lookupName
(arg
, self
.names
)
516 return self
._lookupName
(arg
, self
.varnames
)
517 _convert_STORE_FAST
= _convert_LOAD_FAST
518 _convert_DELETE_FAST
= _convert_LOAD_FAST
520 def _convert_LOAD_NAME(self
, arg
):
521 if self
.klass
is None:
522 self
._lookupName
(arg
, self
.varnames
)
523 return self
._lookupName
(arg
, self
.names
)
525 def _convert_NAME(self
, arg
):
526 if self
.klass
is None:
527 self
._lookupName
(arg
, self
.varnames
)
528 return self
._lookupName
(arg
, self
.names
)
529 _convert_STORE_NAME
= _convert_NAME
530 _convert_DELETE_NAME
= _convert_NAME
531 _convert_IMPORT_NAME
= _convert_NAME
532 _convert_IMPORT_FROM
= _convert_NAME
533 _convert_STORE_ATTR
= _convert_NAME
534 _convert_LOAD_ATTR
= _convert_NAME
535 _convert_DELETE_ATTR
= _convert_NAME
536 _convert_LOAD_GLOBAL
= _convert_NAME
537 _convert_STORE_GLOBAL
= _convert_NAME
538 _convert_DELETE_GLOBAL
= _convert_NAME
540 def _convert_DEREF(self
, arg
):
541 self
._lookupName
(arg
, self
.names
)
542 self
._lookupName
(arg
, self
.varnames
)
543 return self
._lookupName
(arg
, self
.closure
)
544 _convert_LOAD_DEREF
= _convert_DEREF
545 _convert_STORE_DEREF
= _convert_DEREF
547 def _convert_LOAD_CLOSURE(self
, arg
):
548 self
._lookupName
(arg
, self
.varnames
)
549 return self
._lookupName
(arg
, self
.closure
)
551 _cmp
= list(dis
.cmp_op
)
552 def _convert_COMPARE_OP(self
, arg
):
553 return self
._cmp
.index(arg
)
555 # similarly for other opcodes...
557 for name
, obj
in locals().items():
558 if name
[:9] == "_convert_":
560 _converters
[opname
] = obj
561 del name
, obj
, opname
563 def makeByteCode(self
):
564 assert self
.stage
== CONV
565 self
.lnotab
= lnotab
= LineAddrTable()
569 lnotab
.addCode(self
.opnum
[opname
])
572 if opname
== "SET_LINENO":
573 lnotab
.nextLine(oparg
)
575 hi
, lo
= twobyte(oparg
)
577 lnotab
.addCode(self
.opnum
[opname
], lo
, hi
)
580 print self
.opnum
[opname
], lo
, hi
585 for num
in range(len(dis
.opname
)):
586 opnum
[dis
.opname
[num
]] = num
589 def newCodeObject(self
):
590 assert self
.stage
== DONE
591 if (self
.flags
& CO_NEWLOCALS
) == 0:
594 nlocals
= len(self
.varnames
)
595 argcount
= self
.argcount
596 if self
.flags
& CO_VARKEYWORDS
:
597 argcount
= argcount
- 1
598 return new
.code(argcount
, nlocals
, self
.stacksize
, self
.flags
,
599 self
.lnotab
.getCode(), self
.getConsts(),
600 tuple(self
.names
), tuple(self
.varnames
),
601 self
.filename
, self
.name
, self
.lnotab
.firstline
,
602 self
.lnotab
.getTable(), tuple(self
.freevars
),
603 tuple(self
.cellvars
))
606 """Return a tuple for the const slot of the code object
608 Must convert references to code (MAKE_FUNCTION) to code
612 for elt
in self
.consts
:
613 if isinstance(elt
, PyFlowGraph
):
619 if opname
[:4] == 'JUMP':
623 """Helper for marking func defs with nested tuples in arglist"""
624 def __init__(self
, count
, names
):
628 return "TupleArg(%s, %s)" % (self
.count
, self
.names
)
630 return ".%d" % self
.count
632 def getArgCount(args
):
636 if isinstance(arg
, TupleArg
):
637 numNames
= len(misc
.flatten(arg
.names
))
638 argcount
= argcount
- numNames
642 """Convert an int argument into high and low bytes"""
643 assert isinstance(val
, int)
644 return divmod(val
, 256)
649 This class builds the lnotab, which is documented in compile.c.
650 Here's a brief recap:
652 For each SET_LINENO instruction after the first one, two bytes are
653 added to lnotab. (In some cases, multiple two-byte entries are
654 added.) The first byte is the distance in bytes between the
655 instruction for the last SET_LINENO and the current SET_LINENO.
656 The second byte is offset in line numbers. If either offset is
657 greater than 255, multiple two-byte entries are added -- see
658 compile.c for the delicate details.
669 def addCode(self
, *args
):
671 self
.code
.append(chr(arg
))
672 self
.codeOffset
= self
.codeOffset
+ len(args
)
674 def nextLine(self
, lineno
):
675 if self
.firstline
== 0:
676 self
.firstline
= lineno
677 self
.lastline
= lineno
680 addr
= self
.codeOffset
- self
.lastoff
681 line
= lineno
- self
.lastline
682 # Python assumes that lineno always increases with
683 # increasing bytecode address (lnotab is unsigned char).
684 # Depending on when SET_LINENO instructions are emitted
685 # this is not always true. Consider the code:
688 # In the bytecode stream, the assignment to "a" occurs
689 # after the loading of "b". This works with the C Python
690 # compiler because it only generates a SET_LINENO instruction
691 # for the assignment.
693 push
= self
.lnotab
.append
698 push(addr
); push(255)
701 if addr
> 0 or line
> 0:
702 push(addr
); push(line
)
703 self
.lastline
= lineno
704 self
.lastoff
= self
.codeOffset
707 return ''.join(self
.code
)
710 return ''.join(map(chr, self
.lnotab
))
712 class StackDepthTracker
:
713 # XXX 1. need to keep track of stack depth on jumps
714 # XXX 2. at least partly as a result, this code is broken
716 def findDepth(self
, insts
, debug
=0):
723 delta
= self
.effect
.get(opname
, None)
724 if delta
is not None:
725 depth
= depth
+ delta
728 for pat
, pat_delta
in self
.patterns
:
729 if opname
[:len(pat
)] == pat
:
731 depth
= depth
+ delta
733 # if we still haven't found a match
735 meth
= getattr(self
, opname
, None)
737 depth
= depth
+ meth(i
[1])
741 print depth
, maxDepth
755 'DELETE_SLICE+0': -1,
756 'DELETE_SLICE+1': -2,
757 'DELETE_SLICE+2': -2,
758 'DELETE_SLICE+3': -3,
777 'LOAD_ATTR': 0, # unlike other loads
790 def UNPACK_SEQUENCE(self
, count
):
792 def BUILD_TUPLE(self
, count
):
794 def BUILD_LIST(self
, count
):
796 def CALL_FUNCTION(self
, argc
):
797 hi
, lo
= divmod(argc
, 256)
798 return -(lo
+ hi
* 2)
799 def CALL_FUNCTION_VAR(self
, argc
):
800 return self
.CALL_FUNCTION(argc
)-1
801 def CALL_FUNCTION_KW(self
, argc
):
802 return self
.CALL_FUNCTION(argc
)-1
803 def CALL_FUNCTION_VAR_KW(self
, argc
):
804 return self
.CALL_FUNCTION(argc
)-2
805 def MAKE_FUNCTION(self
, argc
):
807 def MAKE_CLOSURE(self
, argc
):
808 # XXX need to account for free variables too!
810 def BUILD_SLICE(self
, argc
):
815 def DUP_TOPX(self
, argc
):
818 findDepth
= StackDepthTracker().findDepth