github/workflows/pycopy-test: Upgrade Pycopy to 3.6.1.
[ScratchABlock.git] / dataflow.py
blobb6fdf4f76f9cacc644011da0a2e4fca31b7a8281
1 import logging
3 import core
4 from core import is_expr, is_mem
5 from utils import set_union, set_intersection
6 from cfgutils import foreach_bblock
9 log = logging.getLogger(__name__)
12 class AnalysisBase:
14 # Set to False for backward analysis
15 forward = True
16 # property prefix to use
17 prop_prefix = None
18 # Set to name of node "in" state
19 node_prop_in = None
20 # Set to name of node "out" state
21 node_prop_out = None
22 node_prop_gen = None
23 node_prop_kill = None
25 def __init__(self, graph):
26 self.g = graph
27 if self.prop_prefix:
28 self.node_prop_in = self.prop_prefix + "_in"
29 self.node_prop_out = self.prop_prefix + "_out"
30 self.node_prop_gen = self.prop_prefix + "_gen"
31 self.node_prop_kill = self.prop_prefix + "_kill"
33 if self.forward:
34 self.node_prop_src = self.node_prop_in
35 self.node_prop_dst = self.node_prop_out
36 else:
37 self.node_prop_src = self.node_prop_out
38 self.node_prop_dst = self.node_prop_in
41 def solve(self):
42 "Solve dataflow analysis."
43 self.init()
45 changed = True
46 while changed:
47 changed = False
49 for node, info in self.g.iter_sorted_nodes():
50 if self.forward:
51 sources = self.g.pred(node)
52 else:
53 sources = self.g.succ(node)
55 if self.node_prop_dst:
56 new = self.transfer(node, info[self.node_prop_src])
57 if new != info[self.node_prop_dst]:
58 info[self.node_prop_dst] = new
59 changed = True
61 if sources:
62 # If there're no "sources" for this node, it's an initial node,
63 # and should keep it's "in" set (which may be non-empty).
64 new = self.join(node, sources)
65 if new != info[self.node_prop_src]:
66 info[self.node_prop_src] = new
67 changed = True
69 def transfer(self, node, src_state):
70 """Transfer function. Computes next state of a node, based on
71 source state. For forward analisys, source state is 'in' state,
72 next state is 'out' state. For backward analysis, vice-versa.
73 Note that this function should not be concerned with direction
74 of analysis, it's just fed with 'source' state by the solver.
76 Default implementation does nothing, and is suitable for analyses
77 which don't depend on node "content", only on overall graph
78 structure (control flow analyses vs data flow analyses).
79 """
80 return src_state
82 def join(self, node, source_nodes):
83 raise NotImplementedError
86 class DominatorAnalysis(AnalysisBase):
87 "Encapsulation of dataflow analysis to discover graph node dominators."
89 forward = True
90 node_prop_in = "dom"
92 def init(self):
93 "Entry node is set to itself, the rest - to graph's all nodes."
94 entry = self.g.entry()
95 all_nodes = set(self.g.nodes())
96 for node, info in self.g.nodes_props():
97 if node == entry:
98 info[self.node_prop_in] = {node}
99 else:
100 info[self.node_prop_in] = all_nodes
102 def join(self, node, source_nodes):
103 state = set_intersection(*(self.g.get_node_attr(x, self.node_prop_in) for x in source_nodes))
104 return state | {node}
107 class GenKillAnalysis(AnalysisBase):
109 # Should be staticmethod(set_union) or staticmethod(set_intersection)
110 # staticmethod() is required to work around Python's magic handling
111 # of functions references within classes.
112 join_op = None
114 def transfer(self, node, src_state):
115 return (src_state - self.g.get_node_attr(node, self.node_prop_kill)) | self.g.get_node_attr(node, self.node_prop_gen)
117 def join(self, node, source_nodes):
118 if node == "_DEADEND_":
119 log.debug("%s: joining from %s to _DEADEND_" % (self.__class__.__name__, source_nodes))
120 return set()
121 # node_prop_dst is named from the point of view of intra-node transfer function.
122 # inter-node join function takes source nodes dst set to compute current node
123 # src set
124 return self.join_op(*(self.g.get_node_attr(x, self.node_prop_dst) for x in source_nodes))
127 class ReachDefAnalysis(GenKillAnalysis):
128 "Encapsulation of dataflow analysis for reaching definitions."
129 forward = True
130 join_op = staticmethod(set_union)
131 prop_prefix = "reachdef"
134 def __init__(self, cfg, regs_only=True, inst_level=False):
135 """If inst_level is True, perform instruction-level analysis, i.e.
136 result will be as a set of (var, inst_addr) pairs. Otherwise, it
137 will be (var, bblock_addr). inst_level=True is useful mostly for
138 unittests/adhoc cases."""
139 super().__init__(cfg)
140 self.inst_level = inst_level
141 self.regs_only = regs_only
143 def init(self):
144 """In and out sets of all nodes are initialized to empty sets, but
145 entry's in set is initialized to a set of all defined locations with
146 None address, representing non-initialized location."""
147 entry = self.g.entry()
148 if self.inst_level:
149 all_defs = foreach_bblock(self.g, lambda b: b.def_addrs(self.regs_only), set.union)
150 else:
151 all_defs = foreach_bblock(self.g, lambda b: set((v, b.addr) for v in b.defs(self.regs_only)), set.union)
153 for node, info in self.g.nodes_props():
154 if node == entry:
155 # Entry's in set to all vars, with "undefined" definition location (None).
156 info[self.node_prop_in] = set(((v[0], None) for v in all_defs))
157 else:
158 info[self.node_prop_in] = set()
159 info[self.node_prop_out] = set()
161 bblock = info["val"]
162 kill = set()
163 gen = set()
164 for inst in bblock.items:
165 addr = inst.addr if self.inst_level else bblock.addr
166 defs = inst.defs(self.regs_only)
167 # Kill any real matching defs in function
168 kill |= {x for x in all_defs if x[0] in defs}
169 # and also any "undefined" defs from function reach-in
170 kill |= {(r, None) for r in defs}
171 for d in defs:
172 gen.add((d, addr))
173 info[self.node_prop_kill] = kill
174 info[self.node_prop_gen] = gen
177 class LiveVarAnalysis(GenKillAnalysis):
178 forward = False
179 join_op = staticmethod(set_union)
180 prop_prefix = "live"
182 def __init__(self, cfg, skip_calls=False, underestimate=False):
183 """If skip_calls is True, skip call instructions. This is useful
184 to estimate current function's argument registers (using unspecific
185 call-conventions driven .uses() for a call instruction may/will make
186 all call-conventions arg registers live for function entry)."""
187 super().__init__(cfg)
188 self.skip_calls = skip_calls
189 self.underestimate = underestimate
191 def init(self):
192 "In and out sets of all nodes is set to empty."
193 exits = self.g.exits()
194 assert len(exits) == 1
195 exit = exits[0]
197 for node, info in self.g.nodes_props():
198 info[self.node_prop_in] = set()
199 if node == exit:
200 if self.underestimate:
201 info[self.node_prop_out] = set()
202 elif self.g.props.get("noreturn") or self.g.props.get("name") == "main":
203 info[self.node_prop_out] = set()
204 else:
205 import progdb
206 rets = progdb.FUNC_DB.get(self.g.props.get("name"), {}).get("returns")
207 if rets is not None:
208 assert isinstance(rets, set)
209 info[self.node_prop_out] = rets
210 elif "modifieds" in self.g.props:
211 info[self.node_prop_out] = self.g.props["modifieds"]
212 log.warning("Conservatively using modifieds as function live-out")
213 elif "reach_exit" in self.g.props:
214 info[self.node_prop_out] = self.g.props["reach_exit"]
215 log.warning("Conservatively using reach_exit as function live-out")
216 else:
217 info[self.node_prop_out] = set()
218 else:
219 info[self.node_prop_out] = set()
221 bblock = info["val"]
223 kill = set()
224 gen = set()
225 for inst in bblock.items:
226 if inst.op == "call" and self.skip_calls:
227 continue
229 # If we underestimate liveness, assume function
230 # calls don't use anything, only kill liveness below.
231 if inst.op != "call" or not self.underestimate:
232 for r in inst.uses(self.g):
233 if r not in kill:
234 gen.add(r)
235 else:
236 # We still need to account for reg uses in indirect call expression
237 for r in inst.args[0].regs():
238 if r not in kill:
239 gen.add(r)
241 for dest in inst.defs(regs_only=False):
242 kill.add(dest)
244 info[self.node_prop_kill] = kill
245 info[self.node_prop_gen] = gen
248 def make_du_chains(cfg):
249 log = logging.getLogger("make_du_chains")
251 du_map = {}
252 bblock_last_def = {}
254 for bblock_addr, node_props in cfg.iter_sorted_nodes():
255 bblock = node_props["val"]
256 last_def = {}
258 for inst in bblock.items:
259 defs = inst.defs(regs_only=False)
260 if defs:
261 inst.comments["uses"] = []
262 du_map[inst.addr] = inst.comments["uses"]
263 for dest in defs:
264 last_def[dest] = inst.addr
266 log.debug("last_def for %s: %s", bblock_addr, last_def)
267 bblock_last_def[bblock_addr] = last_def
269 log.debug("empty du_map: %s", du_map)
270 log.debug("bblock_last_def: %s", bblock_last_def)
272 for bblock_addr, node_props in cfg.iter_sorted_nodes():
273 bblock = node_props["val"]
274 last_def = {}
276 reachdef_in = node_props["reachdef_in"]
277 log.debug("reachdef_in %s: %s", bblock_addr, reachdef_in)
278 for var, defined_in_bblock in reachdef_in:
279 if defined_in_bblock is not None:
280 last_def.setdefault(var, set()).add(bblock_last_def[defined_in_bblock][var])
281 log.debug("last_def on entry: %s", last_def)
283 for inst in bblock.items:
284 log.debug("%s: %s", inst, inst.uses(cfg))
285 for r in inst.uses(cfg):
286 if r in last_def:
287 log.debug("%s", last_def[r])
288 for inst_addr in last_def[r]:
289 du_map[inst_addr].append(inst.addr)
290 else:
291 log.debug("%r not in last_def", r)
293 defs = inst.defs(regs_only=False)
294 for dest in defs:
295 last_def[dest] = {inst.addr}
297 log.debug("du_map:", du_map)