github/workflows/pycopy-test: Upgrade Pycopy to 3.6.1.
[ScratchABlock.git] / xform_ssa.py
blobae46a2fd4b15cb2caabf6ed757aeb722e9f8c587
1 # ScratchABlock - Program analysis and decompilation framework
3 # Copyright (c) 2015-2018 Paul Sokolovsky
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 """SSA related transformations"""
20 from collections import defaultdict
21 from core import *
22 from cfgutils import foreach_inst
23 import xform_expr
24 import xform_cfg
27 def is_phi(inst):
28 return inst.op == "=" and is_sfunc(inst.args[0], "phi")
31 def insert_phi_maximal(cfg, fully_maximal=False):
32 """Insert phi functions to produce maximal SSA form.
34 Described e.g. in Appel 1998 "SSA is Functional Programming" (not named
35 "maximal" there, but the name is quite obvious, especially if contrasted
36 with well-known "minimal SSA form"):
38 "A really crude approach is to split every variable at every basic-block
39 boundary, and put φ-functions for every variable in every block"
41 The algorithm below already contains an obvious optimization - phi
42 functions are inserted only to blocks with multiple predecessors. Note
43 that this requires rename function to process basic block from successors
44 to predecessors, to correctly propagate renamings thru such phi-less
45 blocks. (If on the other hand we inserted phi's to each block, we could
46 process their renaming in arbitrary order, because each basic block would
47 have its local definition for every variable).
48 """
50 if fully_maximal:
51 min_preds = 1
52 else:
53 min_preds = 2
55 all_vars = sorted(xform_cfg.local_defines(cfg))
57 for n, nprops in cfg.nodes_props():
58 if cfg.degree_in(n) >= min_preds:
59 bb = nprops["val"]
60 preds = cfg.pred(n)
61 phi_no = 0
62 for v in all_vars:
63 inst = Inst(v, "=", [EXPR("SFUNC", [SFUNC("phi")] + [v] * len(preds))], addr=bb.addr + ".phi_%s" % v.name)
64 bb.items.insert(phi_no, inst)
65 phi_no += 1
68 def insert_phi_fully_maximal(cfg):
69 insert_phi_maximal(cfg, fully_maximal=True)
72 def rename_ssa_vars(cfg, use_addrs=False):
73 """Generic routine to rename registers to follow SSA form.
75 This should be called after phi-insertion pass, and this routine will
76 work with insertion algorithm (whether constructing maximal, minimal,
77 pruned or whatever form). Can rename registers to use either sequential
78 subscripts or instruction addresses where register is defined.
79 """
81 def rename_reg(e):
82 if is_reg(e):
83 return REG("%s_%s" % (e.name, var_map[e.name]))
85 if use_addrs:
86 var_map = defaultdict(lambda: "0")
87 else:
88 var_map = defaultdict(int)
90 # Process basic blocks in such a way that we have renamings (if any!) for
91 # current block, before we proceed to its successors.
92 for n in cfg.iter_rev_postorder():
93 bb = cfg[n]["val"]
95 # Rename variables within basic block.
96 for inst in bb:
97 # Don't rename args of phi funcs, those names come from
98 # predecessor blocks.
99 if not is_phi(inst):
100 for i, arg in enumerate(inst.args):
101 inst.args[i] = xform_expr.expr_xform(arg, rename_reg)
103 # Rename destination
104 if is_reg(inst.dest):
105 # If it's a register, this defines a new register version
106 if use_addrs:
107 if is_phi(inst):
108 suffix = bb.addr + "_phi"
109 else:
110 suffix = inst.addr
111 var_map[inst.dest.name] = suffix
112 else:
113 var_map[inst.dest.name] += 1
114 inst.dest = rename_reg(inst.dest)
115 else:
116 # Otherwise, it's more complex expression which may contain
117 # reference to reg as a subexpression
118 inst.dest = xform_expr.expr_xform(inst.dest, rename_reg)
120 # Now rename arguments of phi functions in successor blocks, in the
121 # position which correspond to the current block's predecessor edge
122 for next_n in cfg.succ(n):
123 my_pos = cfg.pred(next_n).index(n)
124 next_bb = cfg[next_n]["val"]
125 for inst in next_bb:
126 if not is_phi(inst):
127 break
128 # +1 because first arg is SFUNC("phi")
129 new_reg = rename_reg(inst.args[0].args[my_pos + 1])
130 inst.args[0].args[my_pos + 1] = new_reg
133 def rename_ssa_vars_fully_maximal(cfg, use_addrs=False):
135 def rename_reg(e):
136 if is_reg(e):
137 return REG("%s_%s" % (e.name, var_map[e.name]))
139 if use_addrs:
140 var_map = defaultdict(lambda: "0")
141 else:
142 var_map = defaultdict(int)
144 # This algorithm could process basic blocks in any order. But for human
145 # consumption, it's better when variable are numbered from the start of
146 # a function.
147 for n in cfg.iter_rev_postorder():
148 bb = cfg[n]["val"]
150 # Rename variables within basic block.
151 for inst in bb:
152 # Don't rename args of phi funcs, those names come from
153 # predecessor blocks.
154 if not is_phi(inst):
155 for i, arg in enumerate(inst.args):
156 inst.args[i] = xform_expr.expr_xform(arg, rename_reg)
158 # Rename destination
159 if is_reg(inst.dest):
160 # If it's a register, this defines a new register version
161 if use_addrs:
162 if is_phi(inst):
163 suffix = bb.addr + "_phi"
164 else:
165 suffix = inst.addr
166 var_map[inst.dest.name] = suffix
167 else:
168 var_map[inst.dest.name] += 1
169 inst.dest = rename_reg(inst.dest)
170 else:
171 # Otherwise, it's more complex expression which may contain
172 # reference to reg as a subexpression
173 inst.dest = xform_expr.expr_xform(inst.dest, rename_reg)
175 # Now rename arguments of phi functions in successor blocks, in the
176 # position which correspond to the current block's predecessor edge
177 for next_n in cfg.succ(n):
178 my_pos = cfg.pred(next_n).index(n)
179 next_bb = cfg[next_n]["val"]
180 for inst in next_bb:
181 if not is_phi(inst):
182 break
183 # +1 because first arg is SFUNC("phi")
184 new_reg = rename_reg(inst.args[0].args[my_pos + 1])
185 inst.args[0].args[my_pos + 1] = new_reg
188 def rename_ssa_vars_subscripts(cfg):
189 rename_ssa_vars(cfg, False)
192 def rename_ssa_vars_addrs(cfg):
193 rename_ssa_vars(cfg, True)
196 def verify_ssa(cfg):
197 "Verify that structural properties of SSA hold."
199 # Step 1: Verify that each var is assigned only once.
201 all_defs = set()
203 def check_single_assign(inst):
204 for d in inst.defs():
205 assert d not in all_defs
206 all_defs.add(d)
208 foreach_inst(cfg, check_single_assign)
210 # Step 2: Verify that each use is dominated by definition.
212 def check_doms(inst_addr, node, reg):
213 # All "_0" regs are assumed to be implicitly defined on entry
214 # (e.g. func params).
215 if reg.name.endswith("_0"):
216 return
218 dom_list = []
220 while True:
221 # We start with testing current passed node to handle the
222 # case of this func being called for phi statement, where
223 # its predecessor node is passed here. For normal statement,
224 # this will check its own node in vain, but that's fine.
225 defs = cfg[node]["val"].defs()
226 if reg in defs:
227 return
229 dom_list.append(node)
230 node = cfg[node]["idom"]
231 if not node:
232 assert False, "@%s: Use of %s is not dominated by definition, " \
233 "expected to be in one of bblocks: %s, instead in: %s" % (
234 inst_addr, reg, dom_list, xform_cfg.find_1st_def(cfg, reg, in_bblock=True)
237 for n, info in cfg.nodes_props():
238 bb = info["val"]
240 inst_map = {}
241 for i, inst in enumerate(bb.items):
242 for d in inst.defs():
243 inst_map[d] = i
245 for i in range(len(bb.items) - 1, -1, -1):
246 inst = bb.items[i]
248 if is_phi(inst):
249 preds = cfg.pred(n)
250 for i, u in enumerate(inst.args[0].args[1:]):
251 check_doms(inst.addr, preds[i], u)
252 continue
254 for u in inst.uses():
255 if u in inst_map:
256 assert inst_map[u] < i
257 else:
258 check_doms(inst.addr, n, u)