1 # ScratchABlock - Program analysis and decompilation framework
3 # Copyright (c) 2015-2018 Paul Sokolovsky
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 """SSA related transformations"""
20 from collections
import defaultdict
22 from cfgutils
import foreach_inst
28 return inst
.op
== "=" and is_sfunc(inst
.args
[0], "phi")
31 def insert_phi_maximal(cfg
, fully_maximal
=False):
32 """Insert phi functions to produce maximal SSA form.
34 Described e.g. in Appel 1998 "SSA is Functional Programming" (not named
35 "maximal" there, but the name is quite obvious, especially if contrasted
36 with well-known "minimal SSA form"):
38 "A really crude approach is to split every variable at every basic-block
39 boundary, and put φ-functions for every variable in every block"
41 The algorithm below already contains an obvious optimization - phi
42 functions are inserted only to blocks with multiple predecessors. Note
43 that this requires rename function to process basic block from successors
44 to predecessors, to correctly propagate renamings thru such phi-less
45 blocks. (If on the other hand we inserted phi's to each block, we could
46 process their renaming in arbitrary order, because each basic block would
47 have its local definition for every variable).
55 all_vars
= sorted(xform_cfg
.local_defines(cfg
))
57 for n
, nprops
in cfg
.nodes_props():
58 if cfg
.degree_in(n
) >= min_preds
:
63 inst
= Inst(v
, "=", [EXPR("SFUNC", [SFUNC("phi")] + [v
] * len(preds
))], addr
=bb
.addr
+ ".phi_%s" % v
.name
)
64 bb
.items
.insert(phi_no
, inst
)
68 def insert_phi_fully_maximal(cfg
):
69 insert_phi_maximal(cfg
, fully_maximal
=True)
72 def rename_ssa_vars(cfg
, use_addrs
=False):
73 """Generic routine to rename registers to follow SSA form.
75 This should be called after phi-insertion pass, and this routine will
76 work with insertion algorithm (whether constructing maximal, minimal,
77 pruned or whatever form). Can rename registers to use either sequential
78 subscripts or instruction addresses where register is defined.
83 return REG("%s_%s" % (e
.name
, var_map
[e
.name
]))
86 var_map
= defaultdict(lambda: "0")
88 var_map
= defaultdict(int)
90 # Process basic blocks in such a way that we have renamings (if any!) for
91 # current block, before we proceed to its successors.
92 for n
in cfg
.iter_rev_postorder():
95 # Rename variables within basic block.
97 # Don't rename args of phi funcs, those names come from
100 for i
, arg
in enumerate(inst
.args
):
101 inst
.args
[i
] = xform_expr
.expr_xform(arg
, rename_reg
)
104 if is_reg(inst
.dest
):
105 # If it's a register, this defines a new register version
108 suffix
= bb
.addr
+ "_phi"
111 var_map
[inst
.dest
.name
] = suffix
113 var_map
[inst
.dest
.name
] += 1
114 inst
.dest
= rename_reg(inst
.dest
)
116 # Otherwise, it's more complex expression which may contain
117 # reference to reg as a subexpression
118 inst
.dest
= xform_expr
.expr_xform(inst
.dest
, rename_reg
)
120 # Now rename arguments of phi functions in successor blocks, in the
121 # position which correspond to the current block's predecessor edge
122 for next_n
in cfg
.succ(n
):
123 my_pos
= cfg
.pred(next_n
).index(n
)
124 next_bb
= cfg
[next_n
]["val"]
128 # +1 because first arg is SFUNC("phi")
129 new_reg
= rename_reg(inst
.args
[0].args
[my_pos
+ 1])
130 inst
.args
[0].args
[my_pos
+ 1] = new_reg
133 def rename_ssa_vars_fully_maximal(cfg
, use_addrs
=False):
137 return REG("%s_%s" % (e
.name
, var_map
[e
.name
]))
140 var_map
= defaultdict(lambda: "0")
142 var_map
= defaultdict(int)
144 # This algorithm could process basic blocks in any order. But for human
145 # consumption, it's better when variable are numbered from the start of
147 for n
in cfg
.iter_rev_postorder():
150 # Rename variables within basic block.
152 # Don't rename args of phi funcs, those names come from
153 # predecessor blocks.
155 for i
, arg
in enumerate(inst
.args
):
156 inst
.args
[i
] = xform_expr
.expr_xform(arg
, rename_reg
)
159 if is_reg(inst
.dest
):
160 # If it's a register, this defines a new register version
163 suffix
= bb
.addr
+ "_phi"
166 var_map
[inst
.dest
.name
] = suffix
168 var_map
[inst
.dest
.name
] += 1
169 inst
.dest
= rename_reg(inst
.dest
)
171 # Otherwise, it's more complex expression which may contain
172 # reference to reg as a subexpression
173 inst
.dest
= xform_expr
.expr_xform(inst
.dest
, rename_reg
)
175 # Now rename arguments of phi functions in successor blocks, in the
176 # position which correspond to the current block's predecessor edge
177 for next_n
in cfg
.succ(n
):
178 my_pos
= cfg
.pred(next_n
).index(n
)
179 next_bb
= cfg
[next_n
]["val"]
183 # +1 because first arg is SFUNC("phi")
184 new_reg
= rename_reg(inst
.args
[0].args
[my_pos
+ 1])
185 inst
.args
[0].args
[my_pos
+ 1] = new_reg
188 def rename_ssa_vars_subscripts(cfg
):
189 rename_ssa_vars(cfg
, False)
192 def rename_ssa_vars_addrs(cfg
):
193 rename_ssa_vars(cfg
, True)
197 "Verify that structural properties of SSA hold."
199 # Step 1: Verify that each var is assigned only once.
203 def check_single_assign(inst
):
204 for d
in inst
.defs():
205 assert d
not in all_defs
208 foreach_inst(cfg
, check_single_assign
)
210 # Step 2: Verify that each use is dominated by definition.
212 def check_doms(inst_addr
, node
, reg
):
213 # All "_0" regs are assumed to be implicitly defined on entry
214 # (e.g. func params).
215 if reg
.name
.endswith("_0"):
221 # We start with testing current passed node to handle the
222 # case of this func being called for phi statement, where
223 # its predecessor node is passed here. For normal statement,
224 # this will check its own node in vain, but that's fine.
225 defs
= cfg
[node
]["val"].defs()
229 dom_list
.append(node
)
230 node
= cfg
[node
]["idom"]
232 assert False, "@%s: Use of %s is not dominated by definition, " \
233 "expected to be in one of bblocks: %s, instead in: %s" % (
234 inst_addr
, reg
, dom_list
, xform_cfg
.find_1st_def(cfg
, reg
, in_bblock
=True)
237 for n
, info
in cfg
.nodes_props():
241 for i
, inst
in enumerate(bb
.items
):
242 for d
in inst
.defs():
245 for i
in range(len(bb
.items
) - 1, -1, -1):
250 for i
, u
in enumerate(inst
.args
[0].args
[1:]):
251 check_doms(inst
.addr
, preds
[i
], u
)
254 for u
in inst
.uses():
256 assert inst_map
[u
] < i
258 check_doms(inst
.addr
, n
, u
)