github/workflows/pycopy-test: Upgrade Pycopy to 3.6.1.
[ScratchABlock.git] / xform_expr.py
blobb91c40c733fc9be94dd1fe6b1b00607a7ebba5ba
1 # ScratchABlock - Program analysis and decompilation framework
3 # Copyright (c) 2015-2018 Paul Sokolovsky
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 """Transformation passes on expressions"""
20 import logging
22 from core import *
23 from xform_expr_basic import *
24 import xform_expr_infer
25 import arch
28 _log = logging.getLogger(__name__)
31 def is_expr_2args(e):
32 return is_expr(e) and len(e.args) == 2
35 def mod_add(a, b, bits=arch.BITNESS):
36 "Addition modulo power of 2, but preserve sign."
37 s = a + b
38 if s < 0:
39 return s % 2**bits - 2**bits
40 else:
41 return s % 2**bits
44 def expr_xform(e, func):
45 """Transform expression using a function. A function is called with
46 each recursive subexpression of expression (in depth-first order),
47 then with expression itself. A function can either return new expression
48 to replace original one with, or None to keep an original (sub)expression.
49 """
50 if isinstance(e, MEM):
51 new = expr_xform(e.expr, func)
52 if new:
53 e = MEM(e.type, new)
54 e = func(e) or e
55 return e
57 if isinstance(e, EXPR):
58 new = [expr_xform(a, func) or a for a in e.args]
59 e = EXPR(e.op, new)
60 return func(e) or e
62 if isinstance(e, COND):
63 new = expr_xform(e.expr, func)
64 if new:
65 e.expr = new
66 return e
68 return func(e) or e
71 def add_vals(a, b):
72 val = a.val + b.val
73 base = max(a.base, b.base)
74 return VALUE(val, base)
77 def expr_sub_const_to_add(e):
78 if is_expr_2args(e):
79 if e.op == "-" and is_value(e.args[1]):
80 return EXPR("+", [e.args[0], expr_neg(e.args[1])])
83 def expr_sub_to_add(e):
84 if is_expr(e) and e.op == "-":
85 new_args = [expr_neg(x) for x in e.args[1:]]
86 return EXPR("+", [e.args[0]] + new_args)
89 def expr_commutative_normalize(e):
90 if not is_expr(e):
91 return
92 if e.op not in ("+", "&", "|", "^"):
93 return
94 e.args.sort()
97 def expr_associative_add(e):
98 "Turn (a + b) + c into a + b + c."
99 if is_expr(e) and e.op == "+":
100 if is_expr(e.args[0]) and e.args[0].op == "+":
101 new_args = e.args[0].args.copy()
102 new_args.extend(e.args[1:].copy())
103 return EXPR("+", new_args)
106 def expr_simplify_add(e):
107 if is_expr(e) and e.op == "+":
108 new_args = []
109 val = 0
110 base = 0
111 for a in e.args:
112 if is_value(a):
113 val = mod_add(val, a.val)
114 base = max(base, a.base)
115 else:
116 new_args.append(a)
117 if new_args:
118 if val != 0:
119 new_args.append(VALUE(val, base))
120 if len(new_args) == 1:
121 return new_args[0]
122 new_args.sort()
123 return EXPR("+", new_args)
124 else:
125 return VALUE(val, base)
128 def expr_simplify_lshift(e):
129 if is_expr(e) and e.op == "<<":
130 assert is_expr_2args(e)
132 if is_value(e.args[1]):
133 val = e.args[1].val
134 if val == 0:
135 return e.args[0]
136 # Try to convert usages which are realistic address calculations,
137 # otherwise we may affect bitfield, etc. expressions.
138 if val < 5:
139 return EXPR("*", [e.args[0], VALUE(1 << val, 10)])
142 def expr_simplify_neg(e):
143 if is_expr(e) and e.op == "NEG":
144 new = expr_neg_if_possible(e.args[0])
145 return new
148 def expr_simplify_bitfield(e):
149 "Simplify bitfield() to an integer cast if possible."
150 if is_expr(e) and e.op == "SFUNC" and e.args[0] == SFUNC("bitfield"):
151 assert is_value(e.args[2]) and is_value(e.args[3])
152 if e.args[2].val == 0:
153 sz = e.args[3].val
154 type = {8: "u8", 16: "u16", 32: "u32"}.get(sz)
155 if type:
156 return EXPR("CAST", [TYPE(type), e.args[1]])
157 return EXPR("&", e.args[1], VALUE((1 << sz) - 1))
159 def expr_simplify_cast(e):
160 if is_expr(e) and e.op == "CAST":
161 assert is_expr_2args(e)
163 if is_value(e.args[1]):
164 val = e.args[1].val
166 tname = e.args[0].name
167 is_signed = tname[0] == "i"
168 bits = int(tname[1:])
169 mask = (1 << bits) - 1
170 val &= mask
171 if is_signed:
172 if val & (1 << (bits - 1)):
173 val -= mask + 1
175 return VALUE(val, e.args[1].base)
178 def unsignize_logical_ops(e):
179 "Operands of &, |, ^ should be unsigned."
180 if is_expr(e) and e.op in ("&", "|", "^"):
181 assert is_expr_2args(e)
183 if is_value(e.args[1]):
184 val = e.args[1].val
185 if val >= 0:
186 return
187 val = val & (2**arch.BITNESS - 1)
188 # Force hex
189 return EXPR(e.op, e.args[0], VALUE(val, 16))
192 def simplify_expr(expr):
193 new_expr = expr_xform(expr, expr_sub_to_add)
194 expr_commutative_normalize(new_expr)
195 new_expr = expr_xform(new_expr, expr_associative_add)
196 new_expr = expr_xform(new_expr, expr_simplify_add)
197 new_expr = expr_xform(new_expr, expr_simplify_lshift)
198 new_expr = expr_xform(new_expr, expr_simplify_bitfield)
199 new_expr = expr_xform(new_expr, expr_simplify_cast)
200 new_expr = expr_xform(new_expr, expr_simplify_neg)
201 new_expr = expr_xform(new_expr, unsignize_logical_ops)
203 expr_commutative_normalize(new_expr)
204 new_expr = expr_xform(new_expr, xform_expr_infer.simplify)
205 return new_expr
208 def expr_struct_access(m):
209 import progdb
210 structs = progdb.get_struct_types()
211 struct_addrs = progdb.get_struct_instances()
213 if is_mem(m) and is_value(m.expr):
214 addr = m.expr.val
215 for (start, end), struct_name in struct_addrs.items():
216 if start <= addr < end:
217 offset = addr - start
218 field = structs[struct_name][offset]
219 return SFIELD(struct_name, hex(start), field)
222 def struct_access_expr(expr):
223 new_expr = expr_xform(expr, expr_struct_access)
224 return new_expr
227 # Should transform inplace
228 def simplify_cond(e):
229 # TODO: Replaced with inference rule, remove
230 if e.is_relation():
231 arg1 = e.expr.args[0]
232 arg2 = e.expr.args[1]
233 if is_expr_2args(arg1) and arg1.op == "+" and is_value(arg1.args[1]) and is_value(arg2):
234 assert 0, "Should be replaced by fb730b7de49"
235 e.expr = EXPR(e.expr.op, arg1.args[0], add_vals(expr_neg(arg1.args[1]), arg2))
238 def expr_subst(expr, subst_dict):
240 if isinstance(expr, (VALUE, STR, ADDR, SFUNC, TYPE)):
241 return None
243 if isinstance(expr, REG):
244 new = subst_dict.get(expr)
245 if new and expr in new:
246 _log.debug("Trying to replace %s with recursively referring %s, not doing" % (expr, new))
247 return None
248 if new and len(new) > 10:
249 _log.debug("Trying to replace %s with complex [len=%d] %s, not doing" % (expr, len(new), new))
250 return None
251 return new
253 if isinstance(expr, MEM):
254 new = expr_subst(expr.expr, subst_dict)
255 if new:
256 return MEM(expr.type, new)
257 return
259 if isinstance(expr, COND):
260 # This performs substituations in-place, because the same
261 # COND instance is referenced in inst (to be later removed
262 # by remove_trailing_jumps) and in out edges in CFG.
263 new = expr_subst(expr.expr, subst_dict)
264 if new:
265 expr.expr = new
266 simplify_cond(expr)
267 return
269 if isinstance(expr, EXPR):
270 new_args = []
271 was_new = False
272 for a in expr.args:
273 new = expr_subst(a, subst_dict)
274 if new is None:
275 new = a
276 else:
277 was_new = True
278 new_args.append(new)
279 if not was_new:
280 return None
282 new_expr = EXPR(expr.op, new_args)
283 new_expr = simplify_expr(new_expr)
284 return new_expr
286 assert 0, repr((expr, type(expr)))