1 # ScratchABlock - Program analysis and decompilation framework
3 # Copyright (c) 2015-2018 Paul Sokolovsky
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 """Transformation passes on expressions"""
23 from xform_expr_basic
import *
24 import xform_expr_infer
28 _log
= logging
.getLogger(__name__
)
32 return is_expr(e
) and len(e
.args
) == 2
35 def mod_add(a
, b
, bits
=arch
.BITNESS
):
36 "Addition modulo power of 2, but preserve sign."
39 return s
% 2**bits
- 2**bits
44 def expr_xform(e
, func
):
45 """Transform expression using a function. A function is called with
46 each recursive subexpression of expression (in depth-first order),
47 then with expression itself. A function can either return new expression
48 to replace original one with, or None to keep an original (sub)expression.
50 if isinstance(e
, MEM
):
51 new
= expr_xform(e
.expr
, func
)
57 if isinstance(e
, EXPR
):
58 new
= [expr_xform(a
, func
) or a
for a
in e
.args
]
62 if isinstance(e
, COND
):
63 new
= expr_xform(e
.expr
, func
)
73 base
= max(a
.base
, b
.base
)
74 return VALUE(val
, base
)
77 def expr_sub_const_to_add(e
):
79 if e
.op
== "-" and is_value(e
.args
[1]):
80 return EXPR("+", [e
.args
[0], expr_neg(e
.args
[1])])
83 def expr_sub_to_add(e
):
84 if is_expr(e
) and e
.op
== "-":
85 new_args
= [expr_neg(x
) for x
in e
.args
[1:]]
86 return EXPR("+", [e
.args
[0]] + new_args
)
89 def expr_commutative_normalize(e
):
92 if e
.op
not in ("+", "&", "|", "^"):
97 def expr_associative_add(e
):
98 "Turn (a + b) + c into a + b + c."
99 if is_expr(e
) and e
.op
== "+":
100 if is_expr(e
.args
[0]) and e
.args
[0].op
== "+":
101 new_args
= e
.args
[0].args
.copy()
102 new_args
.extend(e
.args
[1:].copy())
103 return EXPR("+", new_args
)
106 def expr_simplify_add(e
):
107 if is_expr(e
) and e
.op
== "+":
113 val
= mod_add(val
, a
.val
)
114 base
= max(base
, a
.base
)
119 new_args
.append(VALUE(val
, base
))
120 if len(new_args
) == 1:
123 return EXPR("+", new_args
)
125 return VALUE(val
, base
)
128 def expr_simplify_lshift(e
):
129 if is_expr(e
) and e
.op
== "<<":
130 assert is_expr_2args(e
)
132 if is_value(e
.args
[1]):
136 # Try to convert usages which are realistic address calculations,
137 # otherwise we may affect bitfield, etc. expressions.
139 return EXPR("*", [e
.args
[0], VALUE(1 << val
, 10)])
142 def expr_simplify_neg(e
):
143 if is_expr(e
) and e
.op
== "NEG":
144 new
= expr_neg_if_possible(e
.args
[0])
148 def expr_simplify_bitfield(e
):
149 "Simplify bitfield() to an integer cast if possible."
150 if is_expr(e
) and e
.op
== "SFUNC" and e
.args
[0] == SFUNC("bitfield"):
151 assert is_value(e
.args
[2]) and is_value(e
.args
[3])
152 if e
.args
[2].val
== 0:
154 type = {8: "u8", 16: "u16", 32: "u32"}.get(sz
)
156 return EXPR("CAST", [TYPE(type), e
.args
[1]])
157 return EXPR("&", e
.args
[1], VALUE((1 << sz
) - 1))
159 def expr_simplify_cast(e
):
160 if is_expr(e
) and e
.op
== "CAST":
161 assert is_expr_2args(e
)
163 if is_value(e
.args
[1]):
166 tname
= e
.args
[0].name
167 is_signed
= tname
[0] == "i"
168 bits
= int(tname
[1:])
169 mask
= (1 << bits
) - 1
172 if val
& (1 << (bits
- 1)):
175 return VALUE(val
, e
.args
[1].base
)
178 def unsignize_logical_ops(e
):
179 "Operands of &, |, ^ should be unsigned."
180 if is_expr(e
) and e
.op
in ("&", "|", "^"):
181 assert is_expr_2args(e
)
183 if is_value(e
.args
[1]):
187 val
= val
& (2**arch
.BITNESS
- 1)
189 return EXPR(e
.op
, e
.args
[0], VALUE(val
, 16))
192 def simplify_expr(expr
):
193 new_expr
= expr_xform(expr
, expr_sub_to_add
)
194 expr_commutative_normalize(new_expr
)
195 new_expr
= expr_xform(new_expr
, expr_associative_add
)
196 new_expr
= expr_xform(new_expr
, expr_simplify_add
)
197 new_expr
= expr_xform(new_expr
, expr_simplify_lshift
)
198 new_expr
= expr_xform(new_expr
, expr_simplify_bitfield
)
199 new_expr
= expr_xform(new_expr
, expr_simplify_cast
)
200 new_expr
= expr_xform(new_expr
, expr_simplify_neg
)
201 new_expr
= expr_xform(new_expr
, unsignize_logical_ops
)
203 expr_commutative_normalize(new_expr
)
204 new_expr
= expr_xform(new_expr
, xform_expr_infer
.simplify
)
208 def expr_struct_access(m
):
210 structs
= progdb
.get_struct_types()
211 struct_addrs
= progdb
.get_struct_instances()
213 if is_mem(m
) and is_value(m
.expr
):
215 for (start
, end
), struct_name
in struct_addrs
.items():
216 if start
<= addr
< end
:
217 offset
= addr
- start
218 field
= structs
[struct_name
][offset
]
219 return SFIELD(struct_name
, hex(start
), field
)
222 def struct_access_expr(expr
):
223 new_expr
= expr_xform(expr
, expr_struct_access
)
227 # Should transform inplace
228 def simplify_cond(e
):
229 # TODO: Replaced with inference rule, remove
231 arg1
= e
.expr
.args
[0]
232 arg2
= e
.expr
.args
[1]
233 if is_expr_2args(arg1
) and arg1
.op
== "+" and is_value(arg1
.args
[1]) and is_value(arg2
):
234 assert 0, "Should be replaced by fb730b7de49"
235 e
.expr
= EXPR(e
.expr
.op
, arg1
.args
[0], add_vals(expr_neg(arg1
.args
[1]), arg2
))
238 def expr_subst(expr
, subst_dict
):
240 if isinstance(expr
, (VALUE
, STR
, ADDR
, SFUNC
, TYPE
)):
243 if isinstance(expr
, REG
):
244 new
= subst_dict
.get(expr
)
245 if new
and expr
in new
:
246 _log
.debug("Trying to replace %s with recursively referring %s, not doing" % (expr
, new
))
248 if new
and len(new
) > 10:
249 _log
.debug("Trying to replace %s with complex [len=%d] %s, not doing" % (expr
, len(new
), new
))
253 if isinstance(expr
, MEM
):
254 new
= expr_subst(expr
.expr
, subst_dict
)
256 return MEM(expr
.type, new
)
259 if isinstance(expr
, COND
):
260 # This performs substituations in-place, because the same
261 # COND instance is referenced in inst (to be later removed
262 # by remove_trailing_jumps) and in out edges in CFG.
263 new
= expr_subst(expr
.expr
, subst_dict
)
269 if isinstance(expr
, EXPR
):
273 new
= expr_subst(a
, subst_dict
)
282 new_expr
= EXPR(expr
.op
, new_args
)
283 new_expr
= simplify_expr(new_expr
)
286 assert 0, repr((expr
, type(expr
)))