github/workflows/pycopy-test: Upgrade Pycopy to 3.6.1.
[ScratchABlock.git] / decomp.py
blob0e2752632f2abb8a92d3e1938fcdb313dcfe0a98
1 import copy
2 import logging
4 from graph import Graph
5 from core import *
6 from xform import *
7 import cfgutils
10 _log = logging.getLogger(__name__)
13 def split_node(cfg, n):
14 """Split node "horizontally", by duplicating its content and splitting
15 incoming and outgoing edges.
16 """
17 assert cfg.degree_in(n) == 2
18 assert cfg.degree_out(n) == 1
19 preds = cfg.pred(n)
20 node_props = cfg[n]
21 node2_props = copy.deepcopy(node_props)
22 n2 = n + ".split1"
23 cfg.add_node(n2, **node2_props)
24 cfg.remove_edge(preds[1], n)
25 cfg.add_edge(preds[1], n2)
26 cfg.add_edge(n2, cfg.succ(n)[0])
29 class RecursiveBlock(BBlock):
30 """A structured block, consisting recursively of BBlock's and
31 RecursiveBlock's."""
33 def recursive_union(self, func):
34 res = set()
35 for subb in self.subblocks():
36 res |= func(subb)
37 return res
39 def uses(self):
40 return self.recursive_union(lambda b: b.uses())
42 def defs(self, regs_only=True):
43 return self.recursive_union(lambda b: b.defs(regs_only))
46 class Seq(RecursiveBlock):
47 def __init__(self, b1, b2):
48 super().__init__(b1.addr)
49 if isinstance(b1, Seq):
50 b1 = b1.items
51 else:
52 b1 = [b1]
53 if isinstance(b2, Seq):
54 b2 = b2.items
55 else:
56 b2 = [b2]
57 self.items = b1 + b2
59 def subblocks(self):
60 return self.items
62 def __repr__(self):
63 return "%s(%r)" % (self.__class__.__name__, self.items)
65 def dump(self, stream, indent=0, printer=str):
66 for b in self.items:
67 b.dump(stream, indent, printer)
70 def match_seq(cfg):
71 for v in cfg.nodes():
72 if cfg.degree_out(v) == 1:
73 succ = cfg.succ(v)[0]
74 if cfg.degree_in(succ) == 1:
75 _log.info("seq: %s %s", v, succ)
76 newb = Seq(cfg.node(v)["val"], cfg.node(succ)["val"])
77 cfg.add_node(v, val=newb)
78 cfg.move_succ(succ, v)
79 cfg.remove_node(succ)
80 return True
83 def match_if(cfg):
84 for v in cfg.nodes():
85 if cfg.degree_out(v) == 2:
86 a, b = cfg.sorted_succ(v)
88 if cfg.degree_in(a) >= 2 and cfg.degree_in(b) == 1 and cfg.degree_out(b) == 1:
89 truth = False
90 cond = cfg.edge(v, a).get("cond")
91 elif cfg.degree_in(b) >= 2 and cfg.degree_in(a) == 1 and cfg.degree_out(a) == 1:
92 truth = True
93 cond = cfg.edge(v, a).get("cond")
94 a, b = b, a
95 else:
96 continue
98 c = cfg.succ(b)[0]
99 if c == a:
100 _log.info("if: %s, %s, %s", v, b, c)
101 v = split_bblock(cfg, v, ".if", only_non_empty=True)
102 if_header = cfg.node(v)["val"]
103 t_block = cfg.node(b)["val"]
104 if truth == False:
105 cond = cond.neg()
106 newb = IfElse(if_header, t_block, None, cond)
107 cfg.add_node(v, val=newb)
108 cfg.remove_node(b)
109 cfg.set_edge(v, a, cond=None)
110 return True
113 IFELSE_COND = 0
114 IFELSE_BRANCH = 1
116 class IfElse(RecursiveBlock):
117 def __init__(self, header, t_block, f_block, true_cond):
118 super().__init__(header.addr)
119 self.header = header
120 self.branches = [(true_cond, t_block), (None, f_block)]
122 def subblocks(self):
123 return [x[1] for x in self.branches if x[1]]
125 def recursive_union(self, func):
126 res = set()
127 for cond, subb in self.branches:
128 if cond:
129 res |= func(cond)
130 if subb:
131 res |= func(subb)
132 return res
134 def swap_branches(self):
135 # Swap If/Else branches, negating condition
136 assert len(self.branches) == 2
137 assert self.branches[1][1] is not None
138 [(true_cond, t_block), (dummy, f_block)] = self.branches
139 self.branches = [(true_cond.neg(), f_block), (None, t_block)]
141 def __repr__(self):
142 return "%s(%r)" % (self.__class__.__name__, self.branches)
144 def dump(self, stream, indent=0, printer=str):
145 self.write(stream, indent, "if %s {" % self.branches[0][0])
146 self.branches[0][1].dump(stream, indent + 1, printer)
148 for cond, block in self.branches[1:-1]:
149 self.write(stream, indent, "} else if %s {" % cond)
150 block.dump(stream, indent + 1, printer)
152 assert self.branches[-1][0] is None
153 if self.branches[-1][1] is not None:
154 self.write(stream, indent, "} else {")
155 self.branches[-1][1].dump(stream, indent + 1, printer)
156 self.write(stream, indent, "}")
159 # if (!(a > b)) goto false
160 # {true}
161 # goto out
162 # false:
163 # {false}
164 # out:
166 def match_ifelse(cfg):
167 for v in cfg.nodes():
168 if cfg.degree_out(v) == 2:
169 succ = cfg.sorted_succ(v)
170 cond = cfg.edge(v, succ[0]).get("cond")
171 if cond:
172 f_v = succ[0]
173 t_v = succ[1]
174 f_v_s = cfg.succ(f_v)
175 t_v_s = cfg.succ(t_v)
177 if len(t_v_s) != 1: continue
178 if len(f_v_s) != 1: continue
180 common = list(set(t_v_s) & set(f_v_s))
181 if not common:
182 continue
184 f_v_preds = cfg.pred(f_v)
185 t_v_preds = cfg.pred(t_v)
186 if len(f_v_preds) != 1 or len(t_v_preds) != 1:
187 _log.warn("ifelse: %s, %s, %s, %s is part of abnormal selection", v, t_v, f_v, f_v_s[0])
188 continue
190 f_v_s = common
192 _log.info("ifelse: %s, %s, %s, %s", v, t_v, f_v, f_v_s[0])
193 v = split_bblock(cfg, v, ".if", only_non_empty=True)
194 if_header = cfg.node(v)["val"]
195 t_block = cfg.node(t_v)["val"]
196 f_block = cfg.node(f_v)["val"]
197 newb = IfElse(if_header, t_block, f_block, cond.neg())
198 cfg.add_node(v, val=newb)
199 cfg.remove_node(t_v)
200 cfg.remove_node(f_v)
201 cfg.add_edge(v, f_v_s[0])
202 return True
206 # If we have:
208 # if (cond) {
209 # if ...
210 # } else {
211 # // not if
214 # It's better to transform it to:
216 # if (!cond) {
217 # // not if
218 # } else {
219 # if ...
222 # , then to be recognized by match_if_else_ladder
224 def match_if_else_inv_ladder_recursive(block):
225 if isinstance(block, IfElse):
226 if len(block.branches) != 2:
227 _log.warn("match_if_else_inv_ladder: Must be applied before match_if_else_ladder")
228 return
229 if_block = block.branches[0][IFELSE_BRANCH]
230 else_block = block.branches[1][IFELSE_BRANCH]
231 if isinstance(if_block, IfElse) and not isinstance(else_block, IfElse):
232 block.swap_branches()
233 if_block = block.branches[0][IFELSE_BRANCH]
234 else_block = block.branches[1][IFELSE_BRANCH]
235 match_if_else_inv_ladder_recursive(if_block)
236 match_if_else_inv_ladder_recursive(else_block)
238 def match_if_else_inv_ladder(cfg):
239 for v, node_props in cfg.nodes_props():
240 block = node_props["val"]
241 match_if_else_inv_ladder_recursive(block)
245 # If we have:
247 # $a0 = val1
248 # if (...) {
249 # $a0 = val2
252 # it's equivalent to:
254 # if (...) {
255 # $a0 = val2
256 # } else {
257 # $a0 = val1
260 # And transforming it to such may enable match_if_else_ladder
261 def match_if_else_unjumped(cfg):
262 for v, node_props in cfg.nodes_props():
263 #print((v, node_props))
264 block = node_props["val"]
265 if type(block) is BBlock and cfg.degree_out(v) == 1:
266 succ = cfg.succ(v)[0]
267 #print(">", (succ, cfg.node(succ)))
268 succ_block = cfg.node(succ)["val"]
269 if isinstance(succ_block, IfElse) \
270 and succ_block.branches[-1][IFELSE_BRANCH] is None \
271 and type(succ_block.branches[0][IFELSE_BRANCH]) is BBlock:
272 first_defs = block.defs(regs_only=False)
273 second_defs = succ_block.defs(regs_only=False)
274 second_uses = succ_block.uses()
275 _log.info("ifelse_unjumped: first: defs: %s | second: defs: %s, uses: %s", first_defs, second_defs, second_uses)
276 if not first_defs:
277 # Everything was apparently DCEed
278 return
279 if not first_defs.issubset(second_defs):
280 _log.info("ifelse_unjumped: can't apply, because first defines more other vars than 2nd: %s vs %s",
281 first_defs, second_defs)
282 return
283 if first_defs & second_uses:
284 _log.info("ifelse_unjumped: can't apply, because if uses (%s) vals defined in preceding block (%s)",
285 second_uses, first_defs)
286 return
287 # TODO: Are the checks above enough?
288 _log.info("ifelse_unjumped: %s, %s", v, succ)
289 cfgutils.detach_node(cfg, v)
290 succ_block.branches[-1] = (None, block)
291 return True
294 def match_if_else_ladder(cfg):
295 for v, node_props in cfg.nodes_props():
296 block = node_props["val"]
297 if isinstance(block, IfElse):
298 else_block = block.branches[-1][1]
299 if isinstance(else_block, IfElse):
300 block.branches = block.branches[:-1] + else_block.branches
301 return True
304 class Loop(RecursiveBlock):
305 def __init__(self, b):
306 super().__init__(b.addr)
307 self.items = [b]
309 def subblocks(self):
310 return self.items
312 def __repr__(self):
313 return "%s(%s)" % (self.__class__.__name__, self.items[0])
315 def dump(self, stream, indent=0, printer=str):
316 self.write(stream, indent, "while (1) {")
317 for b in self.items:
318 b.dump(stream, indent + 1, printer)
319 self.write(stream, indent, "}")
322 def match_infloop(cfg):
323 for v in cfg.nodes():
324 if cfg.degree_out(v) == 1:
325 for s in cfg.succ(v):
326 if s == v:
327 _log.info("infloop: %s", v)
328 b = cfg.node(v)["val"]
329 newb = Loop(b)
330 cfg.add_node(v, val=newb)
331 cfg.remove_edge(v, v)
332 return True
335 class DoWhile(RecursiveBlock):
336 def __init__(self, b, cond):
337 super().__init__(b.addr)
338 self.cond = cond
339 self.items = [b]
341 def subblocks(self):
342 return self.items
344 def __repr__(self):
345 return "%s(%s)" % (self.__class__.__name__, self.items[0])
347 def dump(self, stream, indent=0, printer=str):
348 self.write(stream, indent, "do {")
349 for b in self.items:
350 b.dump(stream, indent + 1, printer)
351 self.write(stream, indent, "} while %s;" % self.cond)
354 def match_dowhile(cfg):
355 for v in cfg.nodes():
356 if cfg.degree_out(v) == 2:
357 for s in cfg.succ(v):
358 if s == v:
359 _log.info("dowhile: %s", v)
360 b = cfg.node(v)["val"]
361 newb = DoWhile(b, cfg.edge(v, v).get("cond"))
362 cfg.add_node(v, val=newb)
363 cfg.remove_edge(v, v)
364 return True
367 class While(RecursiveBlock):
368 def __init__(self, addr, b, cond):
369 super().__init__(addr)
370 self.cond = cond
371 self.items = [b]
373 def subblocks(self):
374 return self.items
376 def __repr__(self):
377 return "%s(%s)" % (self.__class__.__name__, self.items[0])
379 def dump(self, stream, indent=0, printer=str):
380 self.write(stream, indent, "while %s {" % self.cond)
381 for b in self.items:
382 b.dump(stream, indent + 1, printer)
383 self.write(stream, indent, "}")
386 def match_while(cfg):
387 for v in cfg.nodes():
388 if cfg.degree_out(v) == 2:
389 succ = cfg.sorted_succ(v)
390 back_cand = cfg.succ(succ[0])
391 if len(back_cand) == 1 and back_cand[0] == v and cfg.degree_in(succ[0]) == 1:
392 _log.info("while: %s, %s", v, succ[0])
393 b = cfg.node(succ[0])["val"]
394 newb = While(v, b, cfg.edge(v, succ[0]).get("cond"))
395 cfg.add_node(v, val=newb)
396 cfg.remove_node(succ[0])
397 return True
400 # if (cond) {
401 # do {...} while (cond);
404 # =>
406 # while (cond) {...}
408 def match_if_dowhile(cfg):
409 for addr, info in cfg.nodes_props():
410 bblock = info["val"]
411 if type(bblock) is IfElse:
412 subs = bblock.subblocks()
413 if len(subs) == 1 and type(subs[0]) is DoWhile:
414 if_cond = bblock.branches[0][0]
415 dowhile_cond = subs[0].cond
416 #print(if_cond, if_cond == dowhile_cond)
417 if if_cond != dowhile_cond:
418 continue
419 while_bb = While(bblock.branches[0][1], subs[0].items[0], if_cond)
420 info["val"] = while_bb
421 return True
424 def match_control_and(cfg):
425 for v in cfg.nodes():
426 if cfg.degree_out(v) == 2:
427 succ1 = cfg.sorted_succ(v)
428 v2 = succ1[1]
429 if cfg.degree_out(v2) == 2:
430 succ2 = cfg.sorted_succ(v2)
431 assert len(succ2) == 2
432 if succ1[0] == succ2[0]:
433 _log.info("and %s, %s", v, v2)
434 new_cond = EXPR(
435 "||",
436 cfg.edge(v, succ1[0]).get("cond").expr,
437 cfg.edge(v2, succ1[0]).get("cond").expr
439 cfg.edge(v, succ1[0]).get("cond").expr = new_cond
440 cfg.remove_node(v2)
441 cfg.add_edge(v, succ2[1])
442 return True
445 def match_abnormal_sel(cfg):
446 """Should be run only if match_if, match_ifelse don't match anything
447 in acyclic graph. Then any join node belong to abnormal selection
448 pattern. We try to find the top-most and split it, after which normal
449 structured matches should be tried again.
451 for v in cfg.iter_rev_postorder():
452 if cfg.degree_in(v) == 2 and cfg.degree_out(v) == 1:
453 _log.info("abnormal_sel: %s", v)
454 split_node(cfg, v)
455 return True