github/workflows/pycopy-test: Upgrade Pycopy to 3.6.1.
[ScratchABlock.git] / apply_xform.py
blob8f17db1270340977bf9a9deb00936f6dba33c0ca
1 #!/usr/bin/env python3
2 import sys
3 import argparse
4 import os.path
5 import glob
6 import logging
8 import yaml
9 import yamlutils
11 import core
12 from parser import *
13 import dot
14 from dataflow import *
15 from xform import *
16 from xform_utils import *
17 from decomp import *
18 from asmprinter import AsmPrinter
19 import cprinter
20 import progdb
21 import bindata
23 # TODO: something above shadows "copy" otherwise
24 import copy
27 _log = logging.getLogger(__name__)
30 FUNC_DB = {}
31 FUNC_DB_ORG = {}
34 def parse_args():
35 argp = argparse.ArgumentParser(description="Parse PseudoC program, apply transformations, and dump result in various formats")
36 argp.add_argument("file", help="input file in PseudoC format, or directory of such files")
37 argp.add_argument("-o", "--output", help="output file/dir (default stdout for single file, *.out for directory)")
38 argp.add_argument("--arch", default="xtensa", help="architecture to use")
39 argp.add_argument("--script", action="append", help="apply script from file")
40 argp.add_argument("--iter", action="store_true", help="apply transform iteratively until no changes to funcdb")
41 argp.add_argument("--funcdb", help="function database file (default: funcdb.yaml in input file's dir)")
42 argp.add_argument("--format", choices=["none", "bblocks", "asm", "c"], default="bblocks",
43 help="output format (default: %(default)s)")
44 argp.add_argument("--output-suffix", metavar="SUFFIX", default=".out", help="suffix for output files in same-dir mode (default: .out)")
45 argp.add_argument("--no-dead", action="store_true", help="don't output DCE-eliminated instructions")
46 argp.add_argument("--no-comments", action="store_true", help="don't output decompilation comments (annotations)")
47 argp.add_argument("--no-graph-header", action="store_true", help="don't output graph properties")
48 argp.add_argument("--annotate-calls", action="store_true", help="annotate calls with uses/defs")
49 argp.add_argument("--inst-addr", action="store_true", help="output instruction addresses")
50 argp.add_argument("--dot-inst", action="store_true", help="output instructions in .dot files")
51 argp.add_argument("--repr", action="store_true", help="dump __repr__ format of instructions and other objects")
52 argp.add_argument("--debug", action="store_true", help="produce debug files")
53 argp.add_argument("--log-level", metavar="LEVEL", default="INFO", help="set logging level (default: %(default)s)")
54 args = argp.parse_args()
56 if args.repr:
57 core.SimpleExpr.simple_repr = False
58 if args.inst_addr:
59 core.Inst.show_addr = True
60 if args.dot_inst:
61 import dot
62 dot.show_insts = True
64 return args
67 def handle_file(args):
68 try:
69 handle_file_unprotected(args)
70 except Exception as e:
71 print("Error while processing file: " + args.file)
72 raise e
75 def handle_file_unprotected(args):
76 p = Parser(args.file)
77 cfg = p.parse()
78 cfg.parser = p
80 # If we want to get asm back, i.e. stay close to the input, don't remove
81 # trailing jumps. This will work OK for data flow algos, but will produce
82 # broken or confusing output for control flow algos (for which asm output
83 # shouldn't be used of course).
84 # Update: it's unsafe to use this during dataflow analysis
85 #if args.format != "asm":
86 # foreach_bblock(cfg, remove_trailing_jumps)
88 if args.debug:
89 with open(args.file + ".0.bb", "w") as f:
90 dump_bblocks(cfg, f, no_graph_header=args.no_graph_header)
91 with open(args.file + ".0.dot", "w") as f:
92 dot.dot(cfg, f)
94 if args.script:
95 for s in args.script:
96 mod = __import__(s)
97 mod.apply(cfg)
98 elif hasattr(p, "script"):
99 for op_type, op_name in p.script:
100 if op_type == "xform:":
101 func = globals()[op_name]
102 func(cfg)
103 elif op_type == "xform_bblock:":
104 func = globals()[op_name]
105 foreach_bblock(cfg, func)
106 elif op_type == "xform_inst:":
107 func = globals()[op_name]
108 foreach_inst(cfg, func)
109 elif op_type == "script:":
110 mod = __import__(op_name)
111 mod.apply(cfg)
112 else:
113 assert 0
115 if args.debug:
116 with open(args.file + ".out.bb", "w") as f:
117 dump_bblocks(cfg, f, no_graph_header=args.no_graph_header)
118 with open(args.file + ".out.dot", "w") as f:
119 dot.dot(cfg, f)
121 if args.output and args.format != "none":
122 out = open(args.output, "w")
123 else:
124 out = sys.stdout
126 if args.no_comments:
127 Inst.show_comments = False
129 if args.format == "bblocks":
130 p = CFGPrinter(cfg, out)
131 if args.no_graph_header:
132 p.print_graph_header = lambda: None
133 p.inst_printer = repr if args.repr else str
134 p.no_dead = args.no_dead
135 p.print()
136 elif args.format == "asm":
137 p = AsmPrinter(cfg, out)
138 p.no_dead = args.no_dead
139 p.print()
140 elif args.format == "c":
141 #foreach_bblock(cfg, remove_trailing_jumps)
142 cfg.number_postorder()
143 Inst.trail = ";"
144 cprinter.no_dead = args.no_dead
145 cprinter.dump_c(cfg, out)
147 if out is not sys.stdout:
148 out.close()
150 progdb.update_funcdb(cfg)
152 return cfg
155 def one_iter(input, output, iter_no):
156 global FUNC_DB, FUNC_DB_ORG
158 if args.funcdb != "none":
159 dbs = []
160 if iter_no == 0 and os.path.exists(args.funcdb + ".in"):
161 dbs.append(args.funcdb + ".in")
162 if os.path.exists(args.funcdb):
163 dbs.append(args.funcdb)
164 progdb.load_funcdb(*dbs)
166 FUNC_DB = progdb.FUNC_DB_BY_ADDR
167 FUNC_DB_ORG = copy.deepcopy(FUNC_DB)
169 if args.script:
170 # If script has init() function, call it at the beginning of each
171 # iteration, this is useful to reset some state. E.g., if some
172 # funcdb property is calculated as a union, but we want to find
173 # its lower bound, we need to reset it to empty set at each
174 # iteration.
175 for s in args.script:
176 mod = __import__(s)
177 if hasattr(mod, "init"):
178 mod.init()
180 if os.path.isdir(input):
181 if output and not os.path.isdir(output):
182 os.makedirs(output)
183 for full_name in glob.glob(input + "/*"):
184 if full_name.endswith(".lst") and os.path.isfile(full_name):
185 if args.debug:
186 print(full_name)
187 args.file = full_name
188 if output:
189 base_name = full_name.rsplit("/", 1)[-1]
190 args.output = output + "/" + base_name
191 else:
192 args.output = full_name + args.output_suffix
193 handle_file(args)
194 else:
195 handle_file(args)
198 changed = FUNC_DB != FUNC_DB_ORG
199 if changed and args.funcdb != "none":
200 progdb.save_funcdb(args.funcdb)
202 return changed
205 def __main__():
206 if args.annotate_calls:
207 core.Inst.annotate_calls = True
209 if not args.funcdb:
210 if os.path.isdir(args.file):
211 # For an input as directory, use this *input* directory
212 proj_dir = args.file
213 else:
214 # For a single file, use containing directory
215 proj_dir = os.path.dirname(args.file) or "."
217 args.funcdb = proj_dir + "/funcdb.yaml"
218 _log.info("Using funcdb: %s", args.funcdb)
219 # Load binary data
220 bindata.init(proj_dir)
221 # Load symtab
222 if os.path.exists(proj_dir + "/symtab.txt"):
223 _log.info("Using symtab:", proj_dir + "/symtab.txt")
224 progdb.load_symtab(proj_dir + "/symtab.txt")
226 input = args.file
227 output = args.output
229 iter_no = 0
230 while True:
231 changed = one_iter(input, output, iter_no)
232 if not args.iter:
233 break
234 if args.debug:
235 print("=== Done iteration %d ===" % iter_no)
236 if not changed:
237 break
238 iter_no += 1
241 # Module-level code
243 # As arch.load_arch() performs dynamic import, do it outside of __main__(),
244 # i.e. at load-time, to work with Python "strict mode" semantics.
245 args = parse_args()
247 if args.log_level:
248 logging.basicConfig(level=getattr(logging, args.log_level))
250 import arch
251 arch.load_arch(args.arch)
254 def preparse_scripts(input):
255 files = []
256 scripts = []
258 if os.path.isdir(input):
259 for full_name in glob.glob(input + "/*"):
260 if full_name.endswith(".lst") and os.path.isfile(full_name):
261 files.append(full_name)
262 else:
263 files = [input]
265 for fname in files:
266 with open(fname) as f:
267 for l in f:
268 if l.startswith("#script: "):
269 l = l.rstrip()
270 scripts.append(l.split(None, 1)[1])
271 return scripts
274 # Preload scripts.
275 if args.script:
276 for s in args.script:
277 __import__(s)
278 for s in preparse_scripts(args.file):
279 __import__(s)
282 if __name__ == "__main__":
283 __main__()