14 from dataflow
import *
16 from xform_utils
import *
18 from asmprinter
import AsmPrinter
23 # TODO: something above shadows "copy" otherwise
27 _log
= logging
.getLogger(__name__
)
35 argp
= argparse
.ArgumentParser(description
="Parse PseudoC program, apply transformations, and dump result in various formats")
36 argp
.add_argument("file", help="input file in PseudoC format, or directory of such files")
37 argp
.add_argument("-o", "--output", help="output file/dir (default stdout for single file, *.out for directory)")
38 argp
.add_argument("--arch", default
="xtensa", help="architecture to use")
39 argp
.add_argument("--script", action
="append", help="apply script from file")
40 argp
.add_argument("--iter", action
="store_true", help="apply transform iteratively until no changes to funcdb")
41 argp
.add_argument("--funcdb", help="function database file (default: funcdb.yaml in input file's dir)")
42 argp
.add_argument("--format", choices
=["none", "bblocks", "asm", "c"], default
="bblocks",
43 help="output format (default: %(default)s)")
44 argp
.add_argument("--output-suffix", metavar
="SUFFIX", default
=".out", help="suffix for output files in same-dir mode (default: .out)")
45 argp
.add_argument("--no-dead", action
="store_true", help="don't output DCE-eliminated instructions")
46 argp
.add_argument("--no-comments", action
="store_true", help="don't output decompilation comments (annotations)")
47 argp
.add_argument("--no-graph-header", action
="store_true", help="don't output graph properties")
48 argp
.add_argument("--annotate-calls", action
="store_true", help="annotate calls with uses/defs")
49 argp
.add_argument("--inst-addr", action
="store_true", help="output instruction addresses")
50 argp
.add_argument("--dot-inst", action
="store_true", help="output instructions in .dot files")
51 argp
.add_argument("--repr", action
="store_true", help="dump __repr__ format of instructions and other objects")
52 argp
.add_argument("--debug", action
="store_true", help="produce debug files")
53 argp
.add_argument("--log-level", metavar
="LEVEL", default
="INFO", help="set logging level (default: %(default)s)")
54 args
= argp
.parse_args()
57 core
.SimpleExpr
.simple_repr
= False
59 core
.Inst
.show_addr
= True
67 def handle_file(args
):
69 handle_file_unprotected(args
)
70 except Exception as e
:
71 print("Error while processing file: " + args
.file)
75 def handle_file_unprotected(args
):
80 # If we want to get asm back, i.e. stay close to the input, don't remove
81 # trailing jumps. This will work OK for data flow algos, but will produce
82 # broken or confusing output for control flow algos (for which asm output
83 # shouldn't be used of course).
84 # Update: it's unsafe to use this during dataflow analysis
85 #if args.format != "asm":
86 # foreach_bblock(cfg, remove_trailing_jumps)
89 with
open(args
.file + ".0.bb", "w") as f
:
90 dump_bblocks(cfg
, f
, no_graph_header
=args
.no_graph_header
)
91 with
open(args
.file + ".0.dot", "w") as f
:
98 elif hasattr(p
, "script"):
99 for op_type
, op_name
in p
.script
:
100 if op_type
== "xform:":
101 func
= globals()[op_name
]
103 elif op_type
== "xform_bblock:":
104 func
= globals()[op_name
]
105 foreach_bblock(cfg
, func
)
106 elif op_type
== "xform_inst:":
107 func
= globals()[op_name
]
108 foreach_inst(cfg
, func
)
109 elif op_type
== "script:":
110 mod
= __import__(op_name
)
116 with
open(args
.file + ".out.bb", "w") as f
:
117 dump_bblocks(cfg
, f
, no_graph_header
=args
.no_graph_header
)
118 with
open(args
.file + ".out.dot", "w") as f
:
121 if args
.output
and args
.format
!= "none":
122 out
= open(args
.output
, "w")
127 Inst
.show_comments
= False
129 if args
.format
== "bblocks":
130 p
= CFGPrinter(cfg
, out
)
131 if args
.no_graph_header
:
132 p
.print_graph_header
= lambda: None
133 p
.inst_printer
= repr if args
.repr else str
134 p
.no_dead
= args
.no_dead
136 elif args
.format
== "asm":
137 p
= AsmPrinter(cfg
, out
)
138 p
.no_dead
= args
.no_dead
140 elif args
.format
== "c":
141 #foreach_bblock(cfg, remove_trailing_jumps)
142 cfg
.number_postorder()
144 cprinter
.no_dead
= args
.no_dead
145 cprinter
.dump_c(cfg
, out
)
147 if out
is not sys
.stdout
:
150 progdb
.update_funcdb(cfg
)
155 def one_iter(input, output
, iter_no
):
156 global FUNC_DB
, FUNC_DB_ORG
158 if args
.funcdb
!= "none":
160 if iter_no
== 0 and os
.path
.exists(args
.funcdb
+ ".in"):
161 dbs
.append(args
.funcdb
+ ".in")
162 if os
.path
.exists(args
.funcdb
):
163 dbs
.append(args
.funcdb
)
164 progdb
.load_funcdb(*dbs
)
166 FUNC_DB
= progdb
.FUNC_DB_BY_ADDR
167 FUNC_DB_ORG
= copy
.deepcopy(FUNC_DB
)
170 # If script has init() function, call it at the beginning of each
171 # iteration, this is useful to reset some state. E.g., if some
172 # funcdb property is calculated as a union, but we want to find
173 # its lower bound, we need to reset it to empty set at each
175 for s
in args
.script
:
177 if hasattr(mod
, "init"):
180 if os
.path
.isdir(input):
181 if output
and not os
.path
.isdir(output
):
183 for full_name
in glob
.glob(input + "/*"):
184 if full_name
.endswith(".lst") and os
.path
.isfile(full_name
):
187 args
.file = full_name
189 base_name
= full_name
.rsplit("/", 1)[-1]
190 args
.output
= output
+ "/" + base_name
192 args
.output
= full_name
+ args
.output_suffix
198 changed
= FUNC_DB
!= FUNC_DB_ORG
199 if changed
and args
.funcdb
!= "none":
200 progdb
.save_funcdb(args
.funcdb
)
206 if args
.annotate_calls
:
207 core
.Inst
.annotate_calls
= True
210 if os
.path
.isdir(args
.file):
211 # For an input as directory, use this *input* directory
214 # For a single file, use containing directory
215 proj_dir
= os
.path
.dirname(args
.file) or "."
217 args
.funcdb
= proj_dir
+ "/funcdb.yaml"
218 _log
.info("Using funcdb: %s", args
.funcdb
)
220 bindata
.init(proj_dir
)
222 if os
.path
.exists(proj_dir
+ "/symtab.txt"):
223 _log
.info("Using symtab:", proj_dir
+ "/symtab.txt")
224 progdb
.load_symtab(proj_dir
+ "/symtab.txt")
231 changed
= one_iter(input, output
, iter_no
)
235 print("=== Done iteration %d ===" % iter_no
)
243 # As arch.load_arch() performs dynamic import, do it outside of __main__(),
244 # i.e. at load-time, to work with Python "strict mode" semantics.
248 logging
.basicConfig(level
=getattr(logging
, args
.log_level
))
251 arch
.load_arch(args
.arch
)
254 def preparse_scripts(input):
258 if os
.path
.isdir(input):
259 for full_name
in glob
.glob(input + "/*"):
260 if full_name
.endswith(".lst") and os
.path
.isfile(full_name
):
261 files
.append(full_name
)
266 with
open(fname
) as f
:
268 if l
.startswith("#script: "):
270 scripts
.append(l
.split(None, 1)[1])
276 for s
in args
.script
:
278 for s
in preparse_scripts(args
.file):
282 if __name__
== "__main__":