3 from graph
import Graph
8 class ParseError(Exception):
11 class UndefinedLabel(ParseError
):
17 def __init__(self
, l
):
29 def is_punct(self
, c
):
30 return c
in "&|^-+*/%=<>"
33 if not self
.l
.startswith(tok
):
36 is_word
= tok
[0] in string
.ascii_letters
37 rest
= self
.l
[len(tok
):]
39 if is_word
and rest
[0] not in string
.whitespace
:
41 elif self
.is_punct(tok
[0]) and self
.is_punct(rest
[0]):
47 def match_till(self
, tok
):
49 while self
.l
and not self
.l
.startswith(tok
):
54 def match_bracket(self
, open_b
, close_b
):
58 while self
.l
and level
:
59 if self
.match(open_b
):
61 elif self
.match(close_b
):
68 def expect(self
, tok
, msg
=None):
69 if not self
.match(tok
):
71 msg
= "Expected: '%s'" % tok
72 raise ParseError(msg
, self
.l
)
75 while self
.l
and self
.l
[0] == " ":
79 return self
.l
.rstrip()
83 while self
.l
and self
.l
[0] in (string
.ascii_letters
+ string
.digits
+ "_.$"):
89 return self
.l
[0] in string
.ascii_letters
+ "_."
92 return self
.l
[0] in string
.digits
95 assert self
.isident(), repr(self
.l
)
101 assert self
.isdigit(), repr(self
.l
)
103 if self
.l
.startswith("0x"):
107 return int(w
, 0), base
109 def skip_comment(self
):
111 self
.match_till("*/")
116 def __init__(self
, fname
):
118 self
.expect_line_addr
= None
126 def error(self
, msg
):
127 print("%s:%d: %s" % (self
.fname
, self
.curline
, msg
))
130 def parse_cond(self
, lex
):
133 e
= self
.parse_expr()
137 # Get line from a file, expanding any "macro-like" features,
138 # like "if (cond) $r0 = $r1"
139 def get_expand_line(self
, f
):
143 return [(None, None)]
150 if self
.pass_no
== 1:
152 if l
.startswith("xform: "):
153 self
.script
.append(l
.split(None, 1))
154 elif l
.startswith("xform_bblock: "):
155 self
.script
.append(l
.split(None, 1))
156 elif l
.startswith("xform_inst: "):
157 self
.script
.append(l
.split(None, 1))
158 elif l
.startswith("script: "):
159 self
.script
.append(l
.split(None, 1))
160 elif l
.startswith("struct: "):
162 data
= yaml
.safe_load(l
.split(None, 1)[1])
163 progdb
.set_struct_types(data
)
164 elif l
.startswith("struct_addr: "):
166 data
= yaml
.safe_load(l
.split(None, 1)[1])
167 data
= {(start
, start
+ max(progdb
.get_struct_types()[name
].keys()) + 4): name
for start
, name
in data
.items()}
168 progdb
.set_struct_instances(data
)
171 if self
.expect_line_addr
is None:
172 self
.expect_line_addr
= self
.detect_addr_field(l
)
174 if self
.expect_line_addr
:
175 addr
, l
= l
.split(" ", 1)
177 addr
= "%04d" % self
.curline
180 if l
.startswith("if "):
181 # May need to expand "if macro"
184 c
= self
.parse_cond(lex
)
186 if not lex
.match("goto"):
187 # Uncomment to treat as literal line for roundtrip testing
188 #return [(addr, "\x01" + l)]
190 (addr
, "if %s goto %s.1.cond" % (str(c
.neg()), addr
)),
192 (addr
+ ".1", addr
+ ".1.cond:"),
198 def parse_labels(self
):
201 with
open(self
.fname
) as f
:
204 for addr
, l
in self
.get_expand_line(f
):
208 self
.labels
[l
[:-1]] = addr
210 def parse_reg(self
, lex
):
212 return REG(lex
.ident())
214 def parse_arglist(self
):
217 while not self
.lex
.match(")"):
219 if self
.lex
.match("/*"):
220 comm
= "/*" + self
.lex
.match_till("*/") + "*/"
221 self
.lex
.expect("*/")
222 a
= self
.parse_expr()
223 assert a
, (repr(a
), repr(self
.lex
.l
))
226 if self
.lex
.match(","):
231 if self
.lex
.isdigit():
232 return VALUE(*self
.lex
.num())
234 def parse_primary(self
):
235 if self
.lex
.match("*"):
236 self
.lex
.expect("(", "Dereference requires explicit pointer cast")
237 type = self
.lex
.ident()
240 e
= self
.parse_primary()
242 elif self
.lex
.match("("):
243 e
= self
.parse_expr()
246 # If there was identifier, check it for being a type name
247 if e
.addr
in ("i8", "u8", "i16", "u16", "i32", "u32", "i64", "u64"):
249 e
= self
.parse_primary()
250 return EXPR("CAST", [tp
, e
])
252 elif self
.lex
.peek() == "$":
253 return self
.parse_reg(self
.lex
)
254 elif self
.lex
.isdigit():
255 return VALUE(*self
.lex
.num())
256 elif self
.lex
.match("-"):
257 e
= self
.parse_primary()
259 return VALUE(-e
.val
, e
.base
)
260 return EXPR("NEG", [e
])
261 elif self
.lex
.match("!"):
262 e
= self
.parse_primary()
263 return EXPR("!", [e
])
264 elif self
.lex
.isident():
265 id = self
.lex
.ident()
266 if self
.lex
.peek() == "(":
267 args
= self
.parse_arglist()
268 return EXPR("SFUNC", [SFUNC(id)] + args
)
270 return ADDR(self
.labels
.get(id, id))
272 self
.error("Cannot parse")
274 def parse_level(self
, operators
, next_level
):
280 if self
.lex
.match(op
):
282 e
= EXPR(op
, [e
, e2
])
288 return self
.parse_level(("*", "/", "%"), self
.parse_primary
)
291 return self
.parse_level(("+", "-"), self
.parse_mul
)
293 def parse_shift(self
):
294 return self
.parse_level(("<<", ">>"), self
.parse_add
)
296 def parse_lt_gt(self
):
297 return self
.parse_level(("<", "<=", ">=", ">"), self
.parse_shift
)
299 return self
.parse_level(("==", "!="), self
.parse_lt_gt
)
301 def parse_band(self
):
302 return self
.parse_level(("&",), self
.parse_eq
)
303 def parse_bxor(self
):
304 return self
.parse_level(("^",), self
.parse_band
)
306 return self
.parse_level(("|",), self
.parse_bxor
)
308 def parse_land(self
):
309 return self
.parse_level(("&&",), self
.parse_bor
)
311 return self
.parse_level(("||",), self
.parse_land
)
313 def parse_expr(self
):
314 return self
.parse_lor()
317 # If there's numeric value, treat it as address. Otherwise, parse expression.
318 def parse_local_addr_expr(self
):
319 if self
.lex
.isdigit():
321 return ADDR(self
.labels
.get(w
, w
))
322 return self
.parse_expr()
324 def get_label(self
, label
):
326 return self
.labels
[label
]
328 raise UndefinedLabel(label
)
330 def label_from_addr(self
, addr
):
331 labels
= list(filter(lambda x
: x
[1] == addr
, self
.labels
.items()))
336 def parse_inst(self
, l
):
337 lex
= self
.lex
= Lexer(l
)
338 if lex
.peek() == ".":
339 # Asm directive, ignore
341 if lex
.match("db") or lex
.match("unk"):
342 arg
= self
.parse_num()
345 return Inst(None, "SFUNC", [EXPR("SFUNC", [SFUNC("litbyte"), arg
])])
346 if lex
.match("goto"):
347 return Inst(None, "goto", [self
.parse_local_addr_expr()])
348 if lex
.match("call"):
349 return Inst(None, "call", [self
.parse_expr()])
353 if lex
.match("else"):
356 c
= self
.parse_cond(lex
)
358 label
= lex
.match_till(",").strip()
359 addr
= ADDR(self
.get_label(label
))
360 args
.extend([c
, addr
])
361 if not lex
.match(","):
363 return Inst(None, "if", args
)
364 if lex
.match("return"):
370 a
= self
.parse_expr()
374 return Inst(None, "return", args
)
376 dest
= self
.parse_expr()
378 return Inst(None, "LIT", [l
])
380 if is_expr(dest
) and isinstance(dest
.args
[0], SFUNC
) and lex
.eol():
381 return Inst(None, "SFUNC", [dest
])
383 def make_assign_inst(dest
, op
, args
):
384 #return Inst(dest, op, args)
385 return Inst(dest
, "=", [EXPR(op
, args
)])
390 src
= self
.parse_expr()
391 return make_assign_inst(dest
, "&", [dest
, src
])
392 elif lex
.match("|="):
394 src
= self
.parse_expr()
395 return make_assign_inst(dest
, "|", [dest
, src
])
396 elif lex
.match("^="):
398 src
= self
.parse_expr()
399 return make_assign_inst(dest
, "^", [dest
, src
])
400 elif lex
.match("+="):
402 src
= self
.parse_expr()
403 return make_assign_inst(dest
, "+", [dest
, src
])
404 elif lex
.match("-="):
406 src
= self
.parse_expr()
407 return make_assign_inst(dest
, "-", [dest
, src
])
408 elif lex
.match("*="):
410 src
= self
.parse_expr()
411 return make_assign_inst(dest
, "*", [dest
, src
])
412 elif lex
.match("/="):
414 src
= self
.parse_expr()
415 return make_assign_inst(dest
, "/", [dest
, src
])
416 elif lex
.match("%="):
418 src
= self
.parse_expr()
419 return make_assign_inst(dest
, "%", [dest
, src
])
420 elif lex
.match(">>="):
422 src
= self
.parse_expr()
423 return make_assign_inst(dest
, ">>", [dest
, src
])
424 elif lex
.match("<<="):
425 src
= self
.parse_expr()
426 return make_assign_inst(dest
, "<<", [dest
, src
])
429 src
= self
.parse_expr()
430 assert src
, repr(lex
.l
)
431 if isinstance(src
, SFUNC
):
432 args
= self
.parse_arglist()
433 return Inst(dest
, "SFUNC", [src
] + args
)
436 return Inst(dest
, "=", [src
])
438 assert 0, (lex
.l
, self
.curline
)
440 assert False, (repr(lex
.l
), self
.curline
)
443 def detect_addr_field(self
, l
):
444 # Autodetect whether there's address as first field
446 self
.expect_line_addr
= False
448 # Can be either label or address
457 def _parse_bblocks(self
, f
):
459 # TODO: not stored in props to avoid modifying tests
460 self
.cfg
.filename
= self
.fname
461 self
.cfg
.props
["name"] = None
462 self
.cfg
.props
["trailing_jumps"] = True
468 for addr
, l
in self
.get_expand_line(f
):
471 if self
.should_stop(addr
, l
):
475 l
= l
.split(";", 1)[0]
482 if self
.cfg
.props
["name"] is None:
483 # First label is function name
484 if block
is None and self
.cfg
.is_empty():
485 self
.cfg
.props
["name"] = l
[:-1]
486 self
.cfg
.props
["addr"] = addr
488 if block
is not None:
493 self
.cfg
.add_node(addr
, val
=block
)
495 # Link empty blocks consisting of just 1 label
496 if last_block
is not None:
497 self
.cfg
.add_edge(last_block
.addr
, block
.addr
)
504 self
.cfg
.add_node(addr
, val
=block
)
507 if last_block
is not None:
508 self
.cfg
.add_edge(last_block
.addr
, block
.addr
)
509 #last_block.end.append(addr)
513 inst
= Inst(None, "LIT", [l
[1:]], addr
=addr
)
517 inst
= self
.parse_inst(l
)
522 if inst
.op
in ("goto", "if", "call"):
524 pairs
= [None, inst
.args
[0]]
528 # Add out edge to each successor block, which can be
529 # many for generalized "if".
530 for i
in range(0, len(pairs
), 2):
533 # Skip adding bblocks for indirect jumps
534 if isinstance(addr
, ADDR
):
536 if addr
not in self
.cfg
:
537 self
.cfg
.add_node(addr
)
538 self
.cfg
.add_edge(block
.addr
, addr
, cond
=cond
)
540 if cond
or inst
.op
== "call":
546 elif inst
.op
== "return":
554 print("Warning: function %s was not properly terminated" % self
.cfg
.props
["name"])
555 # block may be None e.g. if last instruction was call,
556 # and function abruptly ended without return.
558 self
.cfg
.add_edge(last_block
.addr
, block
.addr
)
560 def parse_bblocks(self
, f
):
563 self
._parse
_bblocks
(f
)
564 except ParseError
as e
:
565 print("Error: %d: %r" % (self
.curline
+ 1, e
))
568 # External labels may produce empty CFG nodes during parsing.
569 # Make a pass over graph and remove such.
570 for node
, info
in list(self
.cfg
.nodes_props()):
572 self
.cfg
.remove_node(node
)
578 with
open(self
.fname
) as f
:
579 self
.parse_bblocks(f
)
582 def should_stop(self
, addr
, l
):