Fix the segfault when initializer has unknown symbol
[smatch.git] / linearize.c
blob13cdf71612fa392891a00c13af962a3ea6b529b4
1 /*
2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
10 * Copyright (C) 2004 Christopher Li
13 #include <string.h>
14 #include <stdarg.h>
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <assert.h>
19 #include "parse.h"
20 #include "expression.h"
21 #include "linearize.h"
22 #include "flow.h"
23 #include "target.h"
25 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
26 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
28 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right);
29 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val);
30 static void linearize_one_symbol(struct entrypoint *ep, struct symbol *sym);
32 struct access_data;
33 static pseudo_t add_load(struct entrypoint *ep, struct access_data *);
34 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *);
36 struct pseudo void_pseudo = {};
38 static struct position current_pos;
40 ALLOCATOR(pseudo_user, "pseudo_user");
42 static struct instruction *alloc_instruction(int opcode, int size)
44 struct instruction * insn = __alloc_instruction(0);
45 insn->opcode = opcode;
46 insn->size = size;
47 insn->pos = current_pos;
48 return insn;
51 static inline int type_size(struct symbol *type)
53 return type ? type->bit_size > 0 ? type->bit_size : 0 : 0;
56 static struct instruction *alloc_typed_instruction(int opcode, struct symbol *type)
58 return alloc_instruction(opcode, type_size(type));
61 static struct entrypoint *alloc_entrypoint(void)
63 return __alloc_entrypoint(0);
66 static struct basic_block *alloc_basic_block(struct entrypoint *ep, struct position pos)
68 struct basic_block *bb = __alloc_basic_block(0);
69 bb->context = -1;
70 bb->pos = pos;
71 bb->ep = ep;
72 return bb;
75 static struct multijmp *alloc_multijmp(struct basic_block *target, int begin, int end)
77 struct multijmp *multijmp = __alloc_multijmp(0);
78 multijmp->target = target;
79 multijmp->begin = begin;
80 multijmp->end = end;
81 return multijmp;
84 static inline int regno(pseudo_t n)
86 int retval = -1;
87 if (n && n->type == PSEUDO_REG)
88 retval = n->nr;
89 return retval;
92 const char *show_pseudo(pseudo_t pseudo)
94 static int n;
95 static char buffer[4][64];
96 char *buf;
97 int i;
99 if (!pseudo)
100 return "no pseudo";
101 if (pseudo == VOID)
102 return "VOID";
103 buf = buffer[3 & ++n];
104 switch(pseudo->type) {
105 case PSEUDO_SYM: {
106 struct symbol *sym = pseudo->sym;
107 struct expression *expr;
109 if (sym->bb_target) {
110 snprintf(buf, 64, ".L%p", sym->bb_target);
111 break;
113 if (sym->ident) {
114 snprintf(buf, 64, "%s", show_ident(sym->ident));
115 break;
117 expr = sym->initializer;
118 snprintf(buf, 64, "<anon symbol:%p>", sym);
119 switch (expr->type) {
120 case EXPR_VALUE:
121 snprintf(buf, 64, "<symbol value: %lld>", expr->value);
122 break;
123 case EXPR_STRING:
124 return show_string(expr->string);
125 default:
126 break;
128 break;
130 case PSEUDO_REG:
131 i = snprintf(buf, 64, "%%r%d", pseudo->nr);
132 if (pseudo->ident)
133 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
134 break;
135 case PSEUDO_VAL: {
136 long long value = pseudo->value;
137 if (value > 1000 || value < -1000)
138 snprintf(buf, 64, "$%#llx", value);
139 else
140 snprintf(buf, 64, "$%lld", value);
141 break;
143 case PSEUDO_ARG:
144 snprintf(buf, 64, "%%arg%d", pseudo->nr);
145 break;
146 case PSEUDO_PHI:
147 i = snprintf(buf, 64, "%%phi%d", pseudo->nr);
148 if (pseudo->ident)
149 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
150 break;
151 default:
152 snprintf(buf, 64, "<bad pseudo type %d>", pseudo->type);
154 return buf;
157 static const char *opcodes[] = {
158 [OP_BADOP] = "bad_op",
160 /* Fn entrypoint */
161 [OP_ENTRY] = "<entry-point>",
163 /* Terminator */
164 [OP_RET] = "ret",
165 [OP_BR] = "br",
166 [OP_SWITCH] = "switch",
167 [OP_INVOKE] = "invoke",
168 [OP_COMPUTEDGOTO] = "jmp *",
169 [OP_UNWIND] = "unwind",
171 /* Binary */
172 [OP_ADD] = "add",
173 [OP_SUB] = "sub",
174 [OP_MULU] = "mulu",
175 [OP_MULS] = "muls",
176 [OP_DIVU] = "divu",
177 [OP_DIVS] = "divs",
178 [OP_MODU] = "modu",
179 [OP_MODS] = "mods",
180 [OP_SHL] = "shl",
181 [OP_LSR] = "lsr",
182 [OP_ASR] = "asr",
184 /* Logical */
185 [OP_AND] = "and",
186 [OP_OR] = "or",
187 [OP_XOR] = "xor",
188 [OP_AND_BOOL] = "and-bool",
189 [OP_OR_BOOL] = "or-bool",
191 /* Binary comparison */
192 [OP_SET_EQ] = "seteq",
193 [OP_SET_NE] = "setne",
194 [OP_SET_LE] = "setle",
195 [OP_SET_GE] = "setge",
196 [OP_SET_LT] = "setlt",
197 [OP_SET_GT] = "setgt",
198 [OP_SET_B] = "setb",
199 [OP_SET_A] = "seta",
200 [OP_SET_BE] = "setbe",
201 [OP_SET_AE] = "setae",
203 /* Uni */
204 [OP_NOT] = "not",
205 [OP_NEG] = "neg",
207 /* Special three-input */
208 [OP_SEL] = "select",
210 /* Memory */
211 [OP_MALLOC] = "malloc",
212 [OP_FREE] = "free",
213 [OP_ALLOCA] = "alloca",
214 [OP_LOAD] = "load",
215 [OP_STORE] = "store",
216 [OP_SETVAL] = "set",
217 [OP_SYMADDR] = "symaddr",
218 [OP_GET_ELEMENT_PTR] = "getelem",
220 /* Other */
221 [OP_PHI] = "phi",
222 [OP_PHISOURCE] = "phisrc",
223 [OP_CAST] = "cast",
224 [OP_SCAST] = "scast",
225 [OP_FPCAST] = "fpcast",
226 [OP_PTRCAST] = "ptrcast",
227 [OP_CALL] = "call",
228 [OP_VANEXT] = "va_next",
229 [OP_VAARG] = "va_arg",
230 [OP_SLICE] = "slice",
231 [OP_SNOP] = "snop",
232 [OP_LNOP] = "lnop",
233 [OP_NOP] = "nop",
234 [OP_DEATHNOTE] = "dead",
235 [OP_ASM] = "asm",
237 /* Sparse tagging (line numbers, context, whatever) */
238 [OP_CONTEXT] = "context",
239 [OP_RANGE] = "range-check",
241 [OP_COPY] = "copy",
244 static char *show_asm_constraints(char *buf, const char *sep, struct asm_constraint_list *list)
246 struct asm_constraint *entry;
248 FOR_EACH_PTR(list, entry) {
249 buf += sprintf(buf, "%s\"%s\"", sep, entry->constraint);
250 if (entry->pseudo)
251 buf += sprintf(buf, " (%s)", show_pseudo(entry->pseudo));
252 if (entry->ident)
253 buf += sprintf(buf, " [%s]", show_ident(entry->ident));
254 sep = ", ";
255 } END_FOR_EACH_PTR(entry);
256 return buf;
259 static char *show_asm(char *buf, struct instruction *insn)
261 struct asm_rules *rules = insn->asm_rules;
263 buf += sprintf(buf, "\"%s\"", insn->string);
264 buf = show_asm_constraints(buf, "\n\t\tout: ", rules->outputs);
265 buf = show_asm_constraints(buf, "\n\t\tin: ", rules->inputs);
266 buf = show_asm_constraints(buf, "\n\t\tclobber: ", rules->clobbers);
267 return buf;
270 const char *show_instruction(struct instruction *insn)
272 int opcode = insn->opcode;
273 static char buffer[1024];
274 char *buf;
276 buf = buffer;
277 if (!insn->bb)
278 buf += sprintf(buf, "# ");
280 if (opcode < sizeof(opcodes)/sizeof(char *)) {
281 const char *op = opcodes[opcode];
282 if (!op)
283 buf += sprintf(buf, "opcode:%d", opcode);
284 else
285 buf += sprintf(buf, "%s", op);
286 if (insn->size)
287 buf += sprintf(buf, ".%d", insn->size);
288 memset(buf, ' ', 20);
289 buf++;
292 if (buf < buffer + 12)
293 buf = buffer + 12;
294 switch (opcode) {
295 case OP_RET:
296 if (insn->src && insn->src != VOID)
297 buf += sprintf(buf, "%s", show_pseudo(insn->src));
298 break;
299 case OP_BR:
300 if (insn->bb_true && insn->bb_false) {
301 buf += sprintf(buf, "%s, .L%p, .L%p", show_pseudo(insn->cond), insn->bb_true, insn->bb_false);
302 break;
304 buf += sprintf(buf, ".L%p", insn->bb_true ? insn->bb_true : insn->bb_false);
305 break;
307 case OP_SYMADDR: {
308 struct symbol *sym = insn->symbol->sym;
309 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
311 if (sym->bb_target) {
312 buf += sprintf(buf, ".L%p", sym->bb_target);
313 break;
315 if (sym->ident) {
316 buf += sprintf(buf, "%s", show_ident(sym->ident));
317 break;
319 buf += sprintf(buf, "<anon symbol:%p>", sym);
320 break;
323 case OP_SETVAL: {
324 struct expression *expr = insn->val;
325 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
327 if (!expr) {
328 buf += sprintf(buf, "%s", "<none>");
329 break;
332 switch (expr->type) {
333 case EXPR_VALUE:
334 buf += sprintf(buf, "%lld", expr->value);
335 break;
336 case EXPR_FVALUE:
337 buf += sprintf(buf, "%Lf", expr->fvalue);
338 break;
339 case EXPR_STRING:
340 buf += sprintf(buf, "%.40s", show_string(expr->string));
341 break;
342 case EXPR_SYMBOL:
343 buf += sprintf(buf, "%s", show_ident(expr->symbol->ident));
344 break;
345 case EXPR_LABEL:
346 buf += sprintf(buf, ".L%p", expr->symbol->bb_target);
347 break;
348 default:
349 buf += sprintf(buf, "SETVAL EXPR TYPE %d", expr->type);
351 break;
353 case OP_SWITCH: {
354 struct multijmp *jmp;
355 buf += sprintf(buf, "%s", show_pseudo(insn->target));
356 FOR_EACH_PTR(insn->multijmp_list, jmp) {
357 if (jmp->begin == jmp->end)
358 buf += sprintf(buf, ", %d -> .L%p", jmp->begin, jmp->target);
359 else if (jmp->begin < jmp->end)
360 buf += sprintf(buf, ", %d ... %d -> .L%p", jmp->begin, jmp->end, jmp->target);
361 else
362 buf += sprintf(buf, ", default -> .L%p", jmp->target);
363 } END_FOR_EACH_PTR(jmp);
364 break;
366 case OP_COMPUTEDGOTO: {
367 struct multijmp *jmp;
368 buf += sprintf(buf, "%s", show_pseudo(insn->target));
369 FOR_EACH_PTR(insn->multijmp_list, jmp) {
370 buf += sprintf(buf, ", .L%p", jmp->target);
371 } END_FOR_EACH_PTR(jmp);
372 break;
375 case OP_PHISOURCE: {
376 struct instruction *phi;
377 buf += sprintf(buf, "%s <- %s ", show_pseudo(insn->target), show_pseudo(insn->phi_src));
378 FOR_EACH_PTR(insn->phi_users, phi) {
379 buf += sprintf(buf, " (%s)", show_pseudo(phi->target));
380 } END_FOR_EACH_PTR(phi);
381 break;
384 case OP_PHI: {
385 pseudo_t phi;
386 const char *s = " <-";
387 buf += sprintf(buf, "%s", show_pseudo(insn->target));
388 FOR_EACH_PTR(insn->phi_list, phi) {
389 buf += sprintf(buf, "%s %s", s, show_pseudo(phi));
390 s = ",";
391 } END_FOR_EACH_PTR(phi);
392 break;
394 case OP_LOAD: case OP_LNOP:
395 buf += sprintf(buf, "%s <- %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
396 break;
397 case OP_STORE: case OP_SNOP:
398 buf += sprintf(buf, "%s -> %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
399 break;
400 case OP_CALL: {
401 struct pseudo *arg;
402 if (insn->target && insn->target != VOID)
403 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
404 buf += sprintf(buf, "%s", show_pseudo(insn->func));
405 FOR_EACH_PTR(insn->arguments, arg) {
406 buf += sprintf(buf, ", %s", show_pseudo(arg));
407 } END_FOR_EACH_PTR(arg);
408 break;
410 case OP_CAST:
411 case OP_SCAST:
412 case OP_FPCAST:
413 case OP_PTRCAST:
414 buf += sprintf(buf, "%s <- (%d) %s",
415 show_pseudo(insn->target),
416 type_size(insn->orig_type),
417 show_pseudo(insn->src));
418 break;
419 case OP_BINARY ... OP_BINARY_END:
420 case OP_BINCMP ... OP_BINCMP_END:
421 buf += sprintf(buf, "%s <- %s, %s", show_pseudo(insn->target), show_pseudo(insn->src1), show_pseudo(insn->src2));
422 break;
424 case OP_SEL:
425 buf += sprintf(buf, "%s <- %s, %s, %s", show_pseudo(insn->target),
426 show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
427 break;
429 case OP_SLICE:
430 buf += sprintf(buf, "%s <- %s, %d, %d", show_pseudo(insn->target), show_pseudo(insn->base), insn->from, insn->len);
431 break;
433 case OP_NOT: case OP_NEG:
434 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
435 break;
437 case OP_CONTEXT:
438 buf += sprintf(buf, "%s%d", insn->check ? "check: " : "", insn->increment);
439 break;
440 case OP_RANGE:
441 buf += sprintf(buf, "%s between %s..%s", show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
442 break;
443 case OP_NOP:
444 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
445 break;
446 case OP_DEATHNOTE:
447 buf += sprintf(buf, "%s", show_pseudo(insn->target));
448 break;
449 case OP_ASM:
450 buf = show_asm(buf, insn);
451 break;
452 case OP_COPY:
453 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src));
454 break;
455 default:
456 break;
458 do { --buf; } while (*buf == ' ');
459 *++buf = 0;
460 return buffer;
463 void show_bb(struct basic_block *bb)
465 struct instruction *insn;
467 printf(".L%p:\n", bb);
468 if (verbose) {
469 pseudo_t needs, defines;
470 printf("%s:%d\n", stream_name(bb->pos.stream), bb->pos.line);
472 FOR_EACH_PTR(bb->needs, needs) {
473 struct instruction *def = needs->def;
474 if (def->opcode != OP_PHI) {
475 printf(" **uses %s (from .L%p)**\n", show_pseudo(needs), def->bb);
476 } else {
477 pseudo_t phi;
478 const char *sep = " ";
479 printf(" **uses %s (from", show_pseudo(needs));
480 FOR_EACH_PTR(def->phi_list, phi) {
481 if (phi == VOID)
482 continue;
483 printf("%s(%s:.L%p)", sep, show_pseudo(phi), phi->def->bb);
484 sep = ", ";
485 } END_FOR_EACH_PTR(phi);
486 printf(")**\n");
488 } END_FOR_EACH_PTR(needs);
490 FOR_EACH_PTR(bb->defines, defines) {
491 printf(" **defines %s **\n", show_pseudo(defines));
492 } END_FOR_EACH_PTR(defines);
494 if (bb->parents) {
495 struct basic_block *from;
496 FOR_EACH_PTR(bb->parents, from) {
497 printf(" **from %p (%s:%d:%d)**\n", from,
498 stream_name(from->pos.stream), from->pos.line, from->pos.pos);
499 } END_FOR_EACH_PTR(from);
502 if (bb->children) {
503 struct basic_block *to;
504 FOR_EACH_PTR(bb->children, to) {
505 printf(" **to %p (%s:%d:%d)**\n", to,
506 stream_name(to->pos.stream), to->pos.line, to->pos.pos);
507 } END_FOR_EACH_PTR(to);
511 FOR_EACH_PTR(bb->insns, insn) {
512 if (!insn->bb && verbose < 2)
513 continue;
514 printf("\t%s\n", show_instruction(insn));
515 } END_FOR_EACH_PTR(insn);
516 if (!bb_terminated(bb))
517 printf("\tEND\n");
520 static void show_symbol_usage(pseudo_t pseudo)
522 struct pseudo_user *pu;
524 if (pseudo) {
525 FOR_EACH_PTR(pseudo->users, pu) {
526 printf("\t%s\n", show_instruction(pu->insn));
527 } END_FOR_EACH_PTR(pu);
531 void show_entry(struct entrypoint *ep)
533 struct symbol *sym;
534 struct basic_block *bb;
536 printf("%s:\n", show_ident(ep->name->ident));
538 if (verbose) {
539 printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
541 FOR_EACH_PTR(ep->syms, sym) {
542 if (!sym->pseudo)
543 continue;
544 if (!sym->pseudo->users)
545 continue;
546 printf(" sym: %p %s\n", sym, show_ident(sym->ident));
547 if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE))
548 printf("\texternal visibility\n");
549 show_symbol_usage(sym->pseudo);
550 } END_FOR_EACH_PTR(sym);
552 printf("\n");
555 FOR_EACH_PTR(ep->bbs, bb) {
556 if (!bb)
557 continue;
558 if (!bb->parents && !bb->children && !bb->insns && verbose < 2)
559 continue;
560 show_bb(bb);
561 printf("\n");
562 } END_FOR_EACH_PTR(bb);
564 printf("\n");
567 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos)
569 if (label->bb_target)
570 warning(pos, "label '%s' already bound", show_ident(label->ident));
571 label->bb_target = bb;
574 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label)
576 struct basic_block *bb = label->bb_target;
578 if (!bb) {
579 bb = alloc_basic_block(ep, label->pos);
580 label->bb_target = bb;
582 return bb;
585 static void finish_block(struct entrypoint *ep)
587 struct basic_block *src = ep->active;
588 if (bb_reachable(src))
589 ep->active = NULL;
592 static void add_goto(struct entrypoint *ep, struct basic_block *dst)
594 struct basic_block *src = ep->active;
595 if (bb_reachable(src)) {
596 struct instruction *br = alloc_instruction(OP_BR, 0);
597 br->bb_true = dst;
598 add_bb(&dst->parents, src);
599 add_bb(&src->children, dst);
600 br->bb = src;
601 add_instruction(&src->insns, br);
602 ep->active = NULL;
606 static void add_one_insn(struct entrypoint *ep, struct instruction *insn)
608 struct basic_block *bb = ep->active;
610 if (bb_reachable(bb)) {
611 insn->bb = bb;
612 add_instruction(&bb->insns, insn);
616 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb)
618 if (!bb_terminated(ep->active))
619 add_goto(ep, bb);
621 ep->active = bb;
622 if (bb_reachable(bb))
623 add_bb(&ep->bbs, bb);
626 static void remove_parent(struct basic_block *child, struct basic_block *parent)
628 remove_bb_from_list(&child->parents, parent, 1);
629 if (!child->parents)
630 kill_bb(child);
633 /* Change a "switch" into a branch */
634 void insert_branch(struct basic_block *bb, struct instruction *jmp, struct basic_block *target)
636 struct instruction *br, *old;
637 struct basic_block *child;
639 /* Remove the switch */
640 old = delete_last_instruction(&bb->insns);
641 assert(old == jmp);
643 br = alloc_instruction(OP_BR, 0);
644 br->bb = bb;
645 br->bb_true = target;
646 add_instruction(&bb->insns, br);
648 FOR_EACH_PTR(bb->children, child) {
649 if (child == target) {
650 target = NULL; /* Trigger just once */
651 continue;
653 DELETE_CURRENT_PTR(child);
654 remove_parent(child, bb);
655 } END_FOR_EACH_PTR(child);
656 PACK_PTR_LIST(&bb->children);
660 void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi_node, pseudo_t true, pseudo_t false)
662 pseudo_t target;
663 struct instruction *select;
665 /* Remove the 'br' */
666 delete_last_instruction(&bb->insns);
668 select = alloc_instruction(OP_SEL, phi_node->size);
669 select->bb = bb;
671 assert(br->cond);
672 use_pseudo(select, br->cond, &select->src1);
674 target = phi_node->target;
675 assert(target->def == phi_node);
676 select->target = target;
677 target->def = select;
679 use_pseudo(select, true, &select->src2);
680 use_pseudo(select, false, &select->src3);
682 add_instruction(&bb->insns, select);
683 add_instruction(&bb->insns, br);
686 static inline int bb_empty(struct basic_block *bb)
688 return !bb->insns;
691 /* Add a label to the currently active block, return new active block */
692 static struct basic_block * add_label(struct entrypoint *ep, struct symbol *label)
694 struct basic_block *bb = label->bb_target;
696 if (bb) {
697 set_activeblock(ep, bb);
698 return bb;
700 bb = ep->active;
701 if (!bb_reachable(bb) || !bb_empty(bb)) {
702 bb = alloc_basic_block(ep, label->pos);
703 set_activeblock(ep, bb);
705 label->bb_target = bb;
706 return bb;
709 static void add_branch(struct entrypoint *ep, struct expression *expr, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false)
711 struct basic_block *bb = ep->active;
712 struct instruction *br;
714 if (bb_reachable(bb)) {
715 br = alloc_instruction(OP_BR, 0);
716 use_pseudo(br, cond, &br->cond);
717 br->bb_true = bb_true;
718 br->bb_false = bb_false;
719 add_bb(&bb_true->parents, bb);
720 add_bb(&bb_false->parents, bb);
721 add_bb(&bb->children, bb_true);
722 add_bb(&bb->children, bb_false);
723 add_one_insn(ep, br);
727 /* Dummy pseudo allocator */
728 pseudo_t alloc_pseudo(struct instruction *def)
730 static int nr = 0;
731 struct pseudo * pseudo = __alloc_pseudo(0);
732 pseudo->type = PSEUDO_REG;
733 pseudo->nr = ++nr;
734 pseudo->def = def;
735 return pseudo;
738 static void clear_symbol_pseudos(struct entrypoint *ep)
740 pseudo_t pseudo;
742 FOR_EACH_PTR(ep->accesses, pseudo) {
743 pseudo->sym->pseudo = NULL;
744 } END_FOR_EACH_PTR(pseudo);
747 static pseudo_t symbol_pseudo(struct entrypoint *ep, struct symbol *sym)
749 pseudo_t pseudo;
751 if (!sym)
752 return VOID;
754 pseudo = sym->pseudo;
755 if (!pseudo) {
756 pseudo = __alloc_pseudo(0);
757 pseudo->nr = -1;
758 pseudo->type = PSEUDO_SYM;
759 pseudo->sym = sym;
760 pseudo->ident = sym->ident;
761 sym->pseudo = pseudo;
762 add_pseudo(&ep->accesses, pseudo);
764 /* Symbol pseudos have neither nr, usage nor def */
765 return pseudo;
768 pseudo_t value_pseudo(long long val)
770 #define MAX_VAL_HASH 64
771 static struct pseudo_list *prev[MAX_VAL_HASH];
772 int hash = val & (MAX_VAL_HASH-1);
773 struct pseudo_list **list = prev + hash;
774 pseudo_t pseudo;
776 FOR_EACH_PTR(*list, pseudo) {
777 if (pseudo->value == val)
778 return pseudo;
779 } END_FOR_EACH_PTR(pseudo);
781 pseudo = __alloc_pseudo(0);
782 pseudo->type = PSEUDO_VAL;
783 pseudo->value = val;
784 add_pseudo(list, pseudo);
786 /* Value pseudos have neither nr, usage nor def */
787 return pseudo;
790 static pseudo_t argument_pseudo(struct entrypoint *ep, int nr)
792 pseudo_t pseudo = __alloc_pseudo(0);
793 struct instruction *entry = ep->entry;
795 pseudo->type = PSEUDO_ARG;
796 pseudo->nr = nr;
797 pseudo->def = entry;
798 add_pseudo(&entry->arg_list, pseudo);
800 /* Argument pseudos have neither usage nor def */
801 return pseudo;
804 pseudo_t alloc_phi(struct basic_block *source, pseudo_t pseudo, int size)
806 struct instruction *insn = alloc_instruction(OP_PHISOURCE, size);
807 pseudo_t phi = __alloc_pseudo(0);
808 static int nr = 0;
810 phi->type = PSEUDO_PHI;
811 phi->nr = ++nr;
812 phi->def = insn;
814 use_pseudo(insn, pseudo, &insn->phi_src);
815 insn->bb = source;
816 insn->target = phi;
817 add_instruction(&source->insns, insn);
818 return phi;
822 * We carry the "access_data" structure around for any accesses,
823 * which simplifies things a lot. It contains all the access
824 * information in one place.
826 struct access_data {
827 struct symbol *result_type; // result ctype
828 struct symbol *source_type; // source ctype
829 pseudo_t address; // pseudo containing address ..
830 pseudo_t origval; // pseudo for original value ..
831 unsigned int offset, alignment; // byte offset
832 unsigned int bit_size, bit_offset; // which bits
833 struct position pos;
836 static void finish_address_gen(struct entrypoint *ep, struct access_data *ad)
840 static int linearize_simple_address(struct entrypoint *ep,
841 struct expression *addr,
842 struct access_data *ad)
844 if (addr->type == EXPR_SYMBOL) {
845 linearize_one_symbol(ep, addr->symbol);
846 ad->address = symbol_pseudo(ep, addr->symbol);
847 return 1;
849 if (addr->type == EXPR_BINOP) {
850 if (addr->right->type == EXPR_VALUE) {
851 if (addr->op == '+') {
852 ad->offset += get_expression_value(addr->right);
853 return linearize_simple_address(ep, addr->left, ad);
857 ad->address = linearize_expression(ep, addr);
858 return 1;
861 static struct symbol *base_type(struct symbol *sym)
863 struct symbol *base = sym;
865 if (sym) {
866 if (sym->type == SYM_NODE)
867 base = base->ctype.base_type;
868 if (base->type == SYM_BITFIELD)
869 return base->ctype.base_type;
871 return sym;
874 static int linearize_address_gen(struct entrypoint *ep,
875 struct expression *expr,
876 struct access_data *ad)
878 struct symbol *ctype = expr->ctype;
880 if (!ctype)
881 return 0;
882 ad->pos = expr->pos;
883 ad->result_type = ctype;
884 ad->source_type = base_type(ctype);
885 ad->bit_size = ctype->bit_size;
886 ad->alignment = ctype->ctype.alignment;
887 ad->bit_offset = ctype->bit_offset;
888 if (expr->type == EXPR_PREOP && expr->op == '*')
889 return linearize_simple_address(ep, expr->unop, ad);
891 warning(expr->pos, "generating address of non-lvalue (%d)", expr->type);
892 return 0;
895 static pseudo_t add_load(struct entrypoint *ep, struct access_data *ad)
897 struct instruction *insn;
898 pseudo_t new;
900 new = ad->origval;
901 if (0 && new)
902 return new;
904 insn = alloc_typed_instruction(OP_LOAD, ad->source_type);
905 new = alloc_pseudo(insn);
906 ad->origval = new;
908 insn->target = new;
909 insn->offset = ad->offset;
910 use_pseudo(insn, ad->address, &insn->src);
911 add_one_insn(ep, insn);
912 return new;
915 static void add_store(struct entrypoint *ep, struct access_data *ad, pseudo_t value)
917 struct basic_block *bb = ep->active;
919 if (bb_reachable(bb)) {
920 struct instruction *store = alloc_typed_instruction(OP_STORE, ad->source_type);
921 store->offset = ad->offset;
922 use_pseudo(store, value, &store->target);
923 use_pseudo(store, ad->address, &store->src);
924 add_one_insn(ep, store);
928 static pseudo_t linearize_store_gen(struct entrypoint *ep,
929 pseudo_t value,
930 struct access_data *ad)
932 pseudo_t store = value;
934 if (type_size(ad->source_type) != type_size(ad->result_type)) {
935 pseudo_t orig = add_load(ep, ad);
936 int shift = ad->bit_offset;
937 unsigned long long mask = (1ULL << ad->bit_size)-1;
939 if (shift) {
940 store = add_binary_op(ep, ad->source_type, OP_SHL, value, value_pseudo(shift));
941 mask <<= shift;
943 orig = add_binary_op(ep, ad->source_type, OP_AND, orig, value_pseudo(~mask));
944 store = add_binary_op(ep, ad->source_type, OP_OR, orig, store);
946 add_store(ep, ad, store);
947 return value;
950 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right)
952 struct instruction *insn = alloc_typed_instruction(op, ctype);
953 pseudo_t target = alloc_pseudo(insn);
954 insn->target = target;
955 use_pseudo(insn, left, &insn->src1);
956 use_pseudo(insn, right, &insn->src2);
957 add_one_insn(ep, insn);
958 return target;
961 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val)
963 struct instruction *insn = alloc_typed_instruction(OP_SETVAL, ctype);
964 pseudo_t target = alloc_pseudo(insn);
965 insn->target = target;
966 insn->val = val;
967 add_one_insn(ep, insn);
968 return target;
971 static pseudo_t add_symbol_address(struct entrypoint *ep, struct symbol *sym)
973 struct instruction *insn = alloc_instruction(OP_SYMADDR, bits_in_pointer);
974 pseudo_t target = alloc_pseudo(insn);
976 insn->target = target;
977 use_pseudo(insn, symbol_pseudo(ep, sym), &insn->symbol);
978 add_one_insn(ep, insn);
979 return target;
982 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct access_data *ad)
984 pseudo_t new = add_load(ep, ad);
986 if (ad->bit_offset) {
987 pseudo_t shift = value_pseudo(ad->bit_offset);
988 pseudo_t newval = add_binary_op(ep, ad->source_type, OP_LSR, new, shift);
989 new = newval;
992 return new;
995 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
997 struct access_data ad = { NULL, };
998 pseudo_t value;
1000 if (!linearize_address_gen(ep, expr, &ad))
1001 return VOID;
1002 value = linearize_load_gen(ep, &ad);
1003 finish_address_gen(ep, &ad);
1004 return value;
1007 /* FIXME: FP */
1008 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
1010 struct access_data ad = { NULL, };
1011 pseudo_t old, new, one;
1012 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB;
1014 if (!linearize_address_gen(ep, expr->unop, &ad))
1015 return VOID;
1017 old = linearize_load_gen(ep, &ad);
1018 one = value_pseudo(expr->op_value);
1019 new = add_binary_op(ep, expr->ctype, op, old, one);
1020 linearize_store_gen(ep, new, &ad);
1021 finish_address_gen(ep, &ad);
1022 return postop ? old : new;
1025 static pseudo_t add_uniop(struct entrypoint *ep, struct expression *expr, int op, pseudo_t src)
1027 struct instruction *insn = alloc_typed_instruction(op, expr->ctype);
1028 pseudo_t new = alloc_pseudo(insn);
1030 insn->target = new;
1031 use_pseudo(insn, src, &insn->src1);
1032 add_one_insn(ep, insn);
1033 return new;
1036 static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr)
1038 pseudo_t pre = linearize_expression(ep, expr->base);
1039 struct instruction *insn = alloc_typed_instruction(OP_SLICE, expr->ctype);
1040 pseudo_t new = alloc_pseudo(insn);
1042 insn->target = new;
1043 insn->from = expr->r_bitpos;
1044 insn->len = expr->r_nrbits;
1045 use_pseudo(insn, pre, &insn->base);
1046 add_one_insn(ep, insn);
1047 return new;
1050 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
1052 pseudo_t pre = linearize_expression(ep, expr->unop);
1053 switch (expr->op) {
1054 case '+':
1055 return pre;
1056 case '!': {
1057 pseudo_t zero = value_pseudo(0);
1058 return add_binary_op(ep, expr->unop->ctype, OP_SET_EQ, pre, zero);
1060 case '~':
1061 return add_uniop(ep, expr, OP_NOT, pre);
1062 case '-':
1063 return add_uniop(ep, expr, OP_NEG, pre);
1065 return VOID;
1068 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
1071 * '*' is an lvalue access, and is fundamentally different
1072 * from an arithmetic operation. Maybe it should have an
1073 * expression type of its own..
1075 if (expr->op == '*')
1076 return linearize_access(ep, expr);
1077 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
1078 return linearize_inc_dec(ep, expr, 0);
1079 return linearize_regular_preop(ep, expr);
1082 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr)
1084 return linearize_inc_dec(ep, expr, 1);
1088 * Casts to pointers are "less safe" than other casts, since
1089 * they imply type-unsafe accesses. "void *" is a special
1090 * case, since you can't access through it anyway without another
1091 * cast.
1093 static struct instruction *alloc_cast_instruction(struct symbol *ctype)
1095 int opcode = OP_CAST;
1096 struct symbol *base = ctype;
1098 if (base->ctype.modifiers & MOD_SIGNED)
1099 opcode = OP_SCAST;
1100 if (base->type == SYM_NODE)
1101 base = base->ctype.base_type;
1102 if (base->type == SYM_PTR) {
1103 base = base->ctype.base_type;
1104 if (base != &void_ctype)
1105 opcode = OP_PTRCAST;
1107 if (base->ctype.base_type == &fp_type)
1108 opcode = OP_FPCAST;
1109 return alloc_typed_instruction(opcode, ctype);
1112 static pseudo_t cast_pseudo(struct entrypoint *ep, pseudo_t src, struct symbol *from, struct symbol *to)
1114 pseudo_t result;
1115 struct instruction *insn;
1117 if (src == VOID)
1118 return VOID;
1119 if (!from || !to)
1120 return VOID;
1121 if (from->bit_size < 0 || to->bit_size < 0)
1122 return VOID;
1123 insn = alloc_cast_instruction(to);
1124 result = alloc_pseudo(insn);
1125 insn->target = result;
1126 insn->orig_type = from;
1127 use_pseudo(insn, src, &insn->src);
1128 add_one_insn(ep, insn);
1129 return result;
1132 static int opcode_sign(int opcode, struct symbol *ctype)
1134 if (ctype && (ctype->ctype.modifiers & MOD_SIGNED)) {
1135 switch(opcode) {
1136 case OP_MULU: case OP_DIVU: case OP_MODU: case OP_LSR:
1137 opcode++;
1140 return opcode;
1143 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
1145 struct access_data ad = { NULL, };
1146 struct expression *target = expr->left;
1147 struct expression *src = expr->right;
1148 pseudo_t value;
1150 value = linearize_expression(ep, src);
1151 if (!target || !linearize_address_gen(ep, target, &ad))
1152 return value;
1153 if (expr->op != '=') {
1154 pseudo_t oldvalue = linearize_load_gen(ep, &ad);
1155 pseudo_t dst;
1156 static const int op_trans[] = {
1157 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD,
1158 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB,
1159 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MULU,
1160 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIVU,
1161 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MODU,
1162 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL,
1163 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_LSR,
1164 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND,
1165 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR,
1166 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR
1168 int opcode;
1170 if (!src)
1171 return VOID;
1173 oldvalue = cast_pseudo(ep, oldvalue, src->ctype, expr->ctype);
1174 opcode = opcode_sign(op_trans[expr->op - SPECIAL_BASE], src->ctype);
1175 dst = add_binary_op(ep, src->ctype, opcode, oldvalue, value);
1176 value = cast_pseudo(ep, dst, expr->ctype, src->ctype);
1178 value = linearize_store_gen(ep, value, &ad);
1179 finish_address_gen(ep, &ad);
1180 return value;
1183 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr)
1185 struct expression *arg, *fn;
1186 struct instruction *insn = alloc_typed_instruction(OP_CALL, expr->ctype);
1187 pseudo_t retval, call;
1188 struct ctype *ctype = NULL;
1189 struct context *context;
1191 if (!expr->ctype) {
1192 warning(expr->pos, "call with no type!");
1193 return VOID;
1196 FOR_EACH_PTR(expr->args, arg) {
1197 pseudo_t new = linearize_expression(ep, arg);
1198 use_pseudo(insn, new, add_pseudo(&insn->arguments, new));
1199 } END_FOR_EACH_PTR(arg);
1201 fn = expr->fn;
1203 if (fn->ctype)
1204 ctype = &fn->ctype->ctype;
1206 if (fn->type == EXPR_PREOP) {
1207 if (fn->unop->type == EXPR_SYMBOL) {
1208 struct symbol *sym = fn->unop->symbol;
1209 if (sym->ctype.base_type->type == SYM_FN)
1210 fn = fn->unop;
1213 if (fn->type == EXPR_SYMBOL) {
1214 call = symbol_pseudo(ep, fn->symbol);
1215 } else {
1216 call = linearize_expression(ep, fn);
1218 use_pseudo(insn, call, &insn->func);
1219 retval = VOID;
1220 if (expr->ctype != &void_ctype)
1221 retval = alloc_pseudo(insn);
1222 insn->target = retval;
1223 add_one_insn(ep, insn);
1225 if (ctype) {
1226 FOR_EACH_PTR(ctype->contexts, context) {
1227 int in = context->in;
1228 int out = context->out;
1229 int check = 0;
1230 int context_diff;
1231 if (in < 0) {
1232 check = 1;
1233 in = 0;
1235 if (out < 0) {
1236 check = 0;
1237 out = 0;
1239 context_diff = out - in;
1240 if (check || context_diff) {
1241 insn = alloc_instruction(OP_CONTEXT, 0);
1242 insn->increment = context_diff;
1243 insn->check = check;
1244 insn->context_expr = context->context;
1245 add_one_insn(ep, insn);
1247 } END_FOR_EACH_PTR(context);
1250 return retval;
1253 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr)
1255 pseudo_t src1, src2, dst;
1256 static const int opcode[] = {
1257 ['+'] = OP_ADD, ['-'] = OP_SUB,
1258 ['*'] = OP_MULU, ['/'] = OP_DIVU,
1259 ['%'] = OP_MODU, ['&'] = OP_AND,
1260 ['|'] = OP_OR, ['^'] = OP_XOR,
1261 [SPECIAL_LEFTSHIFT] = OP_SHL,
1262 [SPECIAL_RIGHTSHIFT] = OP_LSR,
1263 [SPECIAL_LOGICAL_AND] = OP_AND_BOOL,
1264 [SPECIAL_LOGICAL_OR] = OP_OR_BOOL,
1266 int op;
1268 src1 = linearize_expression(ep, expr->left);
1269 src2 = linearize_expression(ep, expr->right);
1270 op = opcode_sign(opcode[expr->op], expr->ctype);
1271 dst = add_binary_op(ep, expr->ctype, op, src1, src2);
1272 return dst;
1275 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1277 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1279 static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr)
1281 pseudo_t cond, true, false, res;
1282 struct instruction *insn;
1284 true = linearize_expression(ep, expr->cond_true);
1285 false = linearize_expression(ep, expr->cond_false);
1286 cond = linearize_expression(ep, expr->conditional);
1288 insn = alloc_typed_instruction(OP_SEL, expr->ctype);
1289 if (!expr->cond_true)
1290 true = cond;
1291 use_pseudo(insn, cond, &insn->src1);
1292 use_pseudo(insn, true, &insn->src2);
1293 use_pseudo(insn, false, &insn->src3);
1295 res = alloc_pseudo(insn);
1296 insn->target = res;
1297 add_one_insn(ep, insn);
1298 return res;
1301 static pseudo_t add_join_conditional(struct entrypoint *ep, struct expression *expr,
1302 pseudo_t phi1, pseudo_t phi2)
1304 pseudo_t target;
1305 struct instruction *phi_node;
1307 if (phi1 == VOID)
1308 return phi2;
1309 if (phi2 == VOID)
1310 return phi1;
1312 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1313 use_pseudo(phi_node, phi1, add_pseudo(&phi_node->phi_list, phi1));
1314 use_pseudo(phi_node, phi2, add_pseudo(&phi_node->phi_list, phi2));
1315 phi_node->target = target = alloc_pseudo(phi_node);
1316 add_one_insn(ep, phi_node);
1317 return target;
1320 static pseudo_t linearize_short_conditional(struct entrypoint *ep, struct expression *expr,
1321 struct expression *cond,
1322 struct expression *expr_false)
1324 pseudo_t src1, src2;
1325 struct basic_block *bb_false;
1326 struct basic_block *merge = alloc_basic_block(ep, expr->pos);
1327 pseudo_t phi1, phi2;
1328 int size = type_size(expr->ctype);
1330 if (!expr_false || !ep->active)
1331 return VOID;
1333 bb_false = alloc_basic_block(ep, expr_false->pos);
1334 src1 = linearize_expression(ep, cond);
1335 phi1 = alloc_phi(ep->active, src1, size);
1336 add_branch(ep, expr, src1, merge, bb_false);
1338 set_activeblock(ep, bb_false);
1339 src2 = linearize_expression(ep, expr_false);
1340 phi2 = alloc_phi(ep->active, src2, size);
1341 set_activeblock(ep, merge);
1343 return add_join_conditional(ep, expr, phi1, phi2);
1346 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr,
1347 struct expression *cond,
1348 struct expression *expr_true,
1349 struct expression *expr_false)
1351 pseudo_t src1, src2;
1352 pseudo_t phi1, phi2;
1353 struct basic_block *bb_true, *bb_false, *merge;
1354 int size = type_size(expr->ctype);
1356 if (!cond || !expr_true || !expr_false || !ep->active)
1357 return VOID;
1358 bb_true = alloc_basic_block(ep, expr_true->pos);
1359 bb_false = alloc_basic_block(ep, expr_false->pos);
1360 merge = alloc_basic_block(ep, expr->pos);
1362 linearize_cond_branch(ep, cond, bb_true, bb_false);
1364 set_activeblock(ep, bb_true);
1365 src1 = linearize_expression(ep, expr_true);
1366 phi1 = alloc_phi(ep->active, src1, size);
1367 add_goto(ep, merge);
1369 set_activeblock(ep, bb_false);
1370 src2 = linearize_expression(ep, expr_false);
1371 phi2 = alloc_phi(ep->active, src2, size);
1372 set_activeblock(ep, merge);
1374 return add_join_conditional(ep, expr, phi1, phi2);
1377 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr)
1379 struct expression *shortcut;
1381 shortcut = alloc_const_expression(expr->pos, expr->op == SPECIAL_LOGICAL_OR);
1382 shortcut->ctype = expr->ctype;
1383 return linearize_conditional(ep, expr, expr->left, shortcut, expr->right);
1386 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr)
1388 static const int cmpop[] = {
1389 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT,
1390 [SPECIAL_EQUAL] = OP_SET_EQ,
1391 [SPECIAL_NOTEQUAL] = OP_SET_NE,
1392 [SPECIAL_GTE] = OP_SET_GE,
1393 [SPECIAL_LTE] = OP_SET_LE,
1394 [SPECIAL_UNSIGNED_LT] = OP_SET_B,
1395 [SPECIAL_UNSIGNED_GT] = OP_SET_A,
1396 [SPECIAL_UNSIGNED_LTE] = OP_SET_BE,
1397 [SPECIAL_UNSIGNED_GTE] = OP_SET_AE,
1400 pseudo_t src1 = linearize_expression(ep, expr->left);
1401 pseudo_t src2 = linearize_expression(ep, expr->right);
1402 pseudo_t dst = add_binary_op(ep, expr->left->ctype, cmpop[expr->op], src1, src2);
1403 return dst;
1407 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1409 pseudo_t cond;
1411 if (!expr || !bb_reachable(ep->active))
1412 return VOID;
1414 switch (expr->type) {
1416 case EXPR_STRING:
1417 case EXPR_VALUE:
1418 add_goto(ep, expr->value ? bb_true : bb_false);
1419 return VOID;
1421 case EXPR_FVALUE:
1422 add_goto(ep, expr->fvalue ? bb_true : bb_false);
1423 return VOID;
1425 case EXPR_LOGICAL:
1426 linearize_logical_branch(ep, expr, bb_true, bb_false);
1427 return VOID;
1429 case EXPR_COMPARE:
1430 cond = linearize_compare(ep, expr);
1431 add_branch(ep, expr, cond, bb_true, bb_false);
1432 break;
1434 case EXPR_PREOP:
1435 if (expr->op == '!')
1436 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true);
1437 /* fall through */
1438 default: {
1439 cond = linearize_expression(ep, expr);
1440 add_branch(ep, expr, cond, bb_true, bb_false);
1442 return VOID;
1445 return VOID;
1450 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1452 struct basic_block *next = alloc_basic_block(ep, expr->pos);
1454 if (expr->op == SPECIAL_LOGICAL_OR)
1455 linearize_cond_branch(ep, expr->left, bb_true, next);
1456 else
1457 linearize_cond_branch(ep, expr->left, next, bb_false);
1458 set_activeblock(ep, next);
1459 linearize_cond_branch(ep, expr->right, bb_true, bb_false);
1460 return VOID;
1463 static pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr)
1465 pseudo_t src;
1466 struct expression *orig = expr->cast_expression;
1468 if (!orig)
1469 return VOID;
1471 src = linearize_expression(ep, orig);
1472 return cast_pseudo(ep, src, orig->ctype, expr->ctype);
1475 static pseudo_t linearize_position(struct entrypoint *ep, struct expression *pos, struct access_data *ad)
1477 struct expression *init_expr = pos->init_expr;
1479 ad->offset = pos->init_offset;
1480 ad->source_type = base_type(init_expr->ctype);
1481 ad->result_type = init_expr->ctype;
1482 return linearize_initializer(ep, init_expr, ad);
1485 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *ad)
1487 switch (initializer->type) {
1488 case EXPR_INITIALIZER: {
1489 struct expression *expr;
1490 FOR_EACH_PTR(initializer->expr_list, expr) {
1491 linearize_initializer(ep, expr, ad);
1492 } END_FOR_EACH_PTR(expr);
1493 break;
1495 case EXPR_POS:
1496 linearize_position(ep, initializer, ad);
1497 break;
1498 default: {
1499 pseudo_t value = linearize_expression(ep, initializer);
1500 ad->source_type = base_type(initializer->ctype);
1501 ad->result_type = initializer->ctype;
1502 linearize_store_gen(ep, value, ad);
1506 return VOID;
1509 static void linearize_argument(struct entrypoint *ep, struct symbol *arg, int nr)
1511 struct access_data ad = { NULL, };
1513 ad.source_type = arg;
1514 ad.result_type = arg;
1515 ad.address = symbol_pseudo(ep, arg);
1516 linearize_store_gen(ep, argument_pseudo(ep, nr), &ad);
1517 finish_address_gen(ep, &ad);
1520 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
1522 if (!expr)
1523 return VOID;
1525 current_pos = expr->pos;
1526 switch (expr->type) {
1527 case EXPR_SYMBOL:
1528 linearize_one_symbol(ep, expr->symbol);
1529 return add_symbol_address(ep, expr->symbol);
1531 case EXPR_VALUE:
1532 return value_pseudo(expr->value);
1534 case EXPR_STRING: case EXPR_FVALUE: case EXPR_LABEL:
1535 return add_setval(ep, expr->ctype, expr);
1537 case EXPR_STATEMENT:
1538 return linearize_statement(ep, expr->statement);
1540 case EXPR_CALL:
1541 return linearize_call_expression(ep, expr);
1543 case EXPR_BINOP:
1544 return linearize_binop(ep, expr);
1546 case EXPR_LOGICAL:
1547 return linearize_logical(ep, expr);
1549 case EXPR_COMPARE:
1550 return linearize_compare(ep, expr);
1552 case EXPR_SELECT:
1553 return linearize_select(ep, expr);
1555 case EXPR_CONDITIONAL:
1556 if (!expr->cond_true)
1557 return linearize_short_conditional(ep, expr, expr->conditional, expr->cond_false);
1559 return linearize_conditional(ep, expr, expr->conditional,
1560 expr->cond_true, expr->cond_false);
1562 case EXPR_COMMA:
1563 linearize_expression(ep, expr->left);
1564 return linearize_expression(ep, expr->right);
1566 case EXPR_ASSIGNMENT:
1567 return linearize_assignment(ep, expr);
1569 case EXPR_PREOP:
1570 return linearize_preop(ep, expr);
1572 case EXPR_POSTOP:
1573 return linearize_postop(ep, expr);
1575 case EXPR_CAST:
1576 case EXPR_IMPLIED_CAST:
1577 return linearize_cast(ep, expr);
1579 case EXPR_SLICE:
1580 return linearize_slice(ep, expr);
1582 case EXPR_INITIALIZER:
1583 case EXPR_POS:
1584 warning(expr->pos, "unexpected initializer expression (%d %d)", expr->type, expr->op);
1585 return VOID;
1586 default:
1587 warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op);
1588 return VOID;
1590 return VOID;
1593 static void linearize_one_symbol(struct entrypoint *ep, struct symbol *sym)
1595 struct access_data ad = { NULL, };
1597 if (!sym || !sym->initializer || sym->initialized)
1598 return;
1600 /* We need to output these puppies some day too.. */
1601 if (sym->ctype.modifiers & (MOD_STATIC | MOD_TOPLEVEL))
1602 return;
1604 sym->initialized = 1;
1605 ad.address = symbol_pseudo(ep, sym);
1606 linearize_initializer(ep, sym->initializer, &ad);
1607 finish_address_gen(ep, &ad);
1610 static pseudo_t linearize_compound_statement(struct entrypoint *ep, struct statement *stmt)
1612 pseudo_t pseudo;
1613 struct statement *s;
1614 struct symbol *ret = stmt->ret;
1616 pseudo = VOID;
1617 FOR_EACH_PTR(stmt->stmts, s) {
1618 pseudo = linearize_statement(ep, s);
1619 } END_FOR_EACH_PTR(s);
1621 if (ret) {
1622 struct basic_block *bb = add_label(ep, ret);
1623 struct instruction *phi_node = first_instruction(bb->insns);
1625 if (!phi_node)
1626 return pseudo;
1628 if (pseudo_list_size(phi_node->phi_list)==1) {
1629 pseudo = first_pseudo(phi_node->phi_list);
1630 assert(pseudo->type == PSEUDO_PHI);
1631 return pseudo->def->src1;
1633 return phi_node->target;
1635 return pseudo;
1638 static pseudo_t linearize_context(struct entrypoint *ep, struct statement *stmt)
1640 struct instruction *insn = alloc_instruction(OP_CONTEXT, 0);
1641 struct expression *expr = stmt->expression;
1642 int value = 0;
1644 if (expr->type == EXPR_VALUE)
1645 value = expr->value;
1647 insn->increment = value;
1648 insn->context_expr = stmt->context;
1649 add_one_insn(ep, insn);
1650 return VOID;
1653 static pseudo_t linearize_range(struct entrypoint *ep, struct statement *stmt)
1655 struct instruction *insn = alloc_instruction(OP_RANGE, 0);
1657 use_pseudo(insn, linearize_expression(ep, stmt->range_expression), &insn->src1);
1658 use_pseudo(insn, linearize_expression(ep, stmt->range_low), &insn->src2);
1659 use_pseudo(insn, linearize_expression(ep, stmt->range_high), &insn->src3);
1660 add_one_insn(ep, insn);
1661 return VOID;
1664 ALLOCATOR(asm_rules, "asm rules");
1665 ALLOCATOR(asm_constraint, "asm constraints");
1667 static void add_asm_input(struct entrypoint *ep, struct instruction *insn, struct expression *expr,
1668 const char *constraint, const struct ident *ident)
1670 pseudo_t pseudo = linearize_expression(ep, expr);
1671 struct asm_constraint *rule = __alloc_asm_constraint(0);
1673 rule->ident = ident;
1674 rule->constraint = constraint;
1675 use_pseudo(insn, pseudo, &rule->pseudo);
1676 add_ptr_list(&insn->asm_rules->inputs, rule);
1679 static void add_asm_output(struct entrypoint *ep, struct instruction *insn, struct expression *expr,
1680 const char *constraint, const struct ident *ident)
1682 struct access_data ad = { NULL, };
1683 pseudo_t pseudo = alloc_pseudo(insn);
1684 struct asm_constraint *rule;
1686 if (!expr || !linearize_address_gen(ep, expr, &ad))
1687 return;
1688 linearize_store_gen(ep, pseudo, &ad);
1689 finish_address_gen(ep, &ad);
1690 rule = __alloc_asm_constraint(0);
1691 rule->ident = ident;
1692 rule->constraint = constraint;
1693 use_pseudo(insn, pseudo, &rule->pseudo);
1694 add_ptr_list(&insn->asm_rules->outputs, rule);
1697 static pseudo_t linearize_asm_statement(struct entrypoint *ep, struct statement *stmt)
1699 int state;
1700 struct expression *expr;
1701 struct instruction *insn;
1702 struct asm_rules *rules;
1703 const char *constraint;
1704 struct ident *ident;
1706 insn = alloc_instruction(OP_ASM, 0);
1707 expr = stmt->asm_string;
1708 if (!expr || expr->type != EXPR_STRING) {
1709 warning(stmt->pos, "expected string in inline asm");
1710 return VOID;
1712 insn->string = expr->string->data;
1714 rules = __alloc_asm_rules(0);
1715 insn->asm_rules = rules;
1717 /* Gather the inputs.. */
1718 state = 0;
1719 ident = NULL;
1720 constraint = NULL;
1721 FOR_EACH_PTR(stmt->asm_inputs, expr) {
1722 switch (state) {
1723 case 0: /* Identifier */
1724 state = 1;
1725 ident = (struct ident *)expr;
1726 continue;
1728 case 1: /* Constraint */
1729 state = 2;
1730 constraint = expr ? expr->string->data : "";
1731 continue;
1733 case 2: /* Expression */
1734 state = 0;
1735 add_asm_input(ep, insn, expr, constraint, ident);
1737 } END_FOR_EACH_PTR(expr);
1739 add_one_insn(ep, insn);
1741 /* Assign the outputs */
1742 state = 0;
1743 ident = NULL;
1744 constraint = NULL;
1745 FOR_EACH_PTR(stmt->asm_outputs, expr) {
1746 switch (state) {
1747 case 0: /* Identifier */
1748 state = 1;
1749 ident = (struct ident *)expr;
1750 continue;
1752 case 1: /* Constraint */
1753 state = 2;
1754 constraint = expr ? expr->string->data : "";
1755 continue;
1757 case 2:
1758 state = 0;
1759 add_asm_output(ep, insn, expr, constraint, ident);
1761 } END_FOR_EACH_PTR(expr);
1763 return VOID;
1766 static int multijmp_cmp(const void *_a, const void *_b)
1768 const struct multijmp *a = _a;
1769 const struct multijmp *b = _b;
1771 // "default" case?
1772 if (a->begin > a->end) {
1773 if (b->begin > b->end)
1774 return 0;
1775 return 1;
1777 if (b->begin > b->end)
1778 return -1;
1779 if (a->begin == b->begin) {
1780 if (a->end == b->end)
1781 return 0;
1782 return (a->end < b->end) ? -1 : 1;
1784 return a->begin < b->begin ? -1 : 1;
1787 static void sort_switch_cases(struct instruction *insn)
1789 sort_list((struct ptr_list **)&insn->multijmp_list, multijmp_cmp);
1792 static pseudo_t linearize_declaration(struct entrypoint *ep, struct statement *stmt)
1794 struct symbol *sym;
1796 concat_symbol_list(stmt->declaration, &ep->syms);
1798 FOR_EACH_PTR(stmt->declaration, sym) {
1799 linearize_one_symbol(ep, sym);
1800 } END_FOR_EACH_PTR(sym);
1801 return VOID;
1804 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
1806 struct basic_block *bb;
1808 if (!stmt)
1809 return VOID;
1811 bb = ep->active;
1812 if (bb && !bb->insns)
1813 bb->pos = stmt->pos;
1814 current_pos = stmt->pos;
1816 switch (stmt->type) {
1817 case STMT_NONE:
1818 break;
1820 case STMT_DECLARATION:
1821 return linearize_declaration(ep, stmt);
1823 case STMT_CONTEXT:
1824 return linearize_context(ep, stmt);
1826 case STMT_RANGE:
1827 return linearize_range(ep, stmt);
1829 case STMT_EXPRESSION:
1830 return linearize_expression(ep, stmt->expression);
1832 case STMT_ASM:
1833 return linearize_asm_statement(ep, stmt);
1835 case STMT_RETURN: {
1836 struct expression *expr = stmt->expression;
1837 struct basic_block *bb_return = get_bound_block(ep, stmt->ret_target);
1838 struct basic_block *active;
1839 pseudo_t src = linearize_expression(ep, expr);
1840 active = ep->active;
1841 if (active && src != &void_pseudo) {
1842 struct instruction *phi_node = first_instruction(bb_return->insns);
1843 pseudo_t phi;
1844 if (!phi_node) {
1845 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1846 phi_node->target = alloc_pseudo(phi_node);
1847 phi_node->bb = bb_return;
1848 add_instruction(&bb_return->insns, phi_node);
1850 phi = alloc_phi(active, src, type_size(expr->ctype));
1851 phi->ident = &return_ident;
1852 use_pseudo(phi_node, phi, add_pseudo(&phi_node->phi_list, phi));
1854 add_goto(ep, bb_return);
1855 return VOID;
1858 case STMT_CASE: {
1859 add_label(ep, stmt->case_label);
1860 linearize_statement(ep, stmt->case_statement);
1861 break;
1864 case STMT_LABEL: {
1865 struct symbol *label = stmt->label_identifier;
1867 if (label->used) {
1868 add_label(ep, label);
1869 linearize_statement(ep, stmt->label_statement);
1871 break;
1874 case STMT_GOTO: {
1875 struct symbol *sym;
1876 struct expression *expr;
1877 struct instruction *goto_ins;
1878 struct basic_block *active;
1879 pseudo_t pseudo;
1881 active = ep->active;
1882 if (!bb_reachable(active))
1883 break;
1885 if (stmt->goto_label) {
1886 add_goto(ep, get_bound_block(ep, stmt->goto_label));
1887 break;
1890 expr = stmt->goto_expression;
1891 if (!expr)
1892 break;
1894 /* This can happen as part of simplification */
1895 if (expr->type == EXPR_LABEL) {
1896 add_goto(ep, get_bound_block(ep, expr->label_symbol));
1897 break;
1900 pseudo = linearize_expression(ep, expr);
1901 goto_ins = alloc_instruction(OP_COMPUTEDGOTO, 0);
1902 use_pseudo(goto_ins, pseudo, &goto_ins->target);
1903 add_one_insn(ep, goto_ins);
1905 FOR_EACH_PTR(stmt->target_list, sym) {
1906 struct basic_block *bb_computed = get_bound_block(ep, sym);
1907 struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0);
1908 add_multijmp(&goto_ins->multijmp_list, jmp);
1909 add_bb(&bb_computed->parents, ep->active);
1910 add_bb(&active->children, bb_computed);
1911 } END_FOR_EACH_PTR(sym);
1913 finish_block(ep);
1914 break;
1917 case STMT_COMPOUND:
1918 return linearize_compound_statement(ep, stmt);
1921 * This could take 'likely/unlikely' into account, and
1922 * switch the arms around appropriately..
1924 case STMT_IF: {
1925 struct basic_block *bb_true, *bb_false, *endif;
1926 struct expression *cond = stmt->if_conditional;
1928 bb_true = alloc_basic_block(ep, stmt->pos);
1929 bb_false = endif = alloc_basic_block(ep, stmt->pos);
1931 linearize_cond_branch(ep, cond, bb_true, bb_false);
1933 set_activeblock(ep, bb_true);
1934 linearize_statement(ep, stmt->if_true);
1936 if (stmt->if_false) {
1937 endif = alloc_basic_block(ep, stmt->pos);
1938 add_goto(ep, endif);
1939 set_activeblock(ep, bb_false);
1940 linearize_statement(ep, stmt->if_false);
1942 set_activeblock(ep, endif);
1943 break;
1946 case STMT_SWITCH: {
1947 struct symbol *sym;
1948 struct instruction *switch_ins;
1949 struct basic_block *switch_end = alloc_basic_block(ep, stmt->pos);
1950 struct basic_block *active, *default_case;
1951 struct multijmp *jmp;
1952 pseudo_t pseudo;
1954 pseudo = linearize_expression(ep, stmt->switch_expression);
1956 active = ep->active;
1957 if (!bb_reachable(active))
1958 break;
1960 switch_ins = alloc_instruction(OP_SWITCH, 0);
1961 use_pseudo(switch_ins, pseudo, &switch_ins->cond);
1962 add_one_insn(ep, switch_ins);
1963 finish_block(ep);
1965 default_case = NULL;
1966 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
1967 struct statement *case_stmt = sym->stmt;
1968 struct basic_block *bb_case = get_bound_block(ep, sym);
1970 if (!case_stmt->case_expression) {
1971 default_case = bb_case;
1972 continue;
1973 } else {
1974 int begin, end;
1976 begin = end = case_stmt->case_expression->value;
1977 if (case_stmt->case_to)
1978 end = case_stmt->case_to->value;
1979 if (begin > end)
1980 jmp = alloc_multijmp(bb_case, end, begin);
1981 else
1982 jmp = alloc_multijmp(bb_case, begin, end);
1985 add_multijmp(&switch_ins->multijmp_list, jmp);
1986 add_bb(&bb_case->parents, active);
1987 add_bb(&active->children, bb_case);
1988 } END_FOR_EACH_PTR(sym);
1990 bind_label(stmt->switch_break, switch_end, stmt->pos);
1992 /* And linearize the actual statement */
1993 linearize_statement(ep, stmt->switch_statement);
1994 set_activeblock(ep, switch_end);
1996 if (!default_case)
1997 default_case = switch_end;
1999 jmp = alloc_multijmp(default_case, 1, 0);
2000 add_multijmp(&switch_ins->multijmp_list, jmp);
2001 add_bb(&default_case->parents, active);
2002 add_bb(&active->children, default_case);
2003 sort_switch_cases(switch_ins);
2005 break;
2008 case STMT_ITERATOR: {
2009 struct statement *pre_statement = stmt->iterator_pre_statement;
2010 struct expression *pre_condition = stmt->iterator_pre_condition;
2011 struct statement *statement = stmt->iterator_statement;
2012 struct statement *post_statement = stmt->iterator_post_statement;
2013 struct expression *post_condition = stmt->iterator_post_condition;
2014 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end;
2016 concat_symbol_list(stmt->iterator_syms, &ep->syms);
2017 linearize_statement(ep, pre_statement);
2019 loop_body = loop_top = alloc_basic_block(ep, stmt->pos);
2020 loop_continue = alloc_basic_block(ep, stmt->pos);
2021 loop_end = alloc_basic_block(ep, stmt->pos);
2023 /* An empty post-condition means that it's the same as the pre-condition */
2024 if (!post_condition) {
2025 loop_top = alloc_basic_block(ep, stmt->pos);
2026 set_activeblock(ep, loop_top);
2029 if (pre_condition)
2030 linearize_cond_branch(ep, pre_condition, loop_body, loop_end);
2032 bind_label(stmt->iterator_continue, loop_continue, stmt->pos);
2033 bind_label(stmt->iterator_break, loop_end, stmt->pos);
2035 set_activeblock(ep, loop_body);
2036 linearize_statement(ep, statement);
2037 add_goto(ep, loop_continue);
2039 set_activeblock(ep, loop_continue);
2040 linearize_statement(ep, post_statement);
2041 if (!post_condition)
2042 add_goto(ep, loop_top);
2043 else
2044 linearize_cond_branch(ep, post_condition, loop_top, loop_end);
2045 set_activeblock(ep, loop_end);
2046 break;
2049 default:
2050 break;
2052 return VOID;
2055 static struct entrypoint *linearize_fn(struct symbol *sym, struct symbol *base_type)
2057 struct entrypoint *ep;
2058 struct basic_block *bb;
2059 struct symbol *arg;
2060 struct instruction *entry;
2061 pseudo_t result;
2062 int i;
2064 if (!base_type->stmt)
2065 return NULL;
2067 ep = alloc_entrypoint();
2068 bb = alloc_basic_block(ep, sym->pos);
2070 ep->name = sym;
2071 set_activeblock(ep, bb);
2073 entry = alloc_instruction(OP_ENTRY, 0);
2074 add_one_insn(ep, entry);
2075 ep->entry = entry;
2077 concat_symbol_list(base_type->arguments, &ep->syms);
2079 /* FIXME!! We should do something else about varargs.. */
2080 i = 0;
2081 FOR_EACH_PTR(base_type->arguments, arg) {
2082 linearize_argument(ep, arg, ++i);
2083 } END_FOR_EACH_PTR(arg);
2085 result = linearize_statement(ep, base_type->stmt);
2086 if (bb_reachable(ep->active) && !bb_terminated(ep->active)) {
2087 struct symbol *ret_type = base_type->ctype.base_type;
2088 struct instruction *insn = alloc_typed_instruction(OP_RET, ret_type);
2090 if (type_size(ret_type) > 0)
2091 use_pseudo(insn, result, &insn->src);
2092 add_one_insn(ep, insn);
2096 * Do trivial flow simplification - branches to
2097 * branches, kill dead basicblocks etc
2099 kill_unreachable_bbs(ep);
2102 * Turn symbols into pseudos
2104 simplify_symbol_usage(ep);
2106 repeat:
2108 * Remove trivial instructions, and try to CSE
2109 * the rest.
2111 do {
2112 cleanup_and_cse(ep);
2113 pack_basic_blocks(ep);
2114 } while (repeat_phase & REPEAT_CSE);
2116 kill_unreachable_bbs(ep);
2117 vrfy_flow(ep);
2119 /* Cleanup */
2120 clear_symbol_pseudos(ep);
2122 /* And track pseudo register usage */
2123 track_pseudo_liveness(ep);
2126 * Some flow optimizations can only effectively
2127 * be done when we've done liveness analysis. But
2128 * if they trigger, we need to start all over
2129 * again
2131 if (simplify_flow(ep)) {
2132 clear_liveness(ep);
2133 goto repeat;
2136 /* Finally, add deathnotes to pseudos now that we have them */
2137 track_pseudo_death(ep);
2139 return ep;
2142 struct entrypoint *linearize_symbol(struct symbol *sym)
2144 struct symbol *base_type;
2146 if (!sym)
2147 return NULL;
2148 current_pos = sym->pos;
2149 base_type = sym->ctype.base_type;
2150 if (!base_type)
2151 return NULL;
2152 if (base_type->type == SYM_FN)
2153 return linearize_fn(sym, base_type);
2154 return NULL;