Totally re-do how we build up the initializer tree: make the
[smatch.git] / linearize.c
blob4db5b84db18537630006a4f8a699c13bd4bad480
1 /*
2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
10 * Copyright (C) 2004 Christopher Li
13 #include <string.h>
14 #include <stdarg.h>
15 #include <stdlib.h>
16 #include <stdio.h>
18 #include "parse.h"
19 #include "expression.h"
20 #include "linearize.h"
22 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
23 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
25 static void add_setcc(struct entrypoint *ep, struct expression *expr, pseudo_t val);
26 static pseudo_t add_binary_op(struct entrypoint *ep, struct expression *expr, int op, pseudo_t left, pseudo_t right);
27 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val);
28 static pseudo_t add_const_value(struct entrypoint *ep, struct position pos, struct symbol *ctype, int val);
29 static pseudo_t add_load(struct entrypoint *ep, struct expression *expr, pseudo_t addr);
32 struct pseudo void_pseudo = {};
34 static struct instruction *alloc_instruction(int opcode, struct symbol *type)
36 struct instruction * insn = __alloc_instruction(0);
37 insn->type = type;
38 insn->opcode = opcode;
39 return insn;
42 static struct entrypoint *alloc_entrypoint(void)
44 return __alloc_entrypoint(0);
47 static struct basic_block *alloc_basic_block(void)
49 return __alloc_basic_block(0);
52 static struct multijmp* alloc_multijmp(struct basic_block *target, int begin, int end)
54 struct multijmp *multijmp = __alloc_multijmp(0);
55 multijmp->target = target;
56 multijmp->begin = begin;
57 multijmp->end = end;
58 return multijmp;
61 static struct phi* alloc_phi(struct basic_block *source, pseudo_t pseudo)
63 struct phi *phi = __alloc_phi(0);
64 phi->source = source;
65 phi->pseudo = pseudo;
66 return phi;
69 static void show_instruction(struct instruction *insn)
71 int op = insn->opcode;
73 switch (op) {
74 case OP_BADOP:
75 printf("\tAIEEE! (%d %d)\n", insn->target->nr, insn->src->nr);
76 break;
77 case OP_RET:
78 if (insn->type && insn->type != &void_ctype)
79 printf("\tret %%r%d\n", insn->src->nr);
80 else
81 printf("\tret\n");
82 break;
83 case OP_BR:
84 if (insn->bb_true && insn->bb_false) {
85 printf("\tbr\t%%r%d, .L%p, .L%p\n", insn->cond->nr, insn->bb_true, insn->bb_false);
86 break;
88 printf("\tbr\t.L%p\n", insn->bb_true ? insn->bb_true : insn->bb_false);
89 break;
91 case OP_SETVAL: {
92 struct expression *expr = insn->val;
93 switch (expr->type) {
94 case EXPR_VALUE:
95 printf("\t%%r%d <- %lld\n",
96 insn->target->nr, expr->value);
97 break;
98 case EXPR_FVALUE:
99 printf("\t%%r%d <- %Lf\n",
100 insn->target->nr, expr->fvalue);
101 break;
102 case EXPR_STRING:
103 printf("\t%%r%d <- %s\n",
104 insn->target->nr, show_string(expr->string));
105 break;
106 case EXPR_SYMBOL:
107 printf("\t%%r%d <- %s\n",
108 insn->target->nr, show_ident(expr->symbol->ident));
109 break;
110 case EXPR_LABEL:
111 printf("\t%%r%d <- .L%p\n",
112 insn->target->nr, expr->symbol->bb_target);
113 break;
114 default:
115 printf("\t%%r%d <- SETVAL EXPR TYPE %d\n",
116 insn->target->nr, expr->type);
118 break;
120 case OP_SWITCH: {
121 struct multijmp *jmp;
122 printf("\tswitch %%r%d", insn->cond->nr);
123 FOR_EACH_PTR(insn->multijmp_list, jmp) {
124 if (jmp->begin == jmp->end)
125 printf(", %d -> .L%p", jmp->begin, jmp->target);
126 else if (jmp->begin < jmp->end)
127 printf(", %d ... %d -> .L%p", jmp->begin, jmp->end, jmp->target);
128 else
129 printf(", default -> .L%p\n", jmp->target);
130 } END_FOR_EACH_PTR(jmp);
131 printf("\n");
132 break;
134 case OP_COMPUTEDGOTO: {
135 struct multijmp *jmp;
136 printf("\tjmp *%%r%d", insn->target->nr);
137 FOR_EACH_PTR(insn->multijmp_list, jmp) {
138 printf(", .L%p", jmp->target);
139 } END_FOR_EACH_PTR(jmp);
140 printf("\n");
141 break;
144 case OP_PHI: {
145 struct phi *phi;
146 const char *s = " ";
147 printf("\t%%r%d <- phi", insn->target->nr);
148 FOR_EACH_PTR(insn->phi_list, phi) {
149 printf("%s(%%r%d, .L%p)", s, phi->pseudo->nr, phi->source);
150 s = ", ";
151 } END_FOR_EACH_PTR(phi);
152 printf("\n");
153 break;
155 case OP_LOAD:
156 printf("\tload %%r%d <- [%%r%d]\n", insn->target->nr, insn->src->nr);
157 break;
158 case OP_STORE:
159 printf("\tstore %%r%d -> [%%r%d]\n", insn->target->nr, insn->src->nr);
160 break;
161 case OP_CALL: {
162 struct pseudo *arg;
163 printf("\t%%r%d <- CALL %%r%d", insn->target->nr, insn->func->nr);
164 FOR_EACH_PTR(insn->arguments, arg) {
165 printf(", %%r%d", arg->nr);
166 } END_FOR_EACH_PTR(arg);
167 printf("\n");
168 break;
170 case OP_CAST:
171 printf("\t%%r%d <- CAST(%d->%d) %%r%d\n",
172 insn->target->nr,
173 insn->orig_type->bit_size, insn->type->bit_size,
174 insn->src->nr);
175 break;
176 case OP_BINARY ... OP_BINARY_END: {
177 static const char *opname[] = {
178 [OP_ADD - OP_BINARY] = "add", [OP_SUB - OP_BINARY] = "sub",
179 [OP_MUL - OP_BINARY] = "mul", [OP_DIV - OP_BINARY] = "div",
180 [OP_MOD - OP_BINARY] = "mod", [OP_AND - OP_BINARY] = "and",
181 [OP_OR - OP_BINARY] = "or", [OP_XOR - OP_BINARY] = "xor",
182 [OP_SHL - OP_BINARY] = "shl", [OP_SHR - OP_BINARY] = "shr",
183 [OP_AND_BOOL - OP_BINARY] = "and-bool",
184 [OP_OR_BOOL - OP_BINARY] = "or-bool",
185 [OP_SEL - OP_BINARY] = "select",
187 printf("\t%%r%d <- %s %%r%d, %%r%d\n",
188 insn->target->nr,
189 opname[op - OP_BINARY], insn->src1->nr, insn->src2->nr);
190 break;
193 case OP_SLICE:
194 printf("\t%%r%d <- slice %%r%d, %d, %d\n",
195 insn->target->nr,
196 insn->base->nr, insn->from, insn->len);
197 break;
199 case OP_BINCMP ... OP_BINCMP_END: {
200 static const char *opname[] = {
201 [OP_SET_EQ - OP_BINCMP] = "seteq",
202 [OP_SET_NE - OP_BINCMP] = "setne",
203 [OP_SET_LE - OP_BINCMP] = "setle",
204 [OP_SET_GE - OP_BINCMP] = "setge",
205 [OP_SET_LT - OP_BINCMP] = "setlt",
206 [OP_SET_GT - OP_BINCMP] = "setgt",
207 [OP_SET_BE - OP_BINCMP] = "setbe",
208 [OP_SET_AE - OP_BINCMP] = "setae",
209 [OP_SET_A - OP_BINCMP] = "seta",
210 [OP_SET_B - OP_BINCMP] = "setb",
212 printf("\t%%r%d <- %s %%r%d, %%r%d\n",
213 insn->target->nr,
214 opname[op - OP_BINCMP], insn->src1->nr, insn->src2->nr);
215 break;
218 case OP_NOT: case OP_NEG:
219 printf("\t%%r%d <- %s %%r%d\n",
220 insn->target->nr,
221 op == OP_NOT ? "not" : "neg", insn->src1->nr);
222 break;
223 case OP_SETCC:
224 printf("\tsetcc %%r%d\n", insn->src->nr);
225 break;
226 default:
227 printf("\top %d ???\n", op);
231 static void show_bb(struct basic_block *bb)
233 struct instruction *insn;
235 printf("bb: %p\n", bb);
236 if (bb->parents) {
237 struct basic_block *from;
238 FOR_EACH_PTR(bb->parents, from) {
239 printf(" **from %p**\n", from);
240 } END_FOR_EACH_PTR(from);
242 FOR_EACH_PTR(bb->insns, insn) {
243 show_instruction(insn);
244 } END_FOR_EACH_PTR(insn);
245 if (!bb_terminated(bb))
246 printf("\tEND\n");
247 printf("\n");
250 void show_entry(struct entrypoint *ep)
252 struct symbol *sym;
253 struct basic_block *bb;
255 printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
257 FOR_EACH_PTR(ep->syms, sym) {
258 printf(" sym: %p %s\n", sym, show_ident(sym->ident));
259 } END_FOR_EACH_PTR(sym);
261 printf("\n");
263 FOR_EACH_PTR(ep->bbs, bb) {
264 if (bb == ep->entry)
265 printf("ENTRY:\n");
266 show_bb(bb);
267 } END_FOR_EACH_PTR(bb);
269 printf("\n");
272 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos)
274 if (label->bb_target)
275 warning(pos, "label '%s' already bound", show_ident(label->ident));
276 label->bb_target = bb;
279 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label)
281 struct basic_block *bb = label->bb_target;
283 if (!bb) {
284 label->bb_target = bb = alloc_basic_block();
285 bb->flags |= BB_REACHABLE;
287 return bb;
290 static void finish_block(struct entrypoint *ep)
292 struct basic_block *src = ep->active;
293 if (bb_reachable(src))
294 ep->active = NULL;
297 static void add_goto(struct entrypoint *ep, struct basic_block *dst)
299 struct basic_block *src = ep->active;
300 if (bb_reachable(src)) {
301 struct instruction *br = alloc_instruction(OP_BR, NULL);
302 br->bb_true = dst;
303 add_bb(&dst->parents, src);
304 add_instruction(&src->insns, br);
305 ep->active = NULL;
309 static void add_one_insn(struct entrypoint *ep, struct position pos, struct instruction *insn)
311 struct basic_block *bb = ep->active;
313 if (bb_reachable(bb))
314 add_instruction(&bb->insns, insn);
317 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb)
319 if (!bb_terminated(ep->active))
320 add_goto(ep, bb);
322 ep->active = bb;
323 if (bb_reachable(bb))
324 add_bb(&ep->bbs, bb);
327 static void add_setcc(struct entrypoint *ep, struct expression *expr, pseudo_t val)
329 struct basic_block *bb = ep->active;
331 if (bb_reachable(bb)) {
332 struct instruction *cc = alloc_instruction(OP_SETCC, &bool_ctype);
333 cc->src = val;
334 add_one_insn(ep, expr->pos, cc);
338 static void add_branch(struct entrypoint *ep, struct expression *expr, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false)
340 struct basic_block *bb = ep->active;
341 struct instruction *br;
343 if (bb_reachable(bb)) {
344 br = alloc_instruction(OP_BR, expr->ctype);
345 br->cond = cond;
346 br->bb_true = bb_true;
347 br->bb_false = bb_false;
348 add_bb(&bb_true->parents, bb);
349 add_bb(&bb_false->parents, bb);
350 add_one_insn(ep, expr->pos, br);
354 /* Dummy pseudo allocator */
355 static pseudo_t alloc_pseudo(void)
357 static int nr = 0;
358 struct pseudo * pseudo = __alloc_pseudo(0);
359 pseudo->nr = ++nr;
360 return pseudo;
364 * FIXME! Not all accesses are memory loads. We should
365 * check what kind of symbol is behind the dereference.
367 static pseudo_t linearize_address_gen(struct entrypoint *ep, struct expression *expr)
369 if (expr->type == EXPR_PREOP)
370 return linearize_expression(ep, expr->unop);
371 if (expr->type == EXPR_BITFIELD)
372 return linearize_expression(ep, expr->address);
373 warning(expr->pos, "generating address of non-lvalue");
374 return VOID;
377 static void linearize_store_gen(struct entrypoint *ep, pseudo_t value, struct expression *expr, pseudo_t addr)
379 struct instruction *store = alloc_instruction(OP_STORE, expr->ctype);
381 if (expr->type == EXPR_BITFIELD) {
382 unsigned long mask = ((1<<expr->nrbits)-1) << expr->bitpos;
383 pseudo_t andmask, ormask, shift, orig;
384 if (expr->bitpos) {
385 shift = add_const_value(ep, expr->pos, &uint_ctype, expr->bitpos);
386 value = add_binary_op(ep, expr, OP_SHL, value, shift);
388 orig = add_load(ep, expr, addr);
389 andmask = add_const_value(ep, expr->pos, &uint_ctype, ~mask);
390 orig = add_binary_op(ep, expr, OP_AND, orig, andmask);
391 ormask = add_const_value(ep, expr->pos, &uint_ctype, mask);
392 value = add_binary_op(ep, expr, OP_AND, value, ormask);
393 value = add_binary_op(ep, expr, OP_OR, orig, value);
396 store->target = value;
397 store->src = addr;
398 add_one_insn(ep, expr->pos, store);
401 static pseudo_t add_binary_op(struct entrypoint *ep, struct expression *expr, int op, pseudo_t left, pseudo_t right)
403 struct instruction *insn = alloc_instruction(op, expr->ctype);
404 pseudo_t target = alloc_pseudo();
405 insn->target = target;
406 insn->src1 = left;
407 insn->src2 = right;
408 add_one_insn(ep, expr->pos, insn);
409 return target;
412 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val)
414 struct instruction *insn = alloc_instruction(OP_SETVAL, ctype);
415 pseudo_t target = alloc_pseudo();
416 insn->target = target;
417 insn->val = val;
418 add_one_insn(ep, val->pos, insn);
419 return target;
422 static pseudo_t add_const_value(struct entrypoint *ep, struct position pos, struct symbol *ctype, int val)
424 struct expression *expr = alloc_const_expression(pos, val);
425 return add_setval(ep, ctype, expr);
428 static pseudo_t add_load(struct entrypoint *ep, struct expression *expr, pseudo_t addr)
430 pseudo_t new = alloc_pseudo();
431 struct instruction *insn = alloc_instruction(OP_LOAD, expr->ctype);
433 insn->target = new;
434 insn->src = addr;
435 add_one_insn(ep, expr->pos, insn);
436 return new;
439 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct expression *expr, pseudo_t addr)
441 pseudo_t new = add_load(ep, expr, addr);
442 if (expr->type == EXPR_PREOP)
443 return new;
445 if (expr->type == EXPR_BITFIELD) {
446 pseudo_t mask;
447 if (expr->bitpos) {
448 pseudo_t shift = add_const_value(ep, expr->pos, &uint_ctype, expr->bitpos);
449 new = add_binary_op(ep, expr, OP_SHR, new, shift);
451 mask = add_const_value(ep, expr->pos, &uint_ctype, (1<<expr->nrbits)-1);
452 return add_binary_op(ep, expr, OP_AND, new, mask);
455 warning(expr->pos, "loading unknown expression");
456 return new;
459 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
461 pseudo_t addr = linearize_address_gen(ep, expr);
462 return linearize_load_gen(ep, expr, addr);
465 /* FIXME: FP */
466 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
468 pseudo_t addr = linearize_address_gen(ep, expr->unop);
469 pseudo_t old, new, one;
470 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB;
472 old = linearize_load_gen(ep, expr->unop, addr);
473 one = add_const_value(ep, expr->pos, expr->ctype, 1);
474 new = add_binary_op(ep, expr, op, old, one);
475 linearize_store_gen(ep, new, expr->unop, addr);
476 return postop ? old : new;
479 static pseudo_t add_uniop(struct entrypoint *ep, struct expression *expr, int op, pseudo_t src)
481 pseudo_t new = alloc_pseudo();
482 struct instruction *insn = alloc_instruction(op, expr->ctype);
483 insn->target = new;
484 insn->src1 = src;
485 add_one_insn(ep, expr->pos, insn);
486 return new;
489 static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr)
491 pseudo_t pre = linearize_expression(ep, expr->base);
492 pseudo_t new = alloc_pseudo();
493 struct instruction *insn = alloc_instruction(OP_SLICE, expr->ctype);
494 insn->target = new;
495 insn->base = pre;
496 insn->from = expr->r_bitpos;
497 insn->len = expr->r_nrbits;
498 add_one_insn(ep, expr->pos, insn);
499 return new;
502 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
504 pseudo_t pre = linearize_expression(ep, expr->unop);
505 switch (expr->op) {
506 case '+':
507 return pre;
508 case '!': {
509 pseudo_t zero = add_const_value(ep, expr->pos, expr->ctype, 0);
510 return add_binary_op(ep, expr, OP_SET_EQ, pre, zero);
512 case '~':
513 return add_uniop(ep, expr, OP_NOT, pre);
514 case '-':
515 return add_uniop(ep, expr, OP_NEG, pre);
517 return VOID;
520 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
523 * '*' is an lvalue access, and is fundamentally different
524 * from an arithmetic operation. Maybe it should have an
525 * expression type of its own..
527 if (expr->op == '*')
528 return linearize_access(ep, expr);
529 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
530 return linearize_inc_dec(ep, expr, 0);
531 return linearize_regular_preop(ep, expr);
534 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr)
536 return linearize_inc_dec(ep, expr, 1);
539 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
541 struct expression *target = expr->left;
542 pseudo_t value, address;
544 value = linearize_expression(ep, expr->right);
545 address = linearize_address_gen(ep, target);
546 if (expr->op != '=') {
547 static const int opcode[] = {
548 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD,
549 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB,
550 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MUL,
551 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIV,
552 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MOD,
553 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL,
554 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_SHR,
555 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND,
556 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR,
557 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR
559 pseudo_t left = linearize_load_gen(ep, target, address);
560 value = add_binary_op(ep, expr, opcode[expr->op - SPECIAL_BASE], left, value);
562 linearize_store_gen(ep, value, target, address);
563 return value;
566 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr)
568 struct expression *arg, *fn;
569 struct instruction *insn = alloc_instruction(OP_CALL, expr->ctype);
570 pseudo_t retval;
572 if (!expr->ctype) {
573 warning(expr->pos, "call with no type!");
574 return VOID;
577 FOR_EACH_PTR(expr->args, arg) {
578 pseudo_t new = linearize_expression(ep, arg);
579 add_pseudo(&insn->arguments, new);
580 } END_FOR_EACH_PTR(arg);
582 fn = expr->fn;
583 if (fn->type == EXPR_PREOP) {
584 if (fn->unop->type == EXPR_SYMBOL) {
585 struct symbol *sym = fn->unop->symbol;
586 if (sym->ctype.base_type->type == SYM_FN)
587 fn = fn->unop;
590 insn->func = linearize_expression(ep, fn);
591 insn->target = retval = alloc_pseudo();
592 add_one_insn(ep, expr->pos, insn);
594 return retval;
597 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr)
599 pseudo_t src1, src2;
600 static const int opcode[] = {
601 ['+'] = OP_ADD, ['-'] = OP_SUB,
602 ['*'] = OP_MUL, ['/'] = OP_DIV,
603 ['%'] = OP_MOD, ['&'] = OP_AND,
604 ['|'] = OP_OR, ['^'] = OP_XOR,
605 [SPECIAL_LEFTSHIFT] = OP_SHL,
606 [SPECIAL_RIGHTSHIFT] = OP_SHR,
607 [SPECIAL_LOGICAL_AND] = OP_AND_BOOL,
608 [SPECIAL_LOGICAL_OR] = OP_OR_BOOL,
611 src1 = linearize_expression(ep, expr->left);
612 src2 = linearize_expression(ep, expr->right);
613 return add_binary_op(ep, expr, opcode[expr->op], src1, src2);
616 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
618 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
620 static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr)
622 pseudo_t cond, true, false;
624 true = NULL;
625 if (expr->cond_true)
626 true = linearize_expression(ep, expr->cond_true);
627 false = linearize_expression(ep, expr->cond_false);
628 cond = linearize_expression(ep, expr->conditional);
629 if (!true)
630 true = cond;
632 add_setcc(ep, expr, cond);
633 return add_binary_op(ep, expr, OP_SEL, true, false);
636 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr,
637 struct expression *cond, struct expression *expr_true,
638 struct expression *expr_false)
640 pseudo_t src1, src2, target;
641 struct basic_block *bb_true = alloc_basic_block();
642 struct basic_block *bb_false = alloc_basic_block();
643 struct basic_block *merge = alloc_basic_block();
645 if (expr_true) {
646 linearize_cond_branch(ep, cond, bb_true, bb_false);
648 set_activeblock(ep, bb_true);
649 src1 = linearize_expression(ep, expr_true);
650 bb_true = ep->active;
651 add_goto(ep, merge);
652 } else {
653 src1 = linearize_expression(ep, cond);
654 add_branch(ep, expr, src1, merge, bb_false);
657 set_activeblock(ep, bb_false);
658 src2 = linearize_expression(ep, expr_false);
659 bb_false = ep->active;
660 set_activeblock(ep, merge);
662 if (src1 != VOID && src2 != VOID) {
663 struct instruction *phi_node = alloc_instruction(OP_PHI, expr->ctype);
664 add_phi(&phi_node->phi_list, alloc_phi(bb_true, src1));
665 add_phi(&phi_node->phi_list, alloc_phi(bb_false, src2));
666 phi_node->target = target = alloc_pseudo();
667 add_one_insn(ep, expr->pos, phi_node);
668 set_activeblock(ep, alloc_basic_block());
669 return target;
672 return src1 != VOID ? src1 : src2;
675 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr)
677 struct expression *shortcut;
679 shortcut = alloc_const_expression(expr->pos, expr->op == SPECIAL_LOGICAL_OR);
680 shortcut->ctype = expr->ctype;
681 return linearize_conditional(ep, expr, expr->left, shortcut, expr->right);
684 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr)
686 static const int cmpop[] = {
687 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT,
688 [SPECIAL_EQUAL] = OP_SET_EQ,
689 [SPECIAL_NOTEQUAL] = OP_SET_NE,
690 [SPECIAL_GTE] = OP_SET_GE,
691 [SPECIAL_LTE] = OP_SET_LE,
692 [SPECIAL_UNSIGNED_LT] = OP_SET_B,
693 [SPECIAL_UNSIGNED_GT] = OP_SET_A,
694 [SPECIAL_UNSIGNED_LTE] = OP_SET_BE,
695 [SPECIAL_UNSIGNED_GTE] = OP_SET_AE,
698 pseudo_t src1 = linearize_expression(ep, expr->left);
699 pseudo_t src2 = linearize_expression(ep, expr->right);
700 return add_binary_op(ep, expr, cmpop[expr->op], src1, src2);
704 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
706 pseudo_t cond;
708 if (!expr || !bb_reachable(ep->active))
709 return VOID;
711 switch (expr->type) {
713 case EXPR_STRING:
714 case EXPR_VALUE:
715 add_goto(ep, expr->value ? bb_true : bb_false);
716 return VOID;
718 case EXPR_FVALUE:
719 add_goto(ep, expr->fvalue ? bb_true : bb_false);
720 return VOID;
722 case EXPR_LOGICAL:
723 linearize_logical_branch(ep, expr, bb_true, bb_false);
724 return VOID;
726 case EXPR_COMPARE:
727 cond = linearize_compare(ep, expr);
728 add_branch(ep, expr, cond, bb_true, bb_false);
729 break;
731 case EXPR_PREOP:
732 if (expr->op == '!')
733 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true);
734 /* fall through */
735 default: {
736 cond = linearize_expression(ep, expr);
737 add_branch(ep, expr, cond, bb_true, bb_false);
739 return VOID;
742 return VOID;
747 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
749 struct basic_block *next = alloc_basic_block();
751 if (expr->op == SPECIAL_LOGICAL_OR)
752 linearize_cond_branch(ep, expr->left, bb_true, next);
753 else
754 linearize_cond_branch(ep, expr->left, next, bb_false);
755 set_activeblock(ep, next);
756 linearize_cond_branch(ep, expr->right, bb_true, bb_false);
757 return VOID;
760 pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr)
762 pseudo_t src, result;
763 struct instruction *insn;
765 src = linearize_expression(ep, expr->cast_expression);
766 if (src == VOID)
767 return VOID;
768 insn = alloc_instruction(OP_CAST, expr->ctype);
769 result = alloc_pseudo();
770 insn->target = result;
771 insn->src = src;
772 insn->orig_type = expr->cast_expression->ctype;
773 add_one_insn(ep, expr->pos, insn);
774 return result;
777 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
779 if (!expr)
780 return VOID;
782 switch (expr->type) {
783 case EXPR_VALUE: case EXPR_STRING: case EXPR_SYMBOL: case EXPR_FVALUE: case EXPR_LABEL:
784 return add_setval(ep, expr->ctype, expr);
786 case EXPR_STATEMENT:
787 return linearize_statement(ep, expr->statement);
789 case EXPR_CALL:
790 return linearize_call_expression(ep, expr);
792 case EXPR_BINOP:
793 return linearize_binop(ep, expr);
795 case EXPR_LOGICAL:
796 return linearize_logical(ep, expr);
798 case EXPR_COMPARE:
799 return linearize_compare(ep, expr);
801 case EXPR_SELECT:
802 return linearize_select(ep, expr);
804 case EXPR_CONDITIONAL:
805 return linearize_conditional(ep, expr, expr->conditional,
806 expr->cond_true, expr->cond_false);
808 case EXPR_COMMA: {
809 linearize_expression(ep, expr->left);
810 return linearize_expression(ep, expr->right);
813 case EXPR_ASSIGNMENT:
814 return linearize_assignment(ep, expr);
816 case EXPR_PREOP:
817 return linearize_preop(ep, expr);
819 case EXPR_POSTOP:
820 return linearize_postop(ep, expr);
822 case EXPR_CAST:
823 return linearize_cast(ep, expr);
825 case EXPR_BITFIELD:
826 return linearize_access(ep, expr);
828 case EXPR_SLICE:
829 return linearize_slice(ep, expr);
831 default:
832 warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op);
833 return VOID;
835 return VOID;
838 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
840 if (!stmt)
841 return VOID;
843 switch (stmt->type) {
844 case STMT_NONE:
845 break;
847 case STMT_EXPRESSION:
848 return linearize_expression(ep, stmt->expression);
850 case STMT_ASM:
851 /* FIXME */
852 break;
854 case STMT_RETURN: {
855 struct expression *expr = stmt->expression;
856 struct basic_block *bb_return = stmt->ret_target->bb_target;
857 struct basic_block *active;
858 pseudo_t src = linearize_expression(ep, expr);
859 active = ep->active;
860 add_goto(ep, bb_return);
861 if (src != &void_pseudo) {
862 struct instruction *phi_node = first_instruction(bb_return->insns);
863 if (!phi_node) {
864 phi_node = alloc_instruction(OP_PHI, expr->ctype);
865 phi_node->target = alloc_pseudo();
866 add_instruction(&bb_return->insns, phi_node);
868 add_phi(&phi_node->phi_list, alloc_phi(active, src));
870 return VOID;
873 case STMT_CASE: {
874 struct basic_block *bb = get_bound_block(ep, stmt->case_label);
875 set_activeblock(ep, bb);
876 linearize_statement(ep, stmt->case_statement);
877 break;
880 case STMT_LABEL: {
881 struct symbol *label = stmt->label_identifier;
882 struct basic_block *bb;
884 if (label->used) {
885 bb = get_bound_block(ep, stmt->label_identifier);
886 set_activeblock(ep, bb);
887 linearize_statement(ep, stmt->label_statement);
889 break;
892 case STMT_GOTO: {
893 struct symbol *sym;
894 struct expression *expr;
895 struct instruction *goto_ins;
896 pseudo_t pseudo;
898 if (stmt->goto_label) {
899 add_goto(ep, get_bound_block(ep, stmt->goto_label));
900 break;
903 /* This can happen as part of simplification */
904 expr = stmt->goto_expression;
905 if (expr->type == EXPR_LABEL) {
906 add_goto(ep, get_bound_block(ep, expr->label_symbol));
907 break;
910 pseudo = linearize_expression(ep, expr);
911 goto_ins = alloc_instruction(OP_COMPUTEDGOTO, NULL);
912 add_one_insn(ep, stmt->pos, goto_ins);
913 goto_ins->target = pseudo;
915 FOR_EACH_PTR(stmt->target_list, sym) {
916 struct basic_block *bb_computed = get_bound_block(ep, sym);
917 struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0);
918 add_multijmp(&goto_ins->multijmp_list, jmp);
919 add_bb(&bb_computed->parents, ep->active);
920 } END_FOR_EACH_PTR(sym);
922 finish_block(ep);
923 break;
926 case STMT_COMPOUND: {
927 pseudo_t pseudo = NULL;
928 struct statement *s;
929 struct symbol *ret = stmt->ret;
930 concat_symbol_list(stmt->syms, &ep->syms);
931 if (ret)
932 ret->bb_target = alloc_basic_block();
933 FOR_EACH_PTR(stmt->stmts, s) {
934 pseudo = linearize_statement(ep, s);
935 } END_FOR_EACH_PTR(s);
936 if (ret) {
937 struct basic_block *bb = ret->bb_target;
938 struct instruction *phi = first_instruction(bb->insns);
940 if (!phi)
941 return pseudo;
943 set_activeblock(ep, bb);
944 if (phi_list_size(phi->phi_list)==1) {
945 pseudo = first_phi(phi->phi_list)->pseudo;
946 delete_last_instruction(&bb->insns);
947 return pseudo;
949 return phi->target;
951 return pseudo;
955 * This could take 'likely/unlikely' into account, and
956 * switch the arms around appropriately..
958 case STMT_IF: {
959 struct basic_block *bb_true, *bb_false, *endif;
960 struct expression *cond = stmt->if_conditional;
962 bb_true = alloc_basic_block();
963 bb_false = endif = alloc_basic_block();
965 linearize_cond_branch(ep, cond, bb_true, bb_false);
967 set_activeblock(ep, bb_true);
968 linearize_statement(ep, stmt->if_true);
970 if (stmt->if_false) {
971 endif = alloc_basic_block();
972 add_goto(ep, endif);
973 set_activeblock(ep, bb_false);
974 linearize_statement(ep, stmt->if_false);
976 set_activeblock(ep, endif);
977 break;
980 case STMT_SWITCH: {
981 struct symbol *sym;
982 struct instruction *switch_ins;
983 struct basic_block *switch_end = alloc_basic_block();
984 pseudo_t pseudo;
986 pseudo = linearize_expression(ep, stmt->switch_expression);
987 switch_ins = alloc_instruction(OP_SWITCH, NULL);
988 switch_ins->cond = pseudo;
989 add_one_insn(ep, stmt->pos, switch_ins);
991 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
992 struct statement *case_stmt = sym->stmt;
993 struct basic_block *bb_case = get_bound_block(ep, sym);
994 struct multijmp *jmp;
996 if (!case_stmt->case_expression) {
997 jmp = alloc_multijmp(bb_case, 1, 0);
998 } else {
999 int begin, end;
1001 begin = end = case_stmt->case_expression->value;
1002 if (case_stmt->case_to)
1003 end = case_stmt->case_to->value;
1004 if (begin > end)
1005 jmp = alloc_multijmp(bb_case, end, begin);
1006 else
1007 jmp = alloc_multijmp(bb_case, begin, end);
1010 add_multijmp(&switch_ins->multijmp_list, jmp);
1011 add_bb(&bb_case->parents, ep->active);
1012 } END_FOR_EACH_PTR(sym);
1014 bind_label(stmt->switch_break, switch_end, stmt->pos);
1016 /* And linearize the actual statement */
1017 linearize_statement(ep, stmt->switch_statement);
1018 set_activeblock(ep, switch_end);
1020 break;
1023 case STMT_ITERATOR: {
1024 struct statement *pre_statement = stmt->iterator_pre_statement;
1025 struct expression *pre_condition = stmt->iterator_pre_condition;
1026 struct statement *statement = stmt->iterator_statement;
1027 struct statement *post_statement = stmt->iterator_post_statement;
1028 struct expression *post_condition = stmt->iterator_post_condition;
1029 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end;
1031 concat_symbol_list(stmt->iterator_syms, &ep->syms);
1032 linearize_statement(ep, pre_statement);
1034 loop_body = loop_top = alloc_basic_block();
1035 loop_continue = alloc_basic_block();
1036 loop_end = alloc_basic_block();
1038 if (pre_condition == post_condition) {
1039 loop_top = alloc_basic_block();
1040 loop_top->flags |= BB_REACHABLE;
1041 set_activeblock(ep, loop_top);
1044 loop_top->flags |= BB_REACHABLE;
1045 if (pre_condition)
1046 linearize_cond_branch(ep, pre_condition, loop_body, loop_end);
1048 bind_label(stmt->iterator_continue, loop_continue, stmt->pos);
1049 bind_label(stmt->iterator_break, loop_end, stmt->pos);
1051 set_activeblock(ep, loop_body);
1052 linearize_statement(ep, statement);
1053 add_goto(ep, loop_continue);
1055 if (post_condition) {
1056 set_activeblock(ep, loop_continue);
1057 linearize_statement(ep, post_statement);
1058 if (pre_condition == post_condition)
1059 add_goto(ep, loop_top);
1060 else
1061 linearize_cond_branch(ep, post_condition, loop_top, loop_end);
1064 set_activeblock(ep, loop_end);
1065 break;
1068 default:
1069 break;
1071 return VOID;
1074 void mark_bb_reachable(struct basic_block *bb)
1076 struct basic_block *child;
1077 struct terminator_iterator term;
1078 struct basic_block_list *bbstack = NULL;
1080 if (!bb || bb->flags & BB_REACHABLE)
1081 return;
1083 add_bb(&bbstack, bb);
1084 while (bbstack) {
1085 bb = delete_last_basic_block(&bbstack);
1086 if (bb->flags & BB_REACHABLE)
1087 continue;
1088 bb->flags |= BB_REACHABLE;
1089 init_terminator_iterator(last_instruction(bb->insns), &term);
1090 while ((child=next_terminator_bb(&term)) != NULL) {
1091 if (!(child->flags & BB_REACHABLE))
1092 add_bb(&bbstack, child);
1097 void remove_unreachable_bbs(struct entrypoint *ep)
1099 struct basic_block *bb, *child;
1100 struct terminator_iterator term;
1102 FOR_EACH_PTR(ep->bbs, bb) {
1103 bb->flags &= ~BB_REACHABLE;
1104 } END_FOR_EACH_PTR(bb);
1106 mark_bb_reachable(ep->entry);
1107 FOR_EACH_PTR(ep->bbs, bb) {
1108 if (bb->flags & BB_REACHABLE)
1109 continue;
1110 init_terminator_iterator(last_instruction(bb->insns), &term);
1111 while ((child=next_terminator_bb(&term)) != NULL)
1112 replace_basic_block_list(child->parents, bb, NULL);
1113 DELETE_CURRENT_PTR(bb);
1114 }END_FOR_EACH_PTR(bb);
1117 void pack_basic_blocks(struct entrypoint *ep)
1119 struct basic_block *bb;
1121 remove_unreachable_bbs(ep);
1122 FOR_EACH_PTR(ep->bbs, bb) {
1123 struct terminator_iterator term;
1124 struct instruction *jmp;
1125 struct basic_block *target, *sibling, *parent;
1126 int parents;
1128 if (!is_branch_goto(jmp=last_instruction(bb->insns)))
1129 continue;
1131 target = jmp->bb_true ? jmp->bb_true : jmp->bb_false;
1132 if (target == bb)
1133 continue;
1134 parents = bb_list_size(target->parents);
1135 if (target == ep->entry)
1136 parents++;
1137 if (parents != 1 && jmp != first_instruction(bb->insns))
1138 continue;
1140 /* Transfer the parents' terminator to target directly. */
1141 replace_basic_block_list(target->parents, bb, NULL);
1142 FOR_EACH_PTR(bb->parents, parent) {
1143 init_terminator_iterator(last_instruction(parent->insns), &term);
1144 while ((sibling=next_terminator_bb(&term)) != NULL) {
1145 if (sibling == bb) {
1146 replace_terminator_bb(&term, target);
1147 add_bb(&target->parents, parent);
1150 } END_FOR_EACH_PTR(parent);
1152 /* Move the instructions to the target block. */
1153 delete_last_instruction(&bb->insns);
1154 if (bb->insns) {
1155 concat_instruction_list(target->insns, &bb->insns);
1156 free_instruction_list(&target->insns);
1157 target->insns = bb->insns;
1159 if (bb == ep->entry)
1160 ep->entry = target;
1161 DELETE_CURRENT_PTR(bb);
1162 }END_FOR_EACH_PTR(bb);
1165 struct entrypoint *linearize_symbol(struct symbol *sym)
1167 struct symbol *base_type;
1168 struct entrypoint *ret_ep = NULL;
1170 if (!sym)
1171 return NULL;
1172 base_type = sym->ctype.base_type;
1173 if (!base_type)
1174 return NULL;
1175 if (base_type->type == SYM_FN) {
1176 if (base_type->stmt) {
1177 struct entrypoint *ep = alloc_entrypoint();
1178 struct basic_block *bb = alloc_basic_block();
1179 pseudo_t result;
1181 ep->name = sym;
1182 ep->entry = bb;
1183 bb->flags |= BB_REACHABLE;
1184 set_activeblock(ep, bb);
1185 concat_symbol_list(base_type->arguments, &ep->syms);
1186 result = linearize_statement(ep, base_type->stmt);
1187 if (bb_reachable(ep->active) && !bb_terminated(ep->active)) {
1188 struct symbol *ret_type = base_type->ctype.base_type;
1189 struct instruction *insn = alloc_instruction(OP_RET, ret_type);
1190 struct position pos = base_type->stmt->pos;
1192 insn->src = result;
1193 add_one_insn(ep, pos, insn);
1195 pack_basic_blocks(ep);
1196 ret_ep = ep;
1200 return ret_ep;