Make sparse sources themselves be sparse-clean.
[smatch.git] / linearize.c
blobee5c143375cd09b86500273fc6054382afa41788
1 /*
2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
10 * Copyright (C) 2004 Christopher Li
13 #include <string.h>
14 #include <stdarg.h>
15 #include <stdlib.h>
16 #include <stdio.h>
18 #include "parse.h"
19 #include "expression.h"
20 #include "linearize.h"
22 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
23 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
25 static pseudo_t add_binary_op(struct entrypoint *ep, struct expression *expr, int op, pseudo_t left, pseudo_t right);
26 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val);
27 static pseudo_t add_const_value(struct entrypoint *ep, struct position pos, struct symbol *ctype, int val);
28 static pseudo_t add_load(struct entrypoint *ep, struct expression *expr, pseudo_t addr);
31 struct pseudo void_pseudo = {};
33 static struct instruction *alloc_instruction(int opcode, struct symbol *type)
35 struct instruction * insn = __alloc_instruction(0);
36 insn->type = type;
37 insn->opcode = opcode;
38 return insn;
41 static struct entrypoint *alloc_entrypoint(void)
43 return __alloc_entrypoint(0);
46 static struct basic_block *alloc_basic_block(void)
48 return __alloc_basic_block(0);
51 static struct multijmp* alloc_multijmp(struct basic_block *target, int begin, int end)
53 struct multijmp *multijmp = __alloc_multijmp(0);
54 multijmp->target = target;
55 multijmp->begin = begin;
56 multijmp->end = end;
57 return multijmp;
60 static struct phi* alloc_phi(struct basic_block *source, pseudo_t pseudo)
62 struct phi *phi = __alloc_phi(0);
63 phi->source = source;
64 phi->pseudo = pseudo;
65 return phi;
68 static void show_instruction(struct instruction *insn)
70 int op = insn->opcode;
72 switch (op) {
73 case OP_BADOP:
74 printf("\tAIEEE! (%d %d)\n", insn->target->nr, insn->src->nr);
75 break;
76 case OP_RET:
77 if (insn->type && insn->type != &void_ctype)
78 printf("\tret %%r%d\n", insn->src->nr);
79 else
80 printf("\tret\n");
81 break;
82 case OP_BR:
83 if (insn->bb_true && insn->bb_false) {
84 printf("\tbr\t%%r%d, .L%p, .L%p\n", insn->cond->nr, insn->bb_true, insn->bb_false);
85 break;
87 printf("\tbr\t.L%p\n", insn->bb_true ? insn->bb_true : insn->bb_false);
88 break;
90 case OP_SETVAL: {
91 struct expression *expr = insn->val;
92 switch (expr->type) {
93 case EXPR_VALUE:
94 printf("\t%%r%d <- %lld\n",
95 insn->target->nr, expr->value);
96 break;
97 case EXPR_STRING:
98 printf("\t%%r%d <- %s\n",
99 insn->target->nr, show_string(expr->string));
100 break;
101 case EXPR_SYMBOL:
102 printf("\t%%r%d <- %s\n",
103 insn->target->nr, show_ident(expr->symbol->ident));
104 break;
105 default:
106 printf("\t SETVAL ?? ");
108 break;
110 case OP_SWITCH: {
111 struct multijmp *jmp;
112 printf("\tswitch %%r%d", insn->cond->nr);
113 FOR_EACH_PTR(insn->multijmp_list, jmp) {
114 if (jmp->begin == jmp->end)
115 printf(", %d -> .L%p", jmp->begin, jmp->target);
116 else if (jmp->begin < jmp->end)
117 printf(", %d ... %d -> .L%p", jmp->begin, jmp->end, jmp->target);
118 else
119 printf(", default -> .L%p\n", jmp->target);
120 } END_FOR_EACH_PTR;
121 printf("\n");
122 break;
125 case OP_PHI: {
126 struct phi *phi;
127 char *s = " ";
128 printf("\t%%r%d <- phi", insn->target->nr);
129 FOR_EACH_PTR(insn->phi_list, phi) {
130 printf("%s(%%r%d, .L%p)", s, phi->pseudo->nr, phi->source);
131 s = ", ";
132 } END_FOR_EACH_PTR;
133 printf("\n");
134 break;
136 case OP_LOAD:
137 printf("\tload %%r%d <- [%%r%d]\n", insn->target->nr, insn->src->nr);
138 break;
139 case OP_STORE:
140 printf("\tstore %%r%d -> [%%r%d]\n", insn->target->nr, insn->src->nr);
141 break;
142 case OP_CALL: {
143 struct pseudo *arg;
144 printf("\t%%r%d <- CALL %%r%d", insn->target->nr, insn->func->nr);
145 FOR_EACH_PTR(insn->arguments, arg) {
146 printf(", %%r%d", arg->nr);
147 } END_FOR_EACH_PTR;
148 printf("\n");
149 break;
151 case OP_CAST:
152 printf("\t%%r%d <- CAST(%d->%d) %%r%d\n",
153 insn->target->nr,
154 insn->orig_type->bit_size, insn->type->bit_size,
155 insn->src->nr);
156 break;
157 case OP_BINARY ... OP_BINARY_END:
158 case OP_LOGICAL ... OP_LOGICAL_END: {
159 static const char *opname[] = {
160 [OP_ADD - OP_BINARY] = "add", [OP_SUB - OP_BINARY] = "sub",
161 [OP_MUL - OP_BINARY] = "mul", [OP_DIV - OP_BINARY] = "div",
162 [OP_MOD - OP_BINARY] = "mod", [OP_AND - OP_BINARY] = "and",
163 [OP_OR - OP_BINARY] = "or", [OP_XOR - OP_BINARY] = "xor",
164 [OP_SHL - OP_BINARY] = "shl", [OP_SHR - OP_BINARY] = "shr",
166 printf("\t%%r%d <- %s %%r%d, %%r%d\n",
167 insn->target->nr,
168 opname[op - OP_BINARY], insn->src1->nr, insn->src2->nr);
169 break;
172 case OP_BINCMP ... OP_BINCMP_END: {
173 static const char *opname[] = {
174 [OP_SET_EQ - OP_BINCMP] = "seteq",
175 [OP_SET_NE - OP_BINCMP] = "setne",
176 [OP_SET_LE - OP_BINCMP] = "setle",
177 [OP_SET_GE - OP_BINCMP] = "setge",
178 [OP_SET_LT - OP_BINCMP] = "setlt",
179 [OP_SET_GT - OP_BINCMP] = "setgt",
181 printf("\t%%r%d <- %s %%r%d, %%r%d\n",
182 insn->target->nr,
183 opname[op - OP_BINCMP], insn->src1->nr, insn->src2->nr);
184 break;
187 case OP_NOT: case OP_NEG:
188 printf("\t%%r%d <- %s %%r%d\n",
189 insn->target->nr,
190 op == OP_NOT ? "not" : "neg", insn->src1->nr);
191 break;
192 default:
193 printf("\top %d ???\n", op);
197 static void show_bb(struct basic_block *bb)
199 struct instruction *insn;
201 printf("bb: %p\n", bb);
202 if (bb->parents) {
203 struct basic_block *from;
204 FOR_EACH_PTR(bb->parents, from) {
205 printf(" **from %p**\n", from);
206 } END_FOR_EACH_PTR;
208 FOR_EACH_PTR(bb->insns, insn) {
209 show_instruction(insn);
210 } END_FOR_EACH_PTR;
211 if (!bb_terminated(bb))
212 printf("\tEND\n");
213 printf("\n");
216 void show_entry(struct entrypoint *ep)
218 struct symbol *sym;
219 struct basic_block *bb;
221 printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
223 FOR_EACH_PTR(ep->syms, sym) {
224 printf(" sym: %p %s\n", sym, show_ident(sym->ident));
225 } END_FOR_EACH_PTR;
227 printf("\n");
229 FOR_EACH_PTR(ep->bbs, bb) {
230 show_bb(bb);
231 } END_FOR_EACH_PTR;
233 printf("\n");
236 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos)
238 if (label->bb_target)
239 warn(pos, "label already bound\n");
240 label->bb_target = bb;
243 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label)
245 struct basic_block *bb = label->bb_target;
247 if (!bb) {
248 label->bb_target = bb = alloc_basic_block();
249 bb->flags |= BB_REACHABLE;
251 return bb;
254 static void add_goto(struct entrypoint *ep, struct basic_block *dst)
256 struct basic_block *src = ep->active;
257 if (bb_reachable(src)) {
258 struct instruction *br = alloc_instruction(OP_BR, NULL);
259 br->bb_true = dst;
260 add_bb(&dst->parents, src);
261 add_instruction(&src->insns, br);
262 ep->active = NULL;
266 static void add_one_insn(struct entrypoint *ep, struct position pos, struct instruction *insn)
268 struct basic_block *bb = ep->active;
270 if (bb_reachable(bb))
271 add_instruction(&bb->insns, insn);
274 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb)
276 if (!bb_terminated(ep->active))
277 add_goto(ep, bb);
279 ep->active = bb;
280 if (bb_reachable(bb))
281 add_bb(&ep->bbs, bb);
284 static void add_branch(struct entrypoint *ep, struct expression *expr, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false)
286 struct basic_block *bb = ep->active;
287 struct instruction *br;
289 if (bb_reachable(bb)) {
290 br = alloc_instruction(OP_BR, expr->ctype);
291 br->cond = cond;
292 br->bb_true = bb_true;
293 br->bb_false = bb_false;
294 add_bb(&bb_true->parents, bb);
295 add_bb(&bb_false->parents, bb);
296 add_one_insn(ep, expr->pos, br);
300 /* Dummy pseudo allocator */
301 static pseudo_t alloc_pseudo(void)
303 static int nr = 0;
304 struct pseudo * pseudo = __alloc_pseudo(0);
305 pseudo->nr = ++nr;
306 return pseudo;
310 * FIXME! Not all accesses are memory loads. We should
311 * check what kind of symbol is behind the dereference.
313 static pseudo_t linearize_address_gen(struct entrypoint *ep, struct expression *expr)
315 if (expr->type == EXPR_PREOP)
316 return linearize_expression(ep, expr->unop);
317 if (expr->type == EXPR_BITFIELD)
318 return linearize_expression(ep, expr->address);
319 warn(expr->pos, "generating address of non-lvalue");
320 return VOID;
323 static void linearize_store_gen(struct entrypoint *ep, pseudo_t value, struct expression *expr, pseudo_t addr)
325 struct instruction *store = alloc_instruction(OP_STORE, expr->ctype);
327 if (expr->type == EXPR_BITFIELD) {
328 unsigned long mask = ((1<<expr->nrbits)-1) << expr->bitpos;
329 pseudo_t andmask, ormask, shift, orig;
330 if (expr->bitpos) {
331 shift = add_const_value(ep, expr->pos, &uint_ctype, expr->bitpos);
332 value = add_binary_op(ep, expr, OP_SHL, value, shift);
334 orig = add_load(ep, expr, addr);
335 andmask = add_const_value(ep, expr->pos, &uint_ctype, ~mask);
336 value = add_binary_op(ep, expr, OP_AND, orig, andmask);
337 ormask = add_const_value(ep, expr->pos, &uint_ctype, mask);
338 value = add_binary_op(ep, expr, OP_OR, orig, ormask);
341 store->target = value;
342 store->src = addr;
343 add_one_insn(ep, expr->pos, store);
346 static pseudo_t add_binary_op(struct entrypoint *ep, struct expression *expr, int op, pseudo_t left, pseudo_t right)
348 struct instruction *insn = alloc_instruction(op, expr->ctype);
349 pseudo_t target = alloc_pseudo();
350 insn->target = target;
351 insn->src1 = left;
352 insn->src2 = right;
353 add_one_insn(ep, expr->pos, insn);
354 return target;
357 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val)
359 struct instruction *insn = alloc_instruction(OP_SETVAL, ctype);
360 pseudo_t target = alloc_pseudo();
361 insn->target = target;
362 insn->val = val;
363 add_one_insn(ep, val->pos, insn);
364 return target;
367 static pseudo_t add_const_value(struct entrypoint *ep, struct position pos, struct symbol *ctype, int val)
369 struct expression *expr = alloc_const_expression(pos, val);
370 return add_setval(ep, ctype, expr);
373 static pseudo_t add_load(struct entrypoint *ep, struct expression *expr, pseudo_t addr)
375 pseudo_t new = alloc_pseudo();
376 struct instruction *insn = alloc_instruction(OP_LOAD, expr->ctype);
378 insn->target = new;
379 insn->src = addr;
380 add_one_insn(ep, expr->pos, insn);
381 return new;
384 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct expression *expr, pseudo_t addr)
386 pseudo_t new = add_load(ep, expr, addr);
387 if (expr->type == EXPR_PREOP)
388 return new;
390 if (expr->type == EXPR_BITFIELD) {
391 pseudo_t mask;
392 if (expr->bitpos) {
393 pseudo_t shift = add_const_value(ep, expr->pos, &uint_ctype, expr->bitpos);
394 new = add_binary_op(ep, expr, OP_SHR, new, shift);
396 mask = add_const_value(ep, expr->pos, &uint_ctype, (1<<expr->nrbits)-1);
397 return add_binary_op(ep, expr, OP_AND, new, mask);
400 warn(expr->pos, "loading unknown expression");
401 return new;
404 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
406 pseudo_t addr = linearize_address_gen(ep, expr);
407 return linearize_load_gen(ep, expr, addr);
410 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
412 pseudo_t addr = linearize_address_gen(ep, expr->unop);
413 pseudo_t old, new, one;
414 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB;
416 old = linearize_load_gen(ep, expr->unop, addr);
417 one = add_const_value(ep, expr->pos, expr->ctype, 1);
418 new = add_binary_op(ep, expr, op, old, one);
419 linearize_store_gen(ep, new, expr->unop, addr);
420 return postop ? old : new;
423 static pseudo_t add_uniop(struct entrypoint *ep, struct expression *expr, int op, pseudo_t src)
425 pseudo_t new = alloc_pseudo();
426 struct instruction *insn = alloc_instruction(op, expr->ctype);
427 insn->target = new;
428 insn->src1 = src;
429 add_one_insn(ep, expr->pos, insn);
430 return new;
433 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
435 pseudo_t pre = linearize_expression(ep, expr->unop);
436 switch (expr->op) {
437 case '+':
438 return pre;
439 case '!': {
440 pseudo_t zero = add_const_value(ep, expr->pos, expr->ctype, 0);
441 return add_binary_op(ep, expr, OP_SET_EQ, pre, zero);
443 case '~':
444 return add_uniop(ep, expr, OP_NOT, pre);
445 case '-':
446 return add_uniop(ep, expr, OP_NEG, pre);
448 return VOID;
451 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
454 * '*' is an lvalue access, and is fundamentally different
455 * from an arithmetic operation. Maybe it should have an
456 * expression type of its own..
458 if (expr->op == '*')
459 return linearize_access(ep, expr);
460 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
461 return linearize_inc_dec(ep, expr, 0);
462 return linearize_regular_preop(ep, expr);
465 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr)
467 return linearize_inc_dec(ep, expr, 1);
470 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
472 struct expression *target = expr->left;
473 pseudo_t value, address;
475 value = linearize_expression(ep, expr->right);
476 address = linearize_address_gen(ep, target);
477 if (expr->op != '=') {
478 static const int opcode[] = {
479 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD,
480 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB,
481 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MUL,
482 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIV,
483 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MOD,
484 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL,
485 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_SHR,
486 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND,
487 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR,
488 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR
490 pseudo_t left = linearize_load_gen(ep, target, address);
491 value = add_binary_op(ep, expr, opcode[expr->op - SPECIAL_BASE], left, value);
493 linearize_store_gen(ep, value, target, address);
494 return value;
497 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr)
499 struct expression *arg, *fn;
500 struct instruction *insn = alloc_instruction(OP_CALL, expr->ctype);
501 pseudo_t retval;
503 if (!expr->ctype) {
504 warn(expr->pos, "\tcall with no type!");
505 return VOID;
508 FOR_EACH_PTR(expr->args, arg) {
509 pseudo_t new = linearize_expression(ep, arg);
510 add_pseudo(&insn->arguments, new);
511 } END_FOR_EACH_PTR;
513 fn = expr->fn;
514 if (fn->type == EXPR_PREOP) {
515 if (fn->unop->type == EXPR_SYMBOL) {
516 struct symbol *sym = fn->unop->symbol;
517 if (sym->ctype.base_type->type == SYM_FN)
518 fn = fn->unop;
521 insn->func = linearize_expression(ep, fn);
522 insn->target = retval = alloc_pseudo();
523 add_one_insn(ep, expr->pos, insn);
525 return retval;
528 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr)
530 pseudo_t src1, src2;
531 static const int opcode[] = {
532 ['+'] = OP_ADD, ['-'] = OP_SUB,
533 ['*'] = OP_MUL, ['/'] = OP_DIV,
534 ['%'] = OP_MOD, ['&'] = OP_AND,
535 ['|'] = OP_OR, ['^'] = OP_XOR,
536 [SPECIAL_LEFTSHIFT] = OP_SHL,
537 [SPECIAL_RIGHTSHIFT] = OP_SHR,
540 src1 = linearize_expression(ep, expr->left);
541 src2 = linearize_expression(ep, expr->right);
542 return add_binary_op(ep, expr, opcode[expr->op], src1, src2);
545 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
547 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
549 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr,
550 struct expression *cond, struct expression *expr_true,
551 struct expression *expr_false)
553 pseudo_t src1, src2, target;
554 struct basic_block *bb_true = alloc_basic_block();
555 struct basic_block *bb_false = alloc_basic_block();
556 struct basic_block *merge = alloc_basic_block();
558 if (expr_true) {
559 linearize_cond_branch(ep, cond, bb_true, bb_false);
561 set_activeblock(ep, bb_true);
562 src1 = linearize_expression(ep, expr_true);
563 bb_true = ep->active;
564 add_goto(ep, merge);
565 } else {
566 src1 = linearize_expression(ep, cond);
567 add_branch(ep, expr, src1, merge, bb_false);
570 set_activeblock(ep, bb_false);
571 src2 = linearize_expression(ep, expr_false);
572 bb_false = ep->active;
573 set_activeblock(ep, merge);
575 if (src1 != VOID && src2 != VOID) {
576 struct instruction *phi_node = alloc_instruction(OP_PHI, expr->ctype);
577 add_phi(&phi_node->phi_list, alloc_phi(bb_true, src1));
578 add_phi(&phi_node->phi_list, alloc_phi(bb_false, src2));
579 phi_node->target = target = alloc_pseudo();
580 add_one_insn(ep, expr->pos, phi_node);
581 set_activeblock(ep, alloc_basic_block());
582 return target;
585 return src1 != VOID ? src1 : src2;
588 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr)
590 struct expression *shortcut;
592 shortcut = alloc_const_expression(expr->pos, expr->op == SPECIAL_LOGICAL_OR);
593 shortcut->ctype = expr->ctype;
594 return linearize_conditional(ep, expr, expr->left, shortcut, expr->right);
597 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr)
599 static const int cmpop[] = {
600 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT,
601 [SPECIAL_EQUAL] = OP_SET_EQ,
602 [SPECIAL_NOTEQUAL] = OP_SET_NE,
603 [SPECIAL_GTE] = OP_SET_GE,
604 [SPECIAL_LTE] = OP_SET_LE,
607 pseudo_t src1 = linearize_expression(ep, expr->left);
608 pseudo_t src2 = linearize_expression(ep, expr->right);
609 return add_binary_op(ep, expr, cmpop[expr->op], src1, src2);
613 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
615 pseudo_t cond;
617 if (!expr || !bb_reachable(ep->active))
618 return VOID;
620 switch (expr->type) {
622 case EXPR_STRING:
623 case EXPR_VALUE:
624 add_goto(ep, expr->value ? bb_true : bb_false);
625 return VOID;
627 case EXPR_LOGICAL:
628 linearize_logical_branch(ep, expr, bb_true, bb_false);
629 return VOID;
631 case EXPR_COMPARE:
632 cond = linearize_compare(ep, expr);
633 add_branch(ep, expr, cond, bb_true, bb_false);
634 break;
636 case EXPR_PREOP:
637 if (expr->op == '!')
638 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true);
639 /* fall through */
640 default: {
641 cond = linearize_expression(ep, expr);
642 add_branch(ep, expr, cond, bb_true, bb_false);
644 return VOID;
647 return VOID;
652 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
654 struct basic_block *next = alloc_basic_block();
656 if (expr->op == SPECIAL_LOGICAL_OR)
657 linearize_cond_branch(ep, expr->left, bb_true, next);
658 else
659 linearize_cond_branch(ep, expr->left, next, bb_false);
660 set_activeblock(ep, next);
661 linearize_cond_branch(ep, expr->right, bb_true, bb_false);
662 return VOID;
665 pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr)
667 pseudo_t src, result;
668 struct instruction *insn;
670 src = linearize_expression(ep, expr->cast_expression);
671 if (src == VOID)
672 return VOID;
673 insn = alloc_instruction(OP_CAST, expr->ctype);
674 result = alloc_pseudo();
675 insn->target = result;
676 insn->src = src;
677 insn->orig_type = expr->cast_expression->ctype;
678 add_one_insn(ep, expr->pos, insn);
679 return result;
682 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
684 if (!expr)
685 return VOID;
687 switch (expr->type) {
688 case EXPR_VALUE: case EXPR_STRING: case EXPR_SYMBOL:
689 return add_setval(ep, expr->ctype, expr);
691 case EXPR_STATEMENT:
692 return linearize_statement(ep, expr->statement);
694 case EXPR_CALL:
695 return linearize_call_expression(ep, expr);
697 case EXPR_BINOP:
698 return linearize_binop(ep, expr);
700 case EXPR_LOGICAL:
701 return linearize_logical(ep, expr);
703 case EXPR_COMPARE:
704 return linearize_compare(ep, expr);
706 case EXPR_CONDITIONAL:
707 return linearize_conditional(ep, expr, expr->conditional,
708 expr->cond_true, expr->cond_false);
710 case EXPR_COMMA: {
711 linearize_expression(ep, expr->left);
712 return linearize_expression(ep, expr->right);
715 case EXPR_ASSIGNMENT:
716 return linearize_assignment(ep, expr);
718 case EXPR_PREOP:
719 return linearize_preop(ep, expr);
721 case EXPR_POSTOP:
722 return linearize_postop(ep, expr);
724 case EXPR_CAST:
725 return linearize_cast(ep, expr);
727 case EXPR_BITFIELD:
728 return linearize_access(ep, expr);
730 default:
731 die("Unknown expression (%d %d)", expr->type, expr->op);
732 return VOID;
734 return VOID;
737 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
739 if (!stmt)
740 return VOID;
742 switch (stmt->type) {
743 case STMT_NONE:
744 break;
746 case STMT_EXPRESSION:
747 return linearize_expression(ep, stmt->expression);
749 case STMT_ASM:
750 /* FIXME */
751 break;
753 case STMT_RETURN: {
754 struct expression *expr = stmt->expression;
755 struct basic_block *bb_return = stmt->ret_target->bb_target;
756 struct basic_block *active;
757 pseudo_t src = linearize_expression(ep, expr);
758 active = ep->active;
759 add_goto(ep, bb_return);
760 if (src != &void_pseudo) {
761 struct instruction *phi_node = first_instruction(bb_return->insns);
762 if (!phi_node) {
763 phi_node = alloc_instruction(OP_PHI, expr->ctype);
764 phi_node->target = alloc_pseudo();
765 add_instruction(&bb_return->insns, phi_node);
767 add_phi(&phi_node->phi_list, alloc_phi(active, src));
769 return VOID;
772 case STMT_CASE: {
773 struct basic_block *bb = get_bound_block(ep, stmt->case_label);
774 set_activeblock(ep, bb);
775 linearize_statement(ep, stmt->case_statement);
776 break;
779 case STMT_LABEL: {
780 struct symbol *label = stmt->label_identifier;
781 struct basic_block *bb;
783 if (label->used) {
784 bb = get_bound_block(ep, stmt->label_identifier);
785 set_activeblock(ep, bb);
786 linearize_statement(ep, stmt->label_statement);
788 break;
791 case STMT_GOTO: {
792 add_goto(ep, get_bound_block(ep, stmt->goto_label));
793 break;
796 case STMT_COMPOUND: {
797 pseudo_t pseudo;
798 struct statement *s;
799 struct symbol *ret = stmt->ret;
800 concat_symbol_list(stmt->syms, &ep->syms);
801 if (ret)
802 ret->bb_target = alloc_basic_block();
803 FOR_EACH_PTR(stmt->stmts, s) {
804 pseudo = linearize_statement(ep, s);
805 } END_FOR_EACH_PTR;
806 if (ret) {
807 struct basic_block *bb = ret->bb_target;
808 struct instruction *phi = first_instruction(bb->insns);
810 if (!phi)
811 return pseudo;
813 set_activeblock(ep, bb);
814 if (phi_list_size(phi->phi_list)==1) {
815 pseudo = first_phi(phi->phi_list)->pseudo;
816 delete_last_instruction(&bb->insns);
817 return pseudo;
819 return phi->target;
821 return pseudo;
825 * This could take 'likely/unlikely' into account, and
826 * switch the arms around appropriately..
828 case STMT_IF: {
829 struct basic_block *bb_true, *bb_false, *endif;
830 struct expression *cond = stmt->if_conditional;
832 bb_true = alloc_basic_block();
833 bb_false = endif = alloc_basic_block();
835 linearize_cond_branch(ep, cond, bb_true, bb_false);
837 set_activeblock(ep, bb_true);
838 linearize_statement(ep, stmt->if_true);
840 if (stmt->if_false) {
841 endif = alloc_basic_block();
842 add_goto(ep, endif);
843 set_activeblock(ep, bb_false);
844 linearize_statement(ep, stmt->if_false);
846 set_activeblock(ep, endif);
847 break;
850 case STMT_SWITCH: {
851 struct symbol *sym;
852 struct instruction *switch_ins;
853 struct basic_block *switch_end = alloc_basic_block();
854 pseudo_t pseudo;
856 pseudo = linearize_expression(ep, stmt->switch_expression);
857 switch_ins = alloc_instruction(OP_SWITCH, NULL);
858 switch_ins->cond = pseudo;
859 add_one_insn(ep, stmt->pos, switch_ins);
861 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
862 struct statement *case_stmt = sym->stmt;
863 struct basic_block *bb_case = get_bound_block(ep, sym);
864 struct multijmp *jmp;
865 int begin, end;
866 if (!case_stmt->case_expression) {
867 jmp = alloc_multijmp(bb_case, 1, 0);
868 } else {
869 if (case_stmt->case_expression)
870 begin = end = case_stmt->case_expression->value;
871 if (case_stmt->case_to)
872 end = case_stmt->case_to->value;
873 if (begin > end)
874 jmp = alloc_multijmp(bb_case, end, begin);
875 else
876 jmp = alloc_multijmp(bb_case, begin, end);
879 add_multijmp(&switch_ins->multijmp_list, jmp);
880 add_bb(&bb_case->parents, ep->active);
881 } END_FOR_EACH_PTR;
883 bind_label(stmt->switch_break, switch_end, stmt->pos);
885 /* And linearize the actual statement */
886 linearize_statement(ep, stmt->switch_statement);
887 set_activeblock(ep, switch_end);
889 break;
892 case STMT_ITERATOR: {
893 struct statement *pre_statement = stmt->iterator_pre_statement;
894 struct expression *pre_condition = stmt->iterator_pre_condition;
895 struct statement *statement = stmt->iterator_statement;
896 struct statement *post_statement = stmt->iterator_post_statement;
897 struct expression *post_condition = stmt->iterator_post_condition;
898 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end;
900 concat_symbol_list(stmt->iterator_syms, &ep->syms);
901 linearize_statement(ep, pre_statement);
903 loop_body = loop_top = alloc_basic_block();
904 loop_continue = alloc_basic_block();
905 loop_end = alloc_basic_block();
907 if (!post_statement && (pre_condition == post_condition)) {
909 * If it is a while loop, optimize away the post_condition.
911 post_condition = NULL;
912 loop_body = loop_continue;
913 loop_continue = loop_top;
914 loop_top->flags |= BB_REACHABLE;
915 set_activeblock(ep, loop_top);
918 loop_top->flags |= BB_REACHABLE;
919 if (pre_condition)
920 linearize_cond_branch(ep, pre_condition, loop_body, loop_end);
922 bind_label(stmt->iterator_continue, loop_continue, stmt->pos);
923 bind_label(stmt->iterator_break, loop_end, stmt->pos);
925 set_activeblock(ep, loop_body);
926 linearize_statement(ep, statement);
927 add_goto(ep, loop_continue);
929 if (post_condition) {
930 set_activeblock(ep, loop_continue);
931 linearize_statement(ep, post_statement);
932 linearize_cond_branch(ep, post_condition, loop_top, loop_end);
935 set_activeblock(ep, loop_end);
936 break;
939 default:
940 break;
942 return VOID;
945 void mark_bb_reachable(struct basic_block *bb)
947 struct basic_block *child;
948 struct terminator_iterator term;
949 struct basic_block_list *bbstack = NULL;
951 if (!bb || bb->flags & BB_REACHABLE)
952 return;
954 add_bb(&bbstack, bb);
955 while (bbstack) {
956 bb = delete_last_basic_block(&bbstack);
957 if (bb->flags & BB_REACHABLE)
958 continue;
959 bb->flags |= BB_REACHABLE;
960 init_terminator_iterator(last_instruction(bb->insns), &term);
961 while ((child=next_terminator_bb(&term)) != NULL) {
962 if (!(child->flags & BB_REACHABLE))
963 add_bb(&bbstack, child);
968 void remove_unreachable_bbs(struct basic_block_list **bblist)
970 struct basic_block *bb, *child;
971 struct list_iterator iterator;
972 struct terminator_iterator term;
974 init_iterator((struct ptr_list **) bblist, &iterator, 0);
975 while((bb=next_basic_block(&iterator)) != NULL)
976 bb->flags &= ~BB_REACHABLE;
978 init_iterator((struct ptr_list **) bblist, &iterator, 0);
979 mark_bb_reachable(next_basic_block(&iterator));
980 while((bb=next_basic_block(&iterator)) != NULL) {
981 if (bb->flags & BB_REACHABLE)
982 continue;
983 init_terminator_iterator(last_instruction(bb->insns), &term);
984 while ((child=next_terminator_bb(&term)) != NULL)
985 replace_basic_block_list(&child->parents, bb, NULL);
986 delete_iterator(&iterator);
990 void pack_basic_blocks(struct basic_block_list **bblist)
992 struct basic_block *bb;
993 struct list_iterator iterator;
995 remove_unreachable_bbs(bblist);
996 init_bb_iterator(bblist, &iterator, 0);
997 while((bb=next_basic_block(&iterator)) != NULL) {
998 struct list_iterator it_parents;
999 struct terminator_iterator term;
1000 struct instruction *jmp;
1001 struct basic_block *target, *sibling, *parent;
1003 if (!is_branch_goto(jmp=last_instruction(bb->insns)))
1004 continue;
1006 target = jmp->bb_true ? jmp->bb_true : jmp->bb_false;
1007 if (target == bb)
1008 continue;
1009 if (bb_list_size(target->parents) != 1 && jmp != first_instruction(bb->insns))
1010 continue;
1012 /* Transfer the parents' terminator to target directly. */
1013 replace_basic_block_list(&target->parents, bb, NULL);
1014 init_bb_iterator(&bb->parents, &it_parents, 0);
1015 while((parent=next_basic_block(&it_parents)) != NULL) {
1016 init_terminator_iterator(last_instruction(parent->insns), &term);
1017 while ((sibling=next_terminator_bb(&term)) != NULL) {
1018 if (sibling == bb) {
1019 replace_terminator_bb(&term, target);
1020 add_bb(&target->parents, parent);
1025 /* Move the instructions to the target block. */
1026 delete_last_instruction(&bb->insns);
1027 if (bb->insns) {
1028 concat_instruction_list(target->insns, &bb->insns);
1029 free_instruction_list(&target->insns);
1030 target->insns = bb->insns;
1032 delete_iterator(&iterator);
1036 struct entrypoint *linearize_symbol(struct symbol *sym)
1038 struct symbol *base_type;
1039 struct entrypoint *ret_ep = NULL;
1041 if (!sym)
1042 return NULL;
1043 base_type = sym->ctype.base_type;
1044 if (!base_type)
1045 return NULL;
1046 if (base_type->type == SYM_FN) {
1047 if (base_type->stmt) {
1048 struct entrypoint *ep = alloc_entrypoint();
1049 struct basic_block *bb = alloc_basic_block();
1050 pseudo_t result;
1052 ep->name = sym;
1053 bb->flags |= BB_REACHABLE;
1054 set_activeblock(ep, bb);
1055 concat_symbol_list(base_type->arguments, &ep->syms);
1056 result = linearize_statement(ep, base_type->stmt);
1057 if (bb_reachable(ep->active) && !bb_terminated(ep->active)) {
1058 struct symbol *ret_type = base_type->ctype.base_type;
1059 struct instruction *insn = alloc_instruction(OP_RET, ret_type);
1060 struct position pos = base_type->stmt->pos;
1062 insn->src = result;
1063 add_one_insn(ep, pos, insn);
1065 pack_basic_blocks(&ep->bbs);
1066 ret_ep = ep;
1070 return ret_ep;