Show large integers as hex.
[smatch.git] / linearize.c
blob01403203787126a6961912327e3d8a07fd49fa73
1 /*
2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
10 * Copyright (C) 2004 Christopher Li
13 #include <string.h>
14 #include <stdarg.h>
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <stddef.h>
19 #include "parse.h"
20 #include "expression.h"
21 #include "linearize.h"
23 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
24 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
26 static void add_setcc(struct entrypoint *ep, struct expression *expr, pseudo_t val);
27 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right);
28 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val);
30 struct access_data;
31 static pseudo_t add_load(struct entrypoint *ep, struct access_data *);
32 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *);
34 struct pseudo void_pseudo = {};
36 unsigned long bb_generation;
38 static struct instruction *alloc_instruction(int opcode, struct symbol *type)
40 struct instruction * insn = __alloc_instruction(0);
41 insn->type = type;
42 insn->opcode = opcode;
43 return insn;
46 static struct entrypoint *alloc_entrypoint(void)
48 bb_generation = 0;
49 return __alloc_entrypoint(0);
52 static struct basic_block *alloc_basic_block(struct position pos)
54 struct basic_block *bb = __alloc_basic_block(0);
55 bb->context = -1;
56 bb->pos = pos;
57 return bb;
60 static struct multijmp* alloc_multijmp(struct basic_block *target, int begin, int end)
62 struct multijmp *multijmp = __alloc_multijmp(0);
63 multijmp->target = target;
64 multijmp->begin = begin;
65 multijmp->end = end;
66 return multijmp;
69 static struct phi* alloc_phi(struct basic_block *source, pseudo_t pseudo)
71 struct phi *phi = __alloc_phi(0);
72 phi->source = source;
73 phi->pseudo = pseudo;
74 return phi;
77 static inline int regno(pseudo_t n)
79 int retval = -1;
80 if (n && n->type == PSEUDO_REG)
81 retval = n->nr;
82 return retval;
85 static const char *show_pseudo(pseudo_t pseudo)
87 static int n;
88 static char buffer[4][64];
89 char *buf;
91 if (!pseudo)
92 return "no pseudo";
93 if (pseudo == VOID)
94 return "VOID";
95 buf = buffer[3 & ++n];
96 switch(pseudo->type) {
97 case PSEUDO_SYM: {
98 struct symbol *sym = pseudo->sym;
99 struct expression *expr;
101 if (sym->bb_target) {
102 snprintf(buf, 64, ".L%p", sym->bb_target);
103 break;
105 if (sym->ident) {
106 snprintf(buf, 64, "%s:%p", show_ident(sym->ident), sym);
107 break;
109 expr = sym->initializer;
110 if (!expr) {
111 snprintf(buf, 64, "<anon sym: %d>", pseudo->nr);
112 break;
114 switch (expr->type) {
115 case EXPR_VALUE:
116 snprintf(buf, 64, "<symbol value: %lld>", expr->value);
117 break;
118 case EXPR_STRING:
119 return show_string(expr->string);
120 default:
121 snprintf(buf, 64, "<symbol expression: %d>", pseudo->nr);
122 break;
125 case PSEUDO_REG:
126 snprintf(buf, 64, "%%r%d", pseudo->nr);
127 break;
128 case PSEUDO_VAL: {
129 long long value = pseudo->value;
130 if (value > 1000 || value < -1000)
131 snprintf(buf, 64, "$%#llx", value);
132 else
133 snprintf(buf, 64, "$%lld", value);
134 break;
136 case PSEUDO_ARG:
137 snprintf(buf, 64, "%%arg%d", pseudo->nr);
138 break;
139 default:
140 snprintf(buf, 64, "<bad pseudo type %d>", pseudo->type);
142 return buf;
145 static void show_instruction(struct instruction *insn)
147 int op = insn->opcode;
149 switch (op) {
150 case OP_BADOP:
151 printf("\tAIEEE! (%s <- %s)\n", show_pseudo(insn->target), show_pseudo(insn->src));
152 break;
153 case OP_RET:
154 if (insn->type && insn->type != &void_ctype)
155 printf("\tret %s\n", show_pseudo(insn->src));
156 else
157 printf("\tret\n");
158 break;
159 case OP_BR:
160 if (insn->bb_true && insn->bb_false) {
161 printf("\tbr\t%s, .L%p, .L%p\n", show_pseudo(insn->cond), insn->bb_true, insn->bb_false);
162 break;
164 printf("\tbr\t.L%p\n", insn->bb_true ? insn->bb_true : insn->bb_false);
165 break;
167 case OP_SETVAL: {
168 struct expression *expr = insn->val;
169 struct symbol *sym = insn->symbol;
170 int target = regno(insn->target);
172 if (sym) {
173 if (sym->bb_target) {
174 printf("\t%%r%d <- .L%p\n", target, sym->bb_target);
175 break;
177 if (sym->ident) {
178 printf("\t%%r%d <- %s\n", target, show_ident(sym->ident));
179 break;
181 expr = sym->initializer;
182 if (!expr) {
183 printf("\t%%r%d <- %s\n", target, "anon symbol");
184 break;
188 if (!expr) {
189 printf("\t%%r%d <- %s\n", target, "<none>");
190 break;
193 switch (expr->type) {
194 case EXPR_VALUE:
195 printf("\t%%r%d <- %lld\n", target, expr->value);
196 break;
197 case EXPR_FVALUE:
198 printf("\t%%r%d <- %Lf\n", target, expr->fvalue);
199 break;
200 case EXPR_STRING:
201 printf("\t%%r%d <- %s\n", target, show_string(expr->string));
202 break;
203 case EXPR_SYMBOL:
204 printf("\t%%r%d <- %s\n", target, show_ident(expr->symbol->ident));
205 break;
206 case EXPR_LABEL:
207 printf("\t%%r%d <- .L%p\n", target, expr->symbol->bb_target);
208 break;
209 default:
210 printf("\t%%r%d <- SETVAL EXPR TYPE %d\n", target, expr->type);
212 break;
214 case OP_SWITCH: {
215 struct multijmp *jmp;
216 printf("\tswitch %s", show_pseudo(insn->cond));
217 FOR_EACH_PTR(insn->multijmp_list, jmp) {
218 if (jmp->begin == jmp->end)
219 printf(", %d -> .L%p", jmp->begin, jmp->target);
220 else if (jmp->begin < jmp->end)
221 printf(", %d ... %d -> .L%p", jmp->begin, jmp->end, jmp->target);
222 else
223 printf(", default -> .L%p\n", jmp->target);
224 } END_FOR_EACH_PTR(jmp);
225 printf("\n");
226 break;
228 case OP_COMPUTEDGOTO: {
229 struct multijmp *jmp;
230 printf("\tjmp *%s", show_pseudo(insn->target));
231 FOR_EACH_PTR(insn->multijmp_list, jmp) {
232 printf(", .L%p", jmp->target);
233 } END_FOR_EACH_PTR(jmp);
234 printf("\n");
235 break;
238 case OP_PHI: {
239 struct phi *phi;
240 const char *s = " ";
241 printf("\t%s <- phi", show_pseudo(insn->target));
242 FOR_EACH_PTR(insn->phi_list, phi) {
243 printf("%s(%s, .L%p)", s, show_pseudo(phi->pseudo), phi->source);
244 s = ", ";
245 } END_FOR_EACH_PTR(phi);
246 printf("\n");
247 break;
249 case OP_LOAD:
250 printf("\tload %s <- %d.%d.%d[%s]\n", show_pseudo(insn->target), insn->offset,
251 insn->type->bit_offset, insn->type->bit_size, show_pseudo(insn->src));
252 break;
253 case OP_STORE:
254 printf("\tstore %s -> %d.%d.%d[%s]\n", show_pseudo(insn->target), insn->offset,
255 insn->type->bit_offset, insn->type->bit_size, show_pseudo(insn->src));
256 break;
257 case OP_CALL: {
258 struct pseudo *arg;
259 printf("\t%s <- CALL %s", show_pseudo(insn->target), show_pseudo(insn->func));
260 FOR_EACH_PTR(insn->arguments, arg) {
261 printf(", %s", show_pseudo(arg));
262 } END_FOR_EACH_PTR(arg);
263 printf("\n");
264 break;
266 case OP_CAST:
267 printf("\t%%r%d <- CAST(%d->%d) %s\n",
268 regno(insn->target),
269 insn->orig_type->bit_size, insn->type->bit_size,
270 show_pseudo(insn->src));
271 break;
272 case OP_BINARY ... OP_BINARY_END: {
273 static const char *opname[] = {
274 [OP_ADD - OP_BINARY] = "add", [OP_SUB - OP_BINARY] = "sub",
275 [OP_MUL - OP_BINARY] = "mul", [OP_DIV - OP_BINARY] = "div",
276 [OP_MOD - OP_BINARY] = "mod", [OP_AND - OP_BINARY] = "and",
277 [OP_OR - OP_BINARY] = "or", [OP_XOR - OP_BINARY] = "xor",
278 [OP_SHL - OP_BINARY] = "shl", [OP_SHR - OP_BINARY] = "shr",
279 [OP_AND_BOOL - OP_BINARY] = "and-bool",
280 [OP_OR_BOOL - OP_BINARY] = "or-bool",
281 [OP_SEL - OP_BINARY] = "select",
283 printf("\t%%r%d <- %s %s, %s\n",
284 regno(insn->target),
285 opname[op - OP_BINARY], show_pseudo(insn->src1), show_pseudo(insn->src2));
286 break;
289 case OP_SLICE:
290 printf("\t%%r%d <- slice %s, %d, %d\n",
291 regno(insn->target),
292 show_pseudo(insn->base), insn->from, insn->len);
293 break;
295 case OP_BINCMP ... OP_BINCMP_END: {
296 static const char *opname[] = {
297 [OP_SET_EQ - OP_BINCMP] = "seteq",
298 [OP_SET_NE - OP_BINCMP] = "setne",
299 [OP_SET_LE - OP_BINCMP] = "setle",
300 [OP_SET_GE - OP_BINCMP] = "setge",
301 [OP_SET_LT - OP_BINCMP] = "setlt",
302 [OP_SET_GT - OP_BINCMP] = "setgt",
303 [OP_SET_BE - OP_BINCMP] = "setbe",
304 [OP_SET_AE - OP_BINCMP] = "setae",
305 [OP_SET_A - OP_BINCMP] = "seta",
306 [OP_SET_B - OP_BINCMP] = "setb",
308 printf("\t%%r%d <- %s %s, %s\n",
309 regno(insn->target),
310 opname[op - OP_BINCMP], show_pseudo(insn->src1), show_pseudo(insn->src2));
311 break;
314 case OP_MOV:
315 printf("\t%%r%d <- %s\n",
316 regno(insn->target), show_pseudo(insn->src));
317 break;
318 case OP_NOT: case OP_NEG:
319 printf("\t%%r%d <- %s %s\n",
320 regno(insn->target),
321 op == OP_NOT ? "not" : "neg", show_pseudo(insn->src1));
322 break;
323 case OP_SETCC:
324 printf("\tsetcc %s\n", show_pseudo(insn->src));
325 break;
326 case OP_CONTEXT:
327 printf("\tcontext %d\n", insn->increment);
328 break;
329 case OP_SNOP:
330 printf("\tnop (%s -> %d.%d.%d[%s])\n", show_pseudo(insn->target), insn->offset,
331 insn->type->bit_offset, insn->type->bit_size, show_pseudo(insn->src));
332 break;
333 case OP_LNOP:
334 printf("\tnop (%s <- %d.%d.%d[%s])\n", show_pseudo(insn->target), insn->offset,
335 insn->type->bit_offset, insn->type->bit_size, show_pseudo(insn->src));
336 break;
337 default:
338 printf("\top %d ???\n", op);
342 static void show_bb(struct basic_block *bb)
344 struct instruction *insn;
346 printf("bb: %p\n", bb);
347 printf(" %s:%d:%d\n", input_streams[bb->pos.stream].name, bb->pos.line,bb->pos.pos);
348 if (bb->parents) {
349 struct basic_block *from;
350 FOR_EACH_PTR(bb->parents, from) {
351 printf(" **from %p (%s:%d:%d)**\n", from,
352 input_streams[from->pos.stream].name, from->pos.line, from->pos.pos);
353 } END_FOR_EACH_PTR(from);
355 FOR_EACH_PTR(bb->insns, insn) {
356 show_instruction(insn);
357 } END_FOR_EACH_PTR(insn);
358 if (!bb_terminated(bb))
359 printf("\tEND\n");
360 printf("\n");
363 #define container(ptr, type, member) \
364 (type *)((void *)(ptr) - offsetof(type, member))
366 static void show_symbol_usage(pseudo_t pseudo)
368 if (pseudo) {
369 pseudo_t *pp;
370 FOR_EACH_PTR(pseudo->users, pp) {
371 struct instruction *insn = container(pp, struct instruction, src);
372 show_instruction(insn);
373 } END_FOR_EACH_PTR(pp);
377 void show_entry(struct entrypoint *ep)
379 struct symbol *sym;
380 struct basic_block *bb;
382 printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
384 FOR_EACH_PTR(ep->syms, sym) {
385 printf(" sym: %p %s\n", sym, show_ident(sym->ident));
386 if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE))
387 printf("\texternal visibility\n");
388 show_symbol_usage(sym->pseudo);
389 } END_FOR_EACH_PTR(sym);
391 printf("\n");
393 FOR_EACH_PTR(ep->bbs, bb) {
394 if (bb == ep->entry)
395 printf("ENTRY:\n");
396 show_bb(bb);
397 } END_FOR_EACH_PTR(bb);
399 printf("\n");
402 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos)
404 if (label->bb_target)
405 warning(pos, "label '%s' already bound", show_ident(label->ident));
406 label->bb_target = bb;
409 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label)
411 struct basic_block *bb = label->bb_target;
413 if (!bb) {
414 label->bb_target = bb = alloc_basic_block(label->pos);
415 bb->flags |= BB_REACHABLE;
417 return bb;
420 static void finish_block(struct entrypoint *ep)
422 struct basic_block *src = ep->active;
423 if (bb_reachable(src))
424 ep->active = NULL;
427 static void add_goto(struct entrypoint *ep, struct basic_block *dst)
429 struct basic_block *src = ep->active;
430 if (bb_reachable(src)) {
431 struct instruction *br = alloc_instruction(OP_BR, NULL);
432 br->bb_true = dst;
433 add_bb(&dst->parents, src);
434 br->bb = src;
435 add_instruction(&src->insns, br);
436 ep->active = NULL;
440 static inline void use_pseudo(struct instruction *insn, pseudo_t p, pseudo_t *pp)
442 *pp = p;
443 if (p && p->type != PSEUDO_VOID)
444 add_ptr_list((struct ptr_list **)&p->users, pp);
447 static void add_one_insn(struct entrypoint *ep, struct instruction *insn)
449 struct basic_block *bb = ep->active;
451 if (bb_reachable(bb)) {
452 insn->bb = bb;
453 add_instruction(&bb->insns, insn);
457 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb)
459 if (!bb_terminated(ep->active))
460 add_goto(ep, bb);
462 ep->active = bb;
463 if (bb_reachable(bb))
464 add_bb(&ep->bbs, bb);
467 static void add_setcc(struct entrypoint *ep, struct expression *expr, pseudo_t val)
469 struct basic_block *bb = ep->active;
471 if (bb_reachable(bb)) {
472 struct instruction *cc = alloc_instruction(OP_SETCC, &bool_ctype);
473 use_pseudo(cc, val, &cc->src);
474 add_one_insn(ep, cc);
478 static void add_branch(struct entrypoint *ep, struct expression *expr, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false)
480 struct basic_block *bb = ep->active;
481 struct instruction *br;
483 if (bb_reachable(bb)) {
484 br = alloc_instruction(OP_BR, expr->ctype);
485 use_pseudo(br, cond, &br->cond);
486 br->bb_true = bb_true;
487 br->bb_false = bb_false;
488 add_bb(&bb_true->parents, bb);
489 add_bb(&bb_false->parents, bb);
490 add_one_insn(ep, br);
494 /* Dummy pseudo allocator */
495 static pseudo_t alloc_pseudo(struct instruction *def)
497 static int nr = 0;
498 struct pseudo * pseudo = __alloc_pseudo(0);
499 pseudo->type = PSEUDO_REG;
500 pseudo->nr = ++nr;
501 pseudo->usage = 1;
502 pseudo->def = def;
503 return pseudo;
506 static pseudo_t symbol_pseudo(struct symbol *sym)
508 pseudo_t pseudo = sym->pseudo;
510 if (!pseudo) {
511 pseudo = __alloc_pseudo(0);
512 pseudo->type = PSEUDO_SYM;
513 pseudo->sym = sym;
514 sym->pseudo = pseudo;
516 /* Symbol pseudos have neither nr, usage nor def */
517 return pseudo;
520 static pseudo_t value_pseudo(long long val)
522 pseudo_t pseudo = __alloc_pseudo(0);
523 pseudo->type = PSEUDO_VAL;
524 pseudo->value = val;
525 /* Value pseudos have neither nr, usage nor def */
526 return pseudo;
529 static pseudo_t argument_pseudo(int nr)
531 pseudo_t pseudo = __alloc_pseudo(0);
532 pseudo->type = PSEUDO_ARG;
533 pseudo->nr = nr;
534 /* Argument pseudos have neither usage nor def */
535 return pseudo;
539 * We carry the "access_data" structure around for any accesses,
540 * which simplifies things a lot. It contains all the access
541 * information in one place.
543 struct access_data {
544 struct symbol *ctype;
545 pseudo_t address; // pseudo containing address ..
546 pseudo_t origval; // pseudo for original value ..
547 unsigned int offset, alignment; // byte offset
548 unsigned int bit_size, bit_offset; // which bits
549 struct position pos;
552 static void finish_address_gen(struct entrypoint *ep, struct access_data *ad)
556 static int linearize_simple_address(struct entrypoint *ep,
557 struct expression *addr,
558 struct access_data *ad)
560 if (addr->type == EXPR_SYMBOL) {
561 ad->address = symbol_pseudo(addr->symbol);
562 return 1;
564 if (addr->type == EXPR_BINOP) {
565 if (addr->right->type == EXPR_VALUE) {
566 if (addr->op == '+') {
567 ad->offset += get_expression_value(addr->right);
568 return linearize_simple_address(ep, addr->left, ad);
572 ad->address = linearize_expression(ep, addr);
573 return 1;
576 static int linearize_address_gen(struct entrypoint *ep,
577 struct expression *expr,
578 struct access_data *ad)
580 struct symbol *ctype = expr->ctype;
582 if (!ctype)
583 return 0;
584 ad->pos = expr->pos;
585 ad->ctype = ctype;
586 ad->bit_size = ctype->bit_size;
587 ad->alignment = ctype->ctype.alignment;
588 ad->bit_offset = ctype->bit_offset;
589 if (expr->type == EXPR_PREOP && expr->op == '*')
590 return linearize_simple_address(ep, expr->unop, ad);
592 warning(expr->pos, "generating address of non-lvalue (%d)", expr->type);
593 return 0;
596 static pseudo_t add_load(struct entrypoint *ep, struct access_data *ad)
598 struct instruction *insn;
599 pseudo_t new;
601 new = ad->origval;
602 if (new) {
603 new->usage++;
604 return new;
607 insn = alloc_instruction(OP_LOAD, ad->ctype);
608 new = alloc_pseudo(insn);
609 ad->origval = new;
611 insn->target = new;
612 insn->offset = ad->offset;
613 use_pseudo(insn, ad->address, &insn->src);
614 add_one_insn(ep, insn);
615 return new;
618 static void add_store(struct entrypoint *ep, struct access_data *ad, pseudo_t value)
620 struct basic_block *bb = ep->active;
622 if (bb_reachable(bb)) {
623 struct instruction *store = alloc_instruction(OP_STORE, ad->ctype);
624 store->offset = ad->offset;
625 use_pseudo(store, value, &store->target);
626 use_pseudo(store, ad->address, &store->src);
627 add_one_insn(ep, store);
631 /* Target-dependent, really.. */
632 #define OFFSET_ALIGN 8
633 #define MUST_ALIGN 0
635 static struct symbol *base_type(struct symbol *s)
637 if (s->type == SYM_NODE)
638 s = s->ctype.base_type;
639 if (s->type == SYM_BITFIELD)
640 s = s->ctype.base_type;
641 return s;
644 static pseudo_t linearize_store_gen(struct entrypoint *ep,
645 pseudo_t value,
646 struct access_data *ad)
648 unsigned long mask;
649 pseudo_t shifted, ormask, orig, value_mask, newval;
651 /* Bogus tests, but you get the idea.. */
652 if (ad->bit_offset & (OFFSET_ALIGN-1))
653 goto unaligned;
654 if (ad->bit_size & (ad->bit_size-1))
655 goto unaligned;
656 if (ad->bit_size & (OFFSET_ALIGN-1))
657 goto unaligned;
658 if (MUST_ALIGN && ((ad->bit_size >> 3) & (ad->alignment-1)))
659 goto unaligned;
661 add_store(ep, ad, value);
662 return value;
664 unaligned:
665 mask = ((1<<ad->bit_size)-1) << ad->bit_offset;
666 shifted = value;
667 if (ad->bit_offset) {
668 pseudo_t shift;
669 shift = value_pseudo(ad->bit_offset);
670 shifted = add_binary_op(ep, ad->ctype, OP_SHL, value, shift);
672 ad->ctype = base_type(ad->ctype);
673 orig = add_load(ep, ad);
674 ormask = value_pseudo(~mask);
675 value_mask = add_binary_op(ep, ad->ctype, OP_AND, orig, ormask);
676 newval = add_binary_op(ep, ad->ctype, OP_OR, orig, value_mask);
678 add_store(ep, ad, newval);
679 return value;
682 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right)
684 struct instruction *insn = alloc_instruction(op, ctype);
685 pseudo_t target = alloc_pseudo(insn);
686 insn->target = target;
687 use_pseudo(insn, left, &insn->src1);
688 use_pseudo(insn, right, &insn->src2);
689 add_one_insn(ep, insn);
690 return target;
693 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val)
695 struct instruction *insn = alloc_instruction(OP_SETVAL, ctype);
696 pseudo_t target = alloc_pseudo(insn);
697 insn->target = target;
698 insn->val = val;
699 if (!val)
700 insn->symbol = ctype;
701 add_one_insn(ep, insn);
702 return target;
705 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct access_data *ad)
707 pseudo_t new = add_load(ep, ad);
709 if (ad->bit_offset) {
710 pseudo_t shift = value_pseudo(ad->bit_offset);
711 pseudo_t newval = add_binary_op(ep, ad->ctype, OP_SHR, new, shift);
712 new = newval;
715 return new;
718 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
720 struct access_data ad = { NULL, };
721 pseudo_t value;
723 if (!linearize_address_gen(ep, expr, &ad))
724 return VOID;
725 value = linearize_load_gen(ep, &ad);
726 finish_address_gen(ep, &ad);
727 return value;
730 /* FIXME: FP */
731 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
733 struct access_data ad = { NULL, };
734 pseudo_t old, new, one;
735 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB;
737 if (!linearize_address_gen(ep, expr->unop, &ad))
738 return VOID;
740 old = linearize_load_gen(ep, &ad);
741 one = value_pseudo(1);
742 new = add_binary_op(ep, expr->ctype, op, old, one);
743 linearize_store_gen(ep, new, &ad);
744 finish_address_gen(ep, &ad);
745 return postop ? old : new;
748 static pseudo_t add_uniop(struct entrypoint *ep, struct expression *expr, int op, pseudo_t src)
750 struct instruction *insn = alloc_instruction(op, expr->ctype);
751 pseudo_t new = alloc_pseudo(insn);
753 insn->target = new;
754 use_pseudo(insn, src, &insn->src1);
755 add_one_insn(ep, insn);
756 return new;
759 static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr)
761 pseudo_t pre = linearize_expression(ep, expr->base);
762 struct instruction *insn = alloc_instruction(OP_SLICE, expr->ctype);
763 pseudo_t new = alloc_pseudo(insn);
765 insn->target = new;
766 insn->from = expr->r_bitpos;
767 insn->len = expr->r_nrbits;
768 use_pseudo(insn, pre, &insn->base);
769 add_one_insn(ep, insn);
770 return new;
773 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
775 pseudo_t pre = linearize_expression(ep, expr->unop);
776 switch (expr->op) {
777 case '+':
778 return pre;
779 case '!': {
780 pseudo_t zero = value_pseudo(0);
781 return add_binary_op(ep, expr->ctype, OP_SET_EQ, pre, zero);
783 case '~':
784 return add_uniop(ep, expr, OP_NOT, pre);
785 case '-':
786 return add_uniop(ep, expr, OP_NEG, pre);
788 return VOID;
791 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
794 * '*' is an lvalue access, and is fundamentally different
795 * from an arithmetic operation. Maybe it should have an
796 * expression type of its own..
798 if (expr->op == '*')
799 return linearize_access(ep, expr);
800 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
801 return linearize_inc_dec(ep, expr, 0);
802 return linearize_regular_preop(ep, expr);
805 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr)
807 return linearize_inc_dec(ep, expr, 1);
810 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
812 struct access_data ad = { NULL, };
813 struct expression *target = expr->left;
814 pseudo_t value;
816 value = linearize_expression(ep, expr->right);
817 if (!linearize_address_gen(ep, target, &ad))
818 return VOID;
819 if (expr->op != '=') {
820 pseudo_t oldvalue = linearize_load_gen(ep, &ad);
821 pseudo_t dst;
822 static const int op_trans[] = {
823 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD,
824 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB,
825 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MUL,
826 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIV,
827 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MOD,
828 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL,
829 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_SHR,
830 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND,
831 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR,
832 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR
834 dst = add_binary_op(ep, expr->ctype, op_trans[expr->op - SPECIAL_BASE], oldvalue, value);
835 value = dst;
837 value = linearize_store_gen(ep, value, &ad);
838 finish_address_gen(ep, &ad);
839 return value;
842 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr)
844 struct expression *arg, *fn;
845 struct instruction *insn = alloc_instruction(OP_CALL, expr->ctype);
846 pseudo_t retval, call;
847 int context_diff;
849 if (!expr->ctype) {
850 warning(expr->pos, "call with no type!");
851 return VOID;
854 FOR_EACH_PTR(expr->args, arg) {
855 pseudo_t new = linearize_expression(ep, arg);
856 use_pseudo(insn, new, add_pseudo(&insn->arguments, new));
857 } END_FOR_EACH_PTR(arg);
859 fn = expr->fn;
861 context_diff = 0;
862 if (fn->ctype) {
863 int in = fn->ctype->ctype.in_context;
864 int out = fn->ctype->ctype.out_context;
865 if (in < 0 || out < 0)
866 in = out = 0;
867 context_diff = out - in;
870 if (fn->type == EXPR_PREOP) {
871 if (fn->unop->type == EXPR_SYMBOL) {
872 struct symbol *sym = fn->unop->symbol;
873 if (sym->ctype.base_type->type == SYM_FN)
874 fn = fn->unop;
877 if (fn->type == EXPR_SYMBOL) {
878 call = symbol_pseudo(fn->symbol);
879 } else {
880 call = linearize_expression(ep, fn);
882 use_pseudo(insn, call, &insn->func);
883 retval = VOID;
884 if (expr->ctype != &void_ctype)
885 retval = alloc_pseudo(insn);
886 insn->target = retval;
887 add_one_insn(ep, insn);
889 if (context_diff) {
890 insn = alloc_instruction(OP_CONTEXT, &void_ctype);
891 insn->increment = context_diff;
892 add_one_insn(ep, insn);
895 return retval;
898 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr)
900 pseudo_t src1, src2, dst;
901 static const int opcode[] = {
902 ['+'] = OP_ADD, ['-'] = OP_SUB,
903 ['*'] = OP_MUL, ['/'] = OP_DIV,
904 ['%'] = OP_MOD, ['&'] = OP_AND,
905 ['|'] = OP_OR, ['^'] = OP_XOR,
906 [SPECIAL_LEFTSHIFT] = OP_SHL,
907 [SPECIAL_RIGHTSHIFT] = OP_SHR,
908 [SPECIAL_LOGICAL_AND] = OP_AND_BOOL,
909 [SPECIAL_LOGICAL_OR] = OP_OR_BOOL,
912 src1 = linearize_expression(ep, expr->left);
913 src2 = linearize_expression(ep, expr->right);
914 dst = add_binary_op(ep, expr->ctype, opcode[expr->op], src1, src2);
915 return dst;
918 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
920 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
922 static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr)
924 pseudo_t cond, true, false, res;
926 true = linearize_expression(ep, expr->cond_true);
927 false = linearize_expression(ep, expr->cond_false);
928 cond = linearize_expression(ep, expr->conditional);
930 add_setcc(ep, expr, cond);
931 if (!expr->cond_true)
932 true = cond;
933 res = add_binary_op(ep, expr->ctype, OP_SEL, true, false);
934 return res;
937 static pseudo_t copy_pseudo(struct entrypoint *ep, struct expression *expr, pseudo_t src)
939 struct basic_block *bb = ep->active;
941 if (src->type != PSEUDO_REG)
942 return src;
944 if (bb_reachable(bb)) {
945 struct instruction *new = alloc_instruction(OP_MOV, expr->ctype);
946 pseudo_t dst = alloc_pseudo(src->def);
947 new->target = dst;
948 use_pseudo(new, src, &new->src);
949 new->bb = bb;
950 add_instruction(&bb->insns, new);
951 return dst;
953 return VOID;
956 static pseudo_t add_join_conditional(struct entrypoint *ep, struct expression *expr,
957 pseudo_t src1, struct basic_block *bb1,
958 pseudo_t src2, struct basic_block *bb2)
960 pseudo_t target;
961 struct instruction *phi_node;
962 struct phi *phi1, *phi2;
964 if (src1 == VOID || !bb_reachable(bb1))
965 return src2;
966 if (src2 == VOID || !bb_reachable(bb2))
967 return src1;
968 phi_node = alloc_instruction(OP_PHI, expr->ctype);
969 phi1 = alloc_phi(bb1, src1);
970 phi2 = alloc_phi(bb2, src2);
971 add_phi(&phi_node->phi_list, phi1);
972 add_phi(&phi_node->phi_list, phi2);
973 phi_node->target = target = alloc_pseudo(phi_node);
974 use_pseudo(phi_node, src1, &phi1->pseudo);
975 use_pseudo(phi_node, src2, &phi2->pseudo);
976 add_one_insn(ep, phi_node);
977 return target;
980 static pseudo_t linearize_short_conditional(struct entrypoint *ep, struct expression *expr,
981 struct expression *cond,
982 struct expression *expr_false)
984 pseudo_t tst, src1, src2;
985 struct basic_block *bb_true;
986 struct basic_block *bb_false = alloc_basic_block(expr_false->pos);
987 struct basic_block *merge = alloc_basic_block(expr->pos);
989 tst = linearize_expression(ep, cond);
990 src1 = copy_pseudo(ep, expr, tst);
991 bb_true = ep->active;
992 add_branch(ep, expr, tst, merge, bb_false);
994 set_activeblock(ep, bb_false);
995 src2 = linearize_expression(ep, expr_false);
996 bb_false = ep->active;
997 set_activeblock(ep, merge);
999 return add_join_conditional(ep, expr, src1, bb_true, src2, bb_false);
1002 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr,
1003 struct expression *cond,
1004 struct expression *expr_true,
1005 struct expression *expr_false)
1007 pseudo_t src1, src2;
1008 struct basic_block *bb_true = alloc_basic_block(expr_true->pos);
1009 struct basic_block *bb_false = alloc_basic_block(expr_false->pos);
1010 struct basic_block *merge = alloc_basic_block(expr->pos);
1012 linearize_cond_branch(ep, cond, bb_true, bb_false);
1014 set_activeblock(ep, bb_true);
1015 src1 = linearize_expression(ep, expr_true);
1016 bb_true = ep->active;
1017 add_goto(ep, merge);
1019 set_activeblock(ep, bb_false);
1020 src2 = linearize_expression(ep, expr_false);
1021 bb_false = ep->active;
1022 set_activeblock(ep, merge);
1024 return add_join_conditional(ep, expr, src1, bb_true, src2, bb_false);
1027 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr)
1029 struct expression *shortcut;
1031 shortcut = alloc_const_expression(expr->pos, expr->op == SPECIAL_LOGICAL_OR);
1032 shortcut->ctype = expr->ctype;
1033 return linearize_conditional(ep, expr, expr->left, shortcut, expr->right);
1036 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr)
1038 static const int cmpop[] = {
1039 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT,
1040 [SPECIAL_EQUAL] = OP_SET_EQ,
1041 [SPECIAL_NOTEQUAL] = OP_SET_NE,
1042 [SPECIAL_GTE] = OP_SET_GE,
1043 [SPECIAL_LTE] = OP_SET_LE,
1044 [SPECIAL_UNSIGNED_LT] = OP_SET_B,
1045 [SPECIAL_UNSIGNED_GT] = OP_SET_A,
1046 [SPECIAL_UNSIGNED_LTE] = OP_SET_BE,
1047 [SPECIAL_UNSIGNED_GTE] = OP_SET_AE,
1050 pseudo_t src1 = linearize_expression(ep, expr->left);
1051 pseudo_t src2 = linearize_expression(ep, expr->right);
1052 pseudo_t dst = add_binary_op(ep, expr->ctype, cmpop[expr->op], src1, src2);
1053 return dst;
1057 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1059 pseudo_t cond;
1061 if (!expr || !bb_reachable(ep->active))
1062 return VOID;
1064 switch (expr->type) {
1066 case EXPR_STRING:
1067 case EXPR_VALUE:
1068 add_goto(ep, expr->value ? bb_true : bb_false);
1069 return VOID;
1071 case EXPR_FVALUE:
1072 add_goto(ep, expr->fvalue ? bb_true : bb_false);
1073 return VOID;
1075 case EXPR_LOGICAL:
1076 linearize_logical_branch(ep, expr, bb_true, bb_false);
1077 return VOID;
1079 case EXPR_COMPARE:
1080 cond = linearize_compare(ep, expr);
1081 add_branch(ep, expr, cond, bb_true, bb_false);
1082 break;
1084 case EXPR_PREOP:
1085 if (expr->op == '!')
1086 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true);
1087 /* fall through */
1088 default: {
1089 cond = linearize_expression(ep, expr);
1090 add_branch(ep, expr, cond, bb_true, bb_false);
1092 return VOID;
1095 return VOID;
1100 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1102 struct basic_block *next = alloc_basic_block(expr->pos);
1104 if (expr->op == SPECIAL_LOGICAL_OR)
1105 linearize_cond_branch(ep, expr->left, bb_true, next);
1106 else
1107 linearize_cond_branch(ep, expr->left, next, bb_false);
1108 set_activeblock(ep, next);
1109 linearize_cond_branch(ep, expr->right, bb_true, bb_false);
1110 return VOID;
1113 pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr)
1115 pseudo_t src, result;
1116 struct instruction *insn;
1118 src = linearize_expression(ep, expr->cast_expression);
1119 if (src == VOID)
1120 return VOID;
1121 if (expr->ctype->bit_size < 0)
1122 return VOID;
1123 insn = alloc_instruction(OP_CAST, expr->ctype);
1124 result = alloc_pseudo(insn);
1125 insn->target = result;
1126 insn->orig_type = expr->cast_expression->ctype;
1127 use_pseudo(insn, src, &insn->src);
1128 add_one_insn(ep, insn);
1129 return result;
1132 pseudo_t linearize_position(struct entrypoint *ep, struct expression *pos, struct access_data *ad)
1134 struct expression *init_expr = pos->init_expr;
1135 pseudo_t value = linearize_expression(ep, init_expr);
1137 ad->offset = pos->init_offset;
1138 ad->ctype = init_expr->ctype;
1139 linearize_store_gen(ep, value, ad);
1140 return VOID;
1143 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *ad)
1145 switch (initializer->type) {
1146 case EXPR_INITIALIZER: {
1147 struct expression *expr;
1148 FOR_EACH_PTR(initializer->expr_list, expr) {
1149 linearize_initializer(ep, expr, ad);
1150 } END_FOR_EACH_PTR(expr);
1151 break;
1153 case EXPR_POS:
1154 linearize_position(ep, initializer, ad);
1155 break;
1156 default: {
1157 pseudo_t value = linearize_expression(ep, initializer);
1158 ad->ctype = initializer->ctype;
1159 linearize_store_gen(ep, value, ad);
1163 return VOID;
1166 void linearize_argument(struct entrypoint *ep, struct symbol *arg, int nr)
1168 struct access_data ad = { NULL, };
1170 ad.ctype = arg;
1171 ad.address = symbol_pseudo(arg);
1172 linearize_store_gen(ep, argument_pseudo(nr), &ad);
1173 finish_address_gen(ep, &ad);
1176 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
1178 if (!expr)
1179 return VOID;
1181 switch (expr->type) {
1182 case EXPR_SYMBOL:
1183 return add_setval(ep, expr->symbol, NULL);
1185 case EXPR_VALUE:
1186 return value_pseudo(expr->value);
1188 case EXPR_STRING: case EXPR_FVALUE: case EXPR_LABEL:
1189 return add_setval(ep, expr->ctype, expr);
1191 case EXPR_STATEMENT:
1192 return linearize_statement(ep, expr->statement);
1194 case EXPR_CALL:
1195 return linearize_call_expression(ep, expr);
1197 case EXPR_BINOP:
1198 return linearize_binop(ep, expr);
1200 case EXPR_LOGICAL:
1201 return linearize_logical(ep, expr);
1203 case EXPR_COMPARE:
1204 return linearize_compare(ep, expr);
1206 case EXPR_SELECT:
1207 return linearize_select(ep, expr);
1209 case EXPR_CONDITIONAL:
1210 if (!expr->cond_true)
1211 return linearize_short_conditional(ep, expr, expr->conditional, expr->cond_false);
1213 return linearize_conditional(ep, expr, expr->conditional,
1214 expr->cond_true, expr->cond_false);
1216 case EXPR_COMMA:
1217 linearize_expression(ep, expr->left);
1218 return linearize_expression(ep, expr->right);
1220 case EXPR_ASSIGNMENT:
1221 return linearize_assignment(ep, expr);
1223 case EXPR_PREOP:
1224 return linearize_preop(ep, expr);
1226 case EXPR_POSTOP:
1227 return linearize_postop(ep, expr);
1229 case EXPR_CAST:
1230 case EXPR_IMPLIED_CAST:
1231 return linearize_cast(ep, expr);
1233 case EXPR_SLICE:
1234 return linearize_slice(ep, expr);
1236 case EXPR_INITIALIZER:
1237 case EXPR_POS:
1238 warning(expr->pos, "unexpected initializer expression (%d %d)", expr->type, expr->op);
1239 return VOID;
1240 default:
1241 warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op);
1242 return VOID;
1244 return VOID;
1247 static void linearize_one_symbol(struct entrypoint *ep, struct symbol *sym)
1249 struct access_data ad = { NULL, };
1251 if (!sym->initializer)
1252 return;
1254 ad.address = symbol_pseudo(sym);
1255 linearize_initializer(ep, sym->initializer, &ad);
1256 finish_address_gen(ep, &ad);
1259 static pseudo_t linearize_compound_statement(struct entrypoint *ep, struct statement *stmt)
1261 pseudo_t pseudo;
1262 struct statement *s;
1263 struct symbol *sym;
1264 struct symbol *ret = stmt->ret;
1266 concat_symbol_list(stmt->syms, &ep->syms);
1268 FOR_EACH_PTR(stmt->syms, sym) {
1269 linearize_one_symbol(ep, sym);
1270 } END_FOR_EACH_PTR(sym);
1272 pseudo = VOID;
1273 FOR_EACH_PTR(stmt->stmts, s) {
1274 pseudo = linearize_statement(ep, s);
1275 } END_FOR_EACH_PTR(s);
1277 if (ret) {
1278 struct basic_block *bb = get_bound_block(ep, ret);
1279 struct instruction *phi = first_instruction(bb->insns);
1281 set_activeblock(ep, bb);
1282 if (!phi)
1283 return pseudo;
1285 if (phi_list_size(phi->phi_list)==1) {
1286 pseudo = first_phi(phi->phi_list)->pseudo;
1287 delete_last_instruction(&bb->insns);
1288 return pseudo;
1290 return phi->target;
1292 return pseudo;
1296 pseudo_t linearize_internal(struct entrypoint *ep, struct statement *stmt)
1298 struct instruction *insn = alloc_instruction(OP_CONTEXT, &void_ctype);
1299 struct expression *expr = stmt->expression;
1300 int value = 0;
1302 if (expr->type == EXPR_VALUE)
1303 value = expr->value;
1305 insn->increment = value;
1306 add_one_insn(ep, insn);
1307 return VOID;
1310 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
1312 if (!stmt)
1313 return VOID;
1315 switch (stmt->type) {
1316 case STMT_NONE:
1317 break;
1319 case STMT_INTERNAL:
1320 return linearize_internal(ep, stmt);
1322 case STMT_EXPRESSION:
1323 return linearize_expression(ep, stmt->expression);
1325 case STMT_ASM:
1326 /* FIXME */
1327 break;
1329 case STMT_RETURN: {
1330 struct expression *expr = stmt->expression;
1331 struct basic_block *bb_return = get_bound_block(ep, stmt->ret_target);
1332 struct basic_block *active;
1333 pseudo_t src = linearize_expression(ep, expr);
1334 active = ep->active;
1335 add_goto(ep, bb_return);
1336 if (active && src != &void_pseudo) {
1337 struct instruction *phi_node = first_instruction(bb_return->insns);
1338 struct phi *phi;
1339 if (!phi_node) {
1340 phi_node = alloc_instruction(OP_PHI, expr->ctype);
1341 phi_node->target = alloc_pseudo(phi_node);
1342 phi_node->bb = bb_return;
1343 add_instruction(&bb_return->insns, phi_node);
1345 phi = alloc_phi(active, src);
1346 use_pseudo(phi_node, src, &phi->pseudo);
1347 add_phi(&phi_node->phi_list, phi);
1349 return VOID;
1352 case STMT_CASE: {
1353 struct basic_block *bb = get_bound_block(ep, stmt->case_label);
1354 set_activeblock(ep, bb);
1355 linearize_statement(ep, stmt->case_statement);
1356 break;
1359 case STMT_LABEL: {
1360 struct symbol *label = stmt->label_identifier;
1361 struct basic_block *bb;
1363 if (label->used) {
1364 bb = get_bound_block(ep, stmt->label_identifier);
1365 set_activeblock(ep, bb);
1366 linearize_statement(ep, stmt->label_statement);
1368 break;
1371 case STMT_GOTO: {
1372 struct symbol *sym;
1373 struct expression *expr;
1374 struct instruction *goto_ins;
1375 pseudo_t pseudo;
1377 if (stmt->goto_label) {
1378 add_goto(ep, get_bound_block(ep, stmt->goto_label));
1379 break;
1382 expr = stmt->goto_expression;
1383 if (!expr)
1384 break;
1386 /* This can happen as part of simplification */
1387 if (expr->type == EXPR_LABEL) {
1388 add_goto(ep, get_bound_block(ep, expr->label_symbol));
1389 break;
1392 pseudo = linearize_expression(ep, expr);
1393 goto_ins = alloc_instruction(OP_COMPUTEDGOTO, NULL);
1394 use_pseudo(goto_ins, pseudo, &goto_ins->target);
1395 add_one_insn(ep, goto_ins);
1397 FOR_EACH_PTR(stmt->target_list, sym) {
1398 struct basic_block *bb_computed = get_bound_block(ep, sym);
1399 struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0);
1400 add_multijmp(&goto_ins->multijmp_list, jmp);
1401 add_bb(&bb_computed->parents, ep->active);
1402 } END_FOR_EACH_PTR(sym);
1404 finish_block(ep);
1405 break;
1408 case STMT_COMPOUND:
1409 return linearize_compound_statement(ep, stmt);
1412 * This could take 'likely/unlikely' into account, and
1413 * switch the arms around appropriately..
1415 case STMT_IF: {
1416 struct basic_block *bb_true, *bb_false, *endif;
1417 struct expression *cond = stmt->if_conditional;
1419 bb_true = alloc_basic_block(stmt->pos);
1420 bb_false = endif = alloc_basic_block(stmt->pos);
1422 linearize_cond_branch(ep, cond, bb_true, bb_false);
1424 set_activeblock(ep, bb_true);
1425 linearize_statement(ep, stmt->if_true);
1427 if (stmt->if_false) {
1428 endif = alloc_basic_block(stmt->pos);
1429 add_goto(ep, endif);
1430 set_activeblock(ep, bb_false);
1431 linearize_statement(ep, stmt->if_false);
1433 set_activeblock(ep, endif);
1434 break;
1437 case STMT_SWITCH: {
1438 struct symbol *sym;
1439 struct instruction *switch_ins;
1440 struct basic_block *switch_end = alloc_basic_block(stmt->pos);
1441 pseudo_t pseudo;
1443 pseudo = linearize_expression(ep, stmt->switch_expression);
1444 switch_ins = alloc_instruction(OP_SWITCH, NULL);
1445 use_pseudo(switch_ins, pseudo, &switch_ins->cond);
1446 add_one_insn(ep, switch_ins);
1448 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
1449 struct statement *case_stmt = sym->stmt;
1450 struct basic_block *bb_case = get_bound_block(ep, sym);
1451 struct multijmp *jmp;
1453 if (!case_stmt->case_expression) {
1454 jmp = alloc_multijmp(bb_case, 1, 0);
1455 } else {
1456 int begin, end;
1458 begin = end = case_stmt->case_expression->value;
1459 if (case_stmt->case_to)
1460 end = case_stmt->case_to->value;
1461 if (begin > end)
1462 jmp = alloc_multijmp(bb_case, end, begin);
1463 else
1464 jmp = alloc_multijmp(bb_case, begin, end);
1467 add_multijmp(&switch_ins->multijmp_list, jmp);
1468 add_bb(&bb_case->parents, ep->active);
1469 } END_FOR_EACH_PTR(sym);
1471 bind_label(stmt->switch_break, switch_end, stmt->pos);
1473 /* And linearize the actual statement */
1474 linearize_statement(ep, stmt->switch_statement);
1475 set_activeblock(ep, switch_end);
1477 break;
1480 case STMT_ITERATOR: {
1481 struct statement *pre_statement = stmt->iterator_pre_statement;
1482 struct expression *pre_condition = stmt->iterator_pre_condition;
1483 struct statement *statement = stmt->iterator_statement;
1484 struct statement *post_statement = stmt->iterator_post_statement;
1485 struct expression *post_condition = stmt->iterator_post_condition;
1486 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end;
1488 concat_symbol_list(stmt->iterator_syms, &ep->syms);
1489 linearize_statement(ep, pre_statement);
1491 loop_body = loop_top = alloc_basic_block(stmt->pos);
1492 loop_continue = alloc_basic_block(stmt->pos);
1493 loop_end = alloc_basic_block(stmt->pos);
1495 if (pre_condition == post_condition) {
1496 loop_top = alloc_basic_block(stmt->pos);
1497 loop_top->flags |= BB_REACHABLE;
1498 set_activeblock(ep, loop_top);
1501 loop_top->flags |= BB_REACHABLE;
1502 if (pre_condition)
1503 linearize_cond_branch(ep, pre_condition, loop_body, loop_end);
1505 bind_label(stmt->iterator_continue, loop_continue, stmt->pos);
1506 bind_label(stmt->iterator_break, loop_end, stmt->pos);
1508 set_activeblock(ep, loop_body);
1509 linearize_statement(ep, statement);
1510 add_goto(ep, loop_continue);
1512 if (post_condition) {
1513 set_activeblock(ep, loop_continue);
1514 linearize_statement(ep, post_statement);
1515 if (pre_condition == post_condition)
1516 add_goto(ep, loop_top);
1517 else
1518 linearize_cond_branch(ep, post_condition, loop_top, loop_end);
1521 set_activeblock(ep, loop_end);
1522 break;
1525 default:
1526 break;
1528 return VOID;
1531 void mark_bb_reachable(struct basic_block *bb)
1533 struct basic_block *child;
1534 struct terminator_iterator term;
1535 struct basic_block_list *bbstack = NULL;
1537 if (!bb || bb->flags & BB_REACHABLE)
1538 return;
1540 add_bb(&bbstack, bb);
1541 while (bbstack) {
1542 bb = delete_last_basic_block(&bbstack);
1543 if (bb->flags & BB_REACHABLE)
1544 continue;
1545 bb->flags |= BB_REACHABLE;
1546 init_terminator_iterator(last_instruction(bb->insns), &term);
1547 while ((child=next_terminator_bb(&term)) != NULL) {
1548 if (!(child->flags & BB_REACHABLE))
1549 add_bb(&bbstack, child);
1554 void remove_unreachable_bbs(struct entrypoint *ep)
1556 struct basic_block *bb, *child;
1557 struct terminator_iterator term;
1559 FOR_EACH_PTR(ep->bbs, bb) {
1560 bb->flags &= ~BB_REACHABLE;
1561 } END_FOR_EACH_PTR(bb);
1563 mark_bb_reachable(ep->entry);
1564 FOR_EACH_PTR(ep->bbs, bb) {
1565 if (bb->flags & BB_REACHABLE)
1566 continue;
1567 init_terminator_iterator(last_instruction(bb->insns), &term);
1568 while ((child=next_terminator_bb(&term)) != NULL)
1569 replace_basic_block_list(child->parents, bb, NULL);
1570 DELETE_CURRENT_PTR(bb);
1571 }END_FOR_EACH_PTR(bb);
1574 void pack_basic_blocks(struct entrypoint *ep)
1576 struct basic_block *bb;
1578 remove_unreachable_bbs(ep);
1579 FOR_EACH_PTR(ep->bbs, bb) {
1580 struct terminator_iterator term;
1581 struct instruction *jmp;
1582 struct basic_block *target, *sibling, *parent;
1583 int parents;
1585 if (!is_branch_goto(jmp=last_instruction(bb->insns)))
1586 continue;
1588 target = jmp->bb_true ? jmp->bb_true : jmp->bb_false;
1589 if (target == bb)
1590 continue;
1591 parents = bb_list_size(target->parents);
1592 if (target == ep->entry)
1593 parents++;
1594 if (parents != 1 && jmp != first_instruction(bb->insns))
1595 continue;
1597 /* Transfer the parents' terminator to target directly. */
1598 replace_basic_block_list(target->parents, bb, NULL);
1599 FOR_EACH_PTR(bb->parents, parent) {
1600 init_terminator_iterator(last_instruction(parent->insns), &term);
1601 while ((sibling=next_terminator_bb(&term)) != NULL) {
1602 if (sibling == bb) {
1603 replace_terminator_bb(&term, target);
1604 add_bb(&target->parents, parent);
1607 } END_FOR_EACH_PTR(parent);
1609 /* Move the instructions to the target block. */
1610 delete_last_instruction(&bb->insns);
1611 if (bb->insns) {
1612 concat_instruction_list(target->insns, &bb->insns);
1613 free_instruction_list(&target->insns);
1614 target->insns = bb->insns;
1616 if (bb == ep->entry)
1617 ep->entry = target;
1618 DELETE_CURRENT_PTR(bb);
1619 }END_FOR_EACH_PTR(bb);
1623 * Dammit, if we have a phi-node followed by a conditional
1624 * branch on that phi-node, we should damn well be able to
1625 * do something about the source. Maybe.
1627 static void rewrite_branch(struct basic_block *bb,
1628 struct basic_block **ptr,
1629 struct basic_block *old,
1630 struct basic_block *new)
1632 *ptr = new;
1633 add_bb(&new->parents, bb);
1635 * FIXME!!! We should probably also remove bb from "old->parents",
1636 * but we need to be careful that bb isn't a parent some other way.
1641 * Return the known truth value of a pseudo, or -1 if
1642 * it's not known.
1644 static int pseudo_truth_value(pseudo_t pseudo)
1646 switch (pseudo->type) {
1647 case PSEUDO_VAL:
1648 return !!pseudo->value;
1650 case PSEUDO_REG: {
1651 struct instruction *insn = pseudo->def;
1652 if (insn->opcode == OP_SETVAL && insn->target == pseudo) {
1653 struct expression *expr = insn->val;
1655 /* A symbol address is always considered true.. */
1656 if (!expr)
1657 return 1;
1658 if (expr->type == EXPR_VALUE)
1659 return !!expr->value;
1662 /* Fall through */
1663 default:
1664 return -1;
1669 static void try_to_simplify_bb(struct entrypoint *ep, struct basic_block *bb,
1670 struct instruction *first, struct instruction *second)
1672 struct phi *phi;
1674 FOR_EACH_PTR(first->phi_list, phi) {
1675 struct basic_block *source = phi->source, *target;
1676 pseudo_t pseudo = phi->pseudo;
1677 struct instruction *br;
1678 int true;
1680 br = last_instruction(source->insns);
1681 if (!br)
1682 continue;
1683 if (br->opcode != OP_BR)
1684 continue;
1686 true = pseudo_truth_value(pseudo);
1687 if (true < 0)
1688 continue;
1689 target = true ? second->bb_true : second->bb_false;
1690 if (br->bb_true == bb)
1691 rewrite_branch(source, &br->bb_true, bb, target);
1692 if (br->bb_false == bb)
1693 rewrite_branch(source, &br->bb_false, bb, target);
1694 } END_FOR_EACH_PTR(phi);
1697 static inline int linearize_insn_list(struct instruction_list *list, struct instruction **arr, int nr)
1699 return linearize_ptr_list((struct ptr_list *)list, (void **)arr, nr);
1702 static void simplify_phi_nodes(struct entrypoint *ep)
1704 struct basic_block *bb;
1706 FOR_EACH_PTR(ep->bbs, bb) {
1707 struct instruction *insns[2], *first, *second;
1709 if (linearize_insn_list(bb->insns, insns, 2) < 2)
1710 continue;
1712 first = insns[0];
1713 second = insns[1];
1715 if (first->opcode != OP_PHI)
1716 continue;
1717 if (second->opcode != OP_BR)
1718 continue;
1719 if (first->target != second->cond)
1720 continue;
1721 try_to_simplify_bb(ep, bb, first, second);
1722 } END_FOR_EACH_PTR(bb);
1725 static void create_phi_copy(struct basic_block *bb, struct instruction *phi,
1726 pseudo_t src, pseudo_t dst)
1728 struct instruction *insn = last_instruction(bb->insns);
1729 struct instruction *new = alloc_instruction(OP_MOV, phi->type);
1731 delete_last_instruction(&bb->insns);
1732 new->target = dst;
1733 use_pseudo(new, src, &new->src);
1734 new->bb = bb;
1735 add_instruction(&bb->insns, new);
1737 /* And add back the last instruction */
1738 add_instruction(&bb->insns, insn);
1741 static void remove_one_phi_node(struct entrypoint *ep,
1742 struct basic_block *bb,
1743 struct instruction *insn)
1745 struct phi *node;
1746 pseudo_t target = insn->target;
1748 FOR_EACH_PTR(insn->phi_list, node) {
1749 create_phi_copy(node->source, insn, node->pseudo, target);
1750 } END_FOR_EACH_PTR(node);
1753 static void remove_phi_nodes(struct entrypoint *ep)
1755 struct basic_block *bb;
1756 FOR_EACH_PTR(ep->bbs, bb) {
1757 struct instruction *insn = first_instruction(bb->insns);
1758 if (insn && insn->opcode == OP_PHI)
1759 remove_one_phi_node(ep, bb, insn);
1760 } END_FOR_EACH_PTR(bb);
1763 static inline void concat_user_list(struct pseudo_ptr_list *src, struct pseudo_ptr_list **dst)
1765 concat_ptr_list((struct ptr_list *)src, (struct ptr_list **)dst);
1768 static void convert_load_insn(struct instruction *insn, pseudo_t src)
1770 pseudo_t target, *usep;
1773 * Go through the "insn->users" list and replace them all..
1775 target = insn->target;
1776 FOR_EACH_PTR(target->users, usep) {
1777 *usep = src;
1778 } END_FOR_EACH_PTR(usep);
1779 concat_user_list(target->users, &src->users);
1781 /* Turn the load into a no-op */
1782 insn->opcode = OP_LNOP;
1785 static int overlapping_memop(struct instruction *a, struct instruction *b)
1787 unsigned int a_start = (a->offset << 3) + a->type->bit_offset;
1788 unsigned int b_start = (b->offset << 3) + b->type->bit_offset;
1789 unsigned int a_size = a->type->bit_size;
1790 unsigned int b_size = b->type->bit_size;
1792 if (a_size + a_start <= b_start)
1793 return 0;
1794 if (b_size + b_start <= a_start)
1795 return 0;
1796 return 1;
1799 static inline int same_memop(struct instruction *a, struct instruction *b)
1801 return a->offset == b->offset &&
1802 a->type->bit_size == b->type->bit_size &&
1803 a->type->bit_offset == b->type->bit_offset;
1806 static int find_dominating_parents(pseudo_t pseudo, struct instruction *insn,
1807 struct basic_block *bb, unsigned long generation, struct phi_list **dominators)
1809 struct basic_block *parent;
1811 FOR_EACH_PTR(bb->parents, parent) {
1812 struct instruction *one, *dom;
1813 if (parent->generation == generation)
1814 continue;
1815 parent->generation = generation;
1816 dom = NULL;
1817 FOR_EACH_PTR(parent->insns, one) {
1818 if (one == insn)
1819 dom = NULL;
1820 if (one->opcode != OP_STORE)
1821 continue;
1822 if (one->src != pseudo)
1823 continue;
1824 if (!same_memop(insn, one)) {
1825 if (!overlapping_memop(insn, one))
1826 continue;
1827 return 0;
1829 dom = one;
1830 } END_FOR_EACH_PTR(one);
1832 if (dom) {
1833 add_phi(dominators, alloc_phi(parent, dom->target));
1834 continue;
1837 if (!find_dominating_parents(pseudo, insn, parent, generation, dominators))
1838 return 0;
1839 } END_FOR_EACH_PTR(parent);
1840 return 1;
1843 static int find_dominating_stores(pseudo_t pseudo, struct instruction *insn, unsigned long generation)
1845 struct basic_block *bb = insn->bb;
1846 struct instruction *one, *dom = NULL;
1847 struct phi_list *dominators;
1849 /* Unreachable load? Undo it */
1850 if (!bb) {
1851 insn->opcode = OP_LNOP;
1852 return 1;
1855 FOR_EACH_PTR(bb->insns, one) {
1856 if (one == insn)
1857 goto found;
1858 if (one->opcode != OP_STORE)
1859 continue;
1860 if (one->src != pseudo)
1861 continue;
1862 if (!same_memop(one, insn)) {
1863 if (!overlapping_memop(one, insn))
1864 continue;
1865 return 0;
1867 dom = one;
1868 } END_FOR_EACH_PTR(one);
1869 /* Whaa? */
1870 warning(pseudo->sym->pos, "unable to find symbol read");
1871 return 0;
1872 found:
1873 if (dom) {
1874 convert_load_insn(insn, dom->target);
1875 return 1;
1878 /* Ok, go find the parents */
1879 bb->generation = generation;
1881 dominators = NULL;
1882 if (!find_dominating_parents(pseudo, insn, bb, generation, &dominators))
1883 return 0;
1885 /* This happens with initial assignments to structures etc.. */
1886 if (!dominators)
1887 return 0;
1890 * If we find just one dominating instruction, we
1891 * can turn it into a direct thing. Otherwise we'll
1892 * have to turn the load into a phi-node of the
1893 * dominators.
1895 if (phi_list_size(dominators) == 1) {
1896 convert_load_insn(insn, first_phi(dominators)->pseudo);
1897 } else {
1898 pseudo_t new = alloc_pseudo(insn);
1899 convert_load_insn(insn, new);
1900 insn->opcode = OP_PHI;
1901 insn->target = new;
1902 insn->phi_list = dominators;
1904 return 1;
1907 static void simplify_one_symbol(struct symbol *sym)
1909 pseudo_t pseudo, src, *pp;
1910 struct instruction *def;
1911 int all;
1913 /* External visibility? Never mind */
1914 if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE))
1915 return;
1917 /* Never used as a symbol? */
1918 pseudo = sym->pseudo;
1919 if (!pseudo)
1920 return;
1922 def = NULL;
1923 FOR_EACH_PTR(pseudo->users, pp) {
1924 /* We know that the symbol-pseudo use is the "src" in the instruction */
1925 struct instruction *insn = container(pp, struct instruction, src);
1927 switch (insn->opcode) {
1928 case OP_STORE:
1929 if (def)
1930 goto multi_def;
1931 def = insn;
1932 break;
1933 case OP_LOAD:
1934 break;
1935 default:
1936 warning(sym->pos, "symbol '%s' pseudo used in unexpected way", show_ident(sym->ident));
1938 if (insn->offset)
1939 goto complex_def;
1940 } END_FOR_EACH_PTR(pp);
1943 * Goodie, we have a single store (if even that) in the whole
1944 * thing. Replace all loads with moves from the pseudo,
1945 * replace the store with a def.
1947 src = VOID;
1948 if (def) {
1949 src = def->target;
1951 /* Turn the store into a no-op */
1952 def->opcode = OP_SNOP;
1954 FOR_EACH_PTR(pseudo->users, pp) {
1955 struct instruction *insn = container(pp, struct instruction, src);
1956 if (insn->opcode == OP_LOAD)
1957 convert_load_insn(insn, src);
1958 } END_FOR_EACH_PTR(pp);
1959 return;
1961 multi_def:
1962 complex_def:
1964 all = 1;
1965 FOR_EACH_PTR(pseudo->users, pp) {
1966 struct instruction *insn = container(pp, struct instruction, src);
1967 if (insn->opcode == OP_LOAD)
1968 all &= find_dominating_stores(pseudo, insn, ++bb_generation);
1969 } END_FOR_EACH_PTR(pp);
1971 /* If we converted all the loads, remove the stores. They are dead */
1972 if (all) {
1973 FOR_EACH_PTR(pseudo->users, pp) {
1974 struct instruction *insn = container(pp, struct instruction, src);
1975 if (insn->opcode == OP_STORE)
1976 insn->opcode = OP_SNOP;
1977 } END_FOR_EACH_PTR(pp);
1980 return;
1983 static void simplify_symbol_usage(struct entrypoint *ep)
1985 struct symbol *sym;
1986 FOR_EACH_PTR(ep->syms, sym) {
1987 simplify_one_symbol(sym);
1988 } END_FOR_EACH_PTR(sym);
1991 struct entrypoint *linearize_symbol(struct symbol *sym)
1993 struct symbol *base_type;
1994 struct entrypoint *ret_ep = NULL;
1996 if (!sym)
1997 return NULL;
1998 base_type = sym->ctype.base_type;
1999 if (!base_type)
2000 return NULL;
2001 if (base_type->type == SYM_FN) {
2002 if (base_type->stmt) {
2003 struct entrypoint *ep = alloc_entrypoint();
2004 struct basic_block *bb = alloc_basic_block(sym->pos);
2005 struct symbol *arg;
2006 pseudo_t result;
2007 int i;
2009 ep->name = sym;
2010 ep->entry = bb;
2011 bb->flags |= BB_REACHABLE;
2012 set_activeblock(ep, bb);
2013 concat_symbol_list(base_type->arguments, &ep->syms);
2015 /* FIXME!! We should do something else about varargs.. */
2016 i = 0;
2017 FOR_EACH_PTR(base_type->arguments, arg) {
2018 linearize_argument(ep, arg, ++i);
2019 } END_FOR_EACH_PTR(arg);
2021 result = linearize_statement(ep, base_type->stmt);
2022 if (bb_reachable(ep->active) && !bb_terminated(ep->active)) {
2023 struct symbol *ret_type = base_type->ctype.base_type;
2024 struct instruction *insn = alloc_instruction(OP_RET, ret_type);
2026 use_pseudo(insn, result, &insn->src);
2027 add_one_insn(ep, insn);
2030 simplify_symbol_usage(ep);
2033 * Questionable conditional branch simplification.
2034 * This short-circuits branches to conditional branches,
2035 * and leaves the phi-nodes "dangling" in the old
2036 * basic block - the nodes are no longer attached to
2037 * where the uses are. But it can still be considered
2038 * SSA if you just look at it sideways..
2040 simplify_phi_nodes(ep);
2042 #if 0
2044 * WARNING!! The removal of phi nodes will make the
2045 * tree no longer valid SSA format. We do it here
2046 * to make the linearized code look more like real
2047 * assembly code, but we should do all the SSA-based
2048 * optimizations (CSE etc) _before_ this point.
2050 remove_phi_nodes(ep);
2051 #endif
2054 * This packs the basic blocks, and also destroys the
2055 * instruction "bb" pointer information, so we must
2056 * do it last.
2058 pack_basic_blocks(ep);
2060 ret_ep = ep;
2064 return ret_ep;