Generate code for conditional branches.
[smatch.git] / linearize.c
blob9b77c291c1f6805925aedb33bb7329c65945e77b
1 /*
2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
10 * Copyright (C) 2004 Christopher Li
13 #include <string.h>
14 #include <stdarg.h>
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <assert.h>
19 #include "parse.h"
20 #include "expression.h"
21 #include "linearize.h"
22 #include "flow.h"
23 #include "target.h"
25 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
26 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
28 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right);
29 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val);
30 static void linearize_one_symbol(struct entrypoint *ep, struct symbol *sym);
32 struct access_data;
33 static pseudo_t add_load(struct entrypoint *ep, struct access_data *);
34 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *);
36 struct pseudo void_pseudo = {};
38 static struct instruction *alloc_instruction(int opcode, int size)
40 struct instruction * insn = __alloc_instruction(0);
41 insn->opcode = opcode;
42 insn->size = size;
43 return insn;
46 static inline int type_size(struct symbol *type)
48 return type ? type->bit_size > 0 ? type->bit_size : 0 : 0;
51 static struct instruction *alloc_typed_instruction(int opcode, struct symbol *type)
53 return alloc_instruction(opcode, type_size(type));
56 static struct entrypoint *alloc_entrypoint(void)
58 return __alloc_entrypoint(0);
61 static struct basic_block *alloc_basic_block(struct entrypoint *ep, struct position pos)
63 struct basic_block *bb = __alloc_basic_block(0);
64 bb->context = -1;
65 bb->pos = pos;
66 bb->ep = ep;
67 return bb;
70 static struct multijmp* alloc_multijmp(struct basic_block *target, int begin, int end)
72 struct multijmp *multijmp = __alloc_multijmp(0);
73 multijmp->target = target;
74 multijmp->begin = begin;
75 multijmp->end = end;
76 return multijmp;
79 static inline int regno(pseudo_t n)
81 int retval = -1;
82 if (n && n->type == PSEUDO_REG)
83 retval = n->nr;
84 return retval;
87 const char *show_pseudo(pseudo_t pseudo)
89 static int n;
90 static char buffer[4][64];
91 char *buf;
92 int i;
94 if (!pseudo)
95 return "no pseudo";
96 if (pseudo == VOID)
97 return "VOID";
98 buf = buffer[3 & ++n];
99 switch(pseudo->type) {
100 case PSEUDO_SYM: {
101 struct symbol *sym = pseudo->sym;
102 struct expression *expr;
104 if (sym->bb_target) {
105 snprintf(buf, 64, ".L%p", sym->bb_target);
106 break;
108 if (sym->ident) {
109 snprintf(buf, 64, "%s", show_ident(sym->ident));
110 break;
112 expr = sym->initializer;
113 snprintf(buf, 64, "<anon symbol:%p>", sym);
114 switch (expr->type) {
115 case EXPR_VALUE:
116 snprintf(buf, 64, "<symbol value: %lld>", expr->value);
117 break;
118 case EXPR_STRING:
119 return show_string(expr->string);
120 default:
121 break;
123 break;
125 case PSEUDO_REG:
126 i = snprintf(buf, 64, "%%r%d", pseudo->nr);
127 if (pseudo->ident)
128 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
129 break;
130 case PSEUDO_VAL: {
131 long long value = pseudo->value;
132 if (value > 1000 || value < -1000)
133 snprintf(buf, 64, "$%#llx", value);
134 else
135 snprintf(buf, 64, "$%lld", value);
136 break;
138 case PSEUDO_ARG:
139 snprintf(buf, 64, "%%arg%d", pseudo->nr);
140 break;
141 case PSEUDO_PHI:
142 i = snprintf(buf, 64, "%%phi%d", pseudo->nr);
143 if (pseudo->ident)
144 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
145 break;
146 default:
147 snprintf(buf, 64, "<bad pseudo type %d>", pseudo->type);
149 return buf;
152 static const char* opcodes[] = {
153 [OP_BADOP] = "bad_op",
155 /* Fn entrypoint */
156 [OP_ENTRY] = "<entry-point>",
158 /* Terminator */
159 [OP_RET] = "ret",
160 [OP_BR] = "br",
161 [OP_SWITCH] = "switch",
162 [OP_INVOKE] = "invoke",
163 [OP_COMPUTEDGOTO] = "jmp *",
164 [OP_UNWIND] = "unwind",
166 /* Binary */
167 [OP_ADD] = "add",
168 [OP_SUB] = "sub",
169 [OP_MUL] = "mul",
170 [OP_DIV] = "div",
171 [OP_MOD] = "mod",
172 [OP_SHL] = "shl",
173 [OP_SHR] = "shr",
175 /* Logical */
176 [OP_AND] = "and",
177 [OP_OR] = "or",
178 [OP_XOR] = "xor",
179 [OP_AND_BOOL] = "and-bool",
180 [OP_OR_BOOL] = "or-bool",
182 /* Binary comparison */
183 [OP_SET_EQ] = "seteq",
184 [OP_SET_NE] = "setne",
185 [OP_SET_LE] = "setle",
186 [OP_SET_GE] = "setge",
187 [OP_SET_LT] = "setlt",
188 [OP_SET_GT] = "setgt",
189 [OP_SET_B] = "setb",
190 [OP_SET_A] = "seta",
191 [OP_SET_BE] = "setbe",
192 [OP_SET_AE] = "setae",
194 /* Uni */
195 [OP_NOT] = "not",
196 [OP_NEG] = "neg",
198 /* Special three-input */
199 [OP_SEL] = "select",
201 /* Memory */
202 [OP_MALLOC] = "malloc",
203 [OP_FREE] = "free",
204 [OP_ALLOCA] = "alloca",
205 [OP_LOAD] = "load",
206 [OP_STORE] = "store",
207 [OP_SETVAL] = "set",
208 [OP_GET_ELEMENT_PTR] = "getelem",
210 /* Other */
211 [OP_PHI] = "phi",
212 [OP_PHISOURCE] = "phisrc",
213 [OP_CAST] = "cast",
214 [OP_PTRCAST] = "ptrcast",
215 [OP_CALL] = "call",
216 [OP_VANEXT] = "va_next",
217 [OP_VAARG] = "va_arg",
218 [OP_SLICE] = "slice",
219 [OP_SNOP] = "snop",
220 [OP_LNOP] = "lnop",
221 [OP_NOP] = "nop",
222 [OP_DEATHNOTE] = "dead",
223 [OP_ASM] = "asm",
225 /* Sparse tagging (line numbers, context, whatever) */
226 [OP_CONTEXT] = "context",
229 void show_instruction(struct instruction *insn)
231 int opcode = insn->opcode;
232 static char buffer[1024] = "\t";
233 char *buf;
235 buf = buffer+1;
236 if (!insn->bb) {
237 if (verbose < 2)
238 return;
239 buf += sprintf(buf, "# ");
242 if (opcode < sizeof(opcodes)/sizeof(char *)) {
243 const char *op = opcodes[opcode];
244 if (!op)
245 buf += sprintf(buf, "opcode:%d", opcode);
246 else
247 buf += sprintf(buf, "%s", op);
248 if (insn->size)
249 buf += sprintf(buf, ".%d", insn->size);
250 memset(buf, ' ', 20);
251 buf++;
254 if (buf < buffer + 12)
255 buf = buffer + 12;
256 switch (opcode) {
257 case OP_RET:
258 if (insn->src && insn->src != VOID)
259 buf += sprintf(buf, "%s", show_pseudo(insn->src));
260 break;
261 case OP_BR:
262 if (insn->bb_true && insn->bb_false) {
263 buf += sprintf(buf, "%s, .L%p, .L%p", show_pseudo(insn->cond), insn->bb_true, insn->bb_false);
264 break;
266 buf += sprintf(buf, ".L%p", insn->bb_true ? insn->bb_true : insn->bb_false);
267 break;
269 case OP_SETVAL: {
270 struct expression *expr = insn->val;
271 pseudo_t pseudo = insn->symbol;
272 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
273 if (pseudo) {
274 struct symbol *sym = pseudo->sym;
275 if (!sym) {
276 buf += sprintf(buf, "%s", show_pseudo(pseudo));
277 break;
279 if (sym->bb_target) {
280 buf += sprintf(buf, ".L%p", sym->bb_target);
281 break;
283 if (sym->ident) {
284 buf += sprintf(buf, "%s", show_ident(sym->ident));
285 break;
287 buf += sprintf(buf, "<anon symbol:%p>", sym);
288 break;
291 if (!expr) {
292 buf += sprintf(buf, "%s", "<none>");
293 break;
296 switch (expr->type) {
297 case EXPR_VALUE:
298 buf += sprintf(buf, "%lld", expr->value);
299 break;
300 case EXPR_FVALUE:
301 buf += sprintf(buf, "%Lf", expr->fvalue);
302 break;
303 case EXPR_STRING:
304 buf += sprintf(buf, "%.40s", show_string(expr->string));
305 break;
306 case EXPR_SYMBOL:
307 buf += sprintf(buf, "%s", show_ident(expr->symbol->ident));
308 break;
309 case EXPR_LABEL:
310 buf += sprintf(buf, ".L%p", expr->symbol->bb_target);
311 break;
312 default:
313 buf += sprintf(buf, "SETVAL EXPR TYPE %d", expr->type);
315 break;
317 case OP_SWITCH: {
318 struct multijmp *jmp;
319 buf += sprintf(buf, "%s", show_pseudo(insn->target));
320 FOR_EACH_PTR(insn->multijmp_list, jmp) {
321 if (jmp->begin == jmp->end)
322 buf += sprintf(buf, ", %d -> .L%p", jmp->begin, jmp->target);
323 else if (jmp->begin < jmp->end)
324 buf += sprintf(buf, ", %d ... %d -> .L%p", jmp->begin, jmp->end, jmp->target);
325 else
326 buf += sprintf(buf, ", default -> .L%p", jmp->target);
327 } END_FOR_EACH_PTR(jmp);
328 break;
330 case OP_COMPUTEDGOTO: {
331 struct multijmp *jmp;
332 buf += sprintf(buf, "%s", show_pseudo(insn->target));
333 FOR_EACH_PTR(insn->multijmp_list, jmp) {
334 buf += sprintf(buf, ", .L%p", jmp->target);
335 } END_FOR_EACH_PTR(jmp);
336 break;
339 case OP_PHISOURCE: {
340 struct instruction *phi;
341 buf += sprintf(buf, "%s <- %s ", show_pseudo(insn->target), show_pseudo(insn->phi_src));
342 FOR_EACH_PTR(insn->phi_users, phi) {
343 buf += sprintf(buf, " (%s)", show_pseudo(phi->target));
344 } END_FOR_EACH_PTR(phi);
345 break;
348 case OP_PHI: {
349 pseudo_t phi;
350 const char *s = " <-";
351 buf += sprintf(buf, "%s", show_pseudo(insn->target));
352 FOR_EACH_PTR(insn->phi_list, phi) {
353 buf += sprintf(buf, "%s %s", s, show_pseudo(phi));
354 s = ",";
355 } END_FOR_EACH_PTR(phi);
356 break;
358 case OP_LOAD: case OP_LNOP:
359 buf += sprintf(buf, "%s <- %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
360 break;
361 case OP_STORE: case OP_SNOP:
362 buf += sprintf(buf, "%s -> %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
363 break;
364 case OP_CALL: {
365 struct pseudo *arg;
366 if (insn->target && insn->target != VOID)
367 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
368 buf += sprintf(buf, "%s", show_pseudo(insn->func));
369 FOR_EACH_PTR(insn->arguments, arg) {
370 buf += sprintf(buf, ", %s", show_pseudo(arg));
371 } END_FOR_EACH_PTR(arg);
372 break;
374 case OP_CAST:
375 case OP_PTRCAST:
376 buf += sprintf(buf, "%s <- (%d) %s",
377 show_pseudo(insn->target),
378 type_size(insn->orig_type),
379 show_pseudo(insn->src));
380 break;
381 case OP_BINARY ... OP_BINARY_END:
382 case OP_BINCMP ... OP_BINCMP_END:
383 buf += sprintf(buf, "%s <- %s, %s", show_pseudo(insn->target), show_pseudo(insn->src1), show_pseudo(insn->src2));
384 break;
386 case OP_SEL:
387 buf += sprintf(buf, "%s <- %s, %s, %s", show_pseudo(insn->target),
388 show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
389 break;
391 case OP_SLICE:
392 buf += sprintf(buf, "%s <- %s, %d, %d", show_pseudo(insn->target), show_pseudo(insn->base), insn->from, insn->len);
393 break;
395 case OP_NOT: case OP_NEG:
396 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
397 break;
399 case OP_CONTEXT:
400 buf += sprintf(buf, "%d", insn->increment);
401 break;
402 case OP_NOP:
403 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
404 break;
405 case OP_DEATHNOTE:
406 buf += sprintf(buf, "%s", show_pseudo(insn->target));
407 break;
408 case OP_ASM:
409 buf += sprintf(buf, "\"%s\"", insn->string);
410 if (insn->outputs) {
411 pseudo_t pseudo;
412 buf += sprintf(buf, " (");
413 FOR_EACH_PTR(insn->outputs, pseudo) {
414 buf += sprintf(buf, " %s", show_pseudo(pseudo));
415 } END_FOR_EACH_PTR(pseudo);
416 buf += sprintf(buf, " ) <-");
418 if (insn->inputs) {
419 pseudo_t pseudo;
420 buf += sprintf(buf, " (");
421 FOR_EACH_PTR(insn->inputs, pseudo) {
422 buf += sprintf(buf, " %s", show_pseudo(pseudo));
423 } END_FOR_EACH_PTR(pseudo);
424 buf += sprintf(buf, " )");
426 break;
427 default:
428 break;
430 do { --buf; } while (*buf == ' ');
431 *++buf = 0;
432 printf("%s\n", buffer);
435 void show_bb(struct basic_block *bb)
437 struct instruction *insn;
439 printf(".L%p:\n", bb);
440 if (verbose) {
441 pseudo_t needs, defines;
442 printf("%s:%d\n", input_streams[bb->pos.stream].name, bb->pos.line);
444 FOR_EACH_PTR(bb->needs, needs) {
445 struct instruction *def = needs->def;
446 if (def->opcode != OP_PHI) {
447 printf(" **uses %s (from .L%p)**\n", show_pseudo(needs), def->bb);
448 } else {
449 pseudo_t phi;
450 const char *sep = " ";
451 printf(" **uses %s (from", show_pseudo(needs));
452 FOR_EACH_PTR(def->phi_list, phi) {
453 if (phi == VOID)
454 continue;
455 printf("%s(%s:.L%p)", sep, show_pseudo(phi), phi->def->bb);
456 sep = ", ";
457 } END_FOR_EACH_PTR(phi);
458 printf(")**\n");
460 } END_FOR_EACH_PTR(needs);
462 FOR_EACH_PTR(bb->defines, defines) {
463 printf(" **defines %s **\n", show_pseudo(defines));
464 } END_FOR_EACH_PTR(defines);
466 if (bb->parents) {
467 struct basic_block *from;
468 FOR_EACH_PTR(bb->parents, from) {
469 printf(" **from %p (%s:%d:%d)**\n", from,
470 input_streams[from->pos.stream].name, from->pos.line, from->pos.pos);
471 } END_FOR_EACH_PTR(from);
474 if (bb->children) {
475 struct basic_block *to;
476 FOR_EACH_PTR(bb->children, to) {
477 printf(" **to %p (%s:%d:%d)**\n", to,
478 input_streams[to->pos.stream].name, to->pos.line, to->pos.pos);
479 } END_FOR_EACH_PTR(to);
483 FOR_EACH_PTR(bb->insns, insn) {
484 show_instruction(insn);
485 } END_FOR_EACH_PTR(insn);
486 if (!bb_terminated(bb))
487 printf("\tEND\n");
490 static void show_symbol_usage(pseudo_t pseudo)
492 if (pseudo) {
493 pseudo_t *pp;
494 FOR_EACH_PTR(pseudo->users, pp) {
495 struct instruction *insn = container(pp, struct instruction, src);
496 show_instruction(insn);
497 } END_FOR_EACH_PTR(pp);
501 void show_entry(struct entrypoint *ep)
503 struct symbol *sym;
504 struct basic_block *bb;
506 printf("%s:\n", show_ident(ep->name->ident));
508 if (verbose) {
509 printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
511 FOR_EACH_PTR(ep->syms, sym) {
512 if (!sym->pseudo)
513 continue;
514 if (!sym->pseudo->users)
515 continue;
516 printf(" sym: %p %s\n", sym, show_ident(sym->ident));
517 if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE))
518 printf("\texternal visibility\n");
519 show_symbol_usage(sym->pseudo);
520 } END_FOR_EACH_PTR(sym);
522 printf("\n");
525 FOR_EACH_PTR(ep->bbs, bb) {
526 if (!bb)
527 continue;
528 if (!bb->parents && !bb->children && !bb->insns && verbose < 2)
529 continue;
530 show_bb(bb);
531 printf("\n");
532 } END_FOR_EACH_PTR(bb);
534 printf("\n");
537 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos)
539 if (label->bb_target)
540 warning(pos, "label '%s' already bound", show_ident(label->ident));
541 label->bb_target = bb;
544 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label)
546 struct basic_block *bb = label->bb_target;
548 if (!bb) {
549 bb = alloc_basic_block(ep, label->pos);
550 label->bb_target = bb;
552 return bb;
555 static void finish_block(struct entrypoint *ep)
557 struct basic_block *src = ep->active;
558 if (bb_reachable(src))
559 ep->active = NULL;
562 static void add_goto(struct entrypoint *ep, struct basic_block *dst)
564 struct basic_block *src = ep->active;
565 if (bb_reachable(src)) {
566 struct instruction *br = alloc_instruction(OP_BR, 0);
567 br->bb_true = dst;
568 add_bb(&dst->parents, src);
569 add_bb(&src->children, dst);
570 br->bb = src;
571 add_instruction(&src->insns, br);
572 ep->active = NULL;
576 static void add_one_insn(struct entrypoint *ep, struct instruction *insn)
578 struct basic_block *bb = ep->active;
580 if (bb_reachable(bb)) {
581 insn->bb = bb;
582 add_instruction(&bb->insns, insn);
586 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb)
588 if (!bb_terminated(ep->active))
589 add_goto(ep, bb);
591 ep->active = bb;
592 if (bb_reachable(bb))
593 add_bb(&ep->bbs, bb);
596 static void remove_parent(struct basic_block *child, struct basic_block *parent)
598 remove_bb_from_list(&child->parents, parent, 1);
599 if (!child->parents)
600 kill_bb(child);
603 /* Change a "switch" into a branch */
604 void insert_branch(struct basic_block *bb, struct instruction *jmp, struct basic_block *target)
606 struct instruction *br, *old;
607 struct basic_block *child;
609 /* Remove the switch */
610 old = delete_last_instruction(&bb->insns);
611 assert(old == jmp);
613 br = alloc_instruction(OP_BR, 0);
614 br->bb = bb;
615 br->bb_true = target;
616 add_instruction(&bb->insns, br);
618 FOR_EACH_PTR(bb->children, child) {
619 if (child == target) {
620 target = NULL; /* Trigger just once */
621 continue;
623 DELETE_CURRENT_PTR(child);
624 remove_parent(child, bb);
625 } END_FOR_EACH_PTR(child);
626 PACK_PTR_LIST(&bb->children);
630 void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi_node, pseudo_t true, pseudo_t false)
632 pseudo_t target;
633 struct instruction *select;
635 /* Remove the 'br' */
636 delete_last_instruction(&bb->insns);
638 select = alloc_instruction(OP_SEL, phi_node->size);
639 select->bb = bb;
641 assert(br->cond);
642 use_pseudo(br->cond, &select->src1);
644 target = phi_node->target;
645 assert(target->def == phi_node);
646 select->target = target;
647 target->def = select;
649 use_pseudo(true, &select->src2);
650 use_pseudo(false, &select->src3);
652 add_instruction(&bb->insns, select);
653 add_instruction(&bb->insns, br);
656 static inline int bb_empty(struct basic_block *bb)
658 return !bb->insns;
661 /* Add a label to the currently active block, return new active block */
662 static struct basic_block * add_label(struct entrypoint *ep, struct symbol *label)
664 struct basic_block *bb = label->bb_target;
666 if (bb) {
667 set_activeblock(ep, bb);
668 return bb;
670 bb = ep->active;
671 if (!bb_reachable(bb) || !bb_empty(bb)) {
672 bb = alloc_basic_block(ep, label->pos);
673 set_activeblock(ep, bb);
675 label->bb_target = bb;
676 return bb;
679 static void add_branch(struct entrypoint *ep, struct expression *expr, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false)
681 struct basic_block *bb = ep->active;
682 struct instruction *br;
684 if (bb_reachable(bb)) {
685 br = alloc_instruction(OP_BR, 0);
686 use_pseudo(cond, &br->cond);
687 br->bb_true = bb_true;
688 br->bb_false = bb_false;
689 add_bb(&bb_true->parents, bb);
690 add_bb(&bb_false->parents, bb);
691 add_bb(&bb->children, bb_true);
692 add_bb(&bb->children, bb_false);
693 add_one_insn(ep, br);
697 /* Dummy pseudo allocator */
698 pseudo_t alloc_pseudo(struct instruction *def)
700 static int nr = 0;
701 struct pseudo * pseudo = __alloc_pseudo(0);
702 pseudo->type = PSEUDO_REG;
703 pseudo->nr = ++nr;
704 pseudo->def = def;
705 return pseudo;
708 static void clear_symbol_pseudos(struct entrypoint *ep)
710 struct symbol *sym;
712 FOR_EACH_PTR(ep->accesses, sym) {
713 sym->pseudo = NULL;
714 } END_FOR_EACH_PTR(sym);
717 static pseudo_t symbol_pseudo(struct entrypoint *ep, struct symbol *sym)
719 pseudo_t pseudo;
721 if (!sym)
722 return VOID;
724 pseudo = sym->pseudo;
725 if (!pseudo) {
726 pseudo = __alloc_pseudo(0);
727 pseudo->type = PSEUDO_SYM;
728 pseudo->sym = sym;
729 pseudo->ident = sym->ident;
730 sym->pseudo = pseudo;
731 add_symbol(&ep->accesses, sym);
733 /* Symbol pseudos have neither nr, usage nor def */
734 return pseudo;
737 pseudo_t value_pseudo(long long val)
739 #define MAX_VAL_HASH 64
740 static struct pseudo_list *prev[MAX_VAL_HASH];
741 int hash = val & (MAX_VAL_HASH-1);
742 struct pseudo_list **list = prev + hash;
743 pseudo_t pseudo;
745 FOR_EACH_PTR(*list, pseudo) {
746 if (pseudo->value == val)
747 return pseudo;
748 } END_FOR_EACH_PTR(pseudo);
750 pseudo = __alloc_pseudo(0);
751 pseudo->type = PSEUDO_VAL;
752 pseudo->value = val;
753 add_pseudo(list, pseudo);
755 /* Value pseudos have neither nr, usage nor def */
756 return pseudo;
759 static pseudo_t argument_pseudo(struct entrypoint *ep, int nr)
761 pseudo_t pseudo = __alloc_pseudo(0);
762 pseudo->type = PSEUDO_ARG;
763 pseudo->nr = nr;
764 pseudo->def = ep->entry;
765 /* Argument pseudos have neither usage nor def */
766 return pseudo;
769 pseudo_t alloc_phi(struct basic_block *source, pseudo_t pseudo, int size)
771 struct instruction *insn = alloc_instruction(OP_PHISOURCE, size);
772 pseudo_t phi = __alloc_pseudo(0);
773 static int nr = 0;
775 phi->type = PSEUDO_PHI;
776 phi->nr = ++nr;
777 phi->def = insn;
779 use_pseudo(pseudo, &insn->phi_src);
780 insn->bb = source;
781 insn->target = phi;
782 add_instruction(&source->insns, insn);
783 return phi;
787 * We carry the "access_data" structure around for any accesses,
788 * which simplifies things a lot. It contains all the access
789 * information in one place.
791 struct access_data {
792 struct symbol *result_type; // result ctype
793 struct symbol *source_type; // source ctype
794 pseudo_t address; // pseudo containing address ..
795 pseudo_t origval; // pseudo for original value ..
796 unsigned int offset, alignment; // byte offset
797 unsigned int bit_size, bit_offset; // which bits
798 struct position pos;
801 static void finish_address_gen(struct entrypoint *ep, struct access_data *ad)
805 static int linearize_simple_address(struct entrypoint *ep,
806 struct expression *addr,
807 struct access_data *ad)
809 if (addr->type == EXPR_SYMBOL) {
810 linearize_one_symbol(ep, addr->symbol);
811 ad->address = symbol_pseudo(ep, addr->symbol);
812 return 1;
814 if (addr->type == EXPR_BINOP) {
815 if (addr->right->type == EXPR_VALUE) {
816 if (addr->op == '+') {
817 ad->offset += get_expression_value(addr->right);
818 return linearize_simple_address(ep, addr->left, ad);
822 ad->address = linearize_expression(ep, addr);
823 return 1;
826 static struct symbol *base_type(struct symbol *sym)
828 struct symbol *base = sym;
830 if (sym) {
831 if (sym->type == SYM_NODE)
832 base = base->ctype.base_type;
833 if (base->type == SYM_BITFIELD)
834 return base->ctype.base_type;
836 return sym;
839 static int linearize_address_gen(struct entrypoint *ep,
840 struct expression *expr,
841 struct access_data *ad)
843 struct symbol *ctype = expr->ctype;
845 if (!ctype)
846 return 0;
847 ad->pos = expr->pos;
848 ad->result_type = ctype;
849 ad->source_type = base_type(ctype);
850 ad->bit_size = ctype->bit_size;
851 ad->alignment = ctype->ctype.alignment;
852 ad->bit_offset = ctype->bit_offset;
853 if (expr->type == EXPR_PREOP && expr->op == '*')
854 return linearize_simple_address(ep, expr->unop, ad);
856 warning(expr->pos, "generating address of non-lvalue (%d)", expr->type);
857 return 0;
860 static pseudo_t add_load(struct entrypoint *ep, struct access_data *ad)
862 struct instruction *insn;
863 pseudo_t new;
865 new = ad->origval;
866 if (0 && new)
867 return new;
869 insn = alloc_typed_instruction(OP_LOAD, ad->source_type);
870 new = alloc_pseudo(insn);
871 ad->origval = new;
873 insn->target = new;
874 insn->offset = ad->offset;
875 use_pseudo(ad->address, &insn->src);
876 add_one_insn(ep, insn);
877 return new;
880 static void add_store(struct entrypoint *ep, struct access_data *ad, pseudo_t value)
882 struct basic_block *bb = ep->active;
884 if (bb_reachable(bb)) {
885 struct instruction *store = alloc_typed_instruction(OP_STORE, ad->source_type);
886 store->offset = ad->offset;
887 use_pseudo(value, &store->target);
888 use_pseudo(ad->address, &store->src);
889 add_one_insn(ep, store);
893 static pseudo_t linearize_store_gen(struct entrypoint *ep,
894 pseudo_t value,
895 struct access_data *ad)
897 pseudo_t store = value;
899 if (type_size(ad->source_type) != type_size(ad->result_type)) {
900 pseudo_t orig = add_load(ep, ad);
901 int shift = ad->bit_offset;
902 unsigned long long mask = (1ULL << ad->bit_size)-1;
904 if (shift) {
905 store = add_binary_op(ep, ad->source_type, OP_SHL, value, value_pseudo(shift));
906 mask <<= shift;
908 orig = add_binary_op(ep, ad->source_type, OP_AND, orig, value_pseudo(~mask));
909 store = add_binary_op(ep, ad->source_type, OP_OR, orig, store);
911 add_store(ep, ad, store);
912 return value;
915 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right)
917 struct instruction *insn = alloc_typed_instruction(op, ctype);
918 pseudo_t target = alloc_pseudo(insn);
919 insn->target = target;
920 use_pseudo(left, &insn->src1);
921 use_pseudo(right, &insn->src2);
922 add_one_insn(ep, insn);
923 return target;
926 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val)
928 struct instruction *insn = alloc_typed_instruction(OP_SETVAL, ctype);
929 pseudo_t target = alloc_pseudo(insn);
930 insn->target = target;
931 insn->val = val;
932 if (!val) {
933 pseudo_t addr = symbol_pseudo(ep, ctype);
934 use_pseudo(addr, &insn->symbol);
935 insn->size = bits_in_pointer;
937 add_one_insn(ep, insn);
938 return target;
941 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct access_data *ad)
943 pseudo_t new = add_load(ep, ad);
945 if (ad->bit_offset) {
946 pseudo_t shift = value_pseudo(ad->bit_offset);
947 pseudo_t newval = add_binary_op(ep, ad->source_type, OP_SHR, new, shift);
948 new = newval;
951 return new;
954 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
956 struct access_data ad = { NULL, };
957 pseudo_t value;
959 if (!linearize_address_gen(ep, expr, &ad))
960 return VOID;
961 value = linearize_load_gen(ep, &ad);
962 finish_address_gen(ep, &ad);
963 return value;
966 /* FIXME: FP */
967 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
969 struct access_data ad = { NULL, };
970 pseudo_t old, new, one;
971 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB;
973 if (!linearize_address_gen(ep, expr->unop, &ad))
974 return VOID;
976 old = linearize_load_gen(ep, &ad);
977 one = value_pseudo(expr->op_value);
978 new = add_binary_op(ep, expr->ctype, op, old, one);
979 linearize_store_gen(ep, new, &ad);
980 finish_address_gen(ep, &ad);
981 return postop ? old : new;
984 static pseudo_t add_uniop(struct entrypoint *ep, struct expression *expr, int op, pseudo_t src)
986 struct instruction *insn = alloc_typed_instruction(op, expr->ctype);
987 pseudo_t new = alloc_pseudo(insn);
989 insn->target = new;
990 use_pseudo(src, &insn->src1);
991 add_one_insn(ep, insn);
992 return new;
995 static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr)
997 pseudo_t pre = linearize_expression(ep, expr->base);
998 struct instruction *insn = alloc_typed_instruction(OP_SLICE, expr->ctype);
999 pseudo_t new = alloc_pseudo(insn);
1001 insn->target = new;
1002 insn->from = expr->r_bitpos;
1003 insn->len = expr->r_nrbits;
1004 use_pseudo(pre, &insn->base);
1005 add_one_insn(ep, insn);
1006 return new;
1009 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
1011 pseudo_t pre = linearize_expression(ep, expr->unop);
1012 switch (expr->op) {
1013 case '+':
1014 return pre;
1015 case '!': {
1016 pseudo_t zero = value_pseudo(0);
1017 return add_binary_op(ep, expr->ctype, OP_SET_EQ, pre, zero);
1019 case '~':
1020 return add_uniop(ep, expr, OP_NOT, pre);
1021 case '-':
1022 return add_uniop(ep, expr, OP_NEG, pre);
1024 return VOID;
1027 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
1030 * '*' is an lvalue access, and is fundamentally different
1031 * from an arithmetic operation. Maybe it should have an
1032 * expression type of its own..
1034 if (expr->op == '*')
1035 return linearize_access(ep, expr);
1036 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
1037 return linearize_inc_dec(ep, expr, 0);
1038 return linearize_regular_preop(ep, expr);
1041 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr)
1043 return linearize_inc_dec(ep, expr, 1);
1046 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
1048 struct access_data ad = { NULL, };
1049 struct expression *target = expr->left;
1050 pseudo_t value;
1052 value = linearize_expression(ep, expr->right);
1053 if (!linearize_address_gen(ep, target, &ad))
1054 return VOID;
1055 if (expr->op != '=') {
1056 pseudo_t oldvalue = linearize_load_gen(ep, &ad);
1057 pseudo_t dst;
1058 static const int op_trans[] = {
1059 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD,
1060 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB,
1061 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MUL,
1062 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIV,
1063 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MOD,
1064 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL,
1065 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_SHR,
1066 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND,
1067 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR,
1068 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR
1070 dst = add_binary_op(ep, expr->ctype, op_trans[expr->op - SPECIAL_BASE], oldvalue, value);
1071 value = dst;
1073 value = linearize_store_gen(ep, value, &ad);
1074 finish_address_gen(ep, &ad);
1075 return value;
1078 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr)
1080 struct expression *arg, *fn;
1081 struct instruction *insn = alloc_typed_instruction(OP_CALL, expr->ctype);
1082 pseudo_t retval, call;
1083 int context_diff;
1085 if (!expr->ctype) {
1086 warning(expr->pos, "call with no type!");
1087 return VOID;
1090 FOR_EACH_PTR(expr->args, arg) {
1091 pseudo_t new = linearize_expression(ep, arg);
1092 use_pseudo(new, add_pseudo(&insn->arguments, new));
1093 } END_FOR_EACH_PTR(arg);
1095 fn = expr->fn;
1097 context_diff = 0;
1098 if (fn->ctype) {
1099 int in = fn->ctype->ctype.in_context;
1100 int out = fn->ctype->ctype.out_context;
1101 if (in < 0 || out < 0)
1102 in = out = 0;
1103 context_diff = out - in;
1106 if (fn->type == EXPR_PREOP) {
1107 if (fn->unop->type == EXPR_SYMBOL) {
1108 struct symbol *sym = fn->unop->symbol;
1109 if (sym->ctype.base_type->type == SYM_FN)
1110 fn = fn->unop;
1113 if (fn->type == EXPR_SYMBOL) {
1114 call = symbol_pseudo(ep, fn->symbol);
1115 } else {
1116 call = linearize_expression(ep, fn);
1118 use_pseudo(call, &insn->func);
1119 retval = VOID;
1120 if (expr->ctype != &void_ctype)
1121 retval = alloc_pseudo(insn);
1122 insn->target = retval;
1123 add_one_insn(ep, insn);
1125 if (context_diff) {
1126 insn = alloc_instruction(OP_CONTEXT, 0);
1127 insn->increment = context_diff;
1128 add_one_insn(ep, insn);
1131 return retval;
1134 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr)
1136 pseudo_t src1, src2, dst;
1137 static const int opcode[] = {
1138 ['+'] = OP_ADD, ['-'] = OP_SUB,
1139 ['*'] = OP_MUL, ['/'] = OP_DIV,
1140 ['%'] = OP_MOD, ['&'] = OP_AND,
1141 ['|'] = OP_OR, ['^'] = OP_XOR,
1142 [SPECIAL_LEFTSHIFT] = OP_SHL,
1143 [SPECIAL_RIGHTSHIFT] = OP_SHR,
1144 [SPECIAL_LOGICAL_AND] = OP_AND_BOOL,
1145 [SPECIAL_LOGICAL_OR] = OP_OR_BOOL,
1148 src1 = linearize_expression(ep, expr->left);
1149 src2 = linearize_expression(ep, expr->right);
1150 dst = add_binary_op(ep, expr->ctype, opcode[expr->op], src1, src2);
1151 return dst;
1154 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1156 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1158 static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr)
1160 pseudo_t cond, true, false, res;
1161 struct instruction *insn;
1163 true = linearize_expression(ep, expr->cond_true);
1164 false = linearize_expression(ep, expr->cond_false);
1165 cond = linearize_expression(ep, expr->conditional);
1167 insn = alloc_typed_instruction(OP_SEL, expr->ctype);
1168 if (!expr->cond_true)
1169 true = cond;
1170 use_pseudo(cond, &insn->src1);
1171 use_pseudo(true, &insn->src2);
1172 use_pseudo(false, &insn->src3);
1174 res = alloc_pseudo(insn);
1175 insn->target = res;
1176 add_one_insn(ep, insn);
1177 return res;
1180 static pseudo_t add_join_conditional(struct entrypoint *ep, struct expression *expr,
1181 pseudo_t phi1, pseudo_t phi2)
1183 pseudo_t target;
1184 struct instruction *phi_node;
1186 if (phi1 == VOID)
1187 return phi2;
1188 if (phi2 == VOID)
1189 return phi1;
1191 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1192 use_pseudo(phi1, add_pseudo(&phi_node->phi_list, phi1));
1193 use_pseudo(phi2, add_pseudo(&phi_node->phi_list, phi2));
1194 phi_node->target = target = alloc_pseudo(phi_node);
1195 add_one_insn(ep, phi_node);
1196 return target;
1199 static pseudo_t linearize_short_conditional(struct entrypoint *ep, struct expression *expr,
1200 struct expression *cond,
1201 struct expression *expr_false)
1203 pseudo_t src1, src2;
1204 struct basic_block *bb_false = alloc_basic_block(ep, expr_false->pos);
1205 struct basic_block *merge = alloc_basic_block(ep, expr->pos);
1206 pseudo_t phi1, phi2;
1207 int size = type_size(expr->ctype);
1209 src1 = linearize_expression(ep, cond);
1210 phi1 = alloc_phi(ep->active, src1, size);
1211 add_branch(ep, expr, src1, merge, bb_false);
1213 set_activeblock(ep, bb_false);
1214 src2 = linearize_expression(ep, expr_false);
1215 phi2 = alloc_phi(ep->active, src2, size);
1216 set_activeblock(ep, merge);
1218 return add_join_conditional(ep, expr, phi1, phi2);
1221 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr,
1222 struct expression *cond,
1223 struct expression *expr_true,
1224 struct expression *expr_false)
1226 pseudo_t src1, src2;
1227 pseudo_t phi1, phi2;
1228 struct basic_block *bb_true = alloc_basic_block(ep, expr_true->pos);
1229 struct basic_block *bb_false = alloc_basic_block(ep, expr_false->pos);
1230 struct basic_block *merge = alloc_basic_block(ep, expr->pos);
1231 int size = type_size(expr->ctype);
1233 linearize_cond_branch(ep, cond, bb_true, bb_false);
1235 set_activeblock(ep, bb_true);
1236 src1 = linearize_expression(ep, expr_true);
1237 phi1 = alloc_phi(ep->active, src1, size);
1238 add_goto(ep, merge);
1240 set_activeblock(ep, bb_false);
1241 src2 = linearize_expression(ep, expr_false);
1242 phi2 = alloc_phi(ep->active, src2, size);
1243 set_activeblock(ep, merge);
1245 return add_join_conditional(ep, expr, phi1, phi2);
1248 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr)
1250 struct expression *shortcut;
1252 shortcut = alloc_const_expression(expr->pos, expr->op == SPECIAL_LOGICAL_OR);
1253 shortcut->ctype = expr->ctype;
1254 return linearize_conditional(ep, expr, expr->left, shortcut, expr->right);
1257 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr)
1259 static const int cmpop[] = {
1260 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT,
1261 [SPECIAL_EQUAL] = OP_SET_EQ,
1262 [SPECIAL_NOTEQUAL] = OP_SET_NE,
1263 [SPECIAL_GTE] = OP_SET_GE,
1264 [SPECIAL_LTE] = OP_SET_LE,
1265 [SPECIAL_UNSIGNED_LT] = OP_SET_B,
1266 [SPECIAL_UNSIGNED_GT] = OP_SET_A,
1267 [SPECIAL_UNSIGNED_LTE] = OP_SET_BE,
1268 [SPECIAL_UNSIGNED_GTE] = OP_SET_AE,
1271 pseudo_t src1 = linearize_expression(ep, expr->left);
1272 pseudo_t src2 = linearize_expression(ep, expr->right);
1273 pseudo_t dst = add_binary_op(ep, expr->ctype, cmpop[expr->op], src1, src2);
1274 return dst;
1278 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1280 pseudo_t cond;
1282 if (!expr || !bb_reachable(ep->active))
1283 return VOID;
1285 switch (expr->type) {
1287 case EXPR_STRING:
1288 case EXPR_VALUE:
1289 add_goto(ep, expr->value ? bb_true : bb_false);
1290 return VOID;
1292 case EXPR_FVALUE:
1293 add_goto(ep, expr->fvalue ? bb_true : bb_false);
1294 return VOID;
1296 case EXPR_LOGICAL:
1297 linearize_logical_branch(ep, expr, bb_true, bb_false);
1298 return VOID;
1300 case EXPR_COMPARE:
1301 cond = linearize_compare(ep, expr);
1302 add_branch(ep, expr, cond, bb_true, bb_false);
1303 break;
1305 case EXPR_PREOP:
1306 if (expr->op == '!')
1307 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true);
1308 /* fall through */
1309 default: {
1310 cond = linearize_expression(ep, expr);
1311 add_branch(ep, expr, cond, bb_true, bb_false);
1313 return VOID;
1316 return VOID;
1321 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1323 struct basic_block *next = alloc_basic_block(ep, expr->pos);
1325 if (expr->op == SPECIAL_LOGICAL_OR)
1326 linearize_cond_branch(ep, expr->left, bb_true, next);
1327 else
1328 linearize_cond_branch(ep, expr->left, next, bb_false);
1329 set_activeblock(ep, next);
1330 linearize_cond_branch(ep, expr->right, bb_true, bb_false);
1331 return VOID;
1335 * Casts to pointers are "less safe" than other casts, since
1336 * they imply type-unsafe accesses. "void *" is a special
1337 * case, since you can't access through it anyway without another
1338 * cast.
1340 static struct instruction *alloc_cast_instruction(struct symbol *ctype)
1342 int opcode = OP_CAST;
1343 struct symbol *base = ctype;
1345 if (base->type == SYM_NODE)
1346 base = base->ctype.base_type;
1347 if (base->type == SYM_PTR) {
1348 base = base->ctype.base_type;
1349 if (base != &void_ctype)
1350 opcode = OP_PTRCAST;
1352 return alloc_typed_instruction(opcode, ctype);
1355 pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr)
1357 pseudo_t src, result;
1358 struct instruction *insn;
1360 src = linearize_expression(ep, expr->cast_expression);
1361 if (src == VOID)
1362 return VOID;
1363 if (!expr->ctype)
1364 return VOID;
1365 if (expr->ctype->bit_size < 0)
1366 return VOID;
1368 insn = alloc_cast_instruction(expr->ctype);
1369 result = alloc_pseudo(insn);
1370 insn->target = result;
1371 insn->orig_type = expr->cast_expression->ctype;
1372 use_pseudo(src, &insn->src);
1373 add_one_insn(ep, insn);
1374 return result;
1377 pseudo_t linearize_position(struct entrypoint *ep, struct expression *pos, struct access_data *ad)
1379 struct expression *init_expr = pos->init_expr;
1380 pseudo_t value = linearize_expression(ep, init_expr);
1382 ad->offset = pos->init_offset;
1383 ad->source_type = base_type(init_expr->ctype);
1384 ad->result_type = init_expr->ctype;
1385 linearize_store_gen(ep, value, ad);
1386 return VOID;
1389 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *ad)
1391 switch (initializer->type) {
1392 case EXPR_INITIALIZER: {
1393 struct expression *expr;
1394 FOR_EACH_PTR(initializer->expr_list, expr) {
1395 linearize_initializer(ep, expr, ad);
1396 } END_FOR_EACH_PTR(expr);
1397 break;
1399 case EXPR_POS:
1400 linearize_position(ep, initializer, ad);
1401 break;
1402 default: {
1403 pseudo_t value = linearize_expression(ep, initializer);
1404 ad->source_type = base_type(initializer->ctype);
1405 ad->result_type = initializer->ctype;
1406 linearize_store_gen(ep, value, ad);
1410 return VOID;
1413 void linearize_argument(struct entrypoint *ep, struct symbol *arg, int nr)
1415 struct access_data ad = { NULL, };
1417 ad.source_type = arg;
1418 ad.result_type = arg;
1419 ad.address = symbol_pseudo(ep, arg);
1420 linearize_store_gen(ep, argument_pseudo(ep, nr), &ad);
1421 finish_address_gen(ep, &ad);
1424 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
1426 if (!expr)
1427 return VOID;
1429 switch (expr->type) {
1430 case EXPR_SYMBOL:
1431 linearize_one_symbol(ep, expr->symbol);
1432 return add_setval(ep, expr->symbol, NULL);
1434 case EXPR_VALUE:
1435 return value_pseudo(expr->value);
1437 case EXPR_STRING: case EXPR_FVALUE: case EXPR_LABEL:
1438 return add_setval(ep, expr->ctype, expr);
1440 case EXPR_STATEMENT:
1441 return linearize_statement(ep, expr->statement);
1443 case EXPR_CALL:
1444 return linearize_call_expression(ep, expr);
1446 case EXPR_BINOP:
1447 return linearize_binop(ep, expr);
1449 case EXPR_LOGICAL:
1450 return linearize_logical(ep, expr);
1452 case EXPR_COMPARE:
1453 return linearize_compare(ep, expr);
1455 case EXPR_SELECT:
1456 return linearize_select(ep, expr);
1458 case EXPR_CONDITIONAL:
1459 if (!expr->cond_true)
1460 return linearize_short_conditional(ep, expr, expr->conditional, expr->cond_false);
1462 return linearize_conditional(ep, expr, expr->conditional,
1463 expr->cond_true, expr->cond_false);
1465 case EXPR_COMMA:
1466 linearize_expression(ep, expr->left);
1467 return linearize_expression(ep, expr->right);
1469 case EXPR_ASSIGNMENT:
1470 return linearize_assignment(ep, expr);
1472 case EXPR_PREOP:
1473 return linearize_preop(ep, expr);
1475 case EXPR_POSTOP:
1476 return linearize_postop(ep, expr);
1478 case EXPR_CAST:
1479 case EXPR_IMPLIED_CAST:
1480 return linearize_cast(ep, expr);
1482 case EXPR_SLICE:
1483 return linearize_slice(ep, expr);
1485 case EXPR_INITIALIZER:
1486 case EXPR_POS:
1487 warning(expr->pos, "unexpected initializer expression (%d %d)", expr->type, expr->op);
1488 return VOID;
1489 default:
1490 warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op);
1491 return VOID;
1493 return VOID;
1496 static void linearize_one_symbol(struct entrypoint *ep, struct symbol *sym)
1498 struct access_data ad = { NULL, };
1500 if (!sym || !sym->initializer || sym->initialized)
1501 return;
1503 /* We need to output these puppies some day too.. */
1504 if (sym->ctype.modifiers & (MOD_STATIC | MOD_TOPLEVEL))
1505 return;
1507 sym->initialized = 1;
1508 ad.address = symbol_pseudo(ep, sym);
1509 linearize_initializer(ep, sym->initializer, &ad);
1510 finish_address_gen(ep, &ad);
1513 static pseudo_t linearize_compound_statement(struct entrypoint *ep, struct statement *stmt)
1515 pseudo_t pseudo;
1516 struct statement *s;
1517 struct symbol *sym;
1518 struct symbol *ret = stmt->ret;
1520 concat_symbol_list(stmt->syms, &ep->syms);
1522 FOR_EACH_PTR(stmt->syms, sym) {
1523 linearize_one_symbol(ep, sym);
1524 } END_FOR_EACH_PTR(sym);
1526 pseudo = VOID;
1527 FOR_EACH_PTR(stmt->stmts, s) {
1528 pseudo = linearize_statement(ep, s);
1529 } END_FOR_EACH_PTR(s);
1531 if (ret) {
1532 struct basic_block *bb = add_label(ep, ret);
1533 struct instruction *phi_node = first_instruction(bb->insns);
1535 if (!phi_node)
1536 return pseudo;
1538 if (pseudo_list_size(phi_node->phi_list)==1) {
1539 pseudo = first_pseudo(phi_node->phi_list);
1540 assert(pseudo->type == PSEUDO_PHI);
1541 return pseudo->def->src1;
1543 return phi_node->target;
1545 return pseudo;
1548 pseudo_t linearize_internal(struct entrypoint *ep, struct statement *stmt)
1550 struct instruction *insn = alloc_instruction(OP_CONTEXT, 0);
1551 struct expression *expr = stmt->expression;
1552 int value = 0;
1554 if (expr->type == EXPR_VALUE)
1555 value = expr->value;
1557 insn->increment = value;
1558 add_one_insn(ep, insn);
1559 return VOID;
1562 static void add_asm_input(struct entrypoint *ep, struct instruction *insn, struct expression *expr)
1564 pseudo_t pseudo = linearize_expression(ep, expr);
1566 use_pseudo(pseudo, add_pseudo(&insn->inputs, pseudo));
1569 static void add_asm_output(struct entrypoint *ep, struct instruction *insn, struct expression *expr)
1571 struct access_data ad = { NULL, };
1572 pseudo_t pseudo = alloc_pseudo(insn);
1574 if (!linearize_address_gen(ep, expr, &ad))
1575 return;
1576 linearize_store_gen(ep, pseudo, &ad);
1577 finish_address_gen(ep, &ad);
1578 add_pseudo(&insn->outputs, pseudo);
1581 pseudo_t linearize_asm_statement(struct entrypoint *ep, struct statement *stmt)
1583 int even_odd;
1584 struct expression *expr;
1585 struct instruction *insn;
1587 insn = alloc_instruction(OP_ASM, 0);
1588 expr = stmt->asm_string;
1589 if (!expr || expr->type != EXPR_STRING) {
1590 warning(stmt->pos, "expected string in inline asm");
1591 return VOID;
1593 insn->string = expr->string->data;
1595 /* Gather the inputs.. */
1596 even_odd = 0;
1597 FOR_EACH_PTR(stmt->asm_inputs, expr) {
1598 even_odd = 1 - even_odd;
1600 /* FIXME! We ignore the constraints for now.. */
1601 if (even_odd)
1602 continue;
1603 add_asm_input(ep, insn, expr);
1604 } END_FOR_EACH_PTR(expr);
1606 add_one_insn(ep, insn);
1608 /* Assign the outputs */
1609 even_odd = 0;
1610 FOR_EACH_PTR(stmt->asm_outputs, expr) {
1611 even_odd = 1 - even_odd;
1613 /* FIXME! We ignore the constraints for now.. */
1614 if (even_odd)
1615 continue;
1616 add_asm_output(ep, insn, expr);
1617 } END_FOR_EACH_PTR(expr);
1619 return VOID;
1622 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
1624 struct basic_block *bb;
1626 if (!stmt)
1627 return VOID;
1629 bb = ep->active;
1630 if (bb && !bb->insns)
1631 bb->pos = stmt->pos;
1633 switch (stmt->type) {
1634 case STMT_NONE:
1635 break;
1637 case STMT_INTERNAL:
1638 return linearize_internal(ep, stmt);
1640 case STMT_EXPRESSION:
1641 return linearize_expression(ep, stmt->expression);
1643 case STMT_ASM:
1644 return linearize_asm_statement(ep, stmt);
1646 case STMT_RETURN: {
1647 struct expression *expr = stmt->expression;
1648 struct basic_block *bb_return = get_bound_block(ep, stmt->ret_target);
1649 struct basic_block *active;
1650 pseudo_t src = linearize_expression(ep, expr);
1651 active = ep->active;
1652 if (active && src != &void_pseudo) {
1653 struct instruction *phi_node = first_instruction(bb_return->insns);
1654 pseudo_t phi;
1655 if (!phi_node) {
1656 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1657 phi_node->target = alloc_pseudo(phi_node);
1658 phi_node->bb = bb_return;
1659 add_instruction(&bb_return->insns, phi_node);
1661 phi = alloc_phi(active, src, type_size(expr->ctype));
1662 phi->ident = &return_ident;
1663 use_pseudo(phi, add_pseudo(&phi_node->phi_list, phi));
1665 add_goto(ep, bb_return);
1666 return VOID;
1669 case STMT_CASE: {
1670 add_label(ep, stmt->case_label);
1671 linearize_statement(ep, stmt->case_statement);
1672 break;
1675 case STMT_LABEL: {
1676 struct symbol *label = stmt->label_identifier;
1678 if (label->used) {
1679 add_label(ep, label);
1680 linearize_statement(ep, stmt->label_statement);
1682 break;
1685 case STMT_GOTO: {
1686 struct symbol *sym;
1687 struct expression *expr;
1688 struct instruction *goto_ins;
1689 struct basic_block *active;
1690 pseudo_t pseudo;
1692 active = ep->active;
1693 if (!bb_reachable(active))
1694 break;
1696 if (stmt->goto_label) {
1697 add_goto(ep, get_bound_block(ep, stmt->goto_label));
1698 break;
1701 expr = stmt->goto_expression;
1702 if (!expr)
1703 break;
1705 /* This can happen as part of simplification */
1706 if (expr->type == EXPR_LABEL) {
1707 add_goto(ep, get_bound_block(ep, expr->label_symbol));
1708 break;
1711 pseudo = linearize_expression(ep, expr);
1712 goto_ins = alloc_instruction(OP_COMPUTEDGOTO, 0);
1713 use_pseudo(pseudo, &goto_ins->target);
1714 add_one_insn(ep, goto_ins);
1716 FOR_EACH_PTR(stmt->target_list, sym) {
1717 struct basic_block *bb_computed = get_bound_block(ep, sym);
1718 struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0);
1719 add_multijmp(&goto_ins->multijmp_list, jmp);
1720 add_bb(&bb_computed->parents, ep->active);
1721 add_bb(&active->children, bb_computed);
1722 } END_FOR_EACH_PTR(sym);
1724 finish_block(ep);
1725 break;
1728 case STMT_COMPOUND:
1729 return linearize_compound_statement(ep, stmt);
1732 * This could take 'likely/unlikely' into account, and
1733 * switch the arms around appropriately..
1735 case STMT_IF: {
1736 struct basic_block *bb_true, *bb_false, *endif;
1737 struct expression *cond = stmt->if_conditional;
1739 bb_true = alloc_basic_block(ep, stmt->pos);
1740 bb_false = endif = alloc_basic_block(ep, stmt->pos);
1742 linearize_cond_branch(ep, cond, bb_true, bb_false);
1744 set_activeblock(ep, bb_true);
1745 linearize_statement(ep, stmt->if_true);
1747 if (stmt->if_false) {
1748 endif = alloc_basic_block(ep, stmt->pos);
1749 add_goto(ep, endif);
1750 set_activeblock(ep, bb_false);
1751 linearize_statement(ep, stmt->if_false);
1753 set_activeblock(ep, endif);
1754 break;
1757 case STMT_SWITCH: {
1758 struct symbol *sym;
1759 struct instruction *switch_ins;
1760 struct basic_block *switch_end = alloc_basic_block(ep, stmt->pos);
1761 struct basic_block *active, *default_case;
1762 struct multijmp *jmp;
1763 pseudo_t pseudo;
1765 pseudo = linearize_expression(ep, stmt->switch_expression);
1767 active = ep->active;
1768 if (!bb_reachable(active))
1769 break;
1771 switch_ins = alloc_instruction(OP_SWITCH, 0);
1772 use_pseudo(pseudo, &switch_ins->cond);
1773 add_one_insn(ep, switch_ins);
1774 finish_block(ep);
1776 default_case = NULL;
1777 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
1778 struct statement *case_stmt = sym->stmt;
1779 struct basic_block *bb_case = get_bound_block(ep, sym);
1781 if (!case_stmt->case_expression) {
1782 default_case = bb_case;
1783 continue;
1784 } else {
1785 int begin, end;
1787 begin = end = case_stmt->case_expression->value;
1788 if (case_stmt->case_to)
1789 end = case_stmt->case_to->value;
1790 if (begin > end)
1791 jmp = alloc_multijmp(bb_case, end, begin);
1792 else
1793 jmp = alloc_multijmp(bb_case, begin, end);
1796 add_multijmp(&switch_ins->multijmp_list, jmp);
1797 add_bb(&bb_case->parents, active);
1798 add_bb(&active->children, bb_case);
1799 } END_FOR_EACH_PTR(sym);
1801 bind_label(stmt->switch_break, switch_end, stmt->pos);
1803 /* And linearize the actual statement */
1804 linearize_statement(ep, stmt->switch_statement);
1805 set_activeblock(ep, switch_end);
1807 if (!default_case)
1808 default_case = switch_end;
1810 jmp = alloc_multijmp(default_case, 1, 0);
1811 add_multijmp(&switch_ins->multijmp_list, jmp);
1812 add_bb(&default_case->parents, active);
1813 add_bb(&active->children, default_case);
1815 break;
1818 case STMT_ITERATOR: {
1819 struct statement *pre_statement = stmt->iterator_pre_statement;
1820 struct expression *pre_condition = stmt->iterator_pre_condition;
1821 struct statement *statement = stmt->iterator_statement;
1822 struct statement *post_statement = stmt->iterator_post_statement;
1823 struct expression *post_condition = stmt->iterator_post_condition;
1824 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end;
1826 concat_symbol_list(stmt->iterator_syms, &ep->syms);
1827 linearize_statement(ep, pre_statement);
1829 loop_body = loop_top = alloc_basic_block(ep, stmt->pos);
1830 loop_continue = alloc_basic_block(ep, stmt->pos);
1831 loop_end = alloc_basic_block(ep, stmt->pos);
1833 if (pre_condition == post_condition) {
1834 loop_top = alloc_basic_block(ep, stmt->pos);
1835 set_activeblock(ep, loop_top);
1838 if (pre_condition)
1839 linearize_cond_branch(ep, pre_condition, loop_body, loop_end);
1841 bind_label(stmt->iterator_continue, loop_continue, stmt->pos);
1842 bind_label(stmt->iterator_break, loop_end, stmt->pos);
1844 set_activeblock(ep, loop_body);
1845 linearize_statement(ep, statement);
1846 add_goto(ep, loop_continue);
1848 set_activeblock(ep, loop_continue);
1849 linearize_statement(ep, post_statement);
1850 if (!post_condition || pre_condition == post_condition)
1851 add_goto(ep, loop_top);
1852 else
1853 linearize_cond_branch(ep, post_condition, loop_top, loop_end);
1854 set_activeblock(ep, loop_end);
1855 break;
1858 default:
1859 break;
1861 return VOID;
1864 static struct entrypoint *linearize_fn(struct symbol *sym, struct symbol *base_type)
1866 struct entrypoint *ep;
1867 struct basic_block *bb;
1868 struct symbol *arg;
1869 struct instruction *entry;
1870 pseudo_t result;
1871 int i;
1873 if (!base_type->stmt)
1874 return NULL;
1876 ep = alloc_entrypoint();
1877 bb = alloc_basic_block(ep, sym->pos);
1879 ep->name = sym;
1880 set_activeblock(ep, bb);
1882 entry = alloc_instruction(OP_ENTRY, 0);
1883 add_one_insn(ep, entry);
1884 ep->entry = entry;
1886 concat_symbol_list(base_type->arguments, &ep->syms);
1888 /* FIXME!! We should do something else about varargs.. */
1889 i = 0;
1890 FOR_EACH_PTR(base_type->arguments, arg) {
1891 linearize_argument(ep, arg, ++i);
1892 } END_FOR_EACH_PTR(arg);
1894 result = linearize_statement(ep, base_type->stmt);
1895 if (bb_reachable(ep->active) && !bb_terminated(ep->active)) {
1896 struct symbol *ret_type = base_type->ctype.base_type;
1897 struct instruction *insn = alloc_typed_instruction(OP_RET, ret_type);
1899 if (type_size(ret_type) > 0)
1900 use_pseudo(result, &insn->src);
1901 add_one_insn(ep, insn);
1904 merge_phi_sources = 1;
1907 * Do trivial flow simplification - branches to
1908 * branches, kill dead basicblocks etc
1910 kill_unreachable_bbs(ep);
1913 * Turn symbols into pseudos
1915 simplify_symbol_usage(ep);
1917 repeat:
1919 * Remove trivial instructions, and try to CSE
1920 * the rest.
1922 do {
1923 cleanup_and_cse(ep);
1924 pack_basic_blocks(ep);
1925 } while (repeat_phase & REPEAT_CSE);
1927 kill_unreachable_bbs(ep);
1928 vrfy_flow(ep);
1930 /* Cleanup */
1931 clear_symbol_pseudos(ep);
1933 /* And track pseudo register usage */
1934 track_pseudo_liveness(ep);
1937 * Some flow optimizations can only effectively
1938 * be done when we've done liveness analysis. But
1939 * if they trigger, we need to start all over
1940 * again
1942 if (simplify_flow(ep)) {
1943 clear_liveness(ep);
1944 goto repeat;
1947 /* Finally, add deathnotes to pseudos now that we have them */
1948 track_pseudo_death(ep);
1950 return ep;
1953 struct entrypoint *linearize_symbol(struct symbol *sym)
1955 struct symbol *base_type;
1957 if (!sym)
1958 return NULL;
1959 base_type = sym->ctype.base_type;
1960 if (!base_type)
1961 return NULL;
1962 if (base_type->type == SYM_FN)
1963 return linearize_fn(sym, base_type);
1964 return NULL;