Simplify constant unops
[smatch.git] / linearize.c
blob3ebf62062a9d3d0624aa8fef2ae994b2b0718e8f
1 /*
2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
10 * Copyright (C) 2004 Christopher Li
13 #include <string.h>
14 #include <stdarg.h>
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <assert.h>
19 #include "parse.h"
20 #include "expression.h"
21 #include "linearize.h"
22 #include "flow.h"
23 #include "target.h"
25 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
26 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
28 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right);
29 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val);
31 struct access_data;
32 static pseudo_t add_load(struct entrypoint *ep, struct access_data *);
33 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *);
35 struct pseudo void_pseudo = {};
37 static struct instruction *alloc_instruction(int opcode, int size)
39 struct instruction * insn = __alloc_instruction(0);
40 insn->opcode = opcode;
41 insn->size = size;
42 return insn;
45 static inline int type_size(struct symbol *type)
47 return type ? type->bit_size > 0 ? type->bit_size : 0 : 0;
50 static struct instruction *alloc_typed_instruction(int opcode, struct symbol *type)
52 return alloc_instruction(opcode, type_size(type));
55 static struct entrypoint *alloc_entrypoint(void)
57 return __alloc_entrypoint(0);
60 static struct basic_block *alloc_basic_block(struct entrypoint *ep, struct position pos)
62 struct basic_block *bb = __alloc_basic_block(0);
63 bb->context = -1;
64 bb->pos = pos;
65 bb->ep = ep;
66 return bb;
69 static struct multijmp* alloc_multijmp(struct basic_block *target, int begin, int end)
71 struct multijmp *multijmp = __alloc_multijmp(0);
72 multijmp->target = target;
73 multijmp->begin = begin;
74 multijmp->end = end;
75 return multijmp;
78 static inline int regno(pseudo_t n)
80 int retval = -1;
81 if (n && n->type == PSEUDO_REG)
82 retval = n->nr;
83 return retval;
86 const char *show_pseudo(pseudo_t pseudo)
88 static int n;
89 static char buffer[4][64];
90 char *buf;
91 int i;
93 if (!pseudo)
94 return "no pseudo";
95 if (pseudo == VOID)
96 return "VOID";
97 buf = buffer[3 & ++n];
98 switch(pseudo->type) {
99 case PSEUDO_SYM: {
100 struct symbol *sym = pseudo->sym;
101 struct expression *expr;
103 if (sym->bb_target) {
104 snprintf(buf, 64, ".L%p", sym->bb_target);
105 break;
107 if (sym->ident) {
108 snprintf(buf, 64, "%s", show_ident(sym->ident));
109 break;
111 expr = sym->initializer;
112 if (!expr) {
113 snprintf(buf, 64, "<anon sym: %d>", pseudo->nr);
114 break;
116 switch (expr->type) {
117 case EXPR_VALUE:
118 snprintf(buf, 64, "<symbol value: %lld>", expr->value);
119 break;
120 case EXPR_STRING:
121 return show_string(expr->string);
122 default:
123 snprintf(buf, 64, "<symbol expression: %d>", pseudo->nr);
124 break;
127 case PSEUDO_REG:
128 i = snprintf(buf, 64, "%%r%d", pseudo->nr);
129 if (pseudo->ident)
130 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
131 break;
132 case PSEUDO_VAL: {
133 long long value = pseudo->value;
134 if (value > 1000 || value < -1000)
135 snprintf(buf, 64, "$%#llx", value);
136 else
137 snprintf(buf, 64, "$%lld", value);
138 break;
140 case PSEUDO_ARG:
141 snprintf(buf, 64, "%%arg%d", pseudo->nr);
142 break;
143 case PSEUDO_PHI:
144 i = snprintf(buf, 64, "%%phi%d", pseudo->nr);
145 if (pseudo->ident)
146 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
147 break;
148 default:
149 snprintf(buf, 64, "<bad pseudo type %d>", pseudo->type);
151 return buf;
154 static const char* opcodes[] = {
155 [OP_BADOP] = "bad_op",
157 /* Fn entrypoint */
158 [OP_ENTRY] = "<entry-point>",
160 /* Terminator */
161 [OP_RET] = "ret",
162 [OP_BR] = "br",
163 [OP_SWITCH] = "switch",
164 [OP_INVOKE] = "invoke",
165 [OP_COMPUTEDGOTO] = "jmp *",
166 [OP_UNWIND] = "unwind",
168 /* Binary */
169 [OP_ADD] = "add",
170 [OP_SUB] = "sub",
171 [OP_MUL] = "mul",
172 [OP_DIV] = "div",
173 [OP_MOD] = "mod",
174 [OP_SHL] = "shl",
175 [OP_SHR] = "shr",
177 /* Logical */
178 [OP_AND] = "and",
179 [OP_OR] = "or",
180 [OP_XOR] = "xor",
181 [OP_AND_BOOL] = "and-bool",
182 [OP_OR_BOOL] = "or-bool",
184 /* Binary comparison */
185 [OP_SET_EQ] = "seteq",
186 [OP_SET_NE] = "setne",
187 [OP_SET_LE] = "setle",
188 [OP_SET_GE] = "setge",
189 [OP_SET_LT] = "setlt",
190 [OP_SET_GT] = "setgt",
191 [OP_SET_B] = "setb",
192 [OP_SET_A] = "seta",
193 [OP_SET_BE] = "setbe",
194 [OP_SET_AE] = "setae",
196 /* Uni */
197 [OP_NOT] = "not",
198 [OP_NEG] = "neg",
200 /* Special three-input */
201 [OP_SEL] = "select",
203 /* Memory */
204 [OP_MALLOC] = "malloc",
205 [OP_FREE] = "free",
206 [OP_ALLOCA] = "alloca",
207 [OP_LOAD] = "load",
208 [OP_STORE] = "store",
209 [OP_SETVAL] = "set",
210 [OP_GET_ELEMENT_PTR] = "getelem",
212 /* Other */
213 [OP_PHI] = "phi",
214 [OP_PHISOURCE] = "phisrc",
215 [OP_CAST] = "cast",
216 [OP_PTRCAST] = "ptrcast",
217 [OP_CALL] = "call",
218 [OP_VANEXT] = "va_next",
219 [OP_VAARG] = "va_arg",
220 [OP_SLICE] = "slice",
221 [OP_SNOP] = "snop",
222 [OP_LNOP] = "lnop",
223 [OP_NOP] = "nop",
224 [OP_DEATHNOTE] = "dead",
225 [OP_ASM] = "asm",
227 /* Sparse tagging (line numbers, context, whatever) */
228 [OP_CONTEXT] = "context",
231 void show_instruction(struct instruction *insn)
233 int opcode = insn->opcode;
234 static char buffer[1024] = "\t";
235 char *buf;
237 buf = buffer+1;
238 if (!insn->bb) {
239 if (verbose < 2)
240 return;
241 buf += sprintf(buf, "# ");
244 if (opcode < sizeof(opcodes)/sizeof(char *)) {
245 const char *op = opcodes[opcode];
246 if (!op)
247 buf += sprintf(buf, "opcode:%d", opcode);
248 else
249 buf += sprintf(buf, "%s", op);
250 if (insn->size)
251 buf += sprintf(buf, ".%d", insn->size);
252 memset(buf, ' ', 20);
253 buf++;
256 if (buf < buffer + 12)
257 buf = buffer + 12;
258 switch (opcode) {
259 case OP_RET:
260 if (insn->src && insn->src != VOID)
261 buf += sprintf(buf, "%s", show_pseudo(insn->src));
262 break;
263 case OP_BR:
264 if (insn->bb_true && insn->bb_false) {
265 buf += sprintf(buf, "%s, .L%p, .L%p", show_pseudo(insn->cond), insn->bb_true, insn->bb_false);
266 break;
268 buf += sprintf(buf, ".L%p", insn->bb_true ? insn->bb_true : insn->bb_false);
269 break;
271 case OP_SETVAL: {
272 struct expression *expr = insn->val;
273 pseudo_t pseudo = insn->symbol;
274 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
275 if (pseudo) {
276 struct symbol *sym = pseudo->sym;
277 if (!sym) {
278 buf += sprintf(buf, "%s", show_pseudo(pseudo));
279 break;
281 if (sym->bb_target) {
282 buf += sprintf(buf, ".L%p", sym->bb_target);
283 break;
285 if (sym->ident) {
286 buf += sprintf(buf, "%s", show_ident(sym->ident));
287 break;
289 expr = sym->initializer;
290 if (!expr) {
291 buf += sprintf(buf, "%s", "anon symbol");
292 break;
296 if (!expr) {
297 buf += sprintf(buf, "%s", "<none>");
298 break;
301 switch (expr->type) {
302 case EXPR_VALUE:
303 buf += sprintf(buf, "%lld", expr->value);
304 break;
305 case EXPR_FVALUE:
306 buf += sprintf(buf, "%Lf", expr->fvalue);
307 break;
308 case EXPR_STRING:
309 buf += sprintf(buf, "%.40s", show_string(expr->string));
310 break;
311 case EXPR_SYMBOL:
312 buf += sprintf(buf, "%s", show_ident(expr->symbol->ident));
313 break;
314 case EXPR_LABEL:
315 buf += sprintf(buf, ".L%p", expr->symbol->bb_target);
316 break;
317 default:
318 buf += sprintf(buf, "SETVAL EXPR TYPE %d", expr->type);
320 break;
322 case OP_SWITCH: {
323 struct multijmp *jmp;
324 buf += sprintf(buf, "%s", show_pseudo(insn->target));
325 FOR_EACH_PTR(insn->multijmp_list, jmp) {
326 if (jmp->begin == jmp->end)
327 buf += sprintf(buf, ", %d -> .L%p", jmp->begin, jmp->target);
328 else if (jmp->begin < jmp->end)
329 buf += sprintf(buf, ", %d ... %d -> .L%p", jmp->begin, jmp->end, jmp->target);
330 else
331 buf += sprintf(buf, ", default -> .L%p", jmp->target);
332 } END_FOR_EACH_PTR(jmp);
333 break;
335 case OP_COMPUTEDGOTO: {
336 struct multijmp *jmp;
337 buf += sprintf(buf, "%s", show_pseudo(insn->target));
338 FOR_EACH_PTR(insn->multijmp_list, jmp) {
339 buf += sprintf(buf, ", .L%p", jmp->target);
340 } END_FOR_EACH_PTR(jmp);
341 break;
344 case OP_PHISOURCE:
345 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
346 break;
348 case OP_PHI: {
349 pseudo_t phi;
350 const char *s = " <-";
351 buf += sprintf(buf, "%s", show_pseudo(insn->target));
352 FOR_EACH_PTR(insn->phi_list, phi) {
353 buf += sprintf(buf, "%s %s", s, show_pseudo(phi));
354 s = ",";
355 } END_FOR_EACH_PTR(phi);
356 break;
358 case OP_LOAD: case OP_LNOP:
359 buf += sprintf(buf, "%s <- %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
360 break;
361 case OP_STORE: case OP_SNOP:
362 buf += sprintf(buf, "%s -> %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
363 break;
364 case OP_CALL: {
365 struct pseudo *arg;
366 if (insn->target && insn->target != VOID)
367 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
368 buf += sprintf(buf, "%s", show_pseudo(insn->func));
369 FOR_EACH_PTR(insn->arguments, arg) {
370 buf += sprintf(buf, ", %s", show_pseudo(arg));
371 } END_FOR_EACH_PTR(arg);
372 break;
374 case OP_CAST:
375 case OP_PTRCAST:
376 buf += sprintf(buf, "%s <- (%d) %s",
377 show_pseudo(insn->target),
378 type_size(insn->orig_type),
379 show_pseudo(insn->src));
380 break;
381 case OP_BINARY ... OP_BINARY_END:
382 case OP_BINCMP ... OP_BINCMP_END:
383 buf += sprintf(buf, "%s <- %s, %s", show_pseudo(insn->target), show_pseudo(insn->src1), show_pseudo(insn->src2));
384 break;
386 case OP_SEL:
387 buf += sprintf(buf, "%s <- %s, %s, %s", show_pseudo(insn->target),
388 show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
389 break;
391 case OP_SLICE:
392 buf += sprintf(buf, "%s <- %s, %d, %d", show_pseudo(insn->target), show_pseudo(insn->base), insn->from, insn->len);
393 break;
395 case OP_NOT: case OP_NEG:
396 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
397 break;
399 case OP_CONTEXT:
400 buf += sprintf(buf, "%d", insn->increment);
401 break;
402 case OP_NOP:
403 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
404 break;
405 case OP_DEATHNOTE:
406 buf += sprintf(buf, "%s", show_pseudo(insn->target));
407 break;
408 case OP_ASM:
409 buf += sprintf(buf, "\"%s\"", insn->string);
410 if (insn->outputs) {
411 pseudo_t pseudo;
412 buf += sprintf(buf, " (");
413 FOR_EACH_PTR(insn->outputs, pseudo) {
414 buf += sprintf(buf, " %s", show_pseudo(pseudo));
415 } END_FOR_EACH_PTR(pseudo);
416 buf += sprintf(buf, " ) <-");
418 if (insn->inputs) {
419 pseudo_t pseudo;
420 buf += sprintf(buf, " (");
421 FOR_EACH_PTR(insn->inputs, pseudo) {
422 buf += sprintf(buf, " %s", show_pseudo(pseudo));
423 } END_FOR_EACH_PTR(pseudo);
424 buf += sprintf(buf, " )");
426 break;
427 default:
428 break;
430 do { --buf; } while (*buf == ' ');
431 *++buf = 0;
432 printf("%s\n", buffer);
435 static void show_bb(struct basic_block *bb)
437 struct instruction *insn;
439 printf(".L%p:\n", bb);
440 if (verbose) {
441 pseudo_t needs, defines;
442 printf("%s:%d\n", input_streams[bb->pos.stream].name, bb->pos.line);
444 FOR_EACH_PTR(bb->needs, needs) {
445 struct instruction *def = needs->def;
446 if (def->opcode != OP_PHI) {
447 printf(" **uses %s (from .L%p)**\n", show_pseudo(needs), def->bb);
448 } else {
449 pseudo_t phi;
450 const char *sep = " ";
451 printf(" **uses %s (from", show_pseudo(needs));
452 FOR_EACH_PTR(def->phi_list, phi) {
453 if (phi == VOID)
454 continue;
455 printf("%s(%s:.L%p)", sep, show_pseudo(phi), phi->def->bb);
456 sep = ", ";
457 } END_FOR_EACH_PTR(phi);
458 printf(")**\n");
460 } END_FOR_EACH_PTR(needs);
462 FOR_EACH_PTR(bb->defines, defines) {
463 printf(" **defines %s **\n", show_pseudo(defines));
464 } END_FOR_EACH_PTR(defines);
466 if (bb->parents) {
467 struct basic_block *from;
468 FOR_EACH_PTR(bb->parents, from) {
469 printf(" **from %p (%s:%d:%d)**\n", from,
470 input_streams[from->pos.stream].name, from->pos.line, from->pos.pos);
471 } END_FOR_EACH_PTR(from);
474 if (bb->children) {
475 struct basic_block *to;
476 FOR_EACH_PTR(bb->children, to) {
477 printf(" **to %p (%s:%d:%d)**\n", to,
478 input_streams[to->pos.stream].name, to->pos.line, to->pos.pos);
479 } END_FOR_EACH_PTR(to);
483 FOR_EACH_PTR(bb->insns, insn) {
484 show_instruction(insn);
485 } END_FOR_EACH_PTR(insn);
486 if (!bb_terminated(bb))
487 printf("\tEND\n");
488 printf("\n");
491 static void show_symbol_usage(pseudo_t pseudo)
493 if (pseudo) {
494 pseudo_t *pp;
495 FOR_EACH_PTR(pseudo->users, pp) {
496 struct instruction *insn = container(pp, struct instruction, src);
497 show_instruction(insn);
498 } END_FOR_EACH_PTR(pp);
502 void show_entry(struct entrypoint *ep)
504 struct symbol *sym;
505 struct basic_block *bb;
507 printf("%s:\n", show_ident(ep->name->ident));
509 if (verbose) {
510 printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
512 FOR_EACH_PTR(ep->syms, sym) {
513 if (!sym->pseudo)
514 continue;
515 if (!sym->pseudo->users)
516 continue;
517 printf(" sym: %p %s\n", sym, show_ident(sym->ident));
518 if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE))
519 printf("\texternal visibility\n");
520 show_symbol_usage(sym->pseudo);
521 } END_FOR_EACH_PTR(sym);
523 printf("\n");
526 FOR_EACH_PTR(ep->bbs, bb) {
527 if (!bb)
528 continue;
529 if (!bb->parents && !bb->children && !bb->insns && verbose < 2)
530 continue;
531 show_bb(bb);
532 } END_FOR_EACH_PTR(bb);
534 printf("\n");
537 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos)
539 if (label->bb_target)
540 warning(pos, "label '%s' already bound", show_ident(label->ident));
541 label->bb_target = bb;
544 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label)
546 struct basic_block *bb = label->bb_target;
548 if (!bb) {
549 bb = alloc_basic_block(ep, label->pos);
550 label->bb_target = bb;
552 return bb;
555 static void finish_block(struct entrypoint *ep)
557 struct basic_block *src = ep->active;
558 if (bb_reachable(src))
559 ep->active = NULL;
562 static void add_goto(struct entrypoint *ep, struct basic_block *dst)
564 struct basic_block *src = ep->active;
565 if (bb_reachable(src)) {
566 struct instruction *br = alloc_instruction(OP_BR, 0);
567 br->bb_true = dst;
568 add_bb(&dst->parents, src);
569 add_bb(&src->children, dst);
570 br->bb = src;
571 add_instruction(&src->insns, br);
572 ep->active = NULL;
576 static void add_one_insn(struct entrypoint *ep, struct instruction *insn)
578 struct basic_block *bb = ep->active;
580 if (bb_reachable(bb)) {
581 insn->bb = bb;
582 add_instruction(&bb->insns, insn);
586 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb)
588 if (!bb_terminated(ep->active))
589 add_goto(ep, bb);
591 ep->active = bb;
592 if (bb_reachable(bb))
593 add_bb(&ep->bbs, bb);
596 static void remove_parent(struct basic_block *child, struct basic_block *parent)
598 remove_bb_from_list(&child->parents, parent, 1);
599 if (!child->parents)
600 kill_bb(child);
603 /* Change a "switch" into a branch */
604 void insert_branch(struct basic_block *bb, struct instruction *jmp, struct basic_block *target)
606 struct instruction *br, *old;
607 struct basic_block *child;
609 /* Remove the switch */
610 old = delete_last_instruction(&bb->insns);
611 assert(old == jmp);
613 br = alloc_instruction(OP_BR, 0);
614 br->bb = bb;
615 br->bb_true = target;
616 add_instruction(&bb->insns, br);
618 FOR_EACH_PTR(bb->children, child) {
619 if (child == target) {
620 target = NULL; /* Trigger just once */
621 continue;
623 DELETE_CURRENT_PTR(child);
624 remove_parent(child, bb);
625 } END_FOR_EACH_PTR(child);
626 PACK_PTR_LIST(&bb->children);
630 void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi_node, pseudo_t true, pseudo_t false)
632 pseudo_t target;
633 struct instruction *select;
635 /* Remove the 'br' */
636 delete_last_instruction(&bb->insns);
638 select = alloc_instruction(OP_SEL, phi_node->size);
639 select->bb = bb;
641 assert(br->cond);
642 use_pseudo(br->cond, &select->src1);
644 target = phi_node->target;
645 assert(target->def == phi_node);
646 select->target = target;
647 target->def = select;
649 use_pseudo(true, &select->src2);
650 use_pseudo(false, &select->src3);
652 add_instruction(&bb->insns, select);
653 add_instruction(&bb->insns, br);
656 static inline int bb_empty(struct basic_block *bb)
658 return !bb->insns;
661 /* Add a label to the currently active block, return new active block */
662 static struct basic_block * add_label(struct entrypoint *ep, struct symbol *label)
664 struct basic_block *bb = label->bb_target;
666 if (bb) {
667 set_activeblock(ep, bb);
668 return bb;
670 bb = ep->active;
671 if (!bb_reachable(bb) || !bb_empty(bb)) {
672 bb = alloc_basic_block(ep, label->pos);
673 set_activeblock(ep, bb);
675 label->bb_target = bb;
676 return bb;
679 static void add_branch(struct entrypoint *ep, struct expression *expr, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false)
681 struct basic_block *bb = ep->active;
682 struct instruction *br;
684 if (bb_reachable(bb)) {
685 br = alloc_instruction(OP_BR, 0);
686 use_pseudo(cond, &br->cond);
687 br->bb_true = bb_true;
688 br->bb_false = bb_false;
689 add_bb(&bb_true->parents, bb);
690 add_bb(&bb_false->parents, bb);
691 add_bb(&bb->children, bb_true);
692 add_bb(&bb->children, bb_false);
693 add_one_insn(ep, br);
697 /* Dummy pseudo allocator */
698 pseudo_t alloc_pseudo(struct instruction *def)
700 static int nr = 0;
701 struct pseudo * pseudo = __alloc_pseudo(0);
702 pseudo->type = PSEUDO_REG;
703 pseudo->nr = ++nr;
704 pseudo->def = def;
705 return pseudo;
708 static void clear_symbol_pseudos(struct entrypoint *ep)
710 struct symbol *sym;
712 FOR_EACH_PTR(ep->accesses, sym) {
713 sym->pseudo = NULL;
714 } END_FOR_EACH_PTR(sym);
717 static pseudo_t symbol_pseudo(struct entrypoint *ep, struct symbol *sym)
719 pseudo_t pseudo;
721 if (!sym)
722 return VOID;
724 pseudo = sym->pseudo;
725 if (!pseudo) {
726 pseudo = __alloc_pseudo(0);
727 pseudo->type = PSEUDO_SYM;
728 pseudo->sym = sym;
729 pseudo->ident = sym->ident;
730 sym->pseudo = pseudo;
731 add_symbol(&ep->accesses, sym);
733 /* Symbol pseudos have neither nr, usage nor def */
734 return pseudo;
737 pseudo_t value_pseudo(long long val)
739 #define MAX_VAL_HASH 64
740 static struct pseudo_list *prev[MAX_VAL_HASH];
741 int hash = val & (MAX_VAL_HASH-1);
742 struct pseudo_list **list = prev + hash;
743 pseudo_t pseudo;
745 FOR_EACH_PTR(*list, pseudo) {
746 if (pseudo->value == val)
747 return pseudo;
748 } END_FOR_EACH_PTR(pseudo);
750 pseudo = __alloc_pseudo(0);
751 pseudo->type = PSEUDO_VAL;
752 pseudo->value = val;
753 add_pseudo(list, pseudo);
755 /* Value pseudos have neither nr, usage nor def */
756 return pseudo;
759 static pseudo_t argument_pseudo(struct entrypoint *ep, int nr)
761 pseudo_t pseudo = __alloc_pseudo(0);
762 pseudo->type = PSEUDO_ARG;
763 pseudo->nr = nr;
764 pseudo->def = ep->entry;
765 /* Argument pseudos have neither usage nor def */
766 return pseudo;
769 pseudo_t alloc_phi(struct basic_block *source, pseudo_t pseudo, int size)
771 struct instruction *insn = alloc_instruction(OP_PHISOURCE, size);
772 pseudo_t phi = __alloc_pseudo(0);
773 static int nr = 0;
775 phi->type = PSEUDO_PHI;
776 phi->nr = ++nr;
777 phi->def = insn;
779 use_pseudo(pseudo, &insn->src1);
780 insn->bb = source;
781 insn->target = phi;
782 add_instruction(&source->insns, insn);
783 return phi;
787 * We carry the "access_data" structure around for any accesses,
788 * which simplifies things a lot. It contains all the access
789 * information in one place.
791 struct access_data {
792 struct symbol *result_type; // result ctype
793 struct symbol *source_type; // source ctype
794 pseudo_t address; // pseudo containing address ..
795 pseudo_t origval; // pseudo for original value ..
796 unsigned int offset, alignment; // byte offset
797 unsigned int bit_size, bit_offset; // which bits
798 struct position pos;
801 static void finish_address_gen(struct entrypoint *ep, struct access_data *ad)
805 static int linearize_simple_address(struct entrypoint *ep,
806 struct expression *addr,
807 struct access_data *ad)
809 if (addr->type == EXPR_SYMBOL) {
810 ad->address = symbol_pseudo(ep, addr->symbol);
811 return 1;
813 if (addr->type == EXPR_BINOP) {
814 if (addr->right->type == EXPR_VALUE) {
815 if (addr->op == '+') {
816 ad->offset += get_expression_value(addr->right);
817 return linearize_simple_address(ep, addr->left, ad);
821 ad->address = linearize_expression(ep, addr);
822 return 1;
825 static struct symbol *base_type(struct symbol *sym)
827 struct symbol *base = sym;
829 if (sym) {
830 if (sym->type == SYM_NODE)
831 base = base->ctype.base_type;
832 if (base->type == SYM_BITFIELD)
833 return base->ctype.base_type;
835 return sym;
838 static int linearize_address_gen(struct entrypoint *ep,
839 struct expression *expr,
840 struct access_data *ad)
842 struct symbol *ctype = expr->ctype;
844 if (!ctype)
845 return 0;
846 ad->pos = expr->pos;
847 ad->result_type = ctype;
848 ad->source_type = base_type(ctype);
849 ad->bit_size = ctype->bit_size;
850 ad->alignment = ctype->ctype.alignment;
851 ad->bit_offset = ctype->bit_offset;
852 if (expr->type == EXPR_PREOP && expr->op == '*')
853 return linearize_simple_address(ep, expr->unop, ad);
855 warning(expr->pos, "generating address of non-lvalue (%d)", expr->type);
856 return 0;
859 static pseudo_t add_load(struct entrypoint *ep, struct access_data *ad)
861 struct instruction *insn;
862 pseudo_t new;
864 new = ad->origval;
865 if (0 && new)
866 return new;
868 insn = alloc_typed_instruction(OP_LOAD, ad->source_type);
869 new = alloc_pseudo(insn);
870 ad->origval = new;
872 insn->target = new;
873 insn->offset = ad->offset;
874 use_pseudo(ad->address, &insn->src);
875 add_one_insn(ep, insn);
876 return new;
879 static void add_store(struct entrypoint *ep, struct access_data *ad, pseudo_t value)
881 struct basic_block *bb = ep->active;
883 if (bb_reachable(bb)) {
884 struct instruction *store = alloc_typed_instruction(OP_STORE, ad->source_type);
885 store->offset = ad->offset;
886 use_pseudo(value, &store->target);
887 use_pseudo(ad->address, &store->src);
888 add_one_insn(ep, store);
892 static pseudo_t linearize_store_gen(struct entrypoint *ep,
893 pseudo_t value,
894 struct access_data *ad)
896 pseudo_t store = value;
898 if (type_size(ad->source_type) != type_size(ad->result_type)) {
899 pseudo_t orig = add_load(ep, ad);
900 int shift = ad->bit_offset;
901 unsigned long long mask = (1ULL << ad->bit_size)-1;
903 if (shift) {
904 store = add_binary_op(ep, ad->source_type, OP_SHL, value, value_pseudo(shift));
905 mask <<= shift;
907 orig = add_binary_op(ep, ad->source_type, OP_AND, orig, value_pseudo(~mask));
908 store = add_binary_op(ep, ad->source_type, OP_OR, orig, store);
910 add_store(ep, ad, store);
911 return value;
914 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right)
916 struct instruction *insn = alloc_typed_instruction(op, ctype);
917 pseudo_t target = alloc_pseudo(insn);
918 insn->target = target;
919 use_pseudo(left, &insn->src1);
920 use_pseudo(right, &insn->src2);
921 add_one_insn(ep, insn);
922 return target;
925 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val)
927 struct instruction *insn = alloc_typed_instruction(OP_SETVAL, ctype);
928 pseudo_t target = alloc_pseudo(insn);
929 insn->target = target;
930 insn->val = val;
931 if (!val) {
932 pseudo_t addr = symbol_pseudo(ep, ctype);
933 use_pseudo(addr, &insn->symbol);
934 insn->size = bits_in_pointer;
936 add_one_insn(ep, insn);
937 return target;
940 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct access_data *ad)
942 pseudo_t new = add_load(ep, ad);
944 if (ad->bit_offset) {
945 pseudo_t shift = value_pseudo(ad->bit_offset);
946 pseudo_t newval = add_binary_op(ep, ad->source_type, OP_SHR, new, shift);
947 new = newval;
950 return new;
953 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
955 struct access_data ad = { NULL, };
956 pseudo_t value;
958 if (!linearize_address_gen(ep, expr, &ad))
959 return VOID;
960 value = linearize_load_gen(ep, &ad);
961 finish_address_gen(ep, &ad);
962 return value;
965 /* FIXME: FP */
966 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
968 struct access_data ad = { NULL, };
969 pseudo_t old, new, one;
970 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB;
972 if (!linearize_address_gen(ep, expr->unop, &ad))
973 return VOID;
975 old = linearize_load_gen(ep, &ad);
976 one = value_pseudo(expr->op_value);
977 new = add_binary_op(ep, expr->ctype, op, old, one);
978 linearize_store_gen(ep, new, &ad);
979 finish_address_gen(ep, &ad);
980 return postop ? old : new;
983 static pseudo_t add_uniop(struct entrypoint *ep, struct expression *expr, int op, pseudo_t src)
985 struct instruction *insn = alloc_typed_instruction(op, expr->ctype);
986 pseudo_t new = alloc_pseudo(insn);
988 insn->target = new;
989 use_pseudo(src, &insn->src1);
990 add_one_insn(ep, insn);
991 return new;
994 static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr)
996 pseudo_t pre = linearize_expression(ep, expr->base);
997 struct instruction *insn = alloc_typed_instruction(OP_SLICE, expr->ctype);
998 pseudo_t new = alloc_pseudo(insn);
1000 insn->target = new;
1001 insn->from = expr->r_bitpos;
1002 insn->len = expr->r_nrbits;
1003 use_pseudo(pre, &insn->base);
1004 add_one_insn(ep, insn);
1005 return new;
1008 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
1010 pseudo_t pre = linearize_expression(ep, expr->unop);
1011 switch (expr->op) {
1012 case '+':
1013 return pre;
1014 case '!': {
1015 pseudo_t zero = value_pseudo(0);
1016 return add_binary_op(ep, expr->ctype, OP_SET_EQ, pre, zero);
1018 case '~':
1019 return add_uniop(ep, expr, OP_NOT, pre);
1020 case '-':
1021 return add_uniop(ep, expr, OP_NEG, pre);
1023 return VOID;
1026 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
1029 * '*' is an lvalue access, and is fundamentally different
1030 * from an arithmetic operation. Maybe it should have an
1031 * expression type of its own..
1033 if (expr->op == '*')
1034 return linearize_access(ep, expr);
1035 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
1036 return linearize_inc_dec(ep, expr, 0);
1037 return linearize_regular_preop(ep, expr);
1040 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr)
1042 return linearize_inc_dec(ep, expr, 1);
1045 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
1047 struct access_data ad = { NULL, };
1048 struct expression *target = expr->left;
1049 pseudo_t value;
1051 value = linearize_expression(ep, expr->right);
1052 if (!linearize_address_gen(ep, target, &ad))
1053 return VOID;
1054 if (expr->op != '=') {
1055 pseudo_t oldvalue = linearize_load_gen(ep, &ad);
1056 pseudo_t dst;
1057 static const int op_trans[] = {
1058 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD,
1059 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB,
1060 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MUL,
1061 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIV,
1062 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MOD,
1063 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL,
1064 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_SHR,
1065 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND,
1066 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR,
1067 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR
1069 dst = add_binary_op(ep, expr->ctype, op_trans[expr->op - SPECIAL_BASE], oldvalue, value);
1070 value = dst;
1072 value = linearize_store_gen(ep, value, &ad);
1073 finish_address_gen(ep, &ad);
1074 return value;
1077 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr)
1079 struct expression *arg, *fn;
1080 struct instruction *insn = alloc_typed_instruction(OP_CALL, expr->ctype);
1081 pseudo_t retval, call;
1082 int context_diff;
1084 if (!expr->ctype) {
1085 warning(expr->pos, "call with no type!");
1086 return VOID;
1089 FOR_EACH_PTR(expr->args, arg) {
1090 pseudo_t new = linearize_expression(ep, arg);
1091 use_pseudo(new, add_pseudo(&insn->arguments, new));
1092 } END_FOR_EACH_PTR(arg);
1094 fn = expr->fn;
1096 context_diff = 0;
1097 if (fn->ctype) {
1098 int in = fn->ctype->ctype.in_context;
1099 int out = fn->ctype->ctype.out_context;
1100 if (in < 0 || out < 0)
1101 in = out = 0;
1102 context_diff = out - in;
1105 if (fn->type == EXPR_PREOP) {
1106 if (fn->unop->type == EXPR_SYMBOL) {
1107 struct symbol *sym = fn->unop->symbol;
1108 if (sym->ctype.base_type->type == SYM_FN)
1109 fn = fn->unop;
1112 if (fn->type == EXPR_SYMBOL) {
1113 call = symbol_pseudo(ep, fn->symbol);
1114 } else {
1115 call = linearize_expression(ep, fn);
1117 use_pseudo(call, &insn->func);
1118 retval = VOID;
1119 if (expr->ctype != &void_ctype)
1120 retval = alloc_pseudo(insn);
1121 insn->target = retval;
1122 add_one_insn(ep, insn);
1124 if (context_diff) {
1125 insn = alloc_instruction(OP_CONTEXT, 0);
1126 insn->increment = context_diff;
1127 add_one_insn(ep, insn);
1130 return retval;
1133 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr)
1135 pseudo_t src1, src2, dst;
1136 static const int opcode[] = {
1137 ['+'] = OP_ADD, ['-'] = OP_SUB,
1138 ['*'] = OP_MUL, ['/'] = OP_DIV,
1139 ['%'] = OP_MOD, ['&'] = OP_AND,
1140 ['|'] = OP_OR, ['^'] = OP_XOR,
1141 [SPECIAL_LEFTSHIFT] = OP_SHL,
1142 [SPECIAL_RIGHTSHIFT] = OP_SHR,
1143 [SPECIAL_LOGICAL_AND] = OP_AND_BOOL,
1144 [SPECIAL_LOGICAL_OR] = OP_OR_BOOL,
1147 src1 = linearize_expression(ep, expr->left);
1148 src2 = linearize_expression(ep, expr->right);
1149 dst = add_binary_op(ep, expr->ctype, opcode[expr->op], src1, src2);
1150 return dst;
1153 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1155 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1157 static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr)
1159 pseudo_t cond, true, false, res;
1160 struct instruction *insn;
1162 true = linearize_expression(ep, expr->cond_true);
1163 false = linearize_expression(ep, expr->cond_false);
1164 cond = linearize_expression(ep, expr->conditional);
1166 insn = alloc_typed_instruction(OP_SEL, expr->ctype);
1167 if (!expr->cond_true)
1168 true = cond;
1169 use_pseudo(cond, &insn->src1);
1170 use_pseudo(true, &insn->src2);
1171 use_pseudo(false, &insn->src3);
1173 res = alloc_pseudo(insn);
1174 insn->target = res;
1175 add_one_insn(ep, insn);
1176 return res;
1179 static pseudo_t add_join_conditional(struct entrypoint *ep, struct expression *expr,
1180 pseudo_t phi1, pseudo_t phi2)
1182 pseudo_t target;
1183 struct instruction *phi_node;
1185 if (phi1 == VOID)
1186 return phi2;
1187 if (phi2 == VOID)
1188 return phi1;
1190 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1191 use_pseudo(phi1, add_pseudo(&phi_node->phi_list, phi1));
1192 use_pseudo(phi2, add_pseudo(&phi_node->phi_list, phi2));
1193 phi_node->target = target = alloc_pseudo(phi_node);
1194 add_one_insn(ep, phi_node);
1195 return target;
1198 static pseudo_t linearize_short_conditional(struct entrypoint *ep, struct expression *expr,
1199 struct expression *cond,
1200 struct expression *expr_false)
1202 pseudo_t src1, src2;
1203 struct basic_block *bb_false = alloc_basic_block(ep, expr_false->pos);
1204 struct basic_block *merge = alloc_basic_block(ep, expr->pos);
1205 pseudo_t phi1, phi2;
1206 int size = type_size(expr->ctype);
1208 src1 = linearize_expression(ep, cond);
1209 phi1 = alloc_phi(ep->active, src1, size);
1210 add_branch(ep, expr, src1, merge, bb_false);
1212 set_activeblock(ep, bb_false);
1213 src2 = linearize_expression(ep, expr_false);
1214 phi2 = alloc_phi(ep->active, src2, size);
1215 set_activeblock(ep, merge);
1217 return add_join_conditional(ep, expr, phi1, phi2);
1220 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr,
1221 struct expression *cond,
1222 struct expression *expr_true,
1223 struct expression *expr_false)
1225 pseudo_t src1, src2;
1226 pseudo_t phi1, phi2;
1227 struct basic_block *bb_true = alloc_basic_block(ep, expr_true->pos);
1228 struct basic_block *bb_false = alloc_basic_block(ep, expr_false->pos);
1229 struct basic_block *merge = alloc_basic_block(ep, expr->pos);
1230 int size = type_size(expr->ctype);
1232 linearize_cond_branch(ep, cond, bb_true, bb_false);
1234 set_activeblock(ep, bb_true);
1235 src1 = linearize_expression(ep, expr_true);
1236 phi1 = alloc_phi(ep->active, src1, size);
1237 add_goto(ep, merge);
1239 set_activeblock(ep, bb_false);
1240 src2 = linearize_expression(ep, expr_false);
1241 phi2 = alloc_phi(ep->active, src2, size);
1242 set_activeblock(ep, merge);
1244 return add_join_conditional(ep, expr, phi1, phi2);
1247 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr)
1249 struct expression *shortcut;
1251 shortcut = alloc_const_expression(expr->pos, expr->op == SPECIAL_LOGICAL_OR);
1252 shortcut->ctype = expr->ctype;
1253 return linearize_conditional(ep, expr, expr->left, shortcut, expr->right);
1256 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr)
1258 static const int cmpop[] = {
1259 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT,
1260 [SPECIAL_EQUAL] = OP_SET_EQ,
1261 [SPECIAL_NOTEQUAL] = OP_SET_NE,
1262 [SPECIAL_GTE] = OP_SET_GE,
1263 [SPECIAL_LTE] = OP_SET_LE,
1264 [SPECIAL_UNSIGNED_LT] = OP_SET_B,
1265 [SPECIAL_UNSIGNED_GT] = OP_SET_A,
1266 [SPECIAL_UNSIGNED_LTE] = OP_SET_BE,
1267 [SPECIAL_UNSIGNED_GTE] = OP_SET_AE,
1270 pseudo_t src1 = linearize_expression(ep, expr->left);
1271 pseudo_t src2 = linearize_expression(ep, expr->right);
1272 pseudo_t dst = add_binary_op(ep, expr->ctype, cmpop[expr->op], src1, src2);
1273 return dst;
1277 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1279 pseudo_t cond;
1281 if (!expr || !bb_reachable(ep->active))
1282 return VOID;
1284 switch (expr->type) {
1286 case EXPR_STRING:
1287 case EXPR_VALUE:
1288 add_goto(ep, expr->value ? bb_true : bb_false);
1289 return VOID;
1291 case EXPR_FVALUE:
1292 add_goto(ep, expr->fvalue ? bb_true : bb_false);
1293 return VOID;
1295 case EXPR_LOGICAL:
1296 linearize_logical_branch(ep, expr, bb_true, bb_false);
1297 return VOID;
1299 case EXPR_COMPARE:
1300 cond = linearize_compare(ep, expr);
1301 add_branch(ep, expr, cond, bb_true, bb_false);
1302 break;
1304 case EXPR_PREOP:
1305 if (expr->op == '!')
1306 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true);
1307 /* fall through */
1308 default: {
1309 cond = linearize_expression(ep, expr);
1310 add_branch(ep, expr, cond, bb_true, bb_false);
1312 return VOID;
1315 return VOID;
1320 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1322 struct basic_block *next = alloc_basic_block(ep, expr->pos);
1324 if (expr->op == SPECIAL_LOGICAL_OR)
1325 linearize_cond_branch(ep, expr->left, bb_true, next);
1326 else
1327 linearize_cond_branch(ep, expr->left, next, bb_false);
1328 set_activeblock(ep, next);
1329 linearize_cond_branch(ep, expr->right, bb_true, bb_false);
1330 return VOID;
1334 * Casts to pointers are "less safe" than other casts, since
1335 * they imply type-unsafe accesses. "void *" is a special
1336 * case, since you can't access through it anyway without another
1337 * cast.
1339 static struct instruction *alloc_cast_instruction(struct symbol *ctype)
1341 int opcode = OP_CAST;
1342 struct symbol *base = ctype;
1344 if (base->type == SYM_NODE)
1345 base = base->ctype.base_type;
1346 if (base->type == SYM_PTR) {
1347 base = base->ctype.base_type;
1348 if (base != &void_ctype)
1349 opcode = OP_PTRCAST;
1351 return alloc_typed_instruction(opcode, ctype);
1354 pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr)
1356 pseudo_t src, result;
1357 struct instruction *insn;
1359 src = linearize_expression(ep, expr->cast_expression);
1360 if (src == VOID)
1361 return VOID;
1362 if (!expr->ctype)
1363 return VOID;
1364 if (expr->ctype->bit_size < 0)
1365 return VOID;
1367 insn = alloc_cast_instruction(expr->ctype);
1368 result = alloc_pseudo(insn);
1369 insn->target = result;
1370 insn->orig_type = expr->cast_expression->ctype;
1371 use_pseudo(src, &insn->src);
1372 add_one_insn(ep, insn);
1373 return result;
1376 pseudo_t linearize_position(struct entrypoint *ep, struct expression *pos, struct access_data *ad)
1378 struct expression *init_expr = pos->init_expr;
1379 pseudo_t value = linearize_expression(ep, init_expr);
1381 ad->offset = pos->init_offset;
1382 ad->source_type = base_type(init_expr->ctype);
1383 ad->result_type = init_expr->ctype;
1384 linearize_store_gen(ep, value, ad);
1385 return VOID;
1388 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *ad)
1390 switch (initializer->type) {
1391 case EXPR_INITIALIZER: {
1392 struct expression *expr;
1393 FOR_EACH_PTR(initializer->expr_list, expr) {
1394 linearize_initializer(ep, expr, ad);
1395 } END_FOR_EACH_PTR(expr);
1396 break;
1398 case EXPR_POS:
1399 linearize_position(ep, initializer, ad);
1400 break;
1401 default: {
1402 pseudo_t value = linearize_expression(ep, initializer);
1403 ad->source_type = base_type(initializer->ctype);
1404 ad->result_type = initializer->ctype;
1405 linearize_store_gen(ep, value, ad);
1409 return VOID;
1412 void linearize_argument(struct entrypoint *ep, struct symbol *arg, int nr)
1414 struct access_data ad = { NULL, };
1416 ad.source_type = arg;
1417 ad.result_type = arg;
1418 ad.address = symbol_pseudo(ep, arg);
1419 linearize_store_gen(ep, argument_pseudo(ep, nr), &ad);
1420 finish_address_gen(ep, &ad);
1423 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
1425 if (!expr)
1426 return VOID;
1428 switch (expr->type) {
1429 case EXPR_SYMBOL:
1430 return add_setval(ep, expr->symbol, NULL);
1432 case EXPR_VALUE:
1433 return value_pseudo(expr->value);
1435 case EXPR_STRING: case EXPR_FVALUE: case EXPR_LABEL:
1436 return add_setval(ep, expr->ctype, expr);
1438 case EXPR_STATEMENT:
1439 return linearize_statement(ep, expr->statement);
1441 case EXPR_CALL:
1442 return linearize_call_expression(ep, expr);
1444 case EXPR_BINOP:
1445 return linearize_binop(ep, expr);
1447 case EXPR_LOGICAL:
1448 return linearize_logical(ep, expr);
1450 case EXPR_COMPARE:
1451 return linearize_compare(ep, expr);
1453 case EXPR_SELECT:
1454 return linearize_select(ep, expr);
1456 case EXPR_CONDITIONAL:
1457 if (!expr->cond_true)
1458 return linearize_short_conditional(ep, expr, expr->conditional, expr->cond_false);
1460 return linearize_conditional(ep, expr, expr->conditional,
1461 expr->cond_true, expr->cond_false);
1463 case EXPR_COMMA:
1464 linearize_expression(ep, expr->left);
1465 return linearize_expression(ep, expr->right);
1467 case EXPR_ASSIGNMENT:
1468 return linearize_assignment(ep, expr);
1470 case EXPR_PREOP:
1471 return linearize_preop(ep, expr);
1473 case EXPR_POSTOP:
1474 return linearize_postop(ep, expr);
1476 case EXPR_CAST:
1477 case EXPR_IMPLIED_CAST:
1478 return linearize_cast(ep, expr);
1480 case EXPR_SLICE:
1481 return linearize_slice(ep, expr);
1483 case EXPR_INITIALIZER:
1484 case EXPR_POS:
1485 warning(expr->pos, "unexpected initializer expression (%d %d)", expr->type, expr->op);
1486 return VOID;
1487 default:
1488 warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op);
1489 return VOID;
1491 return VOID;
1494 static void linearize_one_symbol(struct entrypoint *ep, struct symbol *sym)
1496 struct access_data ad = { NULL, };
1498 if (!sym->initializer)
1499 return;
1501 ad.address = symbol_pseudo(ep, sym);
1502 linearize_initializer(ep, sym->initializer, &ad);
1503 finish_address_gen(ep, &ad);
1506 static pseudo_t linearize_compound_statement(struct entrypoint *ep, struct statement *stmt)
1508 pseudo_t pseudo;
1509 struct statement *s;
1510 struct symbol *sym;
1511 struct symbol *ret = stmt->ret;
1513 concat_symbol_list(stmt->syms, &ep->syms);
1515 FOR_EACH_PTR(stmt->syms, sym) {
1516 linearize_one_symbol(ep, sym);
1517 } END_FOR_EACH_PTR(sym);
1519 pseudo = VOID;
1520 FOR_EACH_PTR(stmt->stmts, s) {
1521 pseudo = linearize_statement(ep, s);
1522 } END_FOR_EACH_PTR(s);
1524 if (ret) {
1525 struct basic_block *bb = add_label(ep, ret);
1526 struct instruction *phi_node = first_instruction(bb->insns);
1528 if (!phi_node)
1529 return pseudo;
1531 if (pseudo_list_size(phi_node->phi_list)==1) {
1532 pseudo = first_pseudo(phi_node->phi_list);
1533 assert(pseudo->type == PSEUDO_PHI);
1534 return pseudo->def->src1;
1536 return phi_node->target;
1538 return pseudo;
1541 pseudo_t linearize_internal(struct entrypoint *ep, struct statement *stmt)
1543 struct instruction *insn = alloc_instruction(OP_CONTEXT, 0);
1544 struct expression *expr = stmt->expression;
1545 int value = 0;
1547 if (expr->type == EXPR_VALUE)
1548 value = expr->value;
1550 insn->increment = value;
1551 add_one_insn(ep, insn);
1552 return VOID;
1555 static void add_asm_input(struct entrypoint *ep, struct instruction *insn, struct expression *expr)
1557 pseudo_t pseudo = linearize_expression(ep, expr);
1559 use_pseudo(pseudo, add_pseudo(&insn->inputs, pseudo));
1562 static void add_asm_output(struct entrypoint *ep, struct instruction *insn, struct expression *expr)
1564 struct access_data ad = { NULL, };
1565 pseudo_t pseudo = alloc_pseudo(insn);
1567 if (!linearize_address_gen(ep, expr, &ad))
1568 return;
1569 linearize_store_gen(ep, pseudo, &ad);
1570 finish_address_gen(ep, &ad);
1571 add_pseudo(&insn->outputs, pseudo);
1574 pseudo_t linearize_asm_statement(struct entrypoint *ep, struct statement *stmt)
1576 int even_odd;
1577 struct expression *expr;
1578 struct instruction *insn;
1580 insn = alloc_instruction(OP_ASM, 0);
1581 expr = stmt->asm_string;
1582 if (!expr || expr->type != EXPR_STRING) {
1583 warning(stmt->pos, "expected string in inline asm");
1584 return VOID;
1586 insn->string = expr->string->data;
1588 /* Gather the inputs.. */
1589 even_odd = 0;
1590 FOR_EACH_PTR(stmt->asm_inputs, expr) {
1591 even_odd = 1 - even_odd;
1593 /* FIXME! We ignore the constraints for now.. */
1594 if (even_odd)
1595 continue;
1596 add_asm_input(ep, insn, expr);
1597 } END_FOR_EACH_PTR(expr);
1599 add_one_insn(ep, insn);
1601 /* Assign the outputs */
1602 even_odd = 0;
1603 FOR_EACH_PTR(stmt->asm_outputs, expr) {
1604 even_odd = 1 - even_odd;
1606 /* FIXME! We ignore the constraints for now.. */
1607 if (even_odd)
1608 continue;
1609 add_asm_output(ep, insn, expr);
1610 } END_FOR_EACH_PTR(expr);
1612 return VOID;
1615 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
1617 struct basic_block *bb;
1619 if (!stmt)
1620 return VOID;
1622 bb = ep->active;
1623 if (bb && !bb->insns)
1624 bb->pos = stmt->pos;
1626 switch (stmt->type) {
1627 case STMT_NONE:
1628 break;
1630 case STMT_INTERNAL:
1631 return linearize_internal(ep, stmt);
1633 case STMT_EXPRESSION:
1634 return linearize_expression(ep, stmt->expression);
1636 case STMT_ASM:
1637 return linearize_asm_statement(ep, stmt);
1639 case STMT_RETURN: {
1640 struct expression *expr = stmt->expression;
1641 struct basic_block *bb_return = get_bound_block(ep, stmt->ret_target);
1642 struct basic_block *active;
1643 pseudo_t src = linearize_expression(ep, expr);
1644 active = ep->active;
1645 if (active && src != &void_pseudo) {
1646 struct instruction *phi_node = first_instruction(bb_return->insns);
1647 pseudo_t phi;
1648 if (!phi_node) {
1649 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1650 phi_node->target = alloc_pseudo(phi_node);
1651 phi_node->bb = bb_return;
1652 add_instruction(&bb_return->insns, phi_node);
1654 phi = alloc_phi(active, src, type_size(expr->ctype));
1655 phi->ident = &return_ident;
1656 use_pseudo(phi, add_pseudo(&phi_node->phi_list, phi));
1658 add_goto(ep, bb_return);
1659 return VOID;
1662 case STMT_CASE: {
1663 add_label(ep, stmt->case_label);
1664 linearize_statement(ep, stmt->case_statement);
1665 break;
1668 case STMT_LABEL: {
1669 struct symbol *label = stmt->label_identifier;
1671 if (label->used) {
1672 add_label(ep, label);
1673 linearize_statement(ep, stmt->label_statement);
1675 break;
1678 case STMT_GOTO: {
1679 struct symbol *sym;
1680 struct expression *expr;
1681 struct instruction *goto_ins;
1682 struct basic_block *active;
1683 pseudo_t pseudo;
1685 active = ep->active;
1686 if (!bb_reachable(active))
1687 break;
1689 if (stmt->goto_label) {
1690 add_goto(ep, get_bound_block(ep, stmt->goto_label));
1691 break;
1694 expr = stmt->goto_expression;
1695 if (!expr)
1696 break;
1698 /* This can happen as part of simplification */
1699 if (expr->type == EXPR_LABEL) {
1700 add_goto(ep, get_bound_block(ep, expr->label_symbol));
1701 break;
1704 pseudo = linearize_expression(ep, expr);
1705 goto_ins = alloc_instruction(OP_COMPUTEDGOTO, 0);
1706 use_pseudo(pseudo, &goto_ins->target);
1707 add_one_insn(ep, goto_ins);
1709 FOR_EACH_PTR(stmt->target_list, sym) {
1710 struct basic_block *bb_computed = get_bound_block(ep, sym);
1711 struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0);
1712 add_multijmp(&goto_ins->multijmp_list, jmp);
1713 add_bb(&bb_computed->parents, ep->active);
1714 add_bb(&active->children, bb_computed);
1715 } END_FOR_EACH_PTR(sym);
1717 finish_block(ep);
1718 break;
1721 case STMT_COMPOUND:
1722 return linearize_compound_statement(ep, stmt);
1725 * This could take 'likely/unlikely' into account, and
1726 * switch the arms around appropriately..
1728 case STMT_IF: {
1729 struct basic_block *bb_true, *bb_false, *endif;
1730 struct expression *cond = stmt->if_conditional;
1732 bb_true = alloc_basic_block(ep, stmt->pos);
1733 bb_false = endif = alloc_basic_block(ep, stmt->pos);
1735 linearize_cond_branch(ep, cond, bb_true, bb_false);
1737 set_activeblock(ep, bb_true);
1738 linearize_statement(ep, stmt->if_true);
1740 if (stmt->if_false) {
1741 endif = alloc_basic_block(ep, stmt->pos);
1742 add_goto(ep, endif);
1743 set_activeblock(ep, bb_false);
1744 linearize_statement(ep, stmt->if_false);
1746 set_activeblock(ep, endif);
1747 break;
1750 case STMT_SWITCH: {
1751 struct symbol *sym;
1752 struct instruction *switch_ins;
1753 struct basic_block *switch_end = alloc_basic_block(ep, stmt->pos);
1754 struct basic_block *active, *default_case;
1755 struct multijmp *jmp;
1756 pseudo_t pseudo;
1758 pseudo = linearize_expression(ep, stmt->switch_expression);
1760 active = ep->active;
1761 if (!bb_reachable(active))
1762 break;
1764 switch_ins = alloc_instruction(OP_SWITCH, 0);
1765 use_pseudo(pseudo, &switch_ins->cond);
1766 add_one_insn(ep, switch_ins);
1767 finish_block(ep);
1769 default_case = NULL;
1770 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
1771 struct statement *case_stmt = sym->stmt;
1772 struct basic_block *bb_case = get_bound_block(ep, sym);
1774 if (!case_stmt->case_expression) {
1775 default_case = bb_case;
1776 continue;
1777 } else {
1778 int begin, end;
1780 begin = end = case_stmt->case_expression->value;
1781 if (case_stmt->case_to)
1782 end = case_stmt->case_to->value;
1783 if (begin > end)
1784 jmp = alloc_multijmp(bb_case, end, begin);
1785 else
1786 jmp = alloc_multijmp(bb_case, begin, end);
1789 add_multijmp(&switch_ins->multijmp_list, jmp);
1790 add_bb(&bb_case->parents, active);
1791 add_bb(&active->children, bb_case);
1792 } END_FOR_EACH_PTR(sym);
1794 bind_label(stmt->switch_break, switch_end, stmt->pos);
1796 /* And linearize the actual statement */
1797 linearize_statement(ep, stmt->switch_statement);
1798 set_activeblock(ep, switch_end);
1800 if (!default_case)
1801 default_case = switch_end;
1803 jmp = alloc_multijmp(default_case, 1, 0);
1804 add_multijmp(&switch_ins->multijmp_list, jmp);
1805 add_bb(&default_case->parents, active);
1806 add_bb(&active->children, default_case);
1808 break;
1811 case STMT_ITERATOR: {
1812 struct statement *pre_statement = stmt->iterator_pre_statement;
1813 struct expression *pre_condition = stmt->iterator_pre_condition;
1814 struct statement *statement = stmt->iterator_statement;
1815 struct statement *post_statement = stmt->iterator_post_statement;
1816 struct expression *post_condition = stmt->iterator_post_condition;
1817 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end;
1819 concat_symbol_list(stmt->iterator_syms, &ep->syms);
1820 linearize_statement(ep, pre_statement);
1822 loop_body = loop_top = alloc_basic_block(ep, stmt->pos);
1823 loop_continue = alloc_basic_block(ep, stmt->pos);
1824 loop_end = alloc_basic_block(ep, stmt->pos);
1826 if (pre_condition == post_condition) {
1827 loop_top = alloc_basic_block(ep, stmt->pos);
1828 set_activeblock(ep, loop_top);
1831 if (pre_condition)
1832 linearize_cond_branch(ep, pre_condition, loop_body, loop_end);
1834 bind_label(stmt->iterator_continue, loop_continue, stmt->pos);
1835 bind_label(stmt->iterator_break, loop_end, stmt->pos);
1837 set_activeblock(ep, loop_body);
1838 linearize_statement(ep, statement);
1839 add_goto(ep, loop_continue);
1841 set_activeblock(ep, loop_continue);
1842 linearize_statement(ep, post_statement);
1843 if (!post_condition || pre_condition == post_condition)
1844 add_goto(ep, loop_top);
1845 else
1846 linearize_cond_branch(ep, post_condition, loop_top, loop_end);
1847 set_activeblock(ep, loop_end);
1848 break;
1851 default:
1852 break;
1854 return VOID;
1857 static struct entrypoint *linearize_fn(struct symbol *sym, struct symbol *base_type)
1859 struct entrypoint *ep;
1860 struct basic_block *bb;
1861 struct symbol *arg;
1862 struct instruction *entry;
1863 pseudo_t result;
1864 int i;
1866 if (!base_type->stmt)
1867 return NULL;
1869 ep = alloc_entrypoint();
1870 bb = alloc_basic_block(ep, sym->pos);
1872 ep->name = sym;
1873 set_activeblock(ep, bb);
1875 entry = alloc_instruction(OP_ENTRY, 0);
1876 add_one_insn(ep, entry);
1877 ep->entry = entry;
1879 concat_symbol_list(base_type->arguments, &ep->syms);
1881 /* FIXME!! We should do something else about varargs.. */
1882 i = 0;
1883 FOR_EACH_PTR(base_type->arguments, arg) {
1884 linearize_argument(ep, arg, ++i);
1885 } END_FOR_EACH_PTR(arg);
1887 result = linearize_statement(ep, base_type->stmt);
1888 if (bb_reachable(ep->active) && !bb_terminated(ep->active)) {
1889 struct symbol *ret_type = base_type->ctype.base_type;
1890 struct instruction *insn = alloc_typed_instruction(OP_RET, ret_type);
1892 if (type_size(ret_type) > 0)
1893 use_pseudo(result, &insn->src);
1894 add_one_insn(ep, insn);
1897 merge_phi_sources = 1;
1899 repeat:
1901 * Do trivial flow simplification - branches to
1902 * branches, kill dead basicblocks etc
1904 kill_unreachable_bbs(ep);
1907 * Turn symbols into pseudos
1909 simplify_symbol_usage(ep);
1912 * Remove trivial instructions, and try to CSE
1913 * the rest.
1915 do {
1916 cleanup_and_cse(ep);
1917 pack_basic_blocks(ep);
1918 } while (repeat_phase & REPEAT_CSE);
1920 vrfy_flow(ep);
1922 /* Cleanup */
1923 clear_symbol_pseudos(ep);
1925 /* And track pseudo register usage */
1926 track_pseudo_liveness(ep);
1929 * Some flow optimizations can only effectively
1930 * be done when we've done liveness analysis. But
1931 * if they trigger, we need to start all over
1932 * again
1934 if (simplify_flow(ep)) {
1935 clear_liveness(ep);
1936 goto repeat;
1939 /* Finally, add deathnotes to pseudos now that we have them */
1940 track_pseudo_death(ep);
1942 return ep;
1945 struct entrypoint *linearize_symbol(struct symbol *sym)
1947 struct symbol *base_type;
1949 if (!sym)
1950 return NULL;
1951 base_type = sym->ctype.base_type;
1952 if (!base_type)
1953 return NULL;
1954 if (base_type->type == SYM_FN)
1955 return linearize_fn(sym, base_type);
1956 return NULL;