A sparse "context" instruction has side effects. Don't allow
[smatch.git] / linearize.c
blob53faeedac6795770736499e5997ecb503267e94f
1 /*
2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
10 * Copyright (C) 2004 Christopher Li
13 #include <string.h>
14 #include <stdarg.h>
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <assert.h>
19 #include "parse.h"
20 #include "expression.h"
21 #include "linearize.h"
22 #include "flow.h"
23 #include "target.h"
25 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
26 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
28 static void add_setcc(struct entrypoint *ep, struct expression *expr, pseudo_t val);
29 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right);
30 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val);
32 struct access_data;
33 static pseudo_t add_load(struct entrypoint *ep, struct access_data *);
34 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *);
36 struct pseudo void_pseudo = {};
38 static struct instruction *alloc_instruction(int opcode, int size)
40 struct instruction * insn = __alloc_instruction(0);
41 insn->opcode = opcode;
42 insn->size = size;
43 return insn;
46 static struct instruction *alloc_typed_instruction(int opcode, struct symbol *type)
48 int size;
50 size = type ? type->bit_size : 0;
51 if (size < 0)
52 size = 0;
53 return alloc_instruction(opcode, size);
56 static struct entrypoint *alloc_entrypoint(void)
58 return __alloc_entrypoint(0);
61 static struct basic_block *alloc_basic_block(struct entrypoint *ep, struct position pos)
63 struct basic_block *bb = __alloc_basic_block(0);
64 bb->context = -1;
65 bb->pos = pos;
66 bb->ep = ep;
67 return bb;
70 static struct multijmp* alloc_multijmp(struct basic_block *target, int begin, int end)
72 struct multijmp *multijmp = __alloc_multijmp(0);
73 multijmp->target = target;
74 multijmp->begin = begin;
75 multijmp->end = end;
76 return multijmp;
79 static inline int regno(pseudo_t n)
81 int retval = -1;
82 if (n && n->type == PSEUDO_REG)
83 retval = n->nr;
84 return retval;
87 static const char *show_pseudo(pseudo_t pseudo)
89 static int n;
90 static char buffer[4][64];
91 char *buf;
92 int i;
94 if (!pseudo)
95 return "no pseudo";
96 if (pseudo == VOID)
97 return "VOID";
98 buf = buffer[3 & ++n];
99 switch(pseudo->type) {
100 case PSEUDO_SYM: {
101 struct symbol *sym = pseudo->sym;
102 struct expression *expr;
104 if (sym->bb_target) {
105 snprintf(buf, 64, ".L%p", sym->bb_target);
106 break;
108 if (sym->ident) {
109 snprintf(buf, 64, "%s", show_ident(sym->ident));
110 break;
112 expr = sym->initializer;
113 if (!expr) {
114 snprintf(buf, 64, "<anon sym: %d>", pseudo->nr);
115 break;
117 switch (expr->type) {
118 case EXPR_VALUE:
119 snprintf(buf, 64, "<symbol value: %lld>", expr->value);
120 break;
121 case EXPR_STRING:
122 return show_string(expr->string);
123 default:
124 snprintf(buf, 64, "<symbol expression: %d>", pseudo->nr);
125 break;
128 case PSEUDO_REG:
129 i = snprintf(buf, 64, "%%r%d", pseudo->nr);
130 if (pseudo->ident)
131 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
132 break;
133 case PSEUDO_VAL: {
134 long long value = pseudo->value;
135 if (value > 1000 || value < -1000)
136 snprintf(buf, 64, "$%#llx", value);
137 else
138 snprintf(buf, 64, "$%lld", value);
139 break;
141 case PSEUDO_ARG:
142 snprintf(buf, 64, "%%arg%d", pseudo->nr);
143 break;
144 case PSEUDO_PHI:
145 i = snprintf(buf, 64, "%%phi%d", pseudo->nr);
146 if (pseudo->ident)
147 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
148 break;
149 default:
150 snprintf(buf, 64, "<bad pseudo type %d>", pseudo->type);
152 return buf;
155 static const char* opcodes[] = {
156 [OP_BADOP] = "bad_op",
157 /* Terminator */
158 [OP_RET] = "ret",
159 [OP_BR] = "br",
160 [OP_SWITCH] = "switch",
161 [OP_INVOKE] = "invoke",
162 [OP_COMPUTEDGOTO] = "jmp *",
163 [OP_UNWIND] = "unwind",
165 /* Binary */
166 [OP_ADD] = "add",
167 [OP_SUB] = "sub",
168 [OP_MUL] = "mul",
169 [OP_DIV] = "div",
170 [OP_MOD] = "mod",
171 [OP_SHL] = "shl",
172 [OP_SHR] = "shr",
174 /* Logical */
175 [OP_AND] = "and",
176 [OP_OR] = "or",
177 [OP_XOR] = "xor",
178 [OP_AND_BOOL] = "and-bool",
179 [OP_OR_BOOL] = "or-bool",
181 /* Binary comparison */
182 [OP_SET_EQ] = "seteq",
183 [OP_SET_NE] = "setne",
184 [OP_SET_LE] = "setle",
185 [OP_SET_GE] = "setge",
186 [OP_SET_LT] = "setlt",
187 [OP_SET_GT] = "setgt",
188 [OP_SET_B] = "setb",
189 [OP_SET_A] = "seta",
190 [OP_SET_BE] = "setbe",
191 [OP_SET_AE] = "setae",
193 /* Uni */
194 [OP_NOT] = "not",
195 [OP_NEG] = "neg",
197 /* Setcc - always in combination with a select or conditional branch */
198 [OP_SETCC] = "setcc",
199 [OP_SEL] = "select",
201 /* Memory */
202 [OP_MALLOC] = "malloc",
203 [OP_FREE] = "free",
204 [OP_ALLOCA] = "alloca",
205 [OP_LOAD] = "load",
206 [OP_STORE] = "store",
207 [OP_SETVAL] = "set",
208 [OP_GET_ELEMENT_PTR] = "getelem",
210 /* Other */
211 [OP_PHI] = "phi",
212 [OP_PHISOURCE] = "phisrc",
213 [OP_CAST] = "cast",
214 [OP_PTRCAST] = "ptrcast",
215 [OP_CALL] = "call",
216 [OP_VANEXT] = "va_next",
217 [OP_VAARG] = "va_arg",
218 [OP_SLICE] = "slice",
219 [OP_SNOP] = "snop",
220 [OP_LNOP] = "lnop",
221 [OP_NOP] = "nop",
223 /* Sparse tagging (line numbers, context, whatever) */
224 [OP_CONTEXT] = "context",
227 void show_instruction(struct instruction *insn)
229 int opcode = insn->opcode;
230 static char buffer[1024] = "\t";
231 char *buf;
233 buf = buffer+1;
234 if (!insn->bb) {
235 if (verbose < 2)
236 return;
237 buf += sprintf(buf, "# ");
240 if (opcode < sizeof(opcodes)/sizeof(char *)) {
241 const char *op = opcodes[opcode];
242 if (!op)
243 buf += sprintf(buf, "opcode:%d", opcode);
244 else
245 buf += sprintf(buf, "%s", op);
246 if (insn->size)
247 buf += sprintf(buf, ".%d", insn->size);
248 memset(buf, ' ', 20);
249 buf++;
252 if (buf < buffer + 12)
253 buf = buffer + 12;
254 switch (opcode) {
255 case OP_RET:
256 if (insn->src && insn->src != VOID)
257 buf += sprintf(buf, "%s", show_pseudo(insn->src));
258 break;
259 case OP_BR:
260 if (insn->bb_true && insn->bb_false) {
261 buf += sprintf(buf, "%s, .L%p, .L%p", show_pseudo(insn->cond), insn->bb_true, insn->bb_false);
262 break;
264 buf += sprintf(buf, ".L%p", insn->bb_true ? insn->bb_true : insn->bb_false);
265 break;
267 case OP_SETVAL: {
268 struct expression *expr = insn->val;
269 pseudo_t pseudo = insn->symbol;
270 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
271 if (pseudo) {
272 struct symbol *sym = pseudo->sym;
273 if (!sym) {
274 buf += sprintf(buf, "%s", show_pseudo(pseudo));
275 break;
277 if (sym->bb_target) {
278 buf += sprintf(buf, ".L%p", sym->bb_target);
279 break;
281 if (sym->ident) {
282 buf += sprintf(buf, "%s", show_ident(sym->ident));
283 break;
285 expr = sym->initializer;
286 if (!expr) {
287 buf += sprintf(buf, "%s", "anon symbol");
288 break;
292 if (!expr) {
293 buf += sprintf(buf, "%s", "<none>");
294 break;
297 switch (expr->type) {
298 case EXPR_VALUE:
299 buf += sprintf(buf, "%lld", expr->value);
300 break;
301 case EXPR_FVALUE:
302 buf += sprintf(buf, "%Lf", expr->fvalue);
303 break;
304 case EXPR_STRING:
305 buf += sprintf(buf, "%.40s", show_string(expr->string));
306 break;
307 case EXPR_SYMBOL:
308 buf += sprintf(buf, "%s", show_ident(expr->symbol->ident));
309 break;
310 case EXPR_LABEL:
311 buf += sprintf(buf, ".L%p", expr->symbol->bb_target);
312 break;
313 default:
314 buf += sprintf(buf, "SETVAL EXPR TYPE %d", expr->type);
316 break;
318 case OP_SWITCH: {
319 struct multijmp *jmp;
320 buf += sprintf(buf, "%s", show_pseudo(insn->target));
321 FOR_EACH_PTR(insn->multijmp_list, jmp) {
322 if (jmp->begin == jmp->end)
323 buf += sprintf(buf, ", %d -> .L%p", jmp->begin, jmp->target);
324 else if (jmp->begin < jmp->end)
325 buf += sprintf(buf, ", %d ... %d -> .L%p", jmp->begin, jmp->end, jmp->target);
326 else
327 buf += sprintf(buf, ", default -> .L%p", jmp->target);
328 } END_FOR_EACH_PTR(jmp);
329 break;
331 case OP_COMPUTEDGOTO: {
332 struct multijmp *jmp;
333 buf += sprintf(buf, "%s", show_pseudo(insn->target));
334 FOR_EACH_PTR(insn->multijmp_list, jmp) {
335 buf += sprintf(buf, ", .L%p", jmp->target);
336 } END_FOR_EACH_PTR(jmp);
337 break;
340 case OP_PHISOURCE:
341 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
342 break;
344 case OP_PHI: {
345 pseudo_t phi;
346 const char *s = " <-";
347 buf += sprintf(buf, "%s", show_pseudo(insn->target));
348 FOR_EACH_PTR(insn->phi_list, phi) {
349 buf += sprintf(buf, "%s %s", s, show_pseudo(phi));
350 s = ",";
351 } END_FOR_EACH_PTR(phi);
352 break;
354 case OP_LOAD: case OP_LNOP:
355 buf += sprintf(buf, "%s <- %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
356 break;
357 case OP_STORE: case OP_SNOP:
358 buf += sprintf(buf, "%s -> %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
359 break;
360 case OP_CALL: {
361 struct pseudo *arg;
362 if (insn->target && insn->target != VOID)
363 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
364 buf += sprintf(buf, "%s", show_pseudo(insn->func));
365 FOR_EACH_PTR(insn->arguments, arg) {
366 buf += sprintf(buf, ", %s", show_pseudo(arg));
367 } END_FOR_EACH_PTR(arg);
368 break;
370 case OP_CAST:
371 case OP_PTRCAST:
372 buf += sprintf(buf, "%s <- (%d) %s", show_pseudo(insn->target), insn->orig_type->bit_size, show_pseudo(insn->src));
373 break;
374 case OP_BINARY ... OP_BINARY_END:
375 case OP_BINCMP ... OP_BINCMP_END:
376 case OP_SEL:
377 buf += sprintf(buf, "%s <- %s, %s", show_pseudo(insn->target), show_pseudo(insn->src1), show_pseudo(insn->src2));
378 break;
380 case OP_SLICE:
381 buf += sprintf(buf, "%s <- %s, %d, %d", show_pseudo(insn->target), show_pseudo(insn->base), insn->from, insn->len);
382 break;
384 case OP_NOT: case OP_NEG:
385 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
386 break;
388 case OP_SETCC:
389 buf += sprintf(buf, "%s", show_pseudo(insn->src));
390 break;
391 case OP_CONTEXT:
392 buf += sprintf(buf, "%d", insn->increment);
393 break;
394 case OP_NOP:
395 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
396 break;
397 default:
398 break;
400 do { --buf; } while (*buf == ' ');
401 *++buf = 0;
402 printf("%s\n", buffer);
405 static void show_bb(struct basic_block *bb)
407 struct instruction *insn;
409 printf(".L%p:\n", bb);
410 if (verbose) {
411 pseudo_t needs, defines;
412 printf("%s:%d\n", input_streams[bb->pos.stream].name, bb->pos.line);
414 FOR_EACH_PTR(bb->needs, needs) {
415 struct instruction *def = needs->def;
416 if (def->opcode != OP_PHI) {
417 printf(" **uses %s (from .L%p)**\n", show_pseudo(needs), def->bb);
418 } else {
419 pseudo_t phi;
420 const char *sep = " ";
421 printf(" **uses %s (from", show_pseudo(needs));
422 FOR_EACH_PTR(def->phi_list, phi) {
423 if (phi == VOID)
424 continue;
425 printf("%s(%s:.L%p)", sep, show_pseudo(phi), phi->def->bb);
426 sep = ", ";
427 } END_FOR_EACH_PTR(phi);
428 printf(")**\n");
430 } END_FOR_EACH_PTR(needs);
432 FOR_EACH_PTR(bb->defines, defines) {
433 printf(" **defines %s **\n", show_pseudo(defines));
434 } END_FOR_EACH_PTR(defines);
436 if (bb->parents) {
437 struct basic_block *from;
438 FOR_EACH_PTR(bb->parents, from) {
439 printf(" **from %p (%s:%d:%d)**\n", from,
440 input_streams[from->pos.stream].name, from->pos.line, from->pos.pos);
441 } END_FOR_EACH_PTR(from);
444 if (bb->children) {
445 struct basic_block *to;
446 FOR_EACH_PTR(bb->children, to) {
447 printf(" **to %p (%s:%d:%d)**\n", to,
448 input_streams[to->pos.stream].name, to->pos.line, to->pos.pos);
449 } END_FOR_EACH_PTR(to);
453 FOR_EACH_PTR(bb->insns, insn) {
454 show_instruction(insn);
455 } END_FOR_EACH_PTR(insn);
456 if (!bb_terminated(bb))
457 printf("\tEND\n");
458 printf("\n");
461 static void show_symbol_usage(pseudo_t pseudo)
463 if (pseudo) {
464 pseudo_t *pp;
465 FOR_EACH_PTR(pseudo->users, pp) {
466 struct instruction *insn = container(pp, struct instruction, src);
467 show_instruction(insn);
468 } END_FOR_EACH_PTR(pp);
472 void show_entry(struct entrypoint *ep)
474 struct symbol *sym;
475 struct basic_block *bb;
477 printf("%s:\n", show_ident(ep->name->ident));
479 if (verbose) {
480 printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
482 FOR_EACH_PTR(ep->syms, sym) {
483 if (!sym->pseudo)
484 continue;
485 if (!sym->pseudo->users)
486 continue;
487 printf(" sym: %p %s\n", sym, show_ident(sym->ident));
488 if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE))
489 printf("\texternal visibility\n");
490 show_symbol_usage(sym->pseudo);
491 } END_FOR_EACH_PTR(sym);
493 printf("\n");
496 FOR_EACH_PTR(ep->bbs, bb) {
497 if (!bb)
498 continue;
499 if (!bb->parents && !bb->children && !bb->insns && verbose < 2)
500 continue;
501 if (bb == ep->entry)
502 printf("ENTRY:\n");
503 show_bb(bb);
504 } END_FOR_EACH_PTR(bb);
506 printf("\n");
509 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos)
511 if (label->bb_target)
512 warning(pos, "label '%s' already bound", show_ident(label->ident));
513 label->bb_target = bb;
516 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label)
518 struct basic_block *bb = label->bb_target;
520 if (!bb) {
521 bb = alloc_basic_block(ep, label->pos);
522 label->bb_target = bb;
524 return bb;
527 static void finish_block(struct entrypoint *ep)
529 struct basic_block *src = ep->active;
530 if (bb_reachable(src))
531 ep->active = NULL;
534 static void add_goto(struct entrypoint *ep, struct basic_block *dst)
536 struct basic_block *src = ep->active;
537 if (bb_reachable(src)) {
538 struct instruction *br = alloc_instruction(OP_BR, 0);
539 br->bb_true = dst;
540 add_bb(&dst->parents, src);
541 add_bb(&src->children, dst);
542 br->bb = src;
543 add_instruction(&src->insns, br);
544 ep->active = NULL;
548 static void add_one_insn(struct entrypoint *ep, struct instruction *insn)
550 struct basic_block *bb = ep->active;
552 if (bb_reachable(bb)) {
553 insn->bb = bb;
554 add_instruction(&bb->insns, insn);
558 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb)
560 if (!bb_terminated(ep->active))
561 add_goto(ep, bb);
563 ep->active = bb;
564 if (bb_reachable(bb))
565 add_bb(&ep->bbs, bb);
568 static void remove_parent(struct basic_block *child, struct basic_block *parent)
570 remove_bb_from_list(&child->parents, parent, 0);
571 if (!child->parents)
572 kill_bb(child);
575 /* Change a "switch" into a branch */
576 void insert_branch(struct basic_block *bb, struct instruction *jmp, struct basic_block *target)
578 struct instruction *br, *old;
579 struct basic_block *child;
581 /* Remove the switch */
582 old = delete_last_instruction(&bb->insns);
583 assert(old == jmp);
585 br = alloc_instruction(OP_BR, 0);
586 br->bb = bb;
587 br->bb_true = target;
588 add_instruction(&bb->insns, br);
590 FOR_EACH_PTR(bb->children, child) {
591 if (child == target) {
592 target = NULL; /* Trigger just once */
593 continue;
595 DELETE_CURRENT_PTR(child);
596 remove_parent(child, bb);
597 } END_FOR_EACH_PTR(child);
598 PACK_PTR_LIST(&bb->children);
602 void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi_node, pseudo_t true, pseudo_t false)
604 pseudo_t target;
605 struct instruction *setcc, *select;
607 /* Remove the 'br' */
608 delete_last_instruction(&bb->insns);
610 setcc = alloc_instruction(OP_SETCC, 1);
611 setcc->bb = bb;
612 assert(br->cond);
613 use_pseudo(br->cond, &setcc->src);
615 select = alloc_instruction(OP_SEL, phi_node->size);
616 select->bb = bb;
618 target = phi_node->target;
619 assert(target->def == phi_node);
620 select->target = target;
621 target->def = select;
623 use_pseudo(true, &select->src1);
624 use_pseudo(false, &select->src2);
626 add_instruction(&bb->insns, setcc);
627 add_instruction(&bb->insns, select);
628 add_instruction(&bb->insns, br);
631 static inline int bb_empty(struct basic_block *bb)
633 return !bb->insns;
636 /* Add a label to the currently active block, return new active block */
637 static struct basic_block * add_label(struct entrypoint *ep, struct symbol *label)
639 struct basic_block *bb = label->bb_target;
641 if (bb) {
642 set_activeblock(ep, bb);
643 return bb;
645 bb = ep->active;
646 if (!bb_reachable(bb) || !bb_empty(bb)) {
647 bb = alloc_basic_block(ep, label->pos);
648 set_activeblock(ep, bb);
650 label->bb_target = bb;
651 return bb;
654 static void add_setcc(struct entrypoint *ep, struct expression *expr, pseudo_t val)
656 struct basic_block *bb = ep->active;
658 if (bb_reachable(bb)) {
659 struct instruction *cc = alloc_instruction(OP_SETCC, 1);
660 use_pseudo(val, &cc->src);
661 assert(val);
662 add_one_insn(ep, cc);
666 static void add_branch(struct entrypoint *ep, struct expression *expr, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false)
668 struct basic_block *bb = ep->active;
669 struct instruction *br;
671 if (bb_reachable(bb)) {
672 br = alloc_instruction(OP_BR, 0);
673 use_pseudo(cond, &br->cond);
674 br->bb_true = bb_true;
675 br->bb_false = bb_false;
676 add_bb(&bb_true->parents, bb);
677 add_bb(&bb_false->parents, bb);
678 add_bb(&bb->children, bb_true);
679 add_bb(&bb->children, bb_false);
680 add_one_insn(ep, br);
684 /* Dummy pseudo allocator */
685 pseudo_t alloc_pseudo(struct instruction *def)
687 static int nr = 0;
688 struct pseudo * pseudo = __alloc_pseudo(0);
689 pseudo->type = PSEUDO_REG;
690 pseudo->nr = ++nr;
691 pseudo->def = def;
692 return pseudo;
695 static void clear_symbol_pseudos(struct entrypoint *ep)
697 struct symbol *sym;
699 FOR_EACH_PTR(ep->accesses, sym) {
700 sym->pseudo = NULL;
701 } END_FOR_EACH_PTR(sym);
704 static pseudo_t symbol_pseudo(struct entrypoint *ep, struct symbol *sym)
706 pseudo_t pseudo;
708 if (!sym)
709 return VOID;
711 pseudo = sym->pseudo;
712 if (!pseudo) {
713 pseudo = __alloc_pseudo(0);
714 pseudo->type = PSEUDO_SYM;
715 pseudo->sym = sym;
716 pseudo->ident = sym->ident;
717 sym->pseudo = pseudo;
718 add_symbol(&ep->accesses, sym);
720 /* Symbol pseudos have neither nr, usage nor def */
721 return pseudo;
724 pseudo_t value_pseudo(long long val)
726 #define MAX_VAL_HASH 64
727 static struct pseudo_list *prev[MAX_VAL_HASH];
728 int hash = val & (MAX_VAL_HASH-1);
729 struct pseudo_list **list = prev + hash;
730 pseudo_t pseudo;
732 FOR_EACH_PTR(*list, pseudo) {
733 if (pseudo->value == val)
734 return pseudo;
735 } END_FOR_EACH_PTR(pseudo);
737 pseudo = __alloc_pseudo(0);
738 pseudo->type = PSEUDO_VAL;
739 pseudo->value = val;
740 add_pseudo(list, pseudo);
742 /* Value pseudos have neither nr, usage nor def */
743 return pseudo;
746 static pseudo_t argument_pseudo(int nr)
748 pseudo_t pseudo = __alloc_pseudo(0);
749 pseudo->type = PSEUDO_ARG;
750 pseudo->nr = nr;
751 /* Argument pseudos have neither usage nor def */
752 return pseudo;
755 pseudo_t alloc_phi(struct basic_block *source, pseudo_t pseudo, int size)
757 struct instruction *insn = alloc_instruction(OP_PHISOURCE, size);
758 pseudo_t phi = __alloc_pseudo(0);
759 static int nr = 0;
761 phi->type = PSEUDO_PHI;
762 phi->nr = ++nr;
763 phi->def = insn;
765 use_pseudo(pseudo, &insn->src1);
766 insn->bb = source;
767 insn->target = phi;
768 add_instruction(&source->insns, insn);
769 return phi;
773 * We carry the "access_data" structure around for any accesses,
774 * which simplifies things a lot. It contains all the access
775 * information in one place.
777 struct access_data {
778 struct symbol *result_type; // result ctype
779 struct symbol *source_type; // source ctype
780 pseudo_t address; // pseudo containing address ..
781 pseudo_t origval; // pseudo for original value ..
782 unsigned int offset, alignment; // byte offset
783 unsigned int bit_size, bit_offset; // which bits
784 struct position pos;
787 static void finish_address_gen(struct entrypoint *ep, struct access_data *ad)
791 static int linearize_simple_address(struct entrypoint *ep,
792 struct expression *addr,
793 struct access_data *ad)
795 if (addr->type == EXPR_SYMBOL) {
796 ad->address = symbol_pseudo(ep, addr->symbol);
797 return 1;
799 if (addr->type == EXPR_BINOP) {
800 if (addr->right->type == EXPR_VALUE) {
801 if (addr->op == '+') {
802 ad->offset += get_expression_value(addr->right);
803 return linearize_simple_address(ep, addr->left, ad);
807 ad->address = linearize_expression(ep, addr);
808 return 1;
811 static struct symbol *base_type(struct symbol *sym)
813 struct symbol *base = sym;
815 if (sym->type == SYM_NODE)
816 base = base->ctype.base_type;
817 if (base->type == SYM_BITFIELD)
818 return base->ctype.base_type;
819 return sym;
822 static int linearize_address_gen(struct entrypoint *ep,
823 struct expression *expr,
824 struct access_data *ad)
826 struct symbol *ctype = expr->ctype;
828 if (!ctype)
829 return 0;
830 ad->pos = expr->pos;
831 ad->result_type = ctype;
832 ad->source_type = base_type(ctype);
833 ad->bit_size = ctype->bit_size;
834 ad->alignment = ctype->ctype.alignment;
835 ad->bit_offset = ctype->bit_offset;
836 if (expr->type == EXPR_PREOP && expr->op == '*')
837 return linearize_simple_address(ep, expr->unop, ad);
839 warning(expr->pos, "generating address of non-lvalue (%d)", expr->type);
840 return 0;
843 static pseudo_t add_load(struct entrypoint *ep, struct access_data *ad)
845 struct instruction *insn;
846 pseudo_t new;
848 new = ad->origval;
849 if (0 && new)
850 return new;
852 insn = alloc_typed_instruction(OP_LOAD, ad->source_type);
853 new = alloc_pseudo(insn);
854 ad->origval = new;
856 insn->target = new;
857 insn->offset = ad->offset;
858 use_pseudo(ad->address, &insn->src);
859 add_one_insn(ep, insn);
860 return new;
863 static void add_store(struct entrypoint *ep, struct access_data *ad, pseudo_t value)
865 struct basic_block *bb = ep->active;
867 if (bb_reachable(bb)) {
868 struct instruction *store = alloc_typed_instruction(OP_STORE, ad->source_type);
869 store->offset = ad->offset;
870 use_pseudo(value, &store->target);
871 use_pseudo(ad->address, &store->src);
872 add_one_insn(ep, store);
876 static pseudo_t linearize_store_gen(struct entrypoint *ep,
877 pseudo_t value,
878 struct access_data *ad)
880 pseudo_t store = value;
882 if (ad->source_type->bit_size != ad->result_type->bit_size) {
883 pseudo_t orig = add_load(ep, ad);
884 int shift = ad->bit_offset;
885 unsigned long long mask = (1ULL << ad->bit_size)-1;
887 if (shift) {
888 store = add_binary_op(ep, ad->source_type, OP_SHL, value, value_pseudo(shift));
889 mask <<= shift;
891 orig = add_binary_op(ep, ad->source_type, OP_AND, orig, value_pseudo(~mask));
892 store = add_binary_op(ep, ad->source_type, OP_OR, orig, store);
894 add_store(ep, ad, store);
895 return value;
898 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right)
900 struct instruction *insn = alloc_typed_instruction(op, ctype);
901 pseudo_t target = alloc_pseudo(insn);
902 insn->target = target;
903 use_pseudo(left, &insn->src1);
904 use_pseudo(right, &insn->src2);
905 add_one_insn(ep, insn);
906 return target;
909 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val)
911 struct instruction *insn = alloc_typed_instruction(OP_SETVAL, ctype);
912 pseudo_t target = alloc_pseudo(insn);
913 insn->target = target;
914 insn->val = val;
915 if (!val) {
916 pseudo_t addr = symbol_pseudo(ep, ctype);
917 use_pseudo(addr, &insn->symbol);
918 insn->size = bits_in_pointer;
920 add_one_insn(ep, insn);
921 return target;
924 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct access_data *ad)
926 pseudo_t new = add_load(ep, ad);
928 if (ad->bit_offset) {
929 pseudo_t shift = value_pseudo(ad->bit_offset);
930 pseudo_t newval = add_binary_op(ep, ad->source_type, OP_SHR, new, shift);
931 new = newval;
934 return new;
937 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
939 struct access_data ad = { NULL, };
940 pseudo_t value;
942 if (!linearize_address_gen(ep, expr, &ad))
943 return VOID;
944 value = linearize_load_gen(ep, &ad);
945 finish_address_gen(ep, &ad);
946 return value;
949 /* FIXME: FP */
950 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
952 struct access_data ad = { NULL, };
953 pseudo_t old, new, one;
954 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB;
956 if (!linearize_address_gen(ep, expr->unop, &ad))
957 return VOID;
959 old = linearize_load_gen(ep, &ad);
960 one = value_pseudo(expr->op_value);
961 new = add_binary_op(ep, expr->ctype, op, old, one);
962 linearize_store_gen(ep, new, &ad);
963 finish_address_gen(ep, &ad);
964 return postop ? old : new;
967 static pseudo_t add_uniop(struct entrypoint *ep, struct expression *expr, int op, pseudo_t src)
969 struct instruction *insn = alloc_typed_instruction(op, expr->ctype);
970 pseudo_t new = alloc_pseudo(insn);
972 insn->target = new;
973 use_pseudo(src, &insn->src1);
974 add_one_insn(ep, insn);
975 return new;
978 static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr)
980 pseudo_t pre = linearize_expression(ep, expr->base);
981 struct instruction *insn = alloc_typed_instruction(OP_SLICE, expr->ctype);
982 pseudo_t new = alloc_pseudo(insn);
984 insn->target = new;
985 insn->from = expr->r_bitpos;
986 insn->len = expr->r_nrbits;
987 use_pseudo(pre, &insn->base);
988 add_one_insn(ep, insn);
989 return new;
992 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
994 pseudo_t pre = linearize_expression(ep, expr->unop);
995 switch (expr->op) {
996 case '+':
997 return pre;
998 case '!': {
999 pseudo_t zero = value_pseudo(0);
1000 return add_binary_op(ep, expr->ctype, OP_SET_EQ, pre, zero);
1002 case '~':
1003 return add_uniop(ep, expr, OP_NOT, pre);
1004 case '-':
1005 return add_uniop(ep, expr, OP_NEG, pre);
1007 return VOID;
1010 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
1013 * '*' is an lvalue access, and is fundamentally different
1014 * from an arithmetic operation. Maybe it should have an
1015 * expression type of its own..
1017 if (expr->op == '*')
1018 return linearize_access(ep, expr);
1019 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
1020 return linearize_inc_dec(ep, expr, 0);
1021 return linearize_regular_preop(ep, expr);
1024 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr)
1026 return linearize_inc_dec(ep, expr, 1);
1029 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
1031 struct access_data ad = { NULL, };
1032 struct expression *target = expr->left;
1033 pseudo_t value;
1035 value = linearize_expression(ep, expr->right);
1036 if (!linearize_address_gen(ep, target, &ad))
1037 return VOID;
1038 if (expr->op != '=') {
1039 pseudo_t oldvalue = linearize_load_gen(ep, &ad);
1040 pseudo_t dst;
1041 static const int op_trans[] = {
1042 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD,
1043 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB,
1044 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MUL,
1045 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIV,
1046 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MOD,
1047 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL,
1048 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_SHR,
1049 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND,
1050 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR,
1051 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR
1053 dst = add_binary_op(ep, expr->ctype, op_trans[expr->op - SPECIAL_BASE], oldvalue, value);
1054 value = dst;
1056 value = linearize_store_gen(ep, value, &ad);
1057 finish_address_gen(ep, &ad);
1058 return value;
1061 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr)
1063 struct expression *arg, *fn;
1064 struct instruction *insn = alloc_typed_instruction(OP_CALL, expr->ctype);
1065 pseudo_t retval, call;
1066 int context_diff;
1068 if (!expr->ctype) {
1069 warning(expr->pos, "call with no type!");
1070 return VOID;
1073 FOR_EACH_PTR(expr->args, arg) {
1074 pseudo_t new = linearize_expression(ep, arg);
1075 use_pseudo(new, add_pseudo(&insn->arguments, new));
1076 } END_FOR_EACH_PTR(arg);
1078 fn = expr->fn;
1080 context_diff = 0;
1081 if (fn->ctype) {
1082 int in = fn->ctype->ctype.in_context;
1083 int out = fn->ctype->ctype.out_context;
1084 if (in < 0 || out < 0)
1085 in = out = 0;
1086 context_diff = out - in;
1089 if (fn->type == EXPR_PREOP) {
1090 if (fn->unop->type == EXPR_SYMBOL) {
1091 struct symbol *sym = fn->unop->symbol;
1092 if (sym->ctype.base_type->type == SYM_FN)
1093 fn = fn->unop;
1096 if (fn->type == EXPR_SYMBOL) {
1097 call = symbol_pseudo(ep, fn->symbol);
1098 } else {
1099 call = linearize_expression(ep, fn);
1101 use_pseudo(call, &insn->func);
1102 retval = VOID;
1103 if (expr->ctype != &void_ctype)
1104 retval = alloc_pseudo(insn);
1105 insn->target = retval;
1106 add_one_insn(ep, insn);
1108 if (context_diff) {
1109 insn = alloc_instruction(OP_CONTEXT, 0);
1110 insn->increment = context_diff;
1111 add_one_insn(ep, insn);
1114 return retval;
1117 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr)
1119 pseudo_t src1, src2, dst;
1120 static const int opcode[] = {
1121 ['+'] = OP_ADD, ['-'] = OP_SUB,
1122 ['*'] = OP_MUL, ['/'] = OP_DIV,
1123 ['%'] = OP_MOD, ['&'] = OP_AND,
1124 ['|'] = OP_OR, ['^'] = OP_XOR,
1125 [SPECIAL_LEFTSHIFT] = OP_SHL,
1126 [SPECIAL_RIGHTSHIFT] = OP_SHR,
1127 [SPECIAL_LOGICAL_AND] = OP_AND_BOOL,
1128 [SPECIAL_LOGICAL_OR] = OP_OR_BOOL,
1131 src1 = linearize_expression(ep, expr->left);
1132 src2 = linearize_expression(ep, expr->right);
1133 dst = add_binary_op(ep, expr->ctype, opcode[expr->op], src1, src2);
1134 return dst;
1137 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1139 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1141 static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr)
1143 pseudo_t cond, true, false, res;
1145 true = linearize_expression(ep, expr->cond_true);
1146 false = linearize_expression(ep, expr->cond_false);
1147 cond = linearize_expression(ep, expr->conditional);
1149 add_setcc(ep, expr, cond);
1150 if (!expr->cond_true)
1151 true = cond;
1152 res = add_binary_op(ep, expr->ctype, OP_SEL, true, false);
1153 return res;
1156 static pseudo_t add_join_conditional(struct entrypoint *ep, struct expression *expr,
1157 pseudo_t phi1, pseudo_t phi2)
1159 pseudo_t target;
1160 struct instruction *phi_node;
1162 if (phi1 == VOID)
1163 return phi2;
1164 if (phi2 == VOID)
1165 return phi1;
1167 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1168 use_pseudo(phi1, add_pseudo(&phi_node->phi_list, phi1));
1169 use_pseudo(phi2, add_pseudo(&phi_node->phi_list, phi2));
1170 phi_node->target = target = alloc_pseudo(phi_node);
1171 add_one_insn(ep, phi_node);
1172 return target;
1175 static pseudo_t linearize_short_conditional(struct entrypoint *ep, struct expression *expr,
1176 struct expression *cond,
1177 struct expression *expr_false)
1179 pseudo_t src1, src2;
1180 struct basic_block *bb_false = alloc_basic_block(ep, expr_false->pos);
1181 struct basic_block *merge = alloc_basic_block(ep, expr->pos);
1182 pseudo_t phi1, phi2;
1183 int size = expr->ctype->bit_size;
1185 src1 = linearize_expression(ep, cond);
1186 phi1 = alloc_phi(ep->active, src1, size);
1187 add_branch(ep, expr, src1, merge, bb_false);
1189 set_activeblock(ep, bb_false);
1190 src2 = linearize_expression(ep, expr_false);
1191 phi2 = alloc_phi(ep->active, src2, size);
1192 set_activeblock(ep, merge);
1194 return add_join_conditional(ep, expr, phi1, phi2);
1197 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr,
1198 struct expression *cond,
1199 struct expression *expr_true,
1200 struct expression *expr_false)
1202 pseudo_t src1, src2;
1203 pseudo_t phi1, phi2;
1204 struct basic_block *bb_true = alloc_basic_block(ep, expr_true->pos);
1205 struct basic_block *bb_false = alloc_basic_block(ep, expr_false->pos);
1206 struct basic_block *merge = alloc_basic_block(ep, expr->pos);
1207 int size = expr->ctype->bit_size;
1209 linearize_cond_branch(ep, cond, bb_true, bb_false);
1211 set_activeblock(ep, bb_true);
1212 src1 = linearize_expression(ep, expr_true);
1213 phi1 = alloc_phi(ep->active, src1, size);
1214 add_goto(ep, merge);
1216 set_activeblock(ep, bb_false);
1217 src2 = linearize_expression(ep, expr_false);
1218 phi2 = alloc_phi(ep->active, src2, size);
1219 set_activeblock(ep, merge);
1221 return add_join_conditional(ep, expr, phi1, phi2);
1224 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr)
1226 struct expression *shortcut;
1228 shortcut = alloc_const_expression(expr->pos, expr->op == SPECIAL_LOGICAL_OR);
1229 shortcut->ctype = expr->ctype;
1230 return linearize_conditional(ep, expr, expr->left, shortcut, expr->right);
1233 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr)
1235 static const int cmpop[] = {
1236 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT,
1237 [SPECIAL_EQUAL] = OP_SET_EQ,
1238 [SPECIAL_NOTEQUAL] = OP_SET_NE,
1239 [SPECIAL_GTE] = OP_SET_GE,
1240 [SPECIAL_LTE] = OP_SET_LE,
1241 [SPECIAL_UNSIGNED_LT] = OP_SET_B,
1242 [SPECIAL_UNSIGNED_GT] = OP_SET_A,
1243 [SPECIAL_UNSIGNED_LTE] = OP_SET_BE,
1244 [SPECIAL_UNSIGNED_GTE] = OP_SET_AE,
1247 pseudo_t src1 = linearize_expression(ep, expr->left);
1248 pseudo_t src2 = linearize_expression(ep, expr->right);
1249 pseudo_t dst = add_binary_op(ep, expr->ctype, cmpop[expr->op], src1, src2);
1250 return dst;
1254 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1256 pseudo_t cond;
1258 if (!expr || !bb_reachable(ep->active))
1259 return VOID;
1261 switch (expr->type) {
1263 case EXPR_STRING:
1264 case EXPR_VALUE:
1265 add_goto(ep, expr->value ? bb_true : bb_false);
1266 return VOID;
1268 case EXPR_FVALUE:
1269 add_goto(ep, expr->fvalue ? bb_true : bb_false);
1270 return VOID;
1272 case EXPR_LOGICAL:
1273 linearize_logical_branch(ep, expr, bb_true, bb_false);
1274 return VOID;
1276 case EXPR_COMPARE:
1277 cond = linearize_compare(ep, expr);
1278 add_branch(ep, expr, cond, bb_true, bb_false);
1279 break;
1281 case EXPR_PREOP:
1282 if (expr->op == '!')
1283 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true);
1284 /* fall through */
1285 default: {
1286 cond = linearize_expression(ep, expr);
1287 add_branch(ep, expr, cond, bb_true, bb_false);
1289 return VOID;
1292 return VOID;
1297 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1299 struct basic_block *next = alloc_basic_block(ep, expr->pos);
1301 if (expr->op == SPECIAL_LOGICAL_OR)
1302 linearize_cond_branch(ep, expr->left, bb_true, next);
1303 else
1304 linearize_cond_branch(ep, expr->left, next, bb_false);
1305 set_activeblock(ep, next);
1306 linearize_cond_branch(ep, expr->right, bb_true, bb_false);
1307 return VOID;
1311 * Casts to pointers are "less safe" than other casts, since
1312 * they imply type-unsafe accesses. "void *" is a special
1313 * case, since you can't access through it anyway without another
1314 * cast.
1316 static struct instruction *alloc_cast_instruction(struct symbol *ctype)
1318 int opcode = OP_CAST;
1319 struct symbol *base = ctype;
1321 if (base->type == SYM_NODE)
1322 base = base->ctype.base_type;
1323 if (base->type == SYM_PTR) {
1324 base = base->ctype.base_type;
1325 if (base != &void_ctype)
1326 opcode = OP_PTRCAST;
1328 return alloc_typed_instruction(opcode, ctype);
1331 pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr)
1333 pseudo_t src, result;
1334 struct instruction *insn;
1336 src = linearize_expression(ep, expr->cast_expression);
1337 if (src == VOID)
1338 return VOID;
1339 if (expr->ctype->bit_size < 0)
1340 return VOID;
1342 insn = alloc_cast_instruction(expr->ctype);
1343 result = alloc_pseudo(insn);
1344 insn->target = result;
1345 insn->orig_type = expr->cast_expression->ctype;
1346 use_pseudo(src, &insn->src);
1347 add_one_insn(ep, insn);
1348 return result;
1351 pseudo_t linearize_position(struct entrypoint *ep, struct expression *pos, struct access_data *ad)
1353 struct expression *init_expr = pos->init_expr;
1354 pseudo_t value = linearize_expression(ep, init_expr);
1356 ad->offset = pos->init_offset;
1357 ad->source_type = base_type(init_expr->ctype);
1358 ad->result_type = init_expr->ctype;
1359 linearize_store_gen(ep, value, ad);
1360 return VOID;
1363 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *ad)
1365 switch (initializer->type) {
1366 case EXPR_INITIALIZER: {
1367 struct expression *expr;
1368 FOR_EACH_PTR(initializer->expr_list, expr) {
1369 linearize_initializer(ep, expr, ad);
1370 } END_FOR_EACH_PTR(expr);
1371 break;
1373 case EXPR_POS:
1374 linearize_position(ep, initializer, ad);
1375 break;
1376 default: {
1377 pseudo_t value = linearize_expression(ep, initializer);
1378 ad->source_type = base_type(initializer->ctype);
1379 ad->result_type = initializer->ctype;
1380 linearize_store_gen(ep, value, ad);
1384 return VOID;
1387 void linearize_argument(struct entrypoint *ep, struct symbol *arg, int nr)
1389 struct access_data ad = { NULL, };
1391 ad.source_type = arg;
1392 ad.result_type = arg;
1393 ad.address = symbol_pseudo(ep, arg);
1394 linearize_store_gen(ep, argument_pseudo(nr), &ad);
1395 finish_address_gen(ep, &ad);
1398 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
1400 if (!expr)
1401 return VOID;
1403 switch (expr->type) {
1404 case EXPR_SYMBOL:
1405 return add_setval(ep, expr->symbol, NULL);
1407 case EXPR_VALUE:
1408 return value_pseudo(expr->value);
1410 case EXPR_STRING: case EXPR_FVALUE: case EXPR_LABEL:
1411 return add_setval(ep, expr->ctype, expr);
1413 case EXPR_STATEMENT:
1414 return linearize_statement(ep, expr->statement);
1416 case EXPR_CALL:
1417 return linearize_call_expression(ep, expr);
1419 case EXPR_BINOP:
1420 return linearize_binop(ep, expr);
1422 case EXPR_LOGICAL:
1423 return linearize_logical(ep, expr);
1425 case EXPR_COMPARE:
1426 return linearize_compare(ep, expr);
1428 case EXPR_SELECT:
1429 return linearize_select(ep, expr);
1431 case EXPR_CONDITIONAL:
1432 if (!expr->cond_true)
1433 return linearize_short_conditional(ep, expr, expr->conditional, expr->cond_false);
1435 return linearize_conditional(ep, expr, expr->conditional,
1436 expr->cond_true, expr->cond_false);
1438 case EXPR_COMMA:
1439 linearize_expression(ep, expr->left);
1440 return linearize_expression(ep, expr->right);
1442 case EXPR_ASSIGNMENT:
1443 return linearize_assignment(ep, expr);
1445 case EXPR_PREOP:
1446 return linearize_preop(ep, expr);
1448 case EXPR_POSTOP:
1449 return linearize_postop(ep, expr);
1451 case EXPR_CAST:
1452 case EXPR_IMPLIED_CAST:
1453 return linearize_cast(ep, expr);
1455 case EXPR_SLICE:
1456 return linearize_slice(ep, expr);
1458 case EXPR_INITIALIZER:
1459 case EXPR_POS:
1460 warning(expr->pos, "unexpected initializer expression (%d %d)", expr->type, expr->op);
1461 return VOID;
1462 default:
1463 warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op);
1464 return VOID;
1466 return VOID;
1469 static void linearize_one_symbol(struct entrypoint *ep, struct symbol *sym)
1471 struct access_data ad = { NULL, };
1473 if (!sym->initializer)
1474 return;
1476 ad.address = symbol_pseudo(ep, sym);
1477 linearize_initializer(ep, sym->initializer, &ad);
1478 finish_address_gen(ep, &ad);
1481 static pseudo_t linearize_compound_statement(struct entrypoint *ep, struct statement *stmt)
1483 pseudo_t pseudo;
1484 struct statement *s;
1485 struct symbol *sym;
1486 struct symbol *ret = stmt->ret;
1488 concat_symbol_list(stmt->syms, &ep->syms);
1490 FOR_EACH_PTR(stmt->syms, sym) {
1491 linearize_one_symbol(ep, sym);
1492 } END_FOR_EACH_PTR(sym);
1494 pseudo = VOID;
1495 FOR_EACH_PTR(stmt->stmts, s) {
1496 pseudo = linearize_statement(ep, s);
1497 } END_FOR_EACH_PTR(s);
1499 if (ret) {
1500 struct basic_block *bb = add_label(ep, ret);
1501 struct instruction *phi_node = first_instruction(bb->insns);
1503 if (!phi_node)
1504 return pseudo;
1506 if (pseudo_list_size(phi_node->phi_list)==1) {
1507 pseudo = first_pseudo(phi_node->phi_list);
1508 assert(pseudo->type == PSEUDO_PHI);
1509 return pseudo->def->src1;
1511 return phi_node->target;
1513 return pseudo;
1517 pseudo_t linearize_internal(struct entrypoint *ep, struct statement *stmt)
1519 struct instruction *insn = alloc_instruction(OP_CONTEXT, 0);
1520 struct expression *expr = stmt->expression;
1521 int value = 0;
1523 if (expr->type == EXPR_VALUE)
1524 value = expr->value;
1526 insn->increment = value;
1527 add_one_insn(ep, insn);
1528 return VOID;
1531 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
1533 struct basic_block *bb;
1535 if (!stmt)
1536 return VOID;
1538 bb = ep->active;
1539 if (bb && !bb->insns)
1540 bb->pos = stmt->pos;
1542 switch (stmt->type) {
1543 case STMT_NONE:
1544 break;
1546 case STMT_INTERNAL:
1547 return linearize_internal(ep, stmt);
1549 case STMT_EXPRESSION:
1550 return linearize_expression(ep, stmt->expression);
1552 case STMT_ASM:
1553 /* FIXME */
1554 break;
1556 case STMT_RETURN: {
1557 struct expression *expr = stmt->expression;
1558 struct basic_block *bb_return = get_bound_block(ep, stmt->ret_target);
1559 struct basic_block *active;
1560 pseudo_t src = linearize_expression(ep, expr);
1561 active = ep->active;
1562 if (active && src != &void_pseudo) {
1563 struct instruction *phi_node = first_instruction(bb_return->insns);
1564 pseudo_t phi;
1565 if (!phi_node) {
1566 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1567 phi_node->target = alloc_pseudo(phi_node);
1568 phi_node->bb = bb_return;
1569 add_instruction(&bb_return->insns, phi_node);
1571 phi = alloc_phi(active, src, expr->ctype->bit_size);
1572 phi->ident = &return_ident;
1573 use_pseudo(phi, add_pseudo(&phi_node->phi_list, phi));
1575 add_goto(ep, bb_return);
1576 return VOID;
1579 case STMT_CASE: {
1580 add_label(ep, stmt->case_label);
1581 linearize_statement(ep, stmt->case_statement);
1582 break;
1585 case STMT_LABEL: {
1586 struct symbol *label = stmt->label_identifier;
1588 if (label->used) {
1589 add_label(ep, label);
1590 linearize_statement(ep, stmt->label_statement);
1592 break;
1595 case STMT_GOTO: {
1596 struct symbol *sym;
1597 struct expression *expr;
1598 struct instruction *goto_ins;
1599 struct basic_block *active;
1600 pseudo_t pseudo;
1602 active = ep->active;
1603 if (!bb_reachable(active))
1604 break;
1606 if (stmt->goto_label) {
1607 add_goto(ep, get_bound_block(ep, stmt->goto_label));
1608 break;
1611 expr = stmt->goto_expression;
1612 if (!expr)
1613 break;
1615 /* This can happen as part of simplification */
1616 if (expr->type == EXPR_LABEL) {
1617 add_goto(ep, get_bound_block(ep, expr->label_symbol));
1618 break;
1621 pseudo = linearize_expression(ep, expr);
1622 goto_ins = alloc_instruction(OP_COMPUTEDGOTO, 0);
1623 use_pseudo(pseudo, &goto_ins->target);
1624 add_one_insn(ep, goto_ins);
1626 FOR_EACH_PTR(stmt->target_list, sym) {
1627 struct basic_block *bb_computed = get_bound_block(ep, sym);
1628 struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0);
1629 add_multijmp(&goto_ins->multijmp_list, jmp);
1630 add_bb(&bb_computed->parents, ep->active);
1631 add_bb(&active->children, bb_computed);
1632 } END_FOR_EACH_PTR(sym);
1634 finish_block(ep);
1635 break;
1638 case STMT_COMPOUND:
1639 return linearize_compound_statement(ep, stmt);
1642 * This could take 'likely/unlikely' into account, and
1643 * switch the arms around appropriately..
1645 case STMT_IF: {
1646 struct basic_block *bb_true, *bb_false, *endif;
1647 struct expression *cond = stmt->if_conditional;
1649 bb_true = alloc_basic_block(ep, stmt->pos);
1650 bb_false = endif = alloc_basic_block(ep, stmt->pos);
1652 linearize_cond_branch(ep, cond, bb_true, bb_false);
1654 set_activeblock(ep, bb_true);
1655 linearize_statement(ep, stmt->if_true);
1657 if (stmt->if_false) {
1658 endif = alloc_basic_block(ep, stmt->pos);
1659 add_goto(ep, endif);
1660 set_activeblock(ep, bb_false);
1661 linearize_statement(ep, stmt->if_false);
1663 set_activeblock(ep, endif);
1664 break;
1667 case STMT_SWITCH: {
1668 struct symbol *sym;
1669 struct instruction *switch_ins;
1670 struct basic_block *switch_end = alloc_basic_block(ep, stmt->pos);
1671 struct basic_block *active, *default_case;
1672 struct multijmp *jmp;
1673 pseudo_t pseudo;
1675 pseudo = linearize_expression(ep, stmt->switch_expression);
1677 active = ep->active;
1678 if (!bb_reachable(active))
1679 break;
1681 switch_ins = alloc_instruction(OP_SWITCH, 0);
1682 use_pseudo(pseudo, &switch_ins->cond);
1683 add_one_insn(ep, switch_ins);
1684 finish_block(ep);
1686 default_case = NULL;
1687 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
1688 struct statement *case_stmt = sym->stmt;
1689 struct basic_block *bb_case = get_bound_block(ep, sym);
1691 if (!case_stmt->case_expression) {
1692 default_case = bb_case;
1693 continue;
1694 } else {
1695 int begin, end;
1697 begin = end = case_stmt->case_expression->value;
1698 if (case_stmt->case_to)
1699 end = case_stmt->case_to->value;
1700 if (begin > end)
1701 jmp = alloc_multijmp(bb_case, end, begin);
1702 else
1703 jmp = alloc_multijmp(bb_case, begin, end);
1706 add_multijmp(&switch_ins->multijmp_list, jmp);
1707 add_bb(&bb_case->parents, active);
1708 add_bb(&active->children, bb_case);
1709 } END_FOR_EACH_PTR(sym);
1711 bind_label(stmt->switch_break, switch_end, stmt->pos);
1713 /* And linearize the actual statement */
1714 linearize_statement(ep, stmt->switch_statement);
1715 set_activeblock(ep, switch_end);
1717 if (!default_case)
1718 default_case = switch_end;
1720 jmp = alloc_multijmp(default_case, 1, 0);
1721 add_multijmp(&switch_ins->multijmp_list, jmp);
1722 add_bb(&default_case->parents, active);
1723 add_bb(&active->children, default_case);
1725 break;
1728 case STMT_ITERATOR: {
1729 struct statement *pre_statement = stmt->iterator_pre_statement;
1730 struct expression *pre_condition = stmt->iterator_pre_condition;
1731 struct statement *statement = stmt->iterator_statement;
1732 struct statement *post_statement = stmt->iterator_post_statement;
1733 struct expression *post_condition = stmt->iterator_post_condition;
1734 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end;
1736 concat_symbol_list(stmt->iterator_syms, &ep->syms);
1737 linearize_statement(ep, pre_statement);
1739 loop_body = loop_top = alloc_basic_block(ep, stmt->pos);
1740 loop_continue = alloc_basic_block(ep, stmt->pos);
1741 loop_end = alloc_basic_block(ep, stmt->pos);
1743 if (pre_condition == post_condition) {
1744 loop_top = alloc_basic_block(ep, stmt->pos);
1745 set_activeblock(ep, loop_top);
1748 if (pre_condition)
1749 linearize_cond_branch(ep, pre_condition, loop_body, loop_end);
1751 bind_label(stmt->iterator_continue, loop_continue, stmt->pos);
1752 bind_label(stmt->iterator_break, loop_end, stmt->pos);
1754 set_activeblock(ep, loop_body);
1755 linearize_statement(ep, statement);
1756 add_goto(ep, loop_continue);
1758 if (post_condition) {
1759 set_activeblock(ep, loop_continue);
1760 linearize_statement(ep, post_statement);
1761 if (pre_condition == post_condition)
1762 add_goto(ep, loop_top);
1763 else
1764 linearize_cond_branch(ep, post_condition, loop_top, loop_end);
1767 set_activeblock(ep, loop_end);
1768 break;
1771 default:
1772 break;
1774 return VOID;
1777 static struct entrypoint *linearize_fn(struct symbol *sym, struct symbol *base_type)
1779 struct entrypoint *ep;
1780 struct basic_block *bb;
1781 struct symbol *arg;
1782 pseudo_t result;
1783 int i;
1785 if (!base_type->stmt)
1786 return NULL;
1788 ep = alloc_entrypoint();
1789 bb = alloc_basic_block(ep, sym->pos);
1791 ep->name = sym;
1792 ep->entry = bb;
1793 set_activeblock(ep, bb);
1794 concat_symbol_list(base_type->arguments, &ep->syms);
1796 /* FIXME!! We should do something else about varargs.. */
1797 i = 0;
1798 FOR_EACH_PTR(base_type->arguments, arg) {
1799 linearize_argument(ep, arg, ++i);
1800 } END_FOR_EACH_PTR(arg);
1802 result = linearize_statement(ep, base_type->stmt);
1803 if (bb_reachable(ep->active) && !bb_terminated(ep->active)) {
1804 struct symbol *ret_type = base_type->ctype.base_type;
1805 struct instruction *insn = alloc_typed_instruction(OP_RET, ret_type);
1807 if (ret_type->bit_size > 0)
1808 use_pseudo(result, &insn->src);
1809 add_one_insn(ep, insn);
1812 merge_phi_sources = 1;
1814 repeat:
1816 * Do trivial flow simplification - branches to
1817 * branches, kill dead basicblocks etc
1819 kill_unreachable_bbs(ep);
1822 * Turn symbols into pseudos
1824 simplify_symbol_usage(ep);
1827 * Remove trivial instructions, and try to CSE
1828 * the rest.
1830 do {
1831 cleanup_and_cse(ep);
1832 simplify_flow(ep);
1833 pack_basic_blocks(ep);
1834 } while (repeat_phase & REPEAT_CSE);
1836 vrfy_flow(ep);
1838 /* Cleanup */
1839 clear_symbol_pseudos(ep);
1841 /* And track pseudo register usage */
1842 track_pseudo_liveness(ep);
1845 * Some flow optimizations can only effectively
1846 * be done when we've done liveness analysis. But
1847 * if they trigger, we need to start all over
1848 * again
1850 if (simplify_flow(ep)) {
1851 clear_liveness(ep);
1852 goto repeat;
1855 return ep;
1858 struct entrypoint *linearize_symbol(struct symbol *sym)
1860 struct symbol *base_type;
1862 if (!sym)
1863 return NULL;
1864 base_type = sym->ctype.base_type;
1865 if (!base_type)
1866 return NULL;
1867 if (base_type->type == SYM_FN)
1868 return linearize_fn(sym, base_type);
1869 return NULL;