Do slightly better on casts.
[smatch.git] / linearize.c
blob037b3f57560c05867094adf9b60b8fd83ea59829
1 /*
2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
10 * Copyright (C) 2004 Christopher Li
13 #include <string.h>
14 #include <stdarg.h>
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <assert.h>
19 #include "parse.h"
20 #include "expression.h"
21 #include "linearize.h"
22 #include "flow.h"
23 #include "target.h"
25 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
26 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
28 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right);
29 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val);
30 static void linearize_one_symbol(struct entrypoint *ep, struct symbol *sym);
32 struct access_data;
33 static pseudo_t add_load(struct entrypoint *ep, struct access_data *);
34 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *);
36 struct pseudo void_pseudo = {};
38 static struct instruction *alloc_instruction(int opcode, int size)
40 struct instruction * insn = __alloc_instruction(0);
41 insn->opcode = opcode;
42 insn->size = size;
43 return insn;
46 static inline int type_size(struct symbol *type)
48 return type ? type->bit_size > 0 ? type->bit_size : 0 : 0;
51 static struct instruction *alloc_typed_instruction(int opcode, struct symbol *type)
53 return alloc_instruction(opcode, type_size(type));
56 static struct entrypoint *alloc_entrypoint(void)
58 return __alloc_entrypoint(0);
61 static struct basic_block *alloc_basic_block(struct entrypoint *ep, struct position pos)
63 struct basic_block *bb = __alloc_basic_block(0);
64 bb->context = -1;
65 bb->pos = pos;
66 bb->ep = ep;
67 return bb;
70 static struct multijmp* alloc_multijmp(struct basic_block *target, int begin, int end)
72 struct multijmp *multijmp = __alloc_multijmp(0);
73 multijmp->target = target;
74 multijmp->begin = begin;
75 multijmp->end = end;
76 return multijmp;
79 static inline int regno(pseudo_t n)
81 int retval = -1;
82 if (n && n->type == PSEUDO_REG)
83 retval = n->nr;
84 return retval;
87 const char *show_pseudo(pseudo_t pseudo)
89 static int n;
90 static char buffer[4][64];
91 char *buf;
92 int i;
94 if (!pseudo)
95 return "no pseudo";
96 if (pseudo == VOID)
97 return "VOID";
98 buf = buffer[3 & ++n];
99 switch(pseudo->type) {
100 case PSEUDO_SYM: {
101 struct symbol *sym = pseudo->sym;
102 struct expression *expr;
104 if (sym->bb_target) {
105 snprintf(buf, 64, ".L%p", sym->bb_target);
106 break;
108 if (sym->ident) {
109 snprintf(buf, 64, "%s", show_ident(sym->ident));
110 break;
112 expr = sym->initializer;
113 snprintf(buf, 64, "<anon symbol:%p>", sym);
114 switch (expr->type) {
115 case EXPR_VALUE:
116 snprintf(buf, 64, "<symbol value: %lld>", expr->value);
117 break;
118 case EXPR_STRING:
119 return show_string(expr->string);
120 default:
121 break;
123 break;
125 case PSEUDO_REG:
126 i = snprintf(buf, 64, "%%r%d", pseudo->nr);
127 if (pseudo->ident)
128 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
129 break;
130 case PSEUDO_VAL: {
131 long long value = pseudo->value;
132 if (value > 1000 || value < -1000)
133 snprintf(buf, 64, "$%#llx", value);
134 else
135 snprintf(buf, 64, "$%lld", value);
136 break;
138 case PSEUDO_ARG:
139 snprintf(buf, 64, "%%arg%d", pseudo->nr);
140 break;
141 case PSEUDO_PHI:
142 i = snprintf(buf, 64, "%%phi%d", pseudo->nr);
143 if (pseudo->ident)
144 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
145 break;
146 default:
147 snprintf(buf, 64, "<bad pseudo type %d>", pseudo->type);
149 return buf;
152 static const char* opcodes[] = {
153 [OP_BADOP] = "bad_op",
155 /* Fn entrypoint */
156 [OP_ENTRY] = "<entry-point>",
158 /* Terminator */
159 [OP_RET] = "ret",
160 [OP_BR] = "br",
161 [OP_SWITCH] = "switch",
162 [OP_INVOKE] = "invoke",
163 [OP_COMPUTEDGOTO] = "jmp *",
164 [OP_UNWIND] = "unwind",
166 /* Binary */
167 [OP_ADD] = "add",
168 [OP_SUB] = "sub",
169 [OP_MUL] = "mul",
170 [OP_DIV] = "div",
171 [OP_MOD] = "mod",
172 [OP_SHL] = "shl",
173 [OP_SHR] = "shr",
175 /* Logical */
176 [OP_AND] = "and",
177 [OP_OR] = "or",
178 [OP_XOR] = "xor",
179 [OP_AND_BOOL] = "and-bool",
180 [OP_OR_BOOL] = "or-bool",
182 /* Binary comparison */
183 [OP_SET_EQ] = "seteq",
184 [OP_SET_NE] = "setne",
185 [OP_SET_LE] = "setle",
186 [OP_SET_GE] = "setge",
187 [OP_SET_LT] = "setlt",
188 [OP_SET_GT] = "setgt",
189 [OP_SET_B] = "setb",
190 [OP_SET_A] = "seta",
191 [OP_SET_BE] = "setbe",
192 [OP_SET_AE] = "setae",
194 /* Uni */
195 [OP_NOT] = "not",
196 [OP_NEG] = "neg",
198 /* Special three-input */
199 [OP_SEL] = "select",
201 /* Memory */
202 [OP_MALLOC] = "malloc",
203 [OP_FREE] = "free",
204 [OP_ALLOCA] = "alloca",
205 [OP_LOAD] = "load",
206 [OP_STORE] = "store",
207 [OP_SETVAL] = "set",
208 [OP_GET_ELEMENT_PTR] = "getelem",
210 /* Other */
211 [OP_PHI] = "phi",
212 [OP_PHISOURCE] = "phisrc",
213 [OP_CAST] = "cast",
214 [OP_PTRCAST] = "ptrcast",
215 [OP_CALL] = "call",
216 [OP_VANEXT] = "va_next",
217 [OP_VAARG] = "va_arg",
218 [OP_SLICE] = "slice",
219 [OP_SNOP] = "snop",
220 [OP_LNOP] = "lnop",
221 [OP_NOP] = "nop",
222 [OP_DEATHNOTE] = "dead",
223 [OP_ASM] = "asm",
225 /* Sparse tagging (line numbers, context, whatever) */
226 [OP_CONTEXT] = "context",
229 const char *show_instruction(struct instruction *insn)
231 int opcode = insn->opcode;
232 static char buffer[1024];
233 char *buf;
235 buf = buffer;
236 if (!insn->bb)
237 buf += sprintf(buf, "# ");
239 if (opcode < sizeof(opcodes)/sizeof(char *)) {
240 const char *op = opcodes[opcode];
241 if (!op)
242 buf += sprintf(buf, "opcode:%d", opcode);
243 else
244 buf += sprintf(buf, "%s", op);
245 if (insn->size)
246 buf += sprintf(buf, ".%d", insn->size);
247 memset(buf, ' ', 20);
248 buf++;
251 if (buf < buffer + 12)
252 buf = buffer + 12;
253 switch (opcode) {
254 case OP_RET:
255 if (insn->src && insn->src != VOID)
256 buf += sprintf(buf, "%s", show_pseudo(insn->src));
257 break;
258 case OP_BR:
259 if (insn->bb_true && insn->bb_false) {
260 buf += sprintf(buf, "%s, .L%p, .L%p", show_pseudo(insn->cond), insn->bb_true, insn->bb_false);
261 break;
263 buf += sprintf(buf, ".L%p", insn->bb_true ? insn->bb_true : insn->bb_false);
264 break;
266 case OP_SETVAL: {
267 struct expression *expr = insn->val;
268 pseudo_t pseudo = insn->symbol;
269 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
270 if (pseudo) {
271 struct symbol *sym = pseudo->sym;
272 if (!sym) {
273 buf += sprintf(buf, "%s", show_pseudo(pseudo));
274 break;
276 if (sym->bb_target) {
277 buf += sprintf(buf, ".L%p", sym->bb_target);
278 break;
280 if (sym->ident) {
281 buf += sprintf(buf, "%s", show_ident(sym->ident));
282 break;
284 buf += sprintf(buf, "<anon symbol:%p>", sym);
285 break;
288 if (!expr) {
289 buf += sprintf(buf, "%s", "<none>");
290 break;
293 switch (expr->type) {
294 case EXPR_VALUE:
295 buf += sprintf(buf, "%lld", expr->value);
296 break;
297 case EXPR_FVALUE:
298 buf += sprintf(buf, "%Lf", expr->fvalue);
299 break;
300 case EXPR_STRING:
301 buf += sprintf(buf, "%.40s", show_string(expr->string));
302 break;
303 case EXPR_SYMBOL:
304 buf += sprintf(buf, "%s", show_ident(expr->symbol->ident));
305 break;
306 case EXPR_LABEL:
307 buf += sprintf(buf, ".L%p", expr->symbol->bb_target);
308 break;
309 default:
310 buf += sprintf(buf, "SETVAL EXPR TYPE %d", expr->type);
312 break;
314 case OP_SWITCH: {
315 struct multijmp *jmp;
316 buf += sprintf(buf, "%s", show_pseudo(insn->target));
317 FOR_EACH_PTR(insn->multijmp_list, jmp) {
318 if (jmp->begin == jmp->end)
319 buf += sprintf(buf, ", %d -> .L%p", jmp->begin, jmp->target);
320 else if (jmp->begin < jmp->end)
321 buf += sprintf(buf, ", %d ... %d -> .L%p", jmp->begin, jmp->end, jmp->target);
322 else
323 buf += sprintf(buf, ", default -> .L%p", jmp->target);
324 } END_FOR_EACH_PTR(jmp);
325 break;
327 case OP_COMPUTEDGOTO: {
328 struct multijmp *jmp;
329 buf += sprintf(buf, "%s", show_pseudo(insn->target));
330 FOR_EACH_PTR(insn->multijmp_list, jmp) {
331 buf += sprintf(buf, ", .L%p", jmp->target);
332 } END_FOR_EACH_PTR(jmp);
333 break;
336 case OP_PHISOURCE: {
337 struct instruction *phi;
338 buf += sprintf(buf, "%s <- %s ", show_pseudo(insn->target), show_pseudo(insn->phi_src));
339 FOR_EACH_PTR(insn->phi_users, phi) {
340 buf += sprintf(buf, " (%s)", show_pseudo(phi->target));
341 } END_FOR_EACH_PTR(phi);
342 break;
345 case OP_PHI: {
346 pseudo_t phi;
347 const char *s = " <-";
348 buf += sprintf(buf, "%s", show_pseudo(insn->target));
349 FOR_EACH_PTR(insn->phi_list, phi) {
350 buf += sprintf(buf, "%s %s", s, show_pseudo(phi));
351 s = ",";
352 } END_FOR_EACH_PTR(phi);
353 break;
355 case OP_LOAD: case OP_LNOP:
356 buf += sprintf(buf, "%s <- %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
357 break;
358 case OP_STORE: case OP_SNOP:
359 buf += sprintf(buf, "%s -> %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
360 break;
361 case OP_CALL: {
362 struct pseudo *arg;
363 if (insn->target && insn->target != VOID)
364 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
365 buf += sprintf(buf, "%s", show_pseudo(insn->func));
366 FOR_EACH_PTR(insn->arguments, arg) {
367 buf += sprintf(buf, ", %s", show_pseudo(arg));
368 } END_FOR_EACH_PTR(arg);
369 break;
371 case OP_CAST:
372 case OP_PTRCAST:
373 buf += sprintf(buf, "%s <- (%d) %s",
374 show_pseudo(insn->target),
375 type_size(insn->orig_type),
376 show_pseudo(insn->src));
377 break;
378 case OP_BINARY ... OP_BINARY_END:
379 case OP_BINCMP ... OP_BINCMP_END:
380 buf += sprintf(buf, "%s <- %s, %s", show_pseudo(insn->target), show_pseudo(insn->src1), show_pseudo(insn->src2));
381 break;
383 case OP_SEL:
384 buf += sprintf(buf, "%s <- %s, %s, %s", show_pseudo(insn->target),
385 show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
386 break;
388 case OP_SLICE:
389 buf += sprintf(buf, "%s <- %s, %d, %d", show_pseudo(insn->target), show_pseudo(insn->base), insn->from, insn->len);
390 break;
392 case OP_NOT: case OP_NEG:
393 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
394 break;
396 case OP_CONTEXT:
397 buf += sprintf(buf, "%d", insn->increment);
398 break;
399 case OP_NOP:
400 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
401 break;
402 case OP_DEATHNOTE:
403 buf += sprintf(buf, "%s", show_pseudo(insn->target));
404 break;
405 case OP_ASM:
406 buf += sprintf(buf, "\"%s\"", insn->string);
407 if (insn->outputs) {
408 pseudo_t pseudo;
409 buf += sprintf(buf, " (");
410 FOR_EACH_PTR(insn->outputs, pseudo) {
411 buf += sprintf(buf, " %s", show_pseudo(pseudo));
412 } END_FOR_EACH_PTR(pseudo);
413 buf += sprintf(buf, " ) <-");
415 if (insn->inputs) {
416 pseudo_t pseudo;
417 buf += sprintf(buf, " (");
418 FOR_EACH_PTR(insn->inputs, pseudo) {
419 buf += sprintf(buf, " %s", show_pseudo(pseudo));
420 } END_FOR_EACH_PTR(pseudo);
421 buf += sprintf(buf, " )");
423 break;
424 default:
425 break;
427 do { --buf; } while (*buf == ' ');
428 *++buf = 0;
429 return buffer;
432 void show_bb(struct basic_block *bb)
434 struct instruction *insn;
436 printf(".L%p:\n", bb);
437 if (verbose) {
438 pseudo_t needs, defines;
439 printf("%s:%d\n", stream_name(bb->pos.stream), bb->pos.line);
441 FOR_EACH_PTR(bb->needs, needs) {
442 struct instruction *def = needs->def;
443 if (def->opcode != OP_PHI) {
444 printf(" **uses %s (from .L%p)**\n", show_pseudo(needs), def->bb);
445 } else {
446 pseudo_t phi;
447 const char *sep = " ";
448 printf(" **uses %s (from", show_pseudo(needs));
449 FOR_EACH_PTR(def->phi_list, phi) {
450 if (phi == VOID)
451 continue;
452 printf("%s(%s:.L%p)", sep, show_pseudo(phi), phi->def->bb);
453 sep = ", ";
454 } END_FOR_EACH_PTR(phi);
455 printf(")**\n");
457 } END_FOR_EACH_PTR(needs);
459 FOR_EACH_PTR(bb->defines, defines) {
460 printf(" **defines %s **\n", show_pseudo(defines));
461 } END_FOR_EACH_PTR(defines);
463 if (bb->parents) {
464 struct basic_block *from;
465 FOR_EACH_PTR(bb->parents, from) {
466 printf(" **from %p (%s:%d:%d)**\n", from,
467 stream_name(from->pos.stream), from->pos.line, from->pos.pos);
468 } END_FOR_EACH_PTR(from);
471 if (bb->children) {
472 struct basic_block *to;
473 FOR_EACH_PTR(bb->children, to) {
474 printf(" **to %p (%s:%d:%d)**\n", to,
475 stream_name(to->pos.stream), to->pos.line, to->pos.pos);
476 } END_FOR_EACH_PTR(to);
480 FOR_EACH_PTR(bb->insns, insn) {
481 if (!insn->bb && verbose < 2)
482 continue;
483 printf("\t%s\n", show_instruction(insn));
484 } END_FOR_EACH_PTR(insn);
485 if (!bb_terminated(bb))
486 printf("\tEND\n");
489 static void show_symbol_usage(pseudo_t pseudo)
491 if (pseudo) {
492 pseudo_t *pp;
493 FOR_EACH_PTR(pseudo->users, pp) {
494 struct instruction *insn = container(pp, struct instruction, src);
495 printf("\t%s\n", show_instruction(insn));
496 } END_FOR_EACH_PTR(pp);
500 void show_entry(struct entrypoint *ep)
502 struct symbol *sym;
503 struct basic_block *bb;
505 printf("%s:\n", show_ident(ep->name->ident));
507 if (verbose) {
508 printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
510 FOR_EACH_PTR(ep->syms, sym) {
511 if (!sym->pseudo)
512 continue;
513 if (!sym->pseudo->users)
514 continue;
515 printf(" sym: %p %s\n", sym, show_ident(sym->ident));
516 if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE))
517 printf("\texternal visibility\n");
518 show_symbol_usage(sym->pseudo);
519 } END_FOR_EACH_PTR(sym);
521 printf("\n");
524 FOR_EACH_PTR(ep->bbs, bb) {
525 if (!bb)
526 continue;
527 if (!bb->parents && !bb->children && !bb->insns && verbose < 2)
528 continue;
529 show_bb(bb);
530 printf("\n");
531 } END_FOR_EACH_PTR(bb);
533 printf("\n");
536 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos)
538 if (label->bb_target)
539 warning(pos, "label '%s' already bound", show_ident(label->ident));
540 label->bb_target = bb;
543 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label)
545 struct basic_block *bb = label->bb_target;
547 if (!bb) {
548 bb = alloc_basic_block(ep, label->pos);
549 label->bb_target = bb;
551 return bb;
554 static void finish_block(struct entrypoint *ep)
556 struct basic_block *src = ep->active;
557 if (bb_reachable(src))
558 ep->active = NULL;
561 static void add_goto(struct entrypoint *ep, struct basic_block *dst)
563 struct basic_block *src = ep->active;
564 if (bb_reachable(src)) {
565 struct instruction *br = alloc_instruction(OP_BR, 0);
566 br->bb_true = dst;
567 add_bb(&dst->parents, src);
568 add_bb(&src->children, dst);
569 br->bb = src;
570 add_instruction(&src->insns, br);
571 ep->active = NULL;
575 static void add_one_insn(struct entrypoint *ep, struct instruction *insn)
577 struct basic_block *bb = ep->active;
579 if (bb_reachable(bb)) {
580 insn->bb = bb;
581 add_instruction(&bb->insns, insn);
585 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb)
587 if (!bb_terminated(ep->active))
588 add_goto(ep, bb);
590 ep->active = bb;
591 if (bb_reachable(bb))
592 add_bb(&ep->bbs, bb);
595 static void remove_parent(struct basic_block *child, struct basic_block *parent)
597 remove_bb_from_list(&child->parents, parent, 1);
598 if (!child->parents)
599 kill_bb(child);
602 /* Change a "switch" into a branch */
603 void insert_branch(struct basic_block *bb, struct instruction *jmp, struct basic_block *target)
605 struct instruction *br, *old;
606 struct basic_block *child;
608 /* Remove the switch */
609 old = delete_last_instruction(&bb->insns);
610 assert(old == jmp);
612 br = alloc_instruction(OP_BR, 0);
613 br->bb = bb;
614 br->bb_true = target;
615 add_instruction(&bb->insns, br);
617 FOR_EACH_PTR(bb->children, child) {
618 if (child == target) {
619 target = NULL; /* Trigger just once */
620 continue;
622 DELETE_CURRENT_PTR(child);
623 remove_parent(child, bb);
624 } END_FOR_EACH_PTR(child);
625 PACK_PTR_LIST(&bb->children);
629 void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi_node, pseudo_t true, pseudo_t false)
631 pseudo_t target;
632 struct instruction *select;
634 /* Remove the 'br' */
635 delete_last_instruction(&bb->insns);
637 select = alloc_instruction(OP_SEL, phi_node->size);
638 select->bb = bb;
640 assert(br->cond);
641 use_pseudo(br->cond, &select->src1);
643 target = phi_node->target;
644 assert(target->def == phi_node);
645 select->target = target;
646 target->def = select;
648 use_pseudo(true, &select->src2);
649 use_pseudo(false, &select->src3);
651 add_instruction(&bb->insns, select);
652 add_instruction(&bb->insns, br);
655 static inline int bb_empty(struct basic_block *bb)
657 return !bb->insns;
660 /* Add a label to the currently active block, return new active block */
661 static struct basic_block * add_label(struct entrypoint *ep, struct symbol *label)
663 struct basic_block *bb = label->bb_target;
665 if (bb) {
666 set_activeblock(ep, bb);
667 return bb;
669 bb = ep->active;
670 if (!bb_reachable(bb) || !bb_empty(bb)) {
671 bb = alloc_basic_block(ep, label->pos);
672 set_activeblock(ep, bb);
674 label->bb_target = bb;
675 return bb;
678 static void add_branch(struct entrypoint *ep, struct expression *expr, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false)
680 struct basic_block *bb = ep->active;
681 struct instruction *br;
683 if (bb_reachable(bb)) {
684 br = alloc_instruction(OP_BR, 0);
685 use_pseudo(cond, &br->cond);
686 br->bb_true = bb_true;
687 br->bb_false = bb_false;
688 add_bb(&bb_true->parents, bb);
689 add_bb(&bb_false->parents, bb);
690 add_bb(&bb->children, bb_true);
691 add_bb(&bb->children, bb_false);
692 add_one_insn(ep, br);
696 /* Dummy pseudo allocator */
697 pseudo_t alloc_pseudo(struct instruction *def)
699 static int nr = 0;
700 struct pseudo * pseudo = __alloc_pseudo(0);
701 pseudo->type = PSEUDO_REG;
702 pseudo->nr = ++nr;
703 pseudo->def = def;
704 return pseudo;
707 static void clear_symbol_pseudos(struct entrypoint *ep)
709 struct symbol *sym;
711 FOR_EACH_PTR(ep->accesses, sym) {
712 sym->pseudo = NULL;
713 } END_FOR_EACH_PTR(sym);
716 static pseudo_t symbol_pseudo(struct entrypoint *ep, struct symbol *sym)
718 pseudo_t pseudo;
720 if (!sym)
721 return VOID;
723 pseudo = sym->pseudo;
724 if (!pseudo) {
725 pseudo = __alloc_pseudo(0);
726 pseudo->type = PSEUDO_SYM;
727 pseudo->sym = sym;
728 pseudo->ident = sym->ident;
729 sym->pseudo = pseudo;
730 add_symbol(&ep->accesses, sym);
732 /* Symbol pseudos have neither nr, usage nor def */
733 return pseudo;
736 pseudo_t value_pseudo(long long val)
738 #define MAX_VAL_HASH 64
739 static struct pseudo_list *prev[MAX_VAL_HASH];
740 int hash = val & (MAX_VAL_HASH-1);
741 struct pseudo_list **list = prev + hash;
742 pseudo_t pseudo;
744 FOR_EACH_PTR(*list, pseudo) {
745 if (pseudo->value == val)
746 return pseudo;
747 } END_FOR_EACH_PTR(pseudo);
749 pseudo = __alloc_pseudo(0);
750 pseudo->type = PSEUDO_VAL;
751 pseudo->value = val;
752 add_pseudo(list, pseudo);
754 /* Value pseudos have neither nr, usage nor def */
755 return pseudo;
758 static pseudo_t argument_pseudo(struct entrypoint *ep, int nr)
760 pseudo_t pseudo = __alloc_pseudo(0);
761 struct instruction *entry = ep->entry;
763 pseudo->type = PSEUDO_ARG;
764 pseudo->nr = nr;
765 pseudo->def = entry;
766 add_pseudo(&entry->arg_list, pseudo);
768 /* Argument pseudos have neither usage nor def */
769 return pseudo;
772 pseudo_t alloc_phi(struct basic_block *source, pseudo_t pseudo, int size)
774 struct instruction *insn = alloc_instruction(OP_PHISOURCE, size);
775 pseudo_t phi = __alloc_pseudo(0);
776 static int nr = 0;
778 phi->type = PSEUDO_PHI;
779 phi->nr = ++nr;
780 phi->def = insn;
782 use_pseudo(pseudo, &insn->phi_src);
783 insn->bb = source;
784 insn->target = phi;
785 add_instruction(&source->insns, insn);
786 return phi;
790 * We carry the "access_data" structure around for any accesses,
791 * which simplifies things a lot. It contains all the access
792 * information in one place.
794 struct access_data {
795 struct symbol *result_type; // result ctype
796 struct symbol *source_type; // source ctype
797 pseudo_t address; // pseudo containing address ..
798 pseudo_t origval; // pseudo for original value ..
799 unsigned int offset, alignment; // byte offset
800 unsigned int bit_size, bit_offset; // which bits
801 struct position pos;
804 static void finish_address_gen(struct entrypoint *ep, struct access_data *ad)
808 static int linearize_simple_address(struct entrypoint *ep,
809 struct expression *addr,
810 struct access_data *ad)
812 if (addr->type == EXPR_SYMBOL) {
813 linearize_one_symbol(ep, addr->symbol);
814 ad->address = symbol_pseudo(ep, addr->symbol);
815 return 1;
817 if (addr->type == EXPR_BINOP) {
818 if (addr->right->type == EXPR_VALUE) {
819 if (addr->op == '+') {
820 ad->offset += get_expression_value(addr->right);
821 return linearize_simple_address(ep, addr->left, ad);
825 ad->address = linearize_expression(ep, addr);
826 return 1;
829 static struct symbol *base_type(struct symbol *sym)
831 struct symbol *base = sym;
833 if (sym) {
834 if (sym->type == SYM_NODE)
835 base = base->ctype.base_type;
836 if (base->type == SYM_BITFIELD)
837 return base->ctype.base_type;
839 return sym;
842 static int linearize_address_gen(struct entrypoint *ep,
843 struct expression *expr,
844 struct access_data *ad)
846 struct symbol *ctype = expr->ctype;
848 if (!ctype)
849 return 0;
850 ad->pos = expr->pos;
851 ad->result_type = ctype;
852 ad->source_type = base_type(ctype);
853 ad->bit_size = ctype->bit_size;
854 ad->alignment = ctype->ctype.alignment;
855 ad->bit_offset = ctype->bit_offset;
856 if (expr->type == EXPR_PREOP && expr->op == '*')
857 return linearize_simple_address(ep, expr->unop, ad);
859 warning(expr->pos, "generating address of non-lvalue (%d)", expr->type);
860 return 0;
863 static pseudo_t add_load(struct entrypoint *ep, struct access_data *ad)
865 struct instruction *insn;
866 pseudo_t new;
868 new = ad->origval;
869 if (0 && new)
870 return new;
872 insn = alloc_typed_instruction(OP_LOAD, ad->source_type);
873 new = alloc_pseudo(insn);
874 ad->origval = new;
876 insn->target = new;
877 insn->offset = ad->offset;
878 use_pseudo(ad->address, &insn->src);
879 add_one_insn(ep, insn);
880 return new;
883 static void add_store(struct entrypoint *ep, struct access_data *ad, pseudo_t value)
885 struct basic_block *bb = ep->active;
887 if (bb_reachable(bb)) {
888 struct instruction *store = alloc_typed_instruction(OP_STORE, ad->source_type);
889 store->offset = ad->offset;
890 use_pseudo(value, &store->target);
891 use_pseudo(ad->address, &store->src);
892 add_one_insn(ep, store);
896 static pseudo_t linearize_store_gen(struct entrypoint *ep,
897 pseudo_t value,
898 struct access_data *ad)
900 pseudo_t store = value;
902 if (type_size(ad->source_type) != type_size(ad->result_type)) {
903 pseudo_t orig = add_load(ep, ad);
904 int shift = ad->bit_offset;
905 unsigned long long mask = (1ULL << ad->bit_size)-1;
907 if (shift) {
908 store = add_binary_op(ep, ad->source_type, OP_SHL, value, value_pseudo(shift));
909 mask <<= shift;
911 orig = add_binary_op(ep, ad->source_type, OP_AND, orig, value_pseudo(~mask));
912 store = add_binary_op(ep, ad->source_type, OP_OR, orig, store);
914 add_store(ep, ad, store);
915 return value;
918 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right)
920 struct instruction *insn = alloc_typed_instruction(op, ctype);
921 pseudo_t target = alloc_pseudo(insn);
922 insn->target = target;
923 use_pseudo(left, &insn->src1);
924 use_pseudo(right, &insn->src2);
925 add_one_insn(ep, insn);
926 return target;
929 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val)
931 struct instruction *insn = alloc_typed_instruction(OP_SETVAL, ctype);
932 pseudo_t target = alloc_pseudo(insn);
933 insn->target = target;
934 insn->val = val;
935 if (!val) {
936 pseudo_t addr = symbol_pseudo(ep, ctype);
937 use_pseudo(addr, &insn->symbol);
938 insn->size = bits_in_pointer;
940 add_one_insn(ep, insn);
941 return target;
944 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct access_data *ad)
946 pseudo_t new = add_load(ep, ad);
948 if (ad->bit_offset) {
949 pseudo_t shift = value_pseudo(ad->bit_offset);
950 pseudo_t newval = add_binary_op(ep, ad->source_type, OP_SHR, new, shift);
951 new = newval;
954 return new;
957 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
959 struct access_data ad = { NULL, };
960 pseudo_t value;
962 if (!linearize_address_gen(ep, expr, &ad))
963 return VOID;
964 value = linearize_load_gen(ep, &ad);
965 finish_address_gen(ep, &ad);
966 return value;
969 /* FIXME: FP */
970 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
972 struct access_data ad = { NULL, };
973 pseudo_t old, new, one;
974 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB;
976 if (!linearize_address_gen(ep, expr->unop, &ad))
977 return VOID;
979 old = linearize_load_gen(ep, &ad);
980 one = value_pseudo(expr->op_value);
981 new = add_binary_op(ep, expr->ctype, op, old, one);
982 linearize_store_gen(ep, new, &ad);
983 finish_address_gen(ep, &ad);
984 return postop ? old : new;
987 static pseudo_t add_uniop(struct entrypoint *ep, struct expression *expr, int op, pseudo_t src)
989 struct instruction *insn = alloc_typed_instruction(op, expr->ctype);
990 pseudo_t new = alloc_pseudo(insn);
992 insn->target = new;
993 use_pseudo(src, &insn->src1);
994 add_one_insn(ep, insn);
995 return new;
998 static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr)
1000 pseudo_t pre = linearize_expression(ep, expr->base);
1001 struct instruction *insn = alloc_typed_instruction(OP_SLICE, expr->ctype);
1002 pseudo_t new = alloc_pseudo(insn);
1004 insn->target = new;
1005 insn->from = expr->r_bitpos;
1006 insn->len = expr->r_nrbits;
1007 use_pseudo(pre, &insn->base);
1008 add_one_insn(ep, insn);
1009 return new;
1012 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
1014 pseudo_t pre = linearize_expression(ep, expr->unop);
1015 switch (expr->op) {
1016 case '+':
1017 return pre;
1018 case '!': {
1019 pseudo_t zero = value_pseudo(0);
1020 return add_binary_op(ep, expr->ctype, OP_SET_EQ, pre, zero);
1022 case '~':
1023 return add_uniop(ep, expr, OP_NOT, pre);
1024 case '-':
1025 return add_uniop(ep, expr, OP_NEG, pre);
1027 return VOID;
1030 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
1033 * '*' is an lvalue access, and is fundamentally different
1034 * from an arithmetic operation. Maybe it should have an
1035 * expression type of its own..
1037 if (expr->op == '*')
1038 return linearize_access(ep, expr);
1039 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
1040 return linearize_inc_dec(ep, expr, 0);
1041 return linearize_regular_preop(ep, expr);
1044 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr)
1046 return linearize_inc_dec(ep, expr, 1);
1049 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
1051 struct access_data ad = { NULL, };
1052 struct expression *target = expr->left;
1053 pseudo_t value;
1055 value = linearize_expression(ep, expr->right);
1056 if (!linearize_address_gen(ep, target, &ad))
1057 return VOID;
1058 if (expr->op != '=') {
1059 pseudo_t oldvalue = linearize_load_gen(ep, &ad);
1060 pseudo_t dst;
1061 static const int op_trans[] = {
1062 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD,
1063 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB,
1064 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MUL,
1065 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIV,
1066 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MOD,
1067 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL,
1068 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_SHR,
1069 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND,
1070 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR,
1071 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR
1073 dst = add_binary_op(ep, expr->ctype, op_trans[expr->op - SPECIAL_BASE], oldvalue, value);
1074 value = dst;
1076 value = linearize_store_gen(ep, value, &ad);
1077 finish_address_gen(ep, &ad);
1078 return value;
1081 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr)
1083 struct expression *arg, *fn;
1084 struct instruction *insn = alloc_typed_instruction(OP_CALL, expr->ctype);
1085 pseudo_t retval, call;
1086 int context_diff;
1088 if (!expr->ctype) {
1089 warning(expr->pos, "call with no type!");
1090 return VOID;
1093 FOR_EACH_PTR(expr->args, arg) {
1094 pseudo_t new = linearize_expression(ep, arg);
1095 use_pseudo(new, add_pseudo(&insn->arguments, new));
1096 } END_FOR_EACH_PTR(arg);
1098 fn = expr->fn;
1100 context_diff = 0;
1101 if (fn->ctype) {
1102 int in = fn->ctype->ctype.in_context;
1103 int out = fn->ctype->ctype.out_context;
1104 if (in < 0 || out < 0)
1105 in = out = 0;
1106 context_diff = out - in;
1109 if (fn->type == EXPR_PREOP) {
1110 if (fn->unop->type == EXPR_SYMBOL) {
1111 struct symbol *sym = fn->unop->symbol;
1112 if (sym->ctype.base_type->type == SYM_FN)
1113 fn = fn->unop;
1116 if (fn->type == EXPR_SYMBOL) {
1117 call = symbol_pseudo(ep, fn->symbol);
1118 } else {
1119 call = linearize_expression(ep, fn);
1121 use_pseudo(call, &insn->func);
1122 retval = VOID;
1123 if (expr->ctype != &void_ctype)
1124 retval = alloc_pseudo(insn);
1125 insn->target = retval;
1126 add_one_insn(ep, insn);
1128 if (context_diff) {
1129 insn = alloc_instruction(OP_CONTEXT, 0);
1130 insn->increment = context_diff;
1131 add_one_insn(ep, insn);
1134 return retval;
1137 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr)
1139 pseudo_t src1, src2, dst;
1140 static const int opcode[] = {
1141 ['+'] = OP_ADD, ['-'] = OP_SUB,
1142 ['*'] = OP_MUL, ['/'] = OP_DIV,
1143 ['%'] = OP_MOD, ['&'] = OP_AND,
1144 ['|'] = OP_OR, ['^'] = OP_XOR,
1145 [SPECIAL_LEFTSHIFT] = OP_SHL,
1146 [SPECIAL_RIGHTSHIFT] = OP_SHR,
1147 [SPECIAL_LOGICAL_AND] = OP_AND_BOOL,
1148 [SPECIAL_LOGICAL_OR] = OP_OR_BOOL,
1151 src1 = linearize_expression(ep, expr->left);
1152 src2 = linearize_expression(ep, expr->right);
1153 dst = add_binary_op(ep, expr->ctype, opcode[expr->op], src1, src2);
1154 return dst;
1157 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1159 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1161 static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr)
1163 pseudo_t cond, true, false, res;
1164 struct instruction *insn;
1166 true = linearize_expression(ep, expr->cond_true);
1167 false = linearize_expression(ep, expr->cond_false);
1168 cond = linearize_expression(ep, expr->conditional);
1170 insn = alloc_typed_instruction(OP_SEL, expr->ctype);
1171 if (!expr->cond_true)
1172 true = cond;
1173 use_pseudo(cond, &insn->src1);
1174 use_pseudo(true, &insn->src2);
1175 use_pseudo(false, &insn->src3);
1177 res = alloc_pseudo(insn);
1178 insn->target = res;
1179 add_one_insn(ep, insn);
1180 return res;
1183 static pseudo_t add_join_conditional(struct entrypoint *ep, struct expression *expr,
1184 pseudo_t phi1, pseudo_t phi2)
1186 pseudo_t target;
1187 struct instruction *phi_node;
1189 if (phi1 == VOID)
1190 return phi2;
1191 if (phi2 == VOID)
1192 return phi1;
1194 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1195 use_pseudo(phi1, add_pseudo(&phi_node->phi_list, phi1));
1196 use_pseudo(phi2, add_pseudo(&phi_node->phi_list, phi2));
1197 phi_node->target = target = alloc_pseudo(phi_node);
1198 add_one_insn(ep, phi_node);
1199 return target;
1202 static pseudo_t linearize_short_conditional(struct entrypoint *ep, struct expression *expr,
1203 struct expression *cond,
1204 struct expression *expr_false)
1206 pseudo_t src1, src2;
1207 struct basic_block *bb_false = alloc_basic_block(ep, expr_false->pos);
1208 struct basic_block *merge = alloc_basic_block(ep, expr->pos);
1209 pseudo_t phi1, phi2;
1210 int size = type_size(expr->ctype);
1212 src1 = linearize_expression(ep, cond);
1213 phi1 = alloc_phi(ep->active, src1, size);
1214 add_branch(ep, expr, src1, merge, bb_false);
1216 set_activeblock(ep, bb_false);
1217 src2 = linearize_expression(ep, expr_false);
1218 phi2 = alloc_phi(ep->active, src2, size);
1219 set_activeblock(ep, merge);
1221 return add_join_conditional(ep, expr, phi1, phi2);
1224 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr,
1225 struct expression *cond,
1226 struct expression *expr_true,
1227 struct expression *expr_false)
1229 pseudo_t src1, src2;
1230 pseudo_t phi1, phi2;
1231 struct basic_block *bb_true = alloc_basic_block(ep, expr_true->pos);
1232 struct basic_block *bb_false = alloc_basic_block(ep, expr_false->pos);
1233 struct basic_block *merge = alloc_basic_block(ep, expr->pos);
1234 int size = type_size(expr->ctype);
1236 linearize_cond_branch(ep, cond, bb_true, bb_false);
1238 set_activeblock(ep, bb_true);
1239 src1 = linearize_expression(ep, expr_true);
1240 phi1 = alloc_phi(ep->active, src1, size);
1241 add_goto(ep, merge);
1243 set_activeblock(ep, bb_false);
1244 src2 = linearize_expression(ep, expr_false);
1245 phi2 = alloc_phi(ep->active, src2, size);
1246 set_activeblock(ep, merge);
1248 return add_join_conditional(ep, expr, phi1, phi2);
1251 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr)
1253 struct expression *shortcut;
1255 shortcut = alloc_const_expression(expr->pos, expr->op == SPECIAL_LOGICAL_OR);
1256 shortcut->ctype = expr->ctype;
1257 return linearize_conditional(ep, expr, expr->left, shortcut, expr->right);
1260 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr)
1262 static const int cmpop[] = {
1263 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT,
1264 [SPECIAL_EQUAL] = OP_SET_EQ,
1265 [SPECIAL_NOTEQUAL] = OP_SET_NE,
1266 [SPECIAL_GTE] = OP_SET_GE,
1267 [SPECIAL_LTE] = OP_SET_LE,
1268 [SPECIAL_UNSIGNED_LT] = OP_SET_B,
1269 [SPECIAL_UNSIGNED_GT] = OP_SET_A,
1270 [SPECIAL_UNSIGNED_LTE] = OP_SET_BE,
1271 [SPECIAL_UNSIGNED_GTE] = OP_SET_AE,
1274 pseudo_t src1 = linearize_expression(ep, expr->left);
1275 pseudo_t src2 = linearize_expression(ep, expr->right);
1276 pseudo_t dst = add_binary_op(ep, expr->ctype, cmpop[expr->op], src1, src2);
1277 return dst;
1281 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1283 pseudo_t cond;
1285 if (!expr || !bb_reachable(ep->active))
1286 return VOID;
1288 switch (expr->type) {
1290 case EXPR_STRING:
1291 case EXPR_VALUE:
1292 add_goto(ep, expr->value ? bb_true : bb_false);
1293 return VOID;
1295 case EXPR_FVALUE:
1296 add_goto(ep, expr->fvalue ? bb_true : bb_false);
1297 return VOID;
1299 case EXPR_LOGICAL:
1300 linearize_logical_branch(ep, expr, bb_true, bb_false);
1301 return VOID;
1303 case EXPR_COMPARE:
1304 cond = linearize_compare(ep, expr);
1305 add_branch(ep, expr, cond, bb_true, bb_false);
1306 break;
1308 case EXPR_PREOP:
1309 if (expr->op == '!')
1310 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true);
1311 /* fall through */
1312 default: {
1313 cond = linearize_expression(ep, expr);
1314 add_branch(ep, expr, cond, bb_true, bb_false);
1316 return VOID;
1319 return VOID;
1324 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1326 struct basic_block *next = alloc_basic_block(ep, expr->pos);
1328 if (expr->op == SPECIAL_LOGICAL_OR)
1329 linearize_cond_branch(ep, expr->left, bb_true, next);
1330 else
1331 linearize_cond_branch(ep, expr->left, next, bb_false);
1332 set_activeblock(ep, next);
1333 linearize_cond_branch(ep, expr->right, bb_true, bb_false);
1334 return VOID;
1338 * Casts to pointers are "less safe" than other casts, since
1339 * they imply type-unsafe accesses. "void *" is a special
1340 * case, since you can't access through it anyway without another
1341 * cast.
1343 static struct instruction *alloc_cast_instruction(struct symbol *ctype)
1345 int opcode = OP_CAST;
1346 struct symbol *base = ctype;
1348 if (base->type == SYM_NODE)
1349 base = base->ctype.base_type;
1350 if (base->type == SYM_PTR) {
1351 base = base->ctype.base_type;
1352 if (base != &void_ctype)
1353 opcode = OP_PTRCAST;
1355 return alloc_typed_instruction(opcode, ctype);
1358 pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr)
1360 pseudo_t src, result;
1361 struct instruction *insn;
1363 src = linearize_expression(ep, expr->cast_expression);
1364 if (src == VOID)
1365 return VOID;
1366 if (!expr->ctype)
1367 return VOID;
1368 if (expr->ctype->bit_size < 0)
1369 return VOID;
1371 insn = alloc_cast_instruction(expr->ctype);
1372 result = alloc_pseudo(insn);
1373 insn->target = result;
1374 insn->orig_type = expr->cast_expression->ctype;
1375 use_pseudo(src, &insn->src);
1376 add_one_insn(ep, insn);
1377 return result;
1380 pseudo_t linearize_position(struct entrypoint *ep, struct expression *pos, struct access_data *ad)
1382 struct expression *init_expr = pos->init_expr;
1383 pseudo_t value = linearize_expression(ep, init_expr);
1385 ad->offset = pos->init_offset;
1386 ad->source_type = base_type(init_expr->ctype);
1387 ad->result_type = init_expr->ctype;
1388 linearize_store_gen(ep, value, ad);
1389 return VOID;
1392 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *ad)
1394 switch (initializer->type) {
1395 case EXPR_INITIALIZER: {
1396 struct expression *expr;
1397 FOR_EACH_PTR(initializer->expr_list, expr) {
1398 linearize_initializer(ep, expr, ad);
1399 } END_FOR_EACH_PTR(expr);
1400 break;
1402 case EXPR_POS:
1403 linearize_position(ep, initializer, ad);
1404 break;
1405 default: {
1406 pseudo_t value = linearize_expression(ep, initializer);
1407 ad->source_type = base_type(initializer->ctype);
1408 ad->result_type = initializer->ctype;
1409 linearize_store_gen(ep, value, ad);
1413 return VOID;
1416 void linearize_argument(struct entrypoint *ep, struct symbol *arg, int nr)
1418 struct access_data ad = { NULL, };
1420 ad.source_type = arg;
1421 ad.result_type = arg;
1422 ad.address = symbol_pseudo(ep, arg);
1423 linearize_store_gen(ep, argument_pseudo(ep, nr), &ad);
1424 finish_address_gen(ep, &ad);
1427 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
1429 if (!expr)
1430 return VOID;
1432 switch (expr->type) {
1433 case EXPR_SYMBOL:
1434 linearize_one_symbol(ep, expr->symbol);
1435 return add_setval(ep, expr->symbol, NULL);
1437 case EXPR_VALUE:
1438 return value_pseudo(expr->value);
1440 case EXPR_STRING: case EXPR_FVALUE: case EXPR_LABEL:
1441 return add_setval(ep, expr->ctype, expr);
1443 case EXPR_STATEMENT:
1444 return linearize_statement(ep, expr->statement);
1446 case EXPR_CALL:
1447 return linearize_call_expression(ep, expr);
1449 case EXPR_BINOP:
1450 return linearize_binop(ep, expr);
1452 case EXPR_LOGICAL:
1453 return linearize_logical(ep, expr);
1455 case EXPR_COMPARE:
1456 return linearize_compare(ep, expr);
1458 case EXPR_SELECT:
1459 return linearize_select(ep, expr);
1461 case EXPR_CONDITIONAL:
1462 if (!expr->cond_true)
1463 return linearize_short_conditional(ep, expr, expr->conditional, expr->cond_false);
1465 return linearize_conditional(ep, expr, expr->conditional,
1466 expr->cond_true, expr->cond_false);
1468 case EXPR_COMMA:
1469 linearize_expression(ep, expr->left);
1470 return linearize_expression(ep, expr->right);
1472 case EXPR_ASSIGNMENT:
1473 return linearize_assignment(ep, expr);
1475 case EXPR_PREOP:
1476 return linearize_preop(ep, expr);
1478 case EXPR_POSTOP:
1479 return linearize_postop(ep, expr);
1481 case EXPR_CAST:
1482 case EXPR_IMPLIED_CAST:
1483 return linearize_cast(ep, expr);
1485 case EXPR_SLICE:
1486 return linearize_slice(ep, expr);
1488 case EXPR_INITIALIZER:
1489 case EXPR_POS:
1490 warning(expr->pos, "unexpected initializer expression (%d %d)", expr->type, expr->op);
1491 return VOID;
1492 default:
1493 warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op);
1494 return VOID;
1496 return VOID;
1499 static void linearize_one_symbol(struct entrypoint *ep, struct symbol *sym)
1501 struct access_data ad = { NULL, };
1503 if (!sym || !sym->initializer || sym->initialized)
1504 return;
1506 /* We need to output these puppies some day too.. */
1507 if (sym->ctype.modifiers & (MOD_STATIC | MOD_TOPLEVEL))
1508 return;
1510 sym->initialized = 1;
1511 ad.address = symbol_pseudo(ep, sym);
1512 linearize_initializer(ep, sym->initializer, &ad);
1513 finish_address_gen(ep, &ad);
1516 static pseudo_t linearize_compound_statement(struct entrypoint *ep, struct statement *stmt)
1518 pseudo_t pseudo;
1519 struct statement *s;
1520 struct symbol *sym;
1521 struct symbol *ret = stmt->ret;
1523 concat_symbol_list(stmt->syms, &ep->syms);
1525 FOR_EACH_PTR(stmt->syms, sym) {
1526 linearize_one_symbol(ep, sym);
1527 } END_FOR_EACH_PTR(sym);
1529 pseudo = VOID;
1530 FOR_EACH_PTR(stmt->stmts, s) {
1531 pseudo = linearize_statement(ep, s);
1532 } END_FOR_EACH_PTR(s);
1534 if (ret) {
1535 struct basic_block *bb = add_label(ep, ret);
1536 struct instruction *phi_node = first_instruction(bb->insns);
1538 if (!phi_node)
1539 return pseudo;
1541 if (pseudo_list_size(phi_node->phi_list)==1) {
1542 pseudo = first_pseudo(phi_node->phi_list);
1543 assert(pseudo->type == PSEUDO_PHI);
1544 return pseudo->def->src1;
1546 return phi_node->target;
1548 return pseudo;
1551 pseudo_t linearize_internal(struct entrypoint *ep, struct statement *stmt)
1553 struct instruction *insn = alloc_instruction(OP_CONTEXT, 0);
1554 struct expression *expr = stmt->expression;
1555 int value = 0;
1557 if (expr->type == EXPR_VALUE)
1558 value = expr->value;
1560 insn->increment = value;
1561 add_one_insn(ep, insn);
1562 return VOID;
1565 static void add_asm_input(struct entrypoint *ep, struct instruction *insn, struct expression *expr)
1567 pseudo_t pseudo = linearize_expression(ep, expr);
1569 use_pseudo(pseudo, add_pseudo(&insn->inputs, pseudo));
1572 static void add_asm_output(struct entrypoint *ep, struct instruction *insn, struct expression *expr)
1574 struct access_data ad = { NULL, };
1575 pseudo_t pseudo = alloc_pseudo(insn);
1577 if (!linearize_address_gen(ep, expr, &ad))
1578 return;
1579 linearize_store_gen(ep, pseudo, &ad);
1580 finish_address_gen(ep, &ad);
1581 add_pseudo(&insn->outputs, pseudo);
1584 pseudo_t linearize_asm_statement(struct entrypoint *ep, struct statement *stmt)
1586 int even_odd;
1587 struct expression *expr;
1588 struct instruction *insn;
1590 insn = alloc_instruction(OP_ASM, 0);
1591 expr = stmt->asm_string;
1592 if (!expr || expr->type != EXPR_STRING) {
1593 warning(stmt->pos, "expected string in inline asm");
1594 return VOID;
1596 insn->string = expr->string->data;
1598 /* Gather the inputs.. */
1599 even_odd = 0;
1600 FOR_EACH_PTR(stmt->asm_inputs, expr) {
1601 even_odd = 1 - even_odd;
1603 /* FIXME! We ignore the constraints for now.. */
1604 if (even_odd)
1605 continue;
1606 add_asm_input(ep, insn, expr);
1607 } END_FOR_EACH_PTR(expr);
1609 add_one_insn(ep, insn);
1611 /* Assign the outputs */
1612 even_odd = 0;
1613 FOR_EACH_PTR(stmt->asm_outputs, expr) {
1614 even_odd = 1 - even_odd;
1616 /* FIXME! We ignore the constraints for now.. */
1617 if (even_odd)
1618 continue;
1619 add_asm_output(ep, insn, expr);
1620 } END_FOR_EACH_PTR(expr);
1622 return VOID;
1625 static int multijmp_cmp(const void *_a, const void *_b)
1627 const struct multijmp *a = _a;
1628 const struct multijmp *b = _b;
1630 // "default" case?
1631 if (a->begin > a->end) {
1632 if (b->begin > b->end)
1633 return 0;
1634 return 1;
1636 if (b->begin > b->end)
1637 return -1;
1638 if (a->begin == b->begin) {
1639 if (a->end == b->end)
1640 return 0;
1641 return (a->end < b->end) ? -1 : 1;
1643 return a->begin < b->begin ? -1 : 1;
1646 static void sort_switch_cases(struct instruction *insn)
1648 sort_list((struct ptr_list **)&insn->multijmp_list, multijmp_cmp);
1651 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
1653 struct basic_block *bb;
1655 if (!stmt)
1656 return VOID;
1658 bb = ep->active;
1659 if (bb && !bb->insns)
1660 bb->pos = stmt->pos;
1662 switch (stmt->type) {
1663 case STMT_NONE:
1664 break;
1666 case STMT_INTERNAL:
1667 return linearize_internal(ep, stmt);
1669 case STMT_EXPRESSION:
1670 return linearize_expression(ep, stmt->expression);
1672 case STMT_ASM:
1673 return linearize_asm_statement(ep, stmt);
1675 case STMT_RETURN: {
1676 struct expression *expr = stmt->expression;
1677 struct basic_block *bb_return = get_bound_block(ep, stmt->ret_target);
1678 struct basic_block *active;
1679 pseudo_t src = linearize_expression(ep, expr);
1680 active = ep->active;
1681 if (active && src != &void_pseudo) {
1682 struct instruction *phi_node = first_instruction(bb_return->insns);
1683 pseudo_t phi;
1684 if (!phi_node) {
1685 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1686 phi_node->target = alloc_pseudo(phi_node);
1687 phi_node->bb = bb_return;
1688 add_instruction(&bb_return->insns, phi_node);
1690 phi = alloc_phi(active, src, type_size(expr->ctype));
1691 phi->ident = &return_ident;
1692 use_pseudo(phi, add_pseudo(&phi_node->phi_list, phi));
1694 add_goto(ep, bb_return);
1695 return VOID;
1698 case STMT_CASE: {
1699 add_label(ep, stmt->case_label);
1700 linearize_statement(ep, stmt->case_statement);
1701 break;
1704 case STMT_LABEL: {
1705 struct symbol *label = stmt->label_identifier;
1707 if (label->used) {
1708 add_label(ep, label);
1709 linearize_statement(ep, stmt->label_statement);
1711 break;
1714 case STMT_GOTO: {
1715 struct symbol *sym;
1716 struct expression *expr;
1717 struct instruction *goto_ins;
1718 struct basic_block *active;
1719 pseudo_t pseudo;
1721 active = ep->active;
1722 if (!bb_reachable(active))
1723 break;
1725 if (stmt->goto_label) {
1726 add_goto(ep, get_bound_block(ep, stmt->goto_label));
1727 break;
1730 expr = stmt->goto_expression;
1731 if (!expr)
1732 break;
1734 /* This can happen as part of simplification */
1735 if (expr->type == EXPR_LABEL) {
1736 add_goto(ep, get_bound_block(ep, expr->label_symbol));
1737 break;
1740 pseudo = linearize_expression(ep, expr);
1741 goto_ins = alloc_instruction(OP_COMPUTEDGOTO, 0);
1742 use_pseudo(pseudo, &goto_ins->target);
1743 add_one_insn(ep, goto_ins);
1745 FOR_EACH_PTR(stmt->target_list, sym) {
1746 struct basic_block *bb_computed = get_bound_block(ep, sym);
1747 struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0);
1748 add_multijmp(&goto_ins->multijmp_list, jmp);
1749 add_bb(&bb_computed->parents, ep->active);
1750 add_bb(&active->children, bb_computed);
1751 } END_FOR_EACH_PTR(sym);
1753 finish_block(ep);
1754 break;
1757 case STMT_COMPOUND:
1758 return linearize_compound_statement(ep, stmt);
1761 * This could take 'likely/unlikely' into account, and
1762 * switch the arms around appropriately..
1764 case STMT_IF: {
1765 struct basic_block *bb_true, *bb_false, *endif;
1766 struct expression *cond = stmt->if_conditional;
1768 bb_true = alloc_basic_block(ep, stmt->pos);
1769 bb_false = endif = alloc_basic_block(ep, stmt->pos);
1771 linearize_cond_branch(ep, cond, bb_true, bb_false);
1773 set_activeblock(ep, bb_true);
1774 linearize_statement(ep, stmt->if_true);
1776 if (stmt->if_false) {
1777 endif = alloc_basic_block(ep, stmt->pos);
1778 add_goto(ep, endif);
1779 set_activeblock(ep, bb_false);
1780 linearize_statement(ep, stmt->if_false);
1782 set_activeblock(ep, endif);
1783 break;
1786 case STMT_SWITCH: {
1787 struct symbol *sym;
1788 struct instruction *switch_ins;
1789 struct basic_block *switch_end = alloc_basic_block(ep, stmt->pos);
1790 struct basic_block *active, *default_case;
1791 struct multijmp *jmp;
1792 pseudo_t pseudo;
1794 pseudo = linearize_expression(ep, stmt->switch_expression);
1796 active = ep->active;
1797 if (!bb_reachable(active))
1798 break;
1800 switch_ins = alloc_instruction(OP_SWITCH, 0);
1801 use_pseudo(pseudo, &switch_ins->cond);
1802 add_one_insn(ep, switch_ins);
1803 finish_block(ep);
1805 default_case = NULL;
1806 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
1807 struct statement *case_stmt = sym->stmt;
1808 struct basic_block *bb_case = get_bound_block(ep, sym);
1810 if (!case_stmt->case_expression) {
1811 default_case = bb_case;
1812 continue;
1813 } else {
1814 int begin, end;
1816 begin = end = case_stmt->case_expression->value;
1817 if (case_stmt->case_to)
1818 end = case_stmt->case_to->value;
1819 if (begin > end)
1820 jmp = alloc_multijmp(bb_case, end, begin);
1821 else
1822 jmp = alloc_multijmp(bb_case, begin, end);
1825 add_multijmp(&switch_ins->multijmp_list, jmp);
1826 add_bb(&bb_case->parents, active);
1827 add_bb(&active->children, bb_case);
1828 } END_FOR_EACH_PTR(sym);
1830 bind_label(stmt->switch_break, switch_end, stmt->pos);
1832 /* And linearize the actual statement */
1833 linearize_statement(ep, stmt->switch_statement);
1834 set_activeblock(ep, switch_end);
1836 if (!default_case)
1837 default_case = switch_end;
1839 jmp = alloc_multijmp(default_case, 1, 0);
1840 add_multijmp(&switch_ins->multijmp_list, jmp);
1841 add_bb(&default_case->parents, active);
1842 add_bb(&active->children, default_case);
1843 sort_switch_cases(switch_ins);
1845 break;
1848 case STMT_ITERATOR: {
1849 struct statement *pre_statement = stmt->iterator_pre_statement;
1850 struct expression *pre_condition = stmt->iterator_pre_condition;
1851 struct statement *statement = stmt->iterator_statement;
1852 struct statement *post_statement = stmt->iterator_post_statement;
1853 struct expression *post_condition = stmt->iterator_post_condition;
1854 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end;
1856 concat_symbol_list(stmt->iterator_syms, &ep->syms);
1857 linearize_statement(ep, pre_statement);
1859 loop_body = loop_top = alloc_basic_block(ep, stmt->pos);
1860 loop_continue = alloc_basic_block(ep, stmt->pos);
1861 loop_end = alloc_basic_block(ep, stmt->pos);
1863 if (pre_condition == post_condition) {
1864 loop_top = alloc_basic_block(ep, stmt->pos);
1865 set_activeblock(ep, loop_top);
1868 if (pre_condition)
1869 linearize_cond_branch(ep, pre_condition, loop_body, loop_end);
1871 bind_label(stmt->iterator_continue, loop_continue, stmt->pos);
1872 bind_label(stmt->iterator_break, loop_end, stmt->pos);
1874 set_activeblock(ep, loop_body);
1875 linearize_statement(ep, statement);
1876 add_goto(ep, loop_continue);
1878 set_activeblock(ep, loop_continue);
1879 linearize_statement(ep, post_statement);
1880 if (!post_condition || pre_condition == post_condition)
1881 add_goto(ep, loop_top);
1882 else
1883 linearize_cond_branch(ep, post_condition, loop_top, loop_end);
1884 set_activeblock(ep, loop_end);
1885 break;
1888 default:
1889 break;
1891 return VOID;
1894 static struct entrypoint *linearize_fn(struct symbol *sym, struct symbol *base_type)
1896 struct entrypoint *ep;
1897 struct basic_block *bb;
1898 struct symbol *arg;
1899 struct instruction *entry;
1900 pseudo_t result;
1901 int i;
1903 if (!base_type->stmt)
1904 return NULL;
1906 ep = alloc_entrypoint();
1907 bb = alloc_basic_block(ep, sym->pos);
1909 ep->name = sym;
1910 set_activeblock(ep, bb);
1912 entry = alloc_instruction(OP_ENTRY, 0);
1913 add_one_insn(ep, entry);
1914 ep->entry = entry;
1916 concat_symbol_list(base_type->arguments, &ep->syms);
1918 /* FIXME!! We should do something else about varargs.. */
1919 i = 0;
1920 FOR_EACH_PTR(base_type->arguments, arg) {
1921 linearize_argument(ep, arg, ++i);
1922 } END_FOR_EACH_PTR(arg);
1924 result = linearize_statement(ep, base_type->stmt);
1925 if (bb_reachable(ep->active) && !bb_terminated(ep->active)) {
1926 struct symbol *ret_type = base_type->ctype.base_type;
1927 struct instruction *insn = alloc_typed_instruction(OP_RET, ret_type);
1929 if (type_size(ret_type) > 0)
1930 use_pseudo(result, &insn->src);
1931 add_one_insn(ep, insn);
1935 * Do trivial flow simplification - branches to
1936 * branches, kill dead basicblocks etc
1938 kill_unreachable_bbs(ep);
1941 * Turn symbols into pseudos
1943 simplify_symbol_usage(ep);
1945 repeat:
1947 * Remove trivial instructions, and try to CSE
1948 * the rest.
1950 do {
1951 cleanup_and_cse(ep);
1952 pack_basic_blocks(ep);
1953 } while (repeat_phase & REPEAT_CSE);
1955 kill_unreachable_bbs(ep);
1956 vrfy_flow(ep);
1958 /* Cleanup */
1959 clear_symbol_pseudos(ep);
1961 /* And track pseudo register usage */
1962 track_pseudo_liveness(ep);
1965 * Some flow optimizations can only effectively
1966 * be done when we've done liveness analysis. But
1967 * if they trigger, we need to start all over
1968 * again
1970 if (simplify_flow(ep)) {
1971 clear_liveness(ep);
1972 goto repeat;
1975 /* Finally, add deathnotes to pseudos now that we have them */
1976 track_pseudo_death(ep);
1978 return ep;
1981 struct entrypoint *linearize_symbol(struct symbol *sym)
1983 struct symbol *base_type;
1985 if (!sym)
1986 return NULL;
1987 base_type = sym->ctype.base_type;
1988 if (!base_type)
1989 return NULL;
1990 if (base_type->type == SYM_FN)
1991 return linearize_fn(sym, base_type);
1992 return NULL;