Get comparison sizes right.
[smatch.git] / linearize.c
blobe13c67041fd59604af10fc9b830025363700cc69
1 /*
2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
10 * Copyright (C) 2004 Christopher Li
13 #include <string.h>
14 #include <stdarg.h>
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <assert.h>
19 #include "parse.h"
20 #include "expression.h"
21 #include "linearize.h"
22 #include "flow.h"
23 #include "target.h"
25 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
26 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
28 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right);
29 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val);
30 static void linearize_one_symbol(struct entrypoint *ep, struct symbol *sym);
32 struct access_data;
33 static pseudo_t add_load(struct entrypoint *ep, struct access_data *);
34 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *);
36 struct pseudo void_pseudo = {};
38 static struct instruction *alloc_instruction(int opcode, int size)
40 struct instruction * insn = __alloc_instruction(0);
41 insn->opcode = opcode;
42 insn->size = size;
43 return insn;
46 static inline int type_size(struct symbol *type)
48 return type ? type->bit_size > 0 ? type->bit_size : 0 : 0;
51 static struct instruction *alloc_typed_instruction(int opcode, struct symbol *type)
53 return alloc_instruction(opcode, type_size(type));
56 static struct entrypoint *alloc_entrypoint(void)
58 return __alloc_entrypoint(0);
61 static struct basic_block *alloc_basic_block(struct entrypoint *ep, struct position pos)
63 struct basic_block *bb = __alloc_basic_block(0);
64 bb->context = -1;
65 bb->pos = pos;
66 bb->ep = ep;
67 return bb;
70 static struct multijmp* alloc_multijmp(struct basic_block *target, int begin, int end)
72 struct multijmp *multijmp = __alloc_multijmp(0);
73 multijmp->target = target;
74 multijmp->begin = begin;
75 multijmp->end = end;
76 return multijmp;
79 static inline int regno(pseudo_t n)
81 int retval = -1;
82 if (n && n->type == PSEUDO_REG)
83 retval = n->nr;
84 return retval;
87 const char *show_pseudo(pseudo_t pseudo)
89 static int n;
90 static char buffer[4][64];
91 char *buf;
92 int i;
94 if (!pseudo)
95 return "no pseudo";
96 if (pseudo == VOID)
97 return "VOID";
98 buf = buffer[3 & ++n];
99 switch(pseudo->type) {
100 case PSEUDO_SYM: {
101 struct symbol *sym = pseudo->sym;
102 struct expression *expr;
104 if (sym->bb_target) {
105 snprintf(buf, 64, ".L%p", sym->bb_target);
106 break;
108 if (sym->ident) {
109 snprintf(buf, 64, "%s", show_ident(sym->ident));
110 break;
112 expr = sym->initializer;
113 snprintf(buf, 64, "<anon symbol:%p>", sym);
114 switch (expr->type) {
115 case EXPR_VALUE:
116 snprintf(buf, 64, "<symbol value: %lld>", expr->value);
117 break;
118 case EXPR_STRING:
119 return show_string(expr->string);
120 default:
121 break;
123 break;
125 case PSEUDO_REG:
126 i = snprintf(buf, 64, "%%r%d", pseudo->nr);
127 if (pseudo->ident)
128 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
129 break;
130 case PSEUDO_VAL: {
131 long long value = pseudo->value;
132 if (value > 1000 || value < -1000)
133 snprintf(buf, 64, "$%#llx", value);
134 else
135 snprintf(buf, 64, "$%lld", value);
136 break;
138 case PSEUDO_ARG:
139 snprintf(buf, 64, "%%arg%d", pseudo->nr);
140 break;
141 case PSEUDO_PHI:
142 i = snprintf(buf, 64, "%%phi%d", pseudo->nr);
143 if (pseudo->ident)
144 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
145 break;
146 default:
147 snprintf(buf, 64, "<bad pseudo type %d>", pseudo->type);
149 return buf;
152 static const char* opcodes[] = {
153 [OP_BADOP] = "bad_op",
155 /* Fn entrypoint */
156 [OP_ENTRY] = "<entry-point>",
158 /* Terminator */
159 [OP_RET] = "ret",
160 [OP_BR] = "br",
161 [OP_SWITCH] = "switch",
162 [OP_INVOKE] = "invoke",
163 [OP_COMPUTEDGOTO] = "jmp *",
164 [OP_UNWIND] = "unwind",
166 /* Binary */
167 [OP_ADD] = "add",
168 [OP_SUB] = "sub",
169 [OP_MUL] = "mul",
170 [OP_DIV] = "div",
171 [OP_MOD] = "mod",
172 [OP_SHL] = "shl",
173 [OP_SHR] = "shr",
175 /* Logical */
176 [OP_AND] = "and",
177 [OP_OR] = "or",
178 [OP_XOR] = "xor",
179 [OP_AND_BOOL] = "and-bool",
180 [OP_OR_BOOL] = "or-bool",
182 /* Binary comparison */
183 [OP_SET_EQ] = "seteq",
184 [OP_SET_NE] = "setne",
185 [OP_SET_LE] = "setle",
186 [OP_SET_GE] = "setge",
187 [OP_SET_LT] = "setlt",
188 [OP_SET_GT] = "setgt",
189 [OP_SET_B] = "setb",
190 [OP_SET_A] = "seta",
191 [OP_SET_BE] = "setbe",
192 [OP_SET_AE] = "setae",
194 /* Uni */
195 [OP_NOT] = "not",
196 [OP_NEG] = "neg",
198 /* Special three-input */
199 [OP_SEL] = "select",
201 /* Memory */
202 [OP_MALLOC] = "malloc",
203 [OP_FREE] = "free",
204 [OP_ALLOCA] = "alloca",
205 [OP_LOAD] = "load",
206 [OP_STORE] = "store",
207 [OP_SETVAL] = "set",
208 [OP_SYMADDR] = "symaddr",
209 [OP_GET_ELEMENT_PTR] = "getelem",
211 /* Other */
212 [OP_PHI] = "phi",
213 [OP_PHISOURCE] = "phisrc",
214 [OP_CAST] = "cast",
215 [OP_PTRCAST] = "ptrcast",
216 [OP_CALL] = "call",
217 [OP_VANEXT] = "va_next",
218 [OP_VAARG] = "va_arg",
219 [OP_SLICE] = "slice",
220 [OP_SNOP] = "snop",
221 [OP_LNOP] = "lnop",
222 [OP_NOP] = "nop",
223 [OP_DEATHNOTE] = "dead",
224 [OP_ASM] = "asm",
226 /* Sparse tagging (line numbers, context, whatever) */
227 [OP_CONTEXT] = "context",
230 static char *show_asm_constraints(char *buf, const char *sep, struct asm_constraint_list *list)
232 struct asm_constraint *entry;
234 FOR_EACH_PTR(list, entry) {
235 buf += sprintf(buf, "%s\"%s\"", sep, entry->constraint);
236 if (entry->pseudo)
237 buf += sprintf(buf, " (%s)", show_pseudo(entry->pseudo));
238 if (entry->ident)
239 buf += sprintf(buf, " [%s]", show_ident(entry->ident));
240 sep = ", ";
241 } END_FOR_EACH_PTR(entry);
242 return buf;
245 static char *show_asm(char *buf, struct instruction *insn)
247 struct asm_rules *rules = insn->asm_rules;
249 buf += sprintf(buf, "\"%s\"", insn->string);
250 buf = show_asm_constraints(buf, "\n\t\tout: ", rules->outputs);
251 buf = show_asm_constraints(buf, "\n\t\tin: ", rules->inputs);
252 buf = show_asm_constraints(buf, "\n\t\tclobber: ", rules->clobbers);
253 return buf;
256 const char *show_instruction(struct instruction *insn)
258 int opcode = insn->opcode;
259 static char buffer[1024];
260 char *buf;
262 buf = buffer;
263 if (!insn->bb)
264 buf += sprintf(buf, "# ");
266 if (opcode < sizeof(opcodes)/sizeof(char *)) {
267 const char *op = opcodes[opcode];
268 if (!op)
269 buf += sprintf(buf, "opcode:%d", opcode);
270 else
271 buf += sprintf(buf, "%s", op);
272 if (insn->size)
273 buf += sprintf(buf, ".%d", insn->size);
274 memset(buf, ' ', 20);
275 buf++;
278 if (buf < buffer + 12)
279 buf = buffer + 12;
280 switch (opcode) {
281 case OP_RET:
282 if (insn->src && insn->src != VOID)
283 buf += sprintf(buf, "%s", show_pseudo(insn->src));
284 break;
285 case OP_BR:
286 if (insn->bb_true && insn->bb_false) {
287 buf += sprintf(buf, "%s, .L%p, .L%p", show_pseudo(insn->cond), insn->bb_true, insn->bb_false);
288 break;
290 buf += sprintf(buf, ".L%p", insn->bb_true ? insn->bb_true : insn->bb_false);
291 break;
293 case OP_SYMADDR: {
294 struct symbol *sym = insn->symbol->sym;
295 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
297 if (sym->bb_target) {
298 buf += sprintf(buf, ".L%p", sym->bb_target);
299 break;
301 if (sym->ident) {
302 buf += sprintf(buf, "%s", show_ident(sym->ident));
303 break;
305 buf += sprintf(buf, "<anon symbol:%p>", sym);
306 break;
309 case OP_SETVAL: {
310 struct expression *expr = insn->val;
311 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
313 if (!expr) {
314 buf += sprintf(buf, "%s", "<none>");
315 break;
318 switch (expr->type) {
319 case EXPR_VALUE:
320 buf += sprintf(buf, "%lld", expr->value);
321 break;
322 case EXPR_FVALUE:
323 buf += sprintf(buf, "%Lf", expr->fvalue);
324 break;
325 case EXPR_STRING:
326 buf += sprintf(buf, "%.40s", show_string(expr->string));
327 break;
328 case EXPR_SYMBOL:
329 buf += sprintf(buf, "%s", show_ident(expr->symbol->ident));
330 break;
331 case EXPR_LABEL:
332 buf += sprintf(buf, ".L%p", expr->symbol->bb_target);
333 break;
334 default:
335 buf += sprintf(buf, "SETVAL EXPR TYPE %d", expr->type);
337 break;
339 case OP_SWITCH: {
340 struct multijmp *jmp;
341 buf += sprintf(buf, "%s", show_pseudo(insn->target));
342 FOR_EACH_PTR(insn->multijmp_list, jmp) {
343 if (jmp->begin == jmp->end)
344 buf += sprintf(buf, ", %d -> .L%p", jmp->begin, jmp->target);
345 else if (jmp->begin < jmp->end)
346 buf += sprintf(buf, ", %d ... %d -> .L%p", jmp->begin, jmp->end, jmp->target);
347 else
348 buf += sprintf(buf, ", default -> .L%p", jmp->target);
349 } END_FOR_EACH_PTR(jmp);
350 break;
352 case OP_COMPUTEDGOTO: {
353 struct multijmp *jmp;
354 buf += sprintf(buf, "%s", show_pseudo(insn->target));
355 FOR_EACH_PTR(insn->multijmp_list, jmp) {
356 buf += sprintf(buf, ", .L%p", jmp->target);
357 } END_FOR_EACH_PTR(jmp);
358 break;
361 case OP_PHISOURCE: {
362 struct instruction *phi;
363 buf += sprintf(buf, "%s <- %s ", show_pseudo(insn->target), show_pseudo(insn->phi_src));
364 FOR_EACH_PTR(insn->phi_users, phi) {
365 buf += sprintf(buf, " (%s)", show_pseudo(phi->target));
366 } END_FOR_EACH_PTR(phi);
367 break;
370 case OP_PHI: {
371 pseudo_t phi;
372 const char *s = " <-";
373 buf += sprintf(buf, "%s", show_pseudo(insn->target));
374 FOR_EACH_PTR(insn->phi_list, phi) {
375 buf += sprintf(buf, "%s %s", s, show_pseudo(phi));
376 s = ",";
377 } END_FOR_EACH_PTR(phi);
378 break;
380 case OP_LOAD: case OP_LNOP:
381 buf += sprintf(buf, "%s <- %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
382 break;
383 case OP_STORE: case OP_SNOP:
384 buf += sprintf(buf, "%s -> %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
385 break;
386 case OP_CALL: {
387 struct pseudo *arg;
388 if (insn->target && insn->target != VOID)
389 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
390 buf += sprintf(buf, "%s", show_pseudo(insn->func));
391 FOR_EACH_PTR(insn->arguments, arg) {
392 buf += sprintf(buf, ", %s", show_pseudo(arg));
393 } END_FOR_EACH_PTR(arg);
394 break;
396 case OP_CAST:
397 case OP_PTRCAST:
398 buf += sprintf(buf, "%s <- (%d) %s",
399 show_pseudo(insn->target),
400 type_size(insn->orig_type),
401 show_pseudo(insn->src));
402 break;
403 case OP_BINARY ... OP_BINARY_END:
404 case OP_BINCMP ... OP_BINCMP_END:
405 buf += sprintf(buf, "%s <- %s, %s", show_pseudo(insn->target), show_pseudo(insn->src1), show_pseudo(insn->src2));
406 break;
408 case OP_SEL:
409 buf += sprintf(buf, "%s <- %s, %s, %s", show_pseudo(insn->target),
410 show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
411 break;
413 case OP_SLICE:
414 buf += sprintf(buf, "%s <- %s, %d, %d", show_pseudo(insn->target), show_pseudo(insn->base), insn->from, insn->len);
415 break;
417 case OP_NOT: case OP_NEG:
418 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
419 break;
421 case OP_CONTEXT:
422 buf += sprintf(buf, "%d", insn->increment);
423 break;
424 case OP_NOP:
425 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
426 break;
427 case OP_DEATHNOTE:
428 buf += sprintf(buf, "%s", show_pseudo(insn->target));
429 break;
430 case OP_ASM:
431 buf = show_asm(buf, insn);
432 break;
433 default:
434 break;
436 do { --buf; } while (*buf == ' ');
437 *++buf = 0;
438 return buffer;
441 void show_bb(struct basic_block *bb)
443 struct instruction *insn;
445 printf(".L%p:\n", bb);
446 if (verbose) {
447 pseudo_t needs, defines;
448 printf("%s:%d\n", stream_name(bb->pos.stream), bb->pos.line);
450 FOR_EACH_PTR(bb->needs, needs) {
451 struct instruction *def = needs->def;
452 if (def->opcode != OP_PHI) {
453 printf(" **uses %s (from .L%p)**\n", show_pseudo(needs), def->bb);
454 } else {
455 pseudo_t phi;
456 const char *sep = " ";
457 printf(" **uses %s (from", show_pseudo(needs));
458 FOR_EACH_PTR(def->phi_list, phi) {
459 if (phi == VOID)
460 continue;
461 printf("%s(%s:.L%p)", sep, show_pseudo(phi), phi->def->bb);
462 sep = ", ";
463 } END_FOR_EACH_PTR(phi);
464 printf(")**\n");
466 } END_FOR_EACH_PTR(needs);
468 FOR_EACH_PTR(bb->defines, defines) {
469 printf(" **defines %s **\n", show_pseudo(defines));
470 } END_FOR_EACH_PTR(defines);
472 if (bb->parents) {
473 struct basic_block *from;
474 FOR_EACH_PTR(bb->parents, from) {
475 printf(" **from %p (%s:%d:%d)**\n", from,
476 stream_name(from->pos.stream), from->pos.line, from->pos.pos);
477 } END_FOR_EACH_PTR(from);
480 if (bb->children) {
481 struct basic_block *to;
482 FOR_EACH_PTR(bb->children, to) {
483 printf(" **to %p (%s:%d:%d)**\n", to,
484 stream_name(to->pos.stream), to->pos.line, to->pos.pos);
485 } END_FOR_EACH_PTR(to);
489 FOR_EACH_PTR(bb->insns, insn) {
490 if (!insn->bb && verbose < 2)
491 continue;
492 printf("\t%s\n", show_instruction(insn));
493 } END_FOR_EACH_PTR(insn);
494 if (!bb_terminated(bb))
495 printf("\tEND\n");
498 static void show_symbol_usage(pseudo_t pseudo)
500 if (pseudo) {
501 pseudo_t *pp;
502 FOR_EACH_PTR(pseudo->users, pp) {
503 struct instruction *insn = container(pp, struct instruction, src);
504 printf("\t%s\n", show_instruction(insn));
505 } END_FOR_EACH_PTR(pp);
509 void show_entry(struct entrypoint *ep)
511 struct symbol *sym;
512 struct basic_block *bb;
514 printf("%s:\n", show_ident(ep->name->ident));
516 if (verbose) {
517 printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
519 FOR_EACH_PTR(ep->syms, sym) {
520 if (!sym->pseudo)
521 continue;
522 if (!sym->pseudo->users)
523 continue;
524 printf(" sym: %p %s\n", sym, show_ident(sym->ident));
525 if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE))
526 printf("\texternal visibility\n");
527 show_symbol_usage(sym->pseudo);
528 } END_FOR_EACH_PTR(sym);
530 printf("\n");
533 FOR_EACH_PTR(ep->bbs, bb) {
534 if (!bb)
535 continue;
536 if (!bb->parents && !bb->children && !bb->insns && verbose < 2)
537 continue;
538 show_bb(bb);
539 printf("\n");
540 } END_FOR_EACH_PTR(bb);
542 printf("\n");
545 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos)
547 if (label->bb_target)
548 warning(pos, "label '%s' already bound", show_ident(label->ident));
549 label->bb_target = bb;
552 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label)
554 struct basic_block *bb = label->bb_target;
556 if (!bb) {
557 bb = alloc_basic_block(ep, label->pos);
558 label->bb_target = bb;
560 return bb;
563 static void finish_block(struct entrypoint *ep)
565 struct basic_block *src = ep->active;
566 if (bb_reachable(src))
567 ep->active = NULL;
570 static void add_goto(struct entrypoint *ep, struct basic_block *dst)
572 struct basic_block *src = ep->active;
573 if (bb_reachable(src)) {
574 struct instruction *br = alloc_instruction(OP_BR, 0);
575 br->bb_true = dst;
576 add_bb(&dst->parents, src);
577 add_bb(&src->children, dst);
578 br->bb = src;
579 add_instruction(&src->insns, br);
580 ep->active = NULL;
584 static void add_one_insn(struct entrypoint *ep, struct instruction *insn)
586 struct basic_block *bb = ep->active;
588 if (bb_reachable(bb)) {
589 insn->bb = bb;
590 add_instruction(&bb->insns, insn);
594 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb)
596 if (!bb_terminated(ep->active))
597 add_goto(ep, bb);
599 ep->active = bb;
600 if (bb_reachable(bb))
601 add_bb(&ep->bbs, bb);
604 static void remove_parent(struct basic_block *child, struct basic_block *parent)
606 remove_bb_from_list(&child->parents, parent, 1);
607 if (!child->parents)
608 kill_bb(child);
611 /* Change a "switch" into a branch */
612 void insert_branch(struct basic_block *bb, struct instruction *jmp, struct basic_block *target)
614 struct instruction *br, *old;
615 struct basic_block *child;
617 /* Remove the switch */
618 old = delete_last_instruction(&bb->insns);
619 assert(old == jmp);
621 br = alloc_instruction(OP_BR, 0);
622 br->bb = bb;
623 br->bb_true = target;
624 add_instruction(&bb->insns, br);
626 FOR_EACH_PTR(bb->children, child) {
627 if (child == target) {
628 target = NULL; /* Trigger just once */
629 continue;
631 DELETE_CURRENT_PTR(child);
632 remove_parent(child, bb);
633 } END_FOR_EACH_PTR(child);
634 PACK_PTR_LIST(&bb->children);
638 void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi_node, pseudo_t true, pseudo_t false)
640 pseudo_t target;
641 struct instruction *select;
643 /* Remove the 'br' */
644 delete_last_instruction(&bb->insns);
646 select = alloc_instruction(OP_SEL, phi_node->size);
647 select->bb = bb;
649 assert(br->cond);
650 use_pseudo(br->cond, &select->src1);
652 target = phi_node->target;
653 assert(target->def == phi_node);
654 select->target = target;
655 target->def = select;
657 use_pseudo(true, &select->src2);
658 use_pseudo(false, &select->src3);
660 add_instruction(&bb->insns, select);
661 add_instruction(&bb->insns, br);
664 static inline int bb_empty(struct basic_block *bb)
666 return !bb->insns;
669 /* Add a label to the currently active block, return new active block */
670 static struct basic_block * add_label(struct entrypoint *ep, struct symbol *label)
672 struct basic_block *bb = label->bb_target;
674 if (bb) {
675 set_activeblock(ep, bb);
676 return bb;
678 bb = ep->active;
679 if (!bb_reachable(bb) || !bb_empty(bb)) {
680 bb = alloc_basic_block(ep, label->pos);
681 set_activeblock(ep, bb);
683 label->bb_target = bb;
684 return bb;
687 static void add_branch(struct entrypoint *ep, struct expression *expr, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false)
689 struct basic_block *bb = ep->active;
690 struct instruction *br;
692 if (bb_reachable(bb)) {
693 br = alloc_instruction(OP_BR, 0);
694 use_pseudo(cond, &br->cond);
695 br->bb_true = bb_true;
696 br->bb_false = bb_false;
697 add_bb(&bb_true->parents, bb);
698 add_bb(&bb_false->parents, bb);
699 add_bb(&bb->children, bb_true);
700 add_bb(&bb->children, bb_false);
701 add_one_insn(ep, br);
705 /* Dummy pseudo allocator */
706 pseudo_t alloc_pseudo(struct instruction *def)
708 static int nr = 0;
709 struct pseudo * pseudo = __alloc_pseudo(0);
710 pseudo->type = PSEUDO_REG;
711 pseudo->nr = ++nr;
712 pseudo->def = def;
713 return pseudo;
716 static void clear_symbol_pseudos(struct entrypoint *ep)
718 struct symbol *sym;
720 FOR_EACH_PTR(ep->accesses, sym) {
721 sym->pseudo = NULL;
722 } END_FOR_EACH_PTR(sym);
725 static pseudo_t symbol_pseudo(struct entrypoint *ep, struct symbol *sym)
727 pseudo_t pseudo;
729 if (!sym)
730 return VOID;
732 pseudo = sym->pseudo;
733 if (!pseudo) {
734 pseudo = __alloc_pseudo(0);
735 pseudo->type = PSEUDO_SYM;
736 pseudo->sym = sym;
737 pseudo->ident = sym->ident;
738 sym->pseudo = pseudo;
739 add_symbol(&ep->accesses, sym);
741 /* Symbol pseudos have neither nr, usage nor def */
742 return pseudo;
745 pseudo_t value_pseudo(long long val)
747 #define MAX_VAL_HASH 64
748 static struct pseudo_list *prev[MAX_VAL_HASH];
749 int hash = val & (MAX_VAL_HASH-1);
750 struct pseudo_list **list = prev + hash;
751 pseudo_t pseudo;
753 FOR_EACH_PTR(*list, pseudo) {
754 if (pseudo->value == val)
755 return pseudo;
756 } END_FOR_EACH_PTR(pseudo);
758 pseudo = __alloc_pseudo(0);
759 pseudo->type = PSEUDO_VAL;
760 pseudo->value = val;
761 add_pseudo(list, pseudo);
763 /* Value pseudos have neither nr, usage nor def */
764 return pseudo;
767 static pseudo_t argument_pseudo(struct entrypoint *ep, int nr)
769 pseudo_t pseudo = __alloc_pseudo(0);
770 struct instruction *entry = ep->entry;
772 pseudo->type = PSEUDO_ARG;
773 pseudo->nr = nr;
774 pseudo->def = entry;
775 add_pseudo(&entry->arg_list, pseudo);
777 /* Argument pseudos have neither usage nor def */
778 return pseudo;
781 pseudo_t alloc_phi(struct basic_block *source, pseudo_t pseudo, int size)
783 struct instruction *insn = alloc_instruction(OP_PHISOURCE, size);
784 pseudo_t phi = __alloc_pseudo(0);
785 static int nr = 0;
787 phi->type = PSEUDO_PHI;
788 phi->nr = ++nr;
789 phi->def = insn;
791 use_pseudo(pseudo, &insn->phi_src);
792 insn->bb = source;
793 insn->target = phi;
794 add_instruction(&source->insns, insn);
795 return phi;
799 * We carry the "access_data" structure around for any accesses,
800 * which simplifies things a lot. It contains all the access
801 * information in one place.
803 struct access_data {
804 struct symbol *result_type; // result ctype
805 struct symbol *source_type; // source ctype
806 pseudo_t address; // pseudo containing address ..
807 pseudo_t origval; // pseudo for original value ..
808 unsigned int offset, alignment; // byte offset
809 unsigned int bit_size, bit_offset; // which bits
810 struct position pos;
813 static void finish_address_gen(struct entrypoint *ep, struct access_data *ad)
817 static int linearize_simple_address(struct entrypoint *ep,
818 struct expression *addr,
819 struct access_data *ad)
821 if (addr->type == EXPR_SYMBOL) {
822 linearize_one_symbol(ep, addr->symbol);
823 ad->address = symbol_pseudo(ep, addr->symbol);
824 return 1;
826 if (addr->type == EXPR_BINOP) {
827 if (addr->right->type == EXPR_VALUE) {
828 if (addr->op == '+') {
829 ad->offset += get_expression_value(addr->right);
830 return linearize_simple_address(ep, addr->left, ad);
834 ad->address = linearize_expression(ep, addr);
835 return 1;
838 static struct symbol *base_type(struct symbol *sym)
840 struct symbol *base = sym;
842 if (sym) {
843 if (sym->type == SYM_NODE)
844 base = base->ctype.base_type;
845 if (base->type == SYM_BITFIELD)
846 return base->ctype.base_type;
848 return sym;
851 static int linearize_address_gen(struct entrypoint *ep,
852 struct expression *expr,
853 struct access_data *ad)
855 struct symbol *ctype = expr->ctype;
857 if (!ctype)
858 return 0;
859 ad->pos = expr->pos;
860 ad->result_type = ctype;
861 ad->source_type = base_type(ctype);
862 ad->bit_size = ctype->bit_size;
863 ad->alignment = ctype->ctype.alignment;
864 ad->bit_offset = ctype->bit_offset;
865 if (expr->type == EXPR_PREOP && expr->op == '*')
866 return linearize_simple_address(ep, expr->unop, ad);
868 warning(expr->pos, "generating address of non-lvalue (%d)", expr->type);
869 return 0;
872 static pseudo_t add_load(struct entrypoint *ep, struct access_data *ad)
874 struct instruction *insn;
875 pseudo_t new;
877 new = ad->origval;
878 if (0 && new)
879 return new;
881 insn = alloc_typed_instruction(OP_LOAD, ad->source_type);
882 new = alloc_pseudo(insn);
883 ad->origval = new;
885 insn->target = new;
886 insn->offset = ad->offset;
887 use_pseudo(ad->address, &insn->src);
888 add_one_insn(ep, insn);
889 return new;
892 static void add_store(struct entrypoint *ep, struct access_data *ad, pseudo_t value)
894 struct basic_block *bb = ep->active;
896 if (bb_reachable(bb)) {
897 struct instruction *store = alloc_typed_instruction(OP_STORE, ad->source_type);
898 store->offset = ad->offset;
899 use_pseudo(value, &store->target);
900 use_pseudo(ad->address, &store->src);
901 add_one_insn(ep, store);
905 static pseudo_t linearize_store_gen(struct entrypoint *ep,
906 pseudo_t value,
907 struct access_data *ad)
909 pseudo_t store = value;
911 if (type_size(ad->source_type) != type_size(ad->result_type)) {
912 pseudo_t orig = add_load(ep, ad);
913 int shift = ad->bit_offset;
914 unsigned long long mask = (1ULL << ad->bit_size)-1;
916 if (shift) {
917 store = add_binary_op(ep, ad->source_type, OP_SHL, value, value_pseudo(shift));
918 mask <<= shift;
920 orig = add_binary_op(ep, ad->source_type, OP_AND, orig, value_pseudo(~mask));
921 store = add_binary_op(ep, ad->source_type, OP_OR, orig, store);
923 add_store(ep, ad, store);
924 return value;
927 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right)
929 struct instruction *insn = alloc_typed_instruction(op, ctype);
930 pseudo_t target = alloc_pseudo(insn);
931 insn->target = target;
932 use_pseudo(left, &insn->src1);
933 use_pseudo(right, &insn->src2);
934 add_one_insn(ep, insn);
935 return target;
938 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val)
940 struct instruction *insn = alloc_typed_instruction(OP_SETVAL, ctype);
941 pseudo_t target = alloc_pseudo(insn);
942 insn->target = target;
943 insn->val = val;
944 add_one_insn(ep, insn);
945 return target;
948 static pseudo_t add_symbol_address(struct entrypoint *ep, struct symbol *sym)
950 struct instruction *insn = alloc_instruction(OP_SYMADDR, bits_in_pointer);
951 pseudo_t target = alloc_pseudo(insn);
953 insn->target = target;
954 use_pseudo(symbol_pseudo(ep, sym), &insn->symbol);
955 add_one_insn(ep, insn);
956 return target;
959 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct access_data *ad)
961 pseudo_t new = add_load(ep, ad);
963 if (ad->bit_offset) {
964 pseudo_t shift = value_pseudo(ad->bit_offset);
965 pseudo_t newval = add_binary_op(ep, ad->source_type, OP_SHR, new, shift);
966 new = newval;
969 return new;
972 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
974 struct access_data ad = { NULL, };
975 pseudo_t value;
977 if (!linearize_address_gen(ep, expr, &ad))
978 return VOID;
979 value = linearize_load_gen(ep, &ad);
980 finish_address_gen(ep, &ad);
981 return value;
984 /* FIXME: FP */
985 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
987 struct access_data ad = { NULL, };
988 pseudo_t old, new, one;
989 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB;
991 if (!linearize_address_gen(ep, expr->unop, &ad))
992 return VOID;
994 old = linearize_load_gen(ep, &ad);
995 one = value_pseudo(expr->op_value);
996 new = add_binary_op(ep, expr->ctype, op, old, one);
997 linearize_store_gen(ep, new, &ad);
998 finish_address_gen(ep, &ad);
999 return postop ? old : new;
1002 static pseudo_t add_uniop(struct entrypoint *ep, struct expression *expr, int op, pseudo_t src)
1004 struct instruction *insn = alloc_typed_instruction(op, expr->ctype);
1005 pseudo_t new = alloc_pseudo(insn);
1007 insn->target = new;
1008 use_pseudo(src, &insn->src1);
1009 add_one_insn(ep, insn);
1010 return new;
1013 static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr)
1015 pseudo_t pre = linearize_expression(ep, expr->base);
1016 struct instruction *insn = alloc_typed_instruction(OP_SLICE, expr->ctype);
1017 pseudo_t new = alloc_pseudo(insn);
1019 insn->target = new;
1020 insn->from = expr->r_bitpos;
1021 insn->len = expr->r_nrbits;
1022 use_pseudo(pre, &insn->base);
1023 add_one_insn(ep, insn);
1024 return new;
1027 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
1029 pseudo_t pre = linearize_expression(ep, expr->unop);
1030 switch (expr->op) {
1031 case '+':
1032 return pre;
1033 case '!': {
1034 pseudo_t zero = value_pseudo(0);
1035 return add_binary_op(ep, expr->unop->ctype, OP_SET_EQ, pre, zero);
1037 case '~':
1038 return add_uniop(ep, expr, OP_NOT, pre);
1039 case '-':
1040 return add_uniop(ep, expr, OP_NEG, pre);
1042 return VOID;
1045 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
1048 * '*' is an lvalue access, and is fundamentally different
1049 * from an arithmetic operation. Maybe it should have an
1050 * expression type of its own..
1052 if (expr->op == '*')
1053 return linearize_access(ep, expr);
1054 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
1055 return linearize_inc_dec(ep, expr, 0);
1056 return linearize_regular_preop(ep, expr);
1059 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr)
1061 return linearize_inc_dec(ep, expr, 1);
1064 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
1066 struct access_data ad = { NULL, };
1067 struct expression *target = expr->left;
1068 pseudo_t value;
1070 value = linearize_expression(ep, expr->right);
1071 if (!linearize_address_gen(ep, target, &ad))
1072 return VOID;
1073 if (expr->op != '=') {
1074 pseudo_t oldvalue = linearize_load_gen(ep, &ad);
1075 pseudo_t dst;
1076 static const int op_trans[] = {
1077 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD,
1078 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB,
1079 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MUL,
1080 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIV,
1081 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MOD,
1082 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL,
1083 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_SHR,
1084 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND,
1085 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR,
1086 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR
1088 dst = add_binary_op(ep, expr->ctype, op_trans[expr->op - SPECIAL_BASE], oldvalue, value);
1089 value = dst;
1091 value = linearize_store_gen(ep, value, &ad);
1092 finish_address_gen(ep, &ad);
1093 return value;
1096 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr)
1098 struct expression *arg, *fn;
1099 struct instruction *insn = alloc_typed_instruction(OP_CALL, expr->ctype);
1100 pseudo_t retval, call;
1101 int context_diff;
1103 if (!expr->ctype) {
1104 warning(expr->pos, "call with no type!");
1105 return VOID;
1108 FOR_EACH_PTR(expr->args, arg) {
1109 pseudo_t new = linearize_expression(ep, arg);
1110 use_pseudo(new, add_pseudo(&insn->arguments, new));
1111 } END_FOR_EACH_PTR(arg);
1113 fn = expr->fn;
1115 context_diff = 0;
1116 if (fn->ctype) {
1117 int in = fn->ctype->ctype.in_context;
1118 int out = fn->ctype->ctype.out_context;
1119 if (in < 0 || out < 0)
1120 in = out = 0;
1121 context_diff = out - in;
1124 if (fn->type == EXPR_PREOP) {
1125 if (fn->unop->type == EXPR_SYMBOL) {
1126 struct symbol *sym = fn->unop->symbol;
1127 if (sym->ctype.base_type->type == SYM_FN)
1128 fn = fn->unop;
1131 if (fn->type == EXPR_SYMBOL) {
1132 call = symbol_pseudo(ep, fn->symbol);
1133 } else {
1134 call = linearize_expression(ep, fn);
1136 use_pseudo(call, &insn->func);
1137 retval = VOID;
1138 if (expr->ctype != &void_ctype)
1139 retval = alloc_pseudo(insn);
1140 insn->target = retval;
1141 add_one_insn(ep, insn);
1143 if (context_diff) {
1144 insn = alloc_instruction(OP_CONTEXT, 0);
1145 insn->increment = context_diff;
1146 add_one_insn(ep, insn);
1149 return retval;
1152 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr)
1154 pseudo_t src1, src2, dst;
1155 static const int opcode[] = {
1156 ['+'] = OP_ADD, ['-'] = OP_SUB,
1157 ['*'] = OP_MUL, ['/'] = OP_DIV,
1158 ['%'] = OP_MOD, ['&'] = OP_AND,
1159 ['|'] = OP_OR, ['^'] = OP_XOR,
1160 [SPECIAL_LEFTSHIFT] = OP_SHL,
1161 [SPECIAL_RIGHTSHIFT] = OP_SHR,
1162 [SPECIAL_LOGICAL_AND] = OP_AND_BOOL,
1163 [SPECIAL_LOGICAL_OR] = OP_OR_BOOL,
1166 src1 = linearize_expression(ep, expr->left);
1167 src2 = linearize_expression(ep, expr->right);
1168 dst = add_binary_op(ep, expr->ctype, opcode[expr->op], src1, src2);
1169 return dst;
1172 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1174 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1176 static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr)
1178 pseudo_t cond, true, false, res;
1179 struct instruction *insn;
1181 true = linearize_expression(ep, expr->cond_true);
1182 false = linearize_expression(ep, expr->cond_false);
1183 cond = linearize_expression(ep, expr->conditional);
1185 insn = alloc_typed_instruction(OP_SEL, expr->ctype);
1186 if (!expr->cond_true)
1187 true = cond;
1188 use_pseudo(cond, &insn->src1);
1189 use_pseudo(true, &insn->src2);
1190 use_pseudo(false, &insn->src3);
1192 res = alloc_pseudo(insn);
1193 insn->target = res;
1194 add_one_insn(ep, insn);
1195 return res;
1198 static pseudo_t add_join_conditional(struct entrypoint *ep, struct expression *expr,
1199 pseudo_t phi1, pseudo_t phi2)
1201 pseudo_t target;
1202 struct instruction *phi_node;
1204 if (phi1 == VOID)
1205 return phi2;
1206 if (phi2 == VOID)
1207 return phi1;
1209 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1210 use_pseudo(phi1, add_pseudo(&phi_node->phi_list, phi1));
1211 use_pseudo(phi2, add_pseudo(&phi_node->phi_list, phi2));
1212 phi_node->target = target = alloc_pseudo(phi_node);
1213 add_one_insn(ep, phi_node);
1214 return target;
1217 static pseudo_t linearize_short_conditional(struct entrypoint *ep, struct expression *expr,
1218 struct expression *cond,
1219 struct expression *expr_false)
1221 pseudo_t src1, src2;
1222 struct basic_block *bb_false = alloc_basic_block(ep, expr_false->pos);
1223 struct basic_block *merge = alloc_basic_block(ep, expr->pos);
1224 pseudo_t phi1, phi2;
1225 int size = type_size(expr->ctype);
1227 src1 = linearize_expression(ep, cond);
1228 phi1 = alloc_phi(ep->active, src1, size);
1229 add_branch(ep, expr, src1, merge, bb_false);
1231 set_activeblock(ep, bb_false);
1232 src2 = linearize_expression(ep, expr_false);
1233 phi2 = alloc_phi(ep->active, src2, size);
1234 set_activeblock(ep, merge);
1236 return add_join_conditional(ep, expr, phi1, phi2);
1239 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr,
1240 struct expression *cond,
1241 struct expression *expr_true,
1242 struct expression *expr_false)
1244 pseudo_t src1, src2;
1245 pseudo_t phi1, phi2;
1246 struct basic_block *bb_true = alloc_basic_block(ep, expr_true->pos);
1247 struct basic_block *bb_false = alloc_basic_block(ep, expr_false->pos);
1248 struct basic_block *merge = alloc_basic_block(ep, expr->pos);
1249 int size = type_size(expr->ctype);
1251 linearize_cond_branch(ep, cond, bb_true, bb_false);
1253 set_activeblock(ep, bb_true);
1254 src1 = linearize_expression(ep, expr_true);
1255 phi1 = alloc_phi(ep->active, src1, size);
1256 add_goto(ep, merge);
1258 set_activeblock(ep, bb_false);
1259 src2 = linearize_expression(ep, expr_false);
1260 phi2 = alloc_phi(ep->active, src2, size);
1261 set_activeblock(ep, merge);
1263 return add_join_conditional(ep, expr, phi1, phi2);
1266 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr)
1268 struct expression *shortcut;
1270 shortcut = alloc_const_expression(expr->pos, expr->op == SPECIAL_LOGICAL_OR);
1271 shortcut->ctype = expr->ctype;
1272 return linearize_conditional(ep, expr, expr->left, shortcut, expr->right);
1275 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr)
1277 static const int cmpop[] = {
1278 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT,
1279 [SPECIAL_EQUAL] = OP_SET_EQ,
1280 [SPECIAL_NOTEQUAL] = OP_SET_NE,
1281 [SPECIAL_GTE] = OP_SET_GE,
1282 [SPECIAL_LTE] = OP_SET_LE,
1283 [SPECIAL_UNSIGNED_LT] = OP_SET_B,
1284 [SPECIAL_UNSIGNED_GT] = OP_SET_A,
1285 [SPECIAL_UNSIGNED_LTE] = OP_SET_BE,
1286 [SPECIAL_UNSIGNED_GTE] = OP_SET_AE,
1289 pseudo_t src1 = linearize_expression(ep, expr->left);
1290 pseudo_t src2 = linearize_expression(ep, expr->right);
1291 pseudo_t dst = add_binary_op(ep, expr->left->ctype, cmpop[expr->op], src1, src2);
1292 return dst;
1296 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1298 pseudo_t cond;
1300 if (!expr || !bb_reachable(ep->active))
1301 return VOID;
1303 switch (expr->type) {
1305 case EXPR_STRING:
1306 case EXPR_VALUE:
1307 add_goto(ep, expr->value ? bb_true : bb_false);
1308 return VOID;
1310 case EXPR_FVALUE:
1311 add_goto(ep, expr->fvalue ? bb_true : bb_false);
1312 return VOID;
1314 case EXPR_LOGICAL:
1315 linearize_logical_branch(ep, expr, bb_true, bb_false);
1316 return VOID;
1318 case EXPR_COMPARE:
1319 cond = linearize_compare(ep, expr);
1320 add_branch(ep, expr, cond, bb_true, bb_false);
1321 break;
1323 case EXPR_PREOP:
1324 if (expr->op == '!')
1325 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true);
1326 /* fall through */
1327 default: {
1328 cond = linearize_expression(ep, expr);
1329 add_branch(ep, expr, cond, bb_true, bb_false);
1331 return VOID;
1334 return VOID;
1339 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1341 struct basic_block *next = alloc_basic_block(ep, expr->pos);
1343 if (expr->op == SPECIAL_LOGICAL_OR)
1344 linearize_cond_branch(ep, expr->left, bb_true, next);
1345 else
1346 linearize_cond_branch(ep, expr->left, next, bb_false);
1347 set_activeblock(ep, next);
1348 linearize_cond_branch(ep, expr->right, bb_true, bb_false);
1349 return VOID;
1353 * Casts to pointers are "less safe" than other casts, since
1354 * they imply type-unsafe accesses. "void *" is a special
1355 * case, since you can't access through it anyway without another
1356 * cast.
1358 static struct instruction *alloc_cast_instruction(struct symbol *ctype)
1360 int opcode = OP_CAST;
1361 struct symbol *base = ctype;
1363 if (base->type == SYM_NODE)
1364 base = base->ctype.base_type;
1365 if (base->type == SYM_PTR) {
1366 base = base->ctype.base_type;
1367 if (base != &void_ctype)
1368 opcode = OP_PTRCAST;
1370 return alloc_typed_instruction(opcode, ctype);
1373 pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr)
1375 pseudo_t src, result;
1376 struct instruction *insn;
1378 src = linearize_expression(ep, expr->cast_expression);
1379 if (src == VOID)
1380 return VOID;
1381 if (!expr->ctype)
1382 return VOID;
1383 if (expr->ctype->bit_size < 0)
1384 return VOID;
1386 insn = alloc_cast_instruction(expr->ctype);
1387 result = alloc_pseudo(insn);
1388 insn->target = result;
1389 insn->orig_type = expr->cast_expression->ctype;
1390 use_pseudo(src, &insn->src);
1391 add_one_insn(ep, insn);
1392 return result;
1395 pseudo_t linearize_position(struct entrypoint *ep, struct expression *pos, struct access_data *ad)
1397 struct expression *init_expr = pos->init_expr;
1398 pseudo_t value = linearize_expression(ep, init_expr);
1400 ad->offset = pos->init_offset;
1401 ad->source_type = base_type(init_expr->ctype);
1402 ad->result_type = init_expr->ctype;
1403 linearize_store_gen(ep, value, ad);
1404 return VOID;
1407 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *ad)
1409 switch (initializer->type) {
1410 case EXPR_INITIALIZER: {
1411 struct expression *expr;
1412 FOR_EACH_PTR(initializer->expr_list, expr) {
1413 linearize_initializer(ep, expr, ad);
1414 } END_FOR_EACH_PTR(expr);
1415 break;
1417 case EXPR_POS:
1418 linearize_position(ep, initializer, ad);
1419 break;
1420 default: {
1421 pseudo_t value = linearize_expression(ep, initializer);
1422 ad->source_type = base_type(initializer->ctype);
1423 ad->result_type = initializer->ctype;
1424 linearize_store_gen(ep, value, ad);
1428 return VOID;
1431 void linearize_argument(struct entrypoint *ep, struct symbol *arg, int nr)
1433 struct access_data ad = { NULL, };
1435 ad.source_type = arg;
1436 ad.result_type = arg;
1437 ad.address = symbol_pseudo(ep, arg);
1438 linearize_store_gen(ep, argument_pseudo(ep, nr), &ad);
1439 finish_address_gen(ep, &ad);
1442 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
1444 if (!expr)
1445 return VOID;
1447 switch (expr->type) {
1448 case EXPR_SYMBOL:
1449 linearize_one_symbol(ep, expr->symbol);
1450 return add_symbol_address(ep, expr->symbol);
1452 case EXPR_VALUE:
1453 return value_pseudo(expr->value);
1455 case EXPR_STRING: case EXPR_FVALUE: case EXPR_LABEL:
1456 return add_setval(ep, expr->ctype, expr);
1458 case EXPR_STATEMENT:
1459 return linearize_statement(ep, expr->statement);
1461 case EXPR_CALL:
1462 return linearize_call_expression(ep, expr);
1464 case EXPR_BINOP:
1465 return linearize_binop(ep, expr);
1467 case EXPR_LOGICAL:
1468 return linearize_logical(ep, expr);
1470 case EXPR_COMPARE:
1471 return linearize_compare(ep, expr);
1473 case EXPR_SELECT:
1474 return linearize_select(ep, expr);
1476 case EXPR_CONDITIONAL:
1477 if (!expr->cond_true)
1478 return linearize_short_conditional(ep, expr, expr->conditional, expr->cond_false);
1480 return linearize_conditional(ep, expr, expr->conditional,
1481 expr->cond_true, expr->cond_false);
1483 case EXPR_COMMA:
1484 linearize_expression(ep, expr->left);
1485 return linearize_expression(ep, expr->right);
1487 case EXPR_ASSIGNMENT:
1488 return linearize_assignment(ep, expr);
1490 case EXPR_PREOP:
1491 return linearize_preop(ep, expr);
1493 case EXPR_POSTOP:
1494 return linearize_postop(ep, expr);
1496 case EXPR_CAST:
1497 case EXPR_IMPLIED_CAST:
1498 return linearize_cast(ep, expr);
1500 case EXPR_SLICE:
1501 return linearize_slice(ep, expr);
1503 case EXPR_INITIALIZER:
1504 case EXPR_POS:
1505 warning(expr->pos, "unexpected initializer expression (%d %d)", expr->type, expr->op);
1506 return VOID;
1507 default:
1508 warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op);
1509 return VOID;
1511 return VOID;
1514 static void linearize_one_symbol(struct entrypoint *ep, struct symbol *sym)
1516 struct access_data ad = { NULL, };
1518 if (!sym || !sym->initializer || sym->initialized)
1519 return;
1521 /* We need to output these puppies some day too.. */
1522 if (sym->ctype.modifiers & (MOD_STATIC | MOD_TOPLEVEL))
1523 return;
1525 sym->initialized = 1;
1526 ad.address = symbol_pseudo(ep, sym);
1527 linearize_initializer(ep, sym->initializer, &ad);
1528 finish_address_gen(ep, &ad);
1531 static pseudo_t linearize_compound_statement(struct entrypoint *ep, struct statement *stmt)
1533 pseudo_t pseudo;
1534 struct statement *s;
1535 struct symbol *sym;
1536 struct symbol *ret = stmt->ret;
1538 concat_symbol_list(stmt->syms, &ep->syms);
1540 FOR_EACH_PTR(stmt->syms, sym) {
1541 linearize_one_symbol(ep, sym);
1542 } END_FOR_EACH_PTR(sym);
1544 pseudo = VOID;
1545 FOR_EACH_PTR(stmt->stmts, s) {
1546 pseudo = linearize_statement(ep, s);
1547 } END_FOR_EACH_PTR(s);
1549 if (ret) {
1550 struct basic_block *bb = add_label(ep, ret);
1551 struct instruction *phi_node = first_instruction(bb->insns);
1553 if (!phi_node)
1554 return pseudo;
1556 if (pseudo_list_size(phi_node->phi_list)==1) {
1557 pseudo = first_pseudo(phi_node->phi_list);
1558 assert(pseudo->type == PSEUDO_PHI);
1559 return pseudo->def->src1;
1561 return phi_node->target;
1563 return pseudo;
1566 pseudo_t linearize_internal(struct entrypoint *ep, struct statement *stmt)
1568 struct instruction *insn = alloc_instruction(OP_CONTEXT, 0);
1569 struct expression *expr = stmt->expression;
1570 int value = 0;
1572 if (expr->type == EXPR_VALUE)
1573 value = expr->value;
1575 insn->increment = value;
1576 add_one_insn(ep, insn);
1577 return VOID;
1580 ALLOCATOR(asm_rules, "asm rules");
1581 ALLOCATOR(asm_constraint, "asm constraints");
1583 static void add_asm_input(struct entrypoint *ep, struct instruction *insn, struct expression *expr,
1584 const char *constraint, const struct ident *ident)
1586 pseudo_t pseudo = linearize_expression(ep, expr);
1587 struct asm_constraint *rule = __alloc_asm_constraint(0);
1589 rule->ident = ident;
1590 rule->constraint = constraint;
1591 use_pseudo(pseudo, &rule->pseudo);
1592 add_ptr_list(&insn->asm_rules->inputs, rule);
1595 static void add_asm_output(struct entrypoint *ep, struct instruction *insn, struct expression *expr,
1596 const char *constraint, const struct ident *ident)
1598 struct access_data ad = { NULL, };
1599 pseudo_t pseudo = alloc_pseudo(insn);
1600 struct asm_constraint *rule;
1602 if (!linearize_address_gen(ep, expr, &ad))
1603 return;
1604 linearize_store_gen(ep, pseudo, &ad);
1605 finish_address_gen(ep, &ad);
1606 rule = __alloc_asm_constraint(0);
1607 rule->ident = ident;
1608 rule->constraint = constraint;
1609 use_pseudo(pseudo, &rule->pseudo);
1610 add_ptr_list(&insn->asm_rules->outputs, rule);
1613 pseudo_t linearize_asm_statement(struct entrypoint *ep, struct statement *stmt)
1615 int state;
1616 struct expression *expr;
1617 struct instruction *insn;
1618 struct asm_rules *rules;
1619 const char *constraint;
1620 struct ident *ident;
1622 insn = alloc_instruction(OP_ASM, 0);
1623 expr = stmt->asm_string;
1624 if (!expr || expr->type != EXPR_STRING) {
1625 warning(stmt->pos, "expected string in inline asm");
1626 return VOID;
1628 insn->string = expr->string->data;
1630 rules = __alloc_asm_rules(0);
1631 insn->asm_rules = rules;
1633 /* Gather the inputs.. */
1634 state = 0;
1635 ident = NULL;
1636 constraint = NULL;
1637 FOR_EACH_PTR(stmt->asm_inputs, expr) {
1638 switch (state) {
1639 case 0: /* Identifier */
1640 state = 1;
1641 ident = (struct ident *)expr;
1642 continue;
1644 case 1: /* Constraint */
1645 state = 2;
1646 constraint = expr ? expr->string->data : "";
1647 continue;
1649 case 2: /* Expression */
1650 state = 0;
1651 add_asm_input(ep, insn, expr, constraint, ident);
1653 } END_FOR_EACH_PTR(expr);
1655 add_one_insn(ep, insn);
1657 /* Assign the outputs */
1658 state = 0;
1659 ident = NULL;
1660 constraint = NULL;
1661 FOR_EACH_PTR(stmt->asm_outputs, expr) {
1662 switch (state) {
1663 case 0: /* Identifier */
1664 state = 1;
1665 ident = (struct ident *)expr;
1666 continue;
1668 case 1: /* Constraint */
1669 state = 2;
1670 constraint = expr->string->data;
1671 continue;
1673 case 2:
1674 state = 0;
1675 add_asm_output(ep, insn, expr, constraint, ident);
1677 } END_FOR_EACH_PTR(expr);
1679 return VOID;
1682 static int multijmp_cmp(const void *_a, const void *_b)
1684 const struct multijmp *a = _a;
1685 const struct multijmp *b = _b;
1687 // "default" case?
1688 if (a->begin > a->end) {
1689 if (b->begin > b->end)
1690 return 0;
1691 return 1;
1693 if (b->begin > b->end)
1694 return -1;
1695 if (a->begin == b->begin) {
1696 if (a->end == b->end)
1697 return 0;
1698 return (a->end < b->end) ? -1 : 1;
1700 return a->begin < b->begin ? -1 : 1;
1703 static void sort_switch_cases(struct instruction *insn)
1705 sort_list((struct ptr_list **)&insn->multijmp_list, multijmp_cmp);
1708 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
1710 struct basic_block *bb;
1712 if (!stmt)
1713 return VOID;
1715 bb = ep->active;
1716 if (bb && !bb->insns)
1717 bb->pos = stmt->pos;
1719 switch (stmt->type) {
1720 case STMT_NONE:
1721 break;
1723 case STMT_INTERNAL:
1724 return linearize_internal(ep, stmt);
1726 case STMT_EXPRESSION:
1727 return linearize_expression(ep, stmt->expression);
1729 case STMT_ASM:
1730 return linearize_asm_statement(ep, stmt);
1732 case STMT_RETURN: {
1733 struct expression *expr = stmt->expression;
1734 struct basic_block *bb_return = get_bound_block(ep, stmt->ret_target);
1735 struct basic_block *active;
1736 pseudo_t src = linearize_expression(ep, expr);
1737 active = ep->active;
1738 if (active && src != &void_pseudo) {
1739 struct instruction *phi_node = first_instruction(bb_return->insns);
1740 pseudo_t phi;
1741 if (!phi_node) {
1742 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1743 phi_node->target = alloc_pseudo(phi_node);
1744 phi_node->bb = bb_return;
1745 add_instruction(&bb_return->insns, phi_node);
1747 phi = alloc_phi(active, src, type_size(expr->ctype));
1748 phi->ident = &return_ident;
1749 use_pseudo(phi, add_pseudo(&phi_node->phi_list, phi));
1751 add_goto(ep, bb_return);
1752 return VOID;
1755 case STMT_CASE: {
1756 add_label(ep, stmt->case_label);
1757 linearize_statement(ep, stmt->case_statement);
1758 break;
1761 case STMT_LABEL: {
1762 struct symbol *label = stmt->label_identifier;
1764 if (label->used) {
1765 add_label(ep, label);
1766 linearize_statement(ep, stmt->label_statement);
1768 break;
1771 case STMT_GOTO: {
1772 struct symbol *sym;
1773 struct expression *expr;
1774 struct instruction *goto_ins;
1775 struct basic_block *active;
1776 pseudo_t pseudo;
1778 active = ep->active;
1779 if (!bb_reachable(active))
1780 break;
1782 if (stmt->goto_label) {
1783 add_goto(ep, get_bound_block(ep, stmt->goto_label));
1784 break;
1787 expr = stmt->goto_expression;
1788 if (!expr)
1789 break;
1791 /* This can happen as part of simplification */
1792 if (expr->type == EXPR_LABEL) {
1793 add_goto(ep, get_bound_block(ep, expr->label_symbol));
1794 break;
1797 pseudo = linearize_expression(ep, expr);
1798 goto_ins = alloc_instruction(OP_COMPUTEDGOTO, 0);
1799 use_pseudo(pseudo, &goto_ins->target);
1800 add_one_insn(ep, goto_ins);
1802 FOR_EACH_PTR(stmt->target_list, sym) {
1803 struct basic_block *bb_computed = get_bound_block(ep, sym);
1804 struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0);
1805 add_multijmp(&goto_ins->multijmp_list, jmp);
1806 add_bb(&bb_computed->parents, ep->active);
1807 add_bb(&active->children, bb_computed);
1808 } END_FOR_EACH_PTR(sym);
1810 finish_block(ep);
1811 break;
1814 case STMT_COMPOUND:
1815 return linearize_compound_statement(ep, stmt);
1818 * This could take 'likely/unlikely' into account, and
1819 * switch the arms around appropriately..
1821 case STMT_IF: {
1822 struct basic_block *bb_true, *bb_false, *endif;
1823 struct expression *cond = stmt->if_conditional;
1825 bb_true = alloc_basic_block(ep, stmt->pos);
1826 bb_false = endif = alloc_basic_block(ep, stmt->pos);
1828 linearize_cond_branch(ep, cond, bb_true, bb_false);
1830 set_activeblock(ep, bb_true);
1831 linearize_statement(ep, stmt->if_true);
1833 if (stmt->if_false) {
1834 endif = alloc_basic_block(ep, stmt->pos);
1835 add_goto(ep, endif);
1836 set_activeblock(ep, bb_false);
1837 linearize_statement(ep, stmt->if_false);
1839 set_activeblock(ep, endif);
1840 break;
1843 case STMT_SWITCH: {
1844 struct symbol *sym;
1845 struct instruction *switch_ins;
1846 struct basic_block *switch_end = alloc_basic_block(ep, stmt->pos);
1847 struct basic_block *active, *default_case;
1848 struct multijmp *jmp;
1849 pseudo_t pseudo;
1851 pseudo = linearize_expression(ep, stmt->switch_expression);
1853 active = ep->active;
1854 if (!bb_reachable(active))
1855 break;
1857 switch_ins = alloc_instruction(OP_SWITCH, 0);
1858 use_pseudo(pseudo, &switch_ins->cond);
1859 add_one_insn(ep, switch_ins);
1860 finish_block(ep);
1862 default_case = NULL;
1863 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
1864 struct statement *case_stmt = sym->stmt;
1865 struct basic_block *bb_case = get_bound_block(ep, sym);
1867 if (!case_stmt->case_expression) {
1868 default_case = bb_case;
1869 continue;
1870 } else {
1871 int begin, end;
1873 begin = end = case_stmt->case_expression->value;
1874 if (case_stmt->case_to)
1875 end = case_stmt->case_to->value;
1876 if (begin > end)
1877 jmp = alloc_multijmp(bb_case, end, begin);
1878 else
1879 jmp = alloc_multijmp(bb_case, begin, end);
1882 add_multijmp(&switch_ins->multijmp_list, jmp);
1883 add_bb(&bb_case->parents, active);
1884 add_bb(&active->children, bb_case);
1885 } END_FOR_EACH_PTR(sym);
1887 bind_label(stmt->switch_break, switch_end, stmt->pos);
1889 /* And linearize the actual statement */
1890 linearize_statement(ep, stmt->switch_statement);
1891 set_activeblock(ep, switch_end);
1893 if (!default_case)
1894 default_case = switch_end;
1896 jmp = alloc_multijmp(default_case, 1, 0);
1897 add_multijmp(&switch_ins->multijmp_list, jmp);
1898 add_bb(&default_case->parents, active);
1899 add_bb(&active->children, default_case);
1900 sort_switch_cases(switch_ins);
1902 break;
1905 case STMT_ITERATOR: {
1906 struct statement *pre_statement = stmt->iterator_pre_statement;
1907 struct expression *pre_condition = stmt->iterator_pre_condition;
1908 struct statement *statement = stmt->iterator_statement;
1909 struct statement *post_statement = stmt->iterator_post_statement;
1910 struct expression *post_condition = stmt->iterator_post_condition;
1911 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end;
1913 concat_symbol_list(stmt->iterator_syms, &ep->syms);
1914 linearize_statement(ep, pre_statement);
1916 loop_body = loop_top = alloc_basic_block(ep, stmt->pos);
1917 loop_continue = alloc_basic_block(ep, stmt->pos);
1918 loop_end = alloc_basic_block(ep, stmt->pos);
1920 if (pre_condition == post_condition) {
1921 loop_top = alloc_basic_block(ep, stmt->pos);
1922 set_activeblock(ep, loop_top);
1925 if (pre_condition)
1926 linearize_cond_branch(ep, pre_condition, loop_body, loop_end);
1928 bind_label(stmt->iterator_continue, loop_continue, stmt->pos);
1929 bind_label(stmt->iterator_break, loop_end, stmt->pos);
1931 set_activeblock(ep, loop_body);
1932 linearize_statement(ep, statement);
1933 add_goto(ep, loop_continue);
1935 set_activeblock(ep, loop_continue);
1936 linearize_statement(ep, post_statement);
1937 if (!post_condition || pre_condition == post_condition)
1938 add_goto(ep, loop_top);
1939 else
1940 linearize_cond_branch(ep, post_condition, loop_top, loop_end);
1941 set_activeblock(ep, loop_end);
1942 break;
1945 default:
1946 break;
1948 return VOID;
1951 static struct entrypoint *linearize_fn(struct symbol *sym, struct symbol *base_type)
1953 struct entrypoint *ep;
1954 struct basic_block *bb;
1955 struct symbol *arg;
1956 struct instruction *entry;
1957 pseudo_t result;
1958 int i;
1960 if (!base_type->stmt)
1961 return NULL;
1963 ep = alloc_entrypoint();
1964 bb = alloc_basic_block(ep, sym->pos);
1966 ep->name = sym;
1967 set_activeblock(ep, bb);
1969 entry = alloc_instruction(OP_ENTRY, 0);
1970 add_one_insn(ep, entry);
1971 ep->entry = entry;
1973 concat_symbol_list(base_type->arguments, &ep->syms);
1975 /* FIXME!! We should do something else about varargs.. */
1976 i = 0;
1977 FOR_EACH_PTR(base_type->arguments, arg) {
1978 linearize_argument(ep, arg, ++i);
1979 } END_FOR_EACH_PTR(arg);
1981 result = linearize_statement(ep, base_type->stmt);
1982 if (bb_reachable(ep->active) && !bb_terminated(ep->active)) {
1983 struct symbol *ret_type = base_type->ctype.base_type;
1984 struct instruction *insn = alloc_typed_instruction(OP_RET, ret_type);
1986 if (type_size(ret_type) > 0)
1987 use_pseudo(result, &insn->src);
1988 add_one_insn(ep, insn);
1992 * Do trivial flow simplification - branches to
1993 * branches, kill dead basicblocks etc
1995 kill_unreachable_bbs(ep);
1998 * Turn symbols into pseudos
2000 simplify_symbol_usage(ep);
2002 repeat:
2004 * Remove trivial instructions, and try to CSE
2005 * the rest.
2007 do {
2008 cleanup_and_cse(ep);
2009 pack_basic_blocks(ep);
2010 } while (repeat_phase & REPEAT_CSE);
2012 kill_unreachable_bbs(ep);
2013 vrfy_flow(ep);
2015 /* Cleanup */
2016 clear_symbol_pseudos(ep);
2018 /* And track pseudo register usage */
2019 track_pseudo_liveness(ep);
2022 * Some flow optimizations can only effectively
2023 * be done when we've done liveness analysis. But
2024 * if they trigger, we need to start all over
2025 * again
2027 if (simplify_flow(ep)) {
2028 clear_liveness(ep);
2029 goto repeat;
2032 /* Finally, add deathnotes to pseudos now that we have them */
2033 track_pseudo_death(ep);
2035 return ep;
2038 struct entrypoint *linearize_symbol(struct symbol *sym)
2040 struct symbol *base_type;
2042 if (!sym)
2043 return NULL;
2044 base_type = sym->ctype.base_type;
2045 if (!base_type)
2046 return NULL;
2047 if (base_type->type == SYM_FN)
2048 return linearize_fn(sym, base_type);
2049 return NULL;