[PATCH] avoid segafult after parse errors in casts
[smatch.git] / linearize.c
blob6a6b640504bc689fed727026e1e52af8903224bc
1 /*
2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
10 * Copyright (C) 2004 Christopher Li
13 #include <string.h>
14 #include <stdarg.h>
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <assert.h>
19 #include "parse.h"
20 #include "expression.h"
21 #include "linearize.h"
22 #include "flow.h"
23 #include "target.h"
25 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
26 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
28 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right);
29 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val);
30 static void linearize_one_symbol(struct entrypoint *ep, struct symbol *sym);
32 struct access_data;
33 static pseudo_t add_load(struct entrypoint *ep, struct access_data *);
34 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *);
36 struct pseudo void_pseudo = {};
38 static struct position current_pos;
40 static struct instruction *alloc_instruction(int opcode, int size)
42 struct instruction * insn = __alloc_instruction(0);
43 insn->opcode = opcode;
44 insn->size = size;
45 insn->pos = current_pos;
46 return insn;
49 static inline int type_size(struct symbol *type)
51 return type ? type->bit_size > 0 ? type->bit_size : 0 : 0;
54 static struct instruction *alloc_typed_instruction(int opcode, struct symbol *type)
56 return alloc_instruction(opcode, type_size(type));
59 static struct entrypoint *alloc_entrypoint(void)
61 return __alloc_entrypoint(0);
64 static struct basic_block *alloc_basic_block(struct entrypoint *ep, struct position pos)
66 struct basic_block *bb = __alloc_basic_block(0);
67 bb->context = -1;
68 bb->pos = pos;
69 bb->ep = ep;
70 return bb;
73 static struct multijmp* alloc_multijmp(struct basic_block *target, int begin, int end)
75 struct multijmp *multijmp = __alloc_multijmp(0);
76 multijmp->target = target;
77 multijmp->begin = begin;
78 multijmp->end = end;
79 return multijmp;
82 static inline int regno(pseudo_t n)
84 int retval = -1;
85 if (n && n->type == PSEUDO_REG)
86 retval = n->nr;
87 return retval;
90 const char *show_pseudo(pseudo_t pseudo)
92 static int n;
93 static char buffer[4][64];
94 char *buf;
95 int i;
97 if (!pseudo)
98 return "no pseudo";
99 if (pseudo == VOID)
100 return "VOID";
101 buf = buffer[3 & ++n];
102 switch(pseudo->type) {
103 case PSEUDO_SYM: {
104 struct symbol *sym = pseudo->sym;
105 struct expression *expr;
107 if (sym->bb_target) {
108 snprintf(buf, 64, ".L%p", sym->bb_target);
109 break;
111 if (sym->ident) {
112 snprintf(buf, 64, "%s", show_ident(sym->ident));
113 break;
115 expr = sym->initializer;
116 snprintf(buf, 64, "<anon symbol:%p>", sym);
117 switch (expr->type) {
118 case EXPR_VALUE:
119 snprintf(buf, 64, "<symbol value: %lld>", expr->value);
120 break;
121 case EXPR_STRING:
122 return show_string(expr->string);
123 default:
124 break;
126 break;
128 case PSEUDO_REG:
129 i = snprintf(buf, 64, "%%r%d", pseudo->nr);
130 if (pseudo->ident)
131 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
132 break;
133 case PSEUDO_VAL: {
134 long long value = pseudo->value;
135 if (value > 1000 || value < -1000)
136 snprintf(buf, 64, "$%#llx", value);
137 else
138 snprintf(buf, 64, "$%lld", value);
139 break;
141 case PSEUDO_ARG:
142 snprintf(buf, 64, "%%arg%d", pseudo->nr);
143 break;
144 case PSEUDO_PHI:
145 i = snprintf(buf, 64, "%%phi%d", pseudo->nr);
146 if (pseudo->ident)
147 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
148 break;
149 default:
150 snprintf(buf, 64, "<bad pseudo type %d>", pseudo->type);
152 return buf;
155 static const char* opcodes[] = {
156 [OP_BADOP] = "bad_op",
158 /* Fn entrypoint */
159 [OP_ENTRY] = "<entry-point>",
161 /* Terminator */
162 [OP_RET] = "ret",
163 [OP_BR] = "br",
164 [OP_SWITCH] = "switch",
165 [OP_INVOKE] = "invoke",
166 [OP_COMPUTEDGOTO] = "jmp *",
167 [OP_UNWIND] = "unwind",
169 /* Binary */
170 [OP_ADD] = "add",
171 [OP_SUB] = "sub",
172 [OP_MULU] = "mulu",
173 [OP_MULS] = "muls",
174 [OP_DIVU] = "divu",
175 [OP_DIVS] = "divs",
176 [OP_MODU] = "modu",
177 [OP_MODS] = "mods",
178 [OP_SHL] = "shl",
179 [OP_LSR] = "lsr",
180 [OP_ASR] = "asr",
182 /* Logical */
183 [OP_AND] = "and",
184 [OP_OR] = "or",
185 [OP_XOR] = "xor",
186 [OP_AND_BOOL] = "and-bool",
187 [OP_OR_BOOL] = "or-bool",
189 /* Binary comparison */
190 [OP_SET_EQ] = "seteq",
191 [OP_SET_NE] = "setne",
192 [OP_SET_LE] = "setle",
193 [OP_SET_GE] = "setge",
194 [OP_SET_LT] = "setlt",
195 [OP_SET_GT] = "setgt",
196 [OP_SET_B] = "setb",
197 [OP_SET_A] = "seta",
198 [OP_SET_BE] = "setbe",
199 [OP_SET_AE] = "setae",
201 /* Uni */
202 [OP_NOT] = "not",
203 [OP_NEG] = "neg",
205 /* Special three-input */
206 [OP_SEL] = "select",
208 /* Memory */
209 [OP_MALLOC] = "malloc",
210 [OP_FREE] = "free",
211 [OP_ALLOCA] = "alloca",
212 [OP_LOAD] = "load",
213 [OP_STORE] = "store",
214 [OP_SETVAL] = "set",
215 [OP_SYMADDR] = "symaddr",
216 [OP_GET_ELEMENT_PTR] = "getelem",
218 /* Other */
219 [OP_PHI] = "phi",
220 [OP_PHISOURCE] = "phisrc",
221 [OP_CAST] = "cast",
222 [OP_SCAST] = "scast",
223 [OP_FPCAST] = "fpcast",
224 [OP_PTRCAST] = "ptrcast",
225 [OP_CALL] = "call",
226 [OP_VANEXT] = "va_next",
227 [OP_VAARG] = "va_arg",
228 [OP_SLICE] = "slice",
229 [OP_SNOP] = "snop",
230 [OP_LNOP] = "lnop",
231 [OP_NOP] = "nop",
232 [OP_DEATHNOTE] = "dead",
233 [OP_ASM] = "asm",
235 /* Sparse tagging (line numbers, context, whatever) */
236 [OP_CONTEXT] = "context",
237 [OP_RANGE] = "range-check",
240 static char *show_asm_constraints(char *buf, const char *sep, struct asm_constraint_list *list)
242 struct asm_constraint *entry;
244 FOR_EACH_PTR(list, entry) {
245 buf += sprintf(buf, "%s\"%s\"", sep, entry->constraint);
246 if (entry->pseudo)
247 buf += sprintf(buf, " (%s)", show_pseudo(entry->pseudo));
248 if (entry->ident)
249 buf += sprintf(buf, " [%s]", show_ident(entry->ident));
250 sep = ", ";
251 } END_FOR_EACH_PTR(entry);
252 return buf;
255 static char *show_asm(char *buf, struct instruction *insn)
257 struct asm_rules *rules = insn->asm_rules;
259 buf += sprintf(buf, "\"%s\"", insn->string);
260 buf = show_asm_constraints(buf, "\n\t\tout: ", rules->outputs);
261 buf = show_asm_constraints(buf, "\n\t\tin: ", rules->inputs);
262 buf = show_asm_constraints(buf, "\n\t\tclobber: ", rules->clobbers);
263 return buf;
266 const char *show_instruction(struct instruction *insn)
268 int opcode = insn->opcode;
269 static char buffer[1024];
270 char *buf;
272 buf = buffer;
273 if (!insn->bb)
274 buf += sprintf(buf, "# ");
276 if (opcode < sizeof(opcodes)/sizeof(char *)) {
277 const char *op = opcodes[opcode];
278 if (!op)
279 buf += sprintf(buf, "opcode:%d", opcode);
280 else
281 buf += sprintf(buf, "%s", op);
282 if (insn->size)
283 buf += sprintf(buf, ".%d", insn->size);
284 memset(buf, ' ', 20);
285 buf++;
288 if (buf < buffer + 12)
289 buf = buffer + 12;
290 switch (opcode) {
291 case OP_RET:
292 if (insn->src && insn->src != VOID)
293 buf += sprintf(buf, "%s", show_pseudo(insn->src));
294 break;
295 case OP_BR:
296 if (insn->bb_true && insn->bb_false) {
297 buf += sprintf(buf, "%s, .L%p, .L%p", show_pseudo(insn->cond), insn->bb_true, insn->bb_false);
298 break;
300 buf += sprintf(buf, ".L%p", insn->bb_true ? insn->bb_true : insn->bb_false);
301 break;
303 case OP_SYMADDR: {
304 struct symbol *sym = insn->symbol->sym;
305 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
307 if (sym->bb_target) {
308 buf += sprintf(buf, ".L%p", sym->bb_target);
309 break;
311 if (sym->ident) {
312 buf += sprintf(buf, "%s", show_ident(sym->ident));
313 break;
315 buf += sprintf(buf, "<anon symbol:%p>", sym);
316 break;
319 case OP_SETVAL: {
320 struct expression *expr = insn->val;
321 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
323 if (!expr) {
324 buf += sprintf(buf, "%s", "<none>");
325 break;
328 switch (expr->type) {
329 case EXPR_VALUE:
330 buf += sprintf(buf, "%lld", expr->value);
331 break;
332 case EXPR_FVALUE:
333 buf += sprintf(buf, "%Lf", expr->fvalue);
334 break;
335 case EXPR_STRING:
336 buf += sprintf(buf, "%.40s", show_string(expr->string));
337 break;
338 case EXPR_SYMBOL:
339 buf += sprintf(buf, "%s", show_ident(expr->symbol->ident));
340 break;
341 case EXPR_LABEL:
342 buf += sprintf(buf, ".L%p", expr->symbol->bb_target);
343 break;
344 default:
345 buf += sprintf(buf, "SETVAL EXPR TYPE %d", expr->type);
347 break;
349 case OP_SWITCH: {
350 struct multijmp *jmp;
351 buf += sprintf(buf, "%s", show_pseudo(insn->target));
352 FOR_EACH_PTR(insn->multijmp_list, jmp) {
353 if (jmp->begin == jmp->end)
354 buf += sprintf(buf, ", %d -> .L%p", jmp->begin, jmp->target);
355 else if (jmp->begin < jmp->end)
356 buf += sprintf(buf, ", %d ... %d -> .L%p", jmp->begin, jmp->end, jmp->target);
357 else
358 buf += sprintf(buf, ", default -> .L%p", jmp->target);
359 } END_FOR_EACH_PTR(jmp);
360 break;
362 case OP_COMPUTEDGOTO: {
363 struct multijmp *jmp;
364 buf += sprintf(buf, "%s", show_pseudo(insn->target));
365 FOR_EACH_PTR(insn->multijmp_list, jmp) {
366 buf += sprintf(buf, ", .L%p", jmp->target);
367 } END_FOR_EACH_PTR(jmp);
368 break;
371 case OP_PHISOURCE: {
372 struct instruction *phi;
373 buf += sprintf(buf, "%s <- %s ", show_pseudo(insn->target), show_pseudo(insn->phi_src));
374 FOR_EACH_PTR(insn->phi_users, phi) {
375 buf += sprintf(buf, " (%s)", show_pseudo(phi->target));
376 } END_FOR_EACH_PTR(phi);
377 break;
380 case OP_PHI: {
381 pseudo_t phi;
382 const char *s = " <-";
383 buf += sprintf(buf, "%s", show_pseudo(insn->target));
384 FOR_EACH_PTR(insn->phi_list, phi) {
385 buf += sprintf(buf, "%s %s", s, show_pseudo(phi));
386 s = ",";
387 } END_FOR_EACH_PTR(phi);
388 break;
390 case OP_LOAD: case OP_LNOP:
391 buf += sprintf(buf, "%s <- %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
392 break;
393 case OP_STORE: case OP_SNOP:
394 buf += sprintf(buf, "%s -> %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
395 break;
396 case OP_CALL: {
397 struct pseudo *arg;
398 if (insn->target && insn->target != VOID)
399 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
400 buf += sprintf(buf, "%s", show_pseudo(insn->func));
401 FOR_EACH_PTR(insn->arguments, arg) {
402 buf += sprintf(buf, ", %s", show_pseudo(arg));
403 } END_FOR_EACH_PTR(arg);
404 break;
406 case OP_CAST:
407 case OP_SCAST:
408 case OP_FPCAST:
409 case OP_PTRCAST:
410 buf += sprintf(buf, "%s <- (%d) %s",
411 show_pseudo(insn->target),
412 type_size(insn->orig_type),
413 show_pseudo(insn->src));
414 break;
415 case OP_BINARY ... OP_BINARY_END:
416 case OP_BINCMP ... OP_BINCMP_END:
417 buf += sprintf(buf, "%s <- %s, %s", show_pseudo(insn->target), show_pseudo(insn->src1), show_pseudo(insn->src2));
418 break;
420 case OP_SEL:
421 buf += sprintf(buf, "%s <- %s, %s, %s", show_pseudo(insn->target),
422 show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
423 break;
425 case OP_SLICE:
426 buf += sprintf(buf, "%s <- %s, %d, %d", show_pseudo(insn->target), show_pseudo(insn->base), insn->from, insn->len);
427 break;
429 case OP_NOT: case OP_NEG:
430 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
431 break;
433 case OP_CONTEXT:
434 buf += sprintf(buf, "%s%d", insn->check ? "check: " : "", insn->increment);
435 break;
436 case OP_RANGE:
437 buf += sprintf(buf, "%s between %s..%s", show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
438 break;
439 case OP_NOP:
440 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
441 break;
442 case OP_DEATHNOTE:
443 buf += sprintf(buf, "%s", show_pseudo(insn->target));
444 break;
445 case OP_ASM:
446 buf = show_asm(buf, insn);
447 break;
448 default:
449 break;
451 do { --buf; } while (*buf == ' ');
452 *++buf = 0;
453 return buffer;
456 void show_bb(struct basic_block *bb)
458 struct instruction *insn;
460 printf(".L%p:\n", bb);
461 if (verbose) {
462 pseudo_t needs, defines;
463 printf("%s:%d\n", stream_name(bb->pos.stream), bb->pos.line);
465 FOR_EACH_PTR(bb->needs, needs) {
466 struct instruction *def = needs->def;
467 if (def->opcode != OP_PHI) {
468 printf(" **uses %s (from .L%p)**\n", show_pseudo(needs), def->bb);
469 } else {
470 pseudo_t phi;
471 const char *sep = " ";
472 printf(" **uses %s (from", show_pseudo(needs));
473 FOR_EACH_PTR(def->phi_list, phi) {
474 if (phi == VOID)
475 continue;
476 printf("%s(%s:.L%p)", sep, show_pseudo(phi), phi->def->bb);
477 sep = ", ";
478 } END_FOR_EACH_PTR(phi);
479 printf(")**\n");
481 } END_FOR_EACH_PTR(needs);
483 FOR_EACH_PTR(bb->defines, defines) {
484 printf(" **defines %s **\n", show_pseudo(defines));
485 } END_FOR_EACH_PTR(defines);
487 if (bb->parents) {
488 struct basic_block *from;
489 FOR_EACH_PTR(bb->parents, from) {
490 printf(" **from %p (%s:%d:%d)**\n", from,
491 stream_name(from->pos.stream), from->pos.line, from->pos.pos);
492 } END_FOR_EACH_PTR(from);
495 if (bb->children) {
496 struct basic_block *to;
497 FOR_EACH_PTR(bb->children, to) {
498 printf(" **to %p (%s:%d:%d)**\n", to,
499 stream_name(to->pos.stream), to->pos.line, to->pos.pos);
500 } END_FOR_EACH_PTR(to);
504 FOR_EACH_PTR(bb->insns, insn) {
505 if (!insn->bb && verbose < 2)
506 continue;
507 printf("\t%s\n", show_instruction(insn));
508 } END_FOR_EACH_PTR(insn);
509 if (!bb_terminated(bb))
510 printf("\tEND\n");
513 static void show_symbol_usage(pseudo_t pseudo)
515 if (pseudo) {
516 pseudo_t *pp;
517 FOR_EACH_PTR(pseudo->users, pp) {
518 struct instruction *insn = container(pp, struct instruction, src);
519 printf("\t%s\n", show_instruction(insn));
520 } END_FOR_EACH_PTR(pp);
524 void show_entry(struct entrypoint *ep)
526 struct symbol *sym;
527 struct basic_block *bb;
529 printf("%s:\n", show_ident(ep->name->ident));
531 if (verbose) {
532 printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
534 FOR_EACH_PTR(ep->syms, sym) {
535 if (!sym->pseudo)
536 continue;
537 if (!sym->pseudo->users)
538 continue;
539 printf(" sym: %p %s\n", sym, show_ident(sym->ident));
540 if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE))
541 printf("\texternal visibility\n");
542 show_symbol_usage(sym->pseudo);
543 } END_FOR_EACH_PTR(sym);
545 printf("\n");
548 FOR_EACH_PTR(ep->bbs, bb) {
549 if (!bb)
550 continue;
551 if (!bb->parents && !bb->children && !bb->insns && verbose < 2)
552 continue;
553 show_bb(bb);
554 printf("\n");
555 } END_FOR_EACH_PTR(bb);
557 printf("\n");
560 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos)
562 if (label->bb_target)
563 warning(pos, "label '%s' already bound", show_ident(label->ident));
564 label->bb_target = bb;
567 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label)
569 struct basic_block *bb = label->bb_target;
571 if (!bb) {
572 bb = alloc_basic_block(ep, label->pos);
573 label->bb_target = bb;
575 return bb;
578 static void finish_block(struct entrypoint *ep)
580 struct basic_block *src = ep->active;
581 if (bb_reachable(src))
582 ep->active = NULL;
585 static void add_goto(struct entrypoint *ep, struct basic_block *dst)
587 struct basic_block *src = ep->active;
588 if (bb_reachable(src)) {
589 struct instruction *br = alloc_instruction(OP_BR, 0);
590 br->bb_true = dst;
591 add_bb(&dst->parents, src);
592 add_bb(&src->children, dst);
593 br->bb = src;
594 add_instruction(&src->insns, br);
595 ep->active = NULL;
599 static void add_one_insn(struct entrypoint *ep, struct instruction *insn)
601 struct basic_block *bb = ep->active;
603 if (bb_reachable(bb)) {
604 insn->bb = bb;
605 add_instruction(&bb->insns, insn);
609 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb)
611 if (!bb_terminated(ep->active))
612 add_goto(ep, bb);
614 ep->active = bb;
615 if (bb_reachable(bb))
616 add_bb(&ep->bbs, bb);
619 static void remove_parent(struct basic_block *child, struct basic_block *parent)
621 remove_bb_from_list(&child->parents, parent, 1);
622 if (!child->parents)
623 kill_bb(child);
626 /* Change a "switch" into a branch */
627 void insert_branch(struct basic_block *bb, struct instruction *jmp, struct basic_block *target)
629 struct instruction *br, *old;
630 struct basic_block *child;
632 /* Remove the switch */
633 old = delete_last_instruction(&bb->insns);
634 assert(old == jmp);
636 br = alloc_instruction(OP_BR, 0);
637 br->bb = bb;
638 br->bb_true = target;
639 add_instruction(&bb->insns, br);
641 FOR_EACH_PTR(bb->children, child) {
642 if (child == target) {
643 target = NULL; /* Trigger just once */
644 continue;
646 DELETE_CURRENT_PTR(child);
647 remove_parent(child, bb);
648 } END_FOR_EACH_PTR(child);
649 PACK_PTR_LIST(&bb->children);
653 void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi_node, pseudo_t true, pseudo_t false)
655 pseudo_t target;
656 struct instruction *select;
658 /* Remove the 'br' */
659 delete_last_instruction(&bb->insns);
661 select = alloc_instruction(OP_SEL, phi_node->size);
662 select->bb = bb;
664 assert(br->cond);
665 use_pseudo(br->cond, &select->src1);
667 target = phi_node->target;
668 assert(target->def == phi_node);
669 select->target = target;
670 target->def = select;
672 use_pseudo(true, &select->src2);
673 use_pseudo(false, &select->src3);
675 add_instruction(&bb->insns, select);
676 add_instruction(&bb->insns, br);
679 static inline int bb_empty(struct basic_block *bb)
681 return !bb->insns;
684 /* Add a label to the currently active block, return new active block */
685 static struct basic_block * add_label(struct entrypoint *ep, struct symbol *label)
687 struct basic_block *bb = label->bb_target;
689 if (bb) {
690 set_activeblock(ep, bb);
691 return bb;
693 bb = ep->active;
694 if (!bb_reachable(bb) || !bb_empty(bb)) {
695 bb = alloc_basic_block(ep, label->pos);
696 set_activeblock(ep, bb);
698 label->bb_target = bb;
699 return bb;
702 static void add_branch(struct entrypoint *ep, struct expression *expr, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false)
704 struct basic_block *bb = ep->active;
705 struct instruction *br;
707 if (bb_reachable(bb)) {
708 br = alloc_instruction(OP_BR, 0);
709 use_pseudo(cond, &br->cond);
710 br->bb_true = bb_true;
711 br->bb_false = bb_false;
712 add_bb(&bb_true->parents, bb);
713 add_bb(&bb_false->parents, bb);
714 add_bb(&bb->children, bb_true);
715 add_bb(&bb->children, bb_false);
716 add_one_insn(ep, br);
720 /* Dummy pseudo allocator */
721 pseudo_t alloc_pseudo(struct instruction *def)
723 static int nr = 0;
724 struct pseudo * pseudo = __alloc_pseudo(0);
725 pseudo->type = PSEUDO_REG;
726 pseudo->nr = ++nr;
727 pseudo->def = def;
728 return pseudo;
731 static void clear_symbol_pseudos(struct entrypoint *ep)
733 struct symbol *sym;
735 FOR_EACH_PTR(ep->accesses, sym) {
736 sym->pseudo = NULL;
737 } END_FOR_EACH_PTR(sym);
740 static pseudo_t symbol_pseudo(struct entrypoint *ep, struct symbol *sym)
742 pseudo_t pseudo;
744 if (!sym)
745 return VOID;
747 pseudo = sym->pseudo;
748 if (!pseudo) {
749 pseudo = __alloc_pseudo(0);
750 pseudo->nr = -1;
751 pseudo->type = PSEUDO_SYM;
752 pseudo->sym = sym;
753 pseudo->ident = sym->ident;
754 sym->pseudo = pseudo;
755 add_symbol(&ep->accesses, sym);
757 /* Symbol pseudos have neither nr, usage nor def */
758 return pseudo;
761 pseudo_t value_pseudo(long long val)
763 #define MAX_VAL_HASH 64
764 static struct pseudo_list *prev[MAX_VAL_HASH];
765 int hash = val & (MAX_VAL_HASH-1);
766 struct pseudo_list **list = prev + hash;
767 pseudo_t pseudo;
769 FOR_EACH_PTR(*list, pseudo) {
770 if (pseudo->value == val)
771 return pseudo;
772 } END_FOR_EACH_PTR(pseudo);
774 pseudo = __alloc_pseudo(0);
775 pseudo->type = PSEUDO_VAL;
776 pseudo->value = val;
777 add_pseudo(list, pseudo);
779 /* Value pseudos have neither nr, usage nor def */
780 return pseudo;
783 static pseudo_t argument_pseudo(struct entrypoint *ep, int nr)
785 pseudo_t pseudo = __alloc_pseudo(0);
786 struct instruction *entry = ep->entry;
788 pseudo->type = PSEUDO_ARG;
789 pseudo->nr = nr;
790 pseudo->def = entry;
791 add_pseudo(&entry->arg_list, pseudo);
793 /* Argument pseudos have neither usage nor def */
794 return pseudo;
797 pseudo_t alloc_phi(struct basic_block *source, pseudo_t pseudo, int size)
799 struct instruction *insn = alloc_instruction(OP_PHISOURCE, size);
800 pseudo_t phi = __alloc_pseudo(0);
801 static int nr = 0;
803 phi->type = PSEUDO_PHI;
804 phi->nr = ++nr;
805 phi->def = insn;
807 use_pseudo(pseudo, &insn->phi_src);
808 insn->bb = source;
809 insn->target = phi;
810 add_instruction(&source->insns, insn);
811 return phi;
815 * We carry the "access_data" structure around for any accesses,
816 * which simplifies things a lot. It contains all the access
817 * information in one place.
819 struct access_data {
820 struct symbol *result_type; // result ctype
821 struct symbol *source_type; // source ctype
822 pseudo_t address; // pseudo containing address ..
823 pseudo_t origval; // pseudo for original value ..
824 unsigned int offset, alignment; // byte offset
825 unsigned int bit_size, bit_offset; // which bits
826 struct position pos;
829 static void finish_address_gen(struct entrypoint *ep, struct access_data *ad)
833 static int linearize_simple_address(struct entrypoint *ep,
834 struct expression *addr,
835 struct access_data *ad)
837 if (addr->type == EXPR_SYMBOL) {
838 linearize_one_symbol(ep, addr->symbol);
839 ad->address = symbol_pseudo(ep, addr->symbol);
840 return 1;
842 if (addr->type == EXPR_BINOP) {
843 if (addr->right->type == EXPR_VALUE) {
844 if (addr->op == '+') {
845 ad->offset += get_expression_value(addr->right);
846 return linearize_simple_address(ep, addr->left, ad);
850 ad->address = linearize_expression(ep, addr);
851 return 1;
854 static struct symbol *base_type(struct symbol *sym)
856 struct symbol *base = sym;
858 if (sym) {
859 if (sym->type == SYM_NODE)
860 base = base->ctype.base_type;
861 if (base->type == SYM_BITFIELD)
862 return base->ctype.base_type;
864 return sym;
867 static int linearize_address_gen(struct entrypoint *ep,
868 struct expression *expr,
869 struct access_data *ad)
871 struct symbol *ctype = expr->ctype;
873 if (!ctype)
874 return 0;
875 ad->pos = expr->pos;
876 ad->result_type = ctype;
877 ad->source_type = base_type(ctype);
878 ad->bit_size = ctype->bit_size;
879 ad->alignment = ctype->ctype.alignment;
880 ad->bit_offset = ctype->bit_offset;
881 if (expr->type == EXPR_PREOP && expr->op == '*')
882 return linearize_simple_address(ep, expr->unop, ad);
884 warning(expr->pos, "generating address of non-lvalue (%d)", expr->type);
885 return 0;
888 static pseudo_t add_load(struct entrypoint *ep, struct access_data *ad)
890 struct instruction *insn;
891 pseudo_t new;
893 new = ad->origval;
894 if (0 && new)
895 return new;
897 insn = alloc_typed_instruction(OP_LOAD, ad->source_type);
898 new = alloc_pseudo(insn);
899 ad->origval = new;
901 insn->target = new;
902 insn->offset = ad->offset;
903 use_pseudo(ad->address, &insn->src);
904 add_one_insn(ep, insn);
905 return new;
908 static void add_store(struct entrypoint *ep, struct access_data *ad, pseudo_t value)
910 struct basic_block *bb = ep->active;
912 if (bb_reachable(bb)) {
913 struct instruction *store = alloc_typed_instruction(OP_STORE, ad->source_type);
914 store->offset = ad->offset;
915 use_pseudo(value, &store->target);
916 use_pseudo(ad->address, &store->src);
917 add_one_insn(ep, store);
921 static pseudo_t linearize_store_gen(struct entrypoint *ep,
922 pseudo_t value,
923 struct access_data *ad)
925 pseudo_t store = value;
927 if (type_size(ad->source_type) != type_size(ad->result_type)) {
928 pseudo_t orig = add_load(ep, ad);
929 int shift = ad->bit_offset;
930 unsigned long long mask = (1ULL << ad->bit_size)-1;
932 if (shift) {
933 store = add_binary_op(ep, ad->source_type, OP_SHL, value, value_pseudo(shift));
934 mask <<= shift;
936 orig = add_binary_op(ep, ad->source_type, OP_AND, orig, value_pseudo(~mask));
937 store = add_binary_op(ep, ad->source_type, OP_OR, orig, store);
939 add_store(ep, ad, store);
940 return value;
943 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right)
945 struct instruction *insn = alloc_typed_instruction(op, ctype);
946 pseudo_t target = alloc_pseudo(insn);
947 insn->target = target;
948 use_pseudo(left, &insn->src1);
949 use_pseudo(right, &insn->src2);
950 add_one_insn(ep, insn);
951 return target;
954 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val)
956 struct instruction *insn = alloc_typed_instruction(OP_SETVAL, ctype);
957 pseudo_t target = alloc_pseudo(insn);
958 insn->target = target;
959 insn->val = val;
960 add_one_insn(ep, insn);
961 return target;
964 static pseudo_t add_symbol_address(struct entrypoint *ep, struct symbol *sym)
966 struct instruction *insn = alloc_instruction(OP_SYMADDR, bits_in_pointer);
967 pseudo_t target = alloc_pseudo(insn);
969 insn->target = target;
970 use_pseudo(symbol_pseudo(ep, sym), &insn->symbol);
971 add_one_insn(ep, insn);
972 return target;
975 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct access_data *ad)
977 pseudo_t new = add_load(ep, ad);
979 if (ad->bit_offset) {
980 pseudo_t shift = value_pseudo(ad->bit_offset);
981 pseudo_t newval = add_binary_op(ep, ad->source_type, OP_LSR, new, shift);
982 new = newval;
985 return new;
988 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
990 struct access_data ad = { NULL, };
991 pseudo_t value;
993 if (!linearize_address_gen(ep, expr, &ad))
994 return VOID;
995 value = linearize_load_gen(ep, &ad);
996 finish_address_gen(ep, &ad);
997 return value;
1000 /* FIXME: FP */
1001 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
1003 struct access_data ad = { NULL, };
1004 pseudo_t old, new, one;
1005 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB;
1007 if (!linearize_address_gen(ep, expr->unop, &ad))
1008 return VOID;
1010 old = linearize_load_gen(ep, &ad);
1011 one = value_pseudo(expr->op_value);
1012 new = add_binary_op(ep, expr->ctype, op, old, one);
1013 linearize_store_gen(ep, new, &ad);
1014 finish_address_gen(ep, &ad);
1015 return postop ? old : new;
1018 static pseudo_t add_uniop(struct entrypoint *ep, struct expression *expr, int op, pseudo_t src)
1020 struct instruction *insn = alloc_typed_instruction(op, expr->ctype);
1021 pseudo_t new = alloc_pseudo(insn);
1023 insn->target = new;
1024 use_pseudo(src, &insn->src1);
1025 add_one_insn(ep, insn);
1026 return new;
1029 static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr)
1031 pseudo_t pre = linearize_expression(ep, expr->base);
1032 struct instruction *insn = alloc_typed_instruction(OP_SLICE, expr->ctype);
1033 pseudo_t new = alloc_pseudo(insn);
1035 insn->target = new;
1036 insn->from = expr->r_bitpos;
1037 insn->len = expr->r_nrbits;
1038 use_pseudo(pre, &insn->base);
1039 add_one_insn(ep, insn);
1040 return new;
1043 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
1045 pseudo_t pre = linearize_expression(ep, expr->unop);
1046 switch (expr->op) {
1047 case '+':
1048 return pre;
1049 case '!': {
1050 pseudo_t zero = value_pseudo(0);
1051 return add_binary_op(ep, expr->unop->ctype, OP_SET_EQ, pre, zero);
1053 case '~':
1054 return add_uniop(ep, expr, OP_NOT, pre);
1055 case '-':
1056 return add_uniop(ep, expr, OP_NEG, pre);
1058 return VOID;
1061 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
1064 * '*' is an lvalue access, and is fundamentally different
1065 * from an arithmetic operation. Maybe it should have an
1066 * expression type of its own..
1068 if (expr->op == '*')
1069 return linearize_access(ep, expr);
1070 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
1071 return linearize_inc_dec(ep, expr, 0);
1072 return linearize_regular_preop(ep, expr);
1075 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr)
1077 return linearize_inc_dec(ep, expr, 1);
1081 * Casts to pointers are "less safe" than other casts, since
1082 * they imply type-unsafe accesses. "void *" is a special
1083 * case, since you can't access through it anyway without another
1084 * cast.
1086 static struct instruction *alloc_cast_instruction(struct symbol *ctype)
1088 int opcode = OP_CAST;
1089 struct symbol *base = ctype;
1091 if (base->ctype.modifiers & MOD_SIGNED)
1092 opcode = OP_SCAST;
1093 if (base->type == SYM_NODE)
1094 base = base->ctype.base_type;
1095 if (base->type == SYM_PTR) {
1096 base = base->ctype.base_type;
1097 if (base != &void_ctype)
1098 opcode = OP_PTRCAST;
1100 if (base->ctype.base_type == &fp_type)
1101 opcode = OP_FPCAST;
1102 return alloc_typed_instruction(opcode, ctype);
1105 static pseudo_t cast_pseudo(struct entrypoint *ep, pseudo_t src, struct symbol *from, struct symbol *to)
1107 pseudo_t result;
1108 struct instruction *insn;
1110 if (src == VOID)
1111 return VOID;
1112 if (!from || !to)
1113 return VOID;
1114 if (from->bit_size < 0 || to->bit_size < 0)
1115 return VOID;
1116 insn = alloc_cast_instruction(to);
1117 result = alloc_pseudo(insn);
1118 insn->target = result;
1119 insn->orig_type = from;
1120 use_pseudo(src, &insn->src);
1121 add_one_insn(ep, insn);
1122 return result;
1125 static int opcode_sign(int opcode, struct symbol *ctype)
1127 if (ctype && (ctype->ctype.modifiers & MOD_SIGNED)) {
1128 switch(opcode) {
1129 case OP_MULU: case OP_DIVU: case OP_MODU: case OP_LSR:
1130 opcode++;
1133 return opcode;
1136 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
1138 struct access_data ad = { NULL, };
1139 struct expression *target = expr->left;
1140 struct expression *src = expr->right;
1141 pseudo_t value;
1143 value = linearize_expression(ep, src);
1144 if (!target || !linearize_address_gen(ep, target, &ad))
1145 return value;
1146 if (expr->op != '=') {
1147 pseudo_t oldvalue = linearize_load_gen(ep, &ad);
1148 pseudo_t dst;
1149 static const int op_trans[] = {
1150 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD,
1151 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB,
1152 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MULU,
1153 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIVU,
1154 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MODU,
1155 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL,
1156 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_LSR,
1157 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND,
1158 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR,
1159 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR
1161 int opcode;
1163 if (!src)
1164 return VOID;
1166 oldvalue = cast_pseudo(ep, oldvalue, src->ctype, expr->ctype);
1167 opcode = opcode_sign(op_trans[expr->op - SPECIAL_BASE], src->ctype);
1168 dst = add_binary_op(ep, src->ctype, opcode, oldvalue, value);
1169 value = cast_pseudo(ep, dst, expr->ctype, src->ctype);
1171 value = linearize_store_gen(ep, value, &ad);
1172 finish_address_gen(ep, &ad);
1173 return value;
1176 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr)
1178 struct expression *arg, *fn;
1179 struct instruction *insn = alloc_typed_instruction(OP_CALL, expr->ctype);
1180 pseudo_t retval, call;
1181 int context_diff, check;
1183 if (!expr->ctype) {
1184 warning(expr->pos, "call with no type!");
1185 return VOID;
1188 FOR_EACH_PTR(expr->args, arg) {
1189 pseudo_t new = linearize_expression(ep, arg);
1190 use_pseudo(new, add_pseudo(&insn->arguments, new));
1191 } END_FOR_EACH_PTR(arg);
1193 fn = expr->fn;
1195 check = 0;
1196 context_diff = 0;
1197 if (fn->ctype) {
1198 int in = fn->ctype->ctype.in_context;
1199 int out = fn->ctype->ctype.out_context;
1200 if (in < 0) {
1201 check = 1;
1202 in = 0;
1204 if (out < 0) {
1205 check = 0;
1206 out = 0;
1208 context_diff = out - in;
1211 if (fn->type == EXPR_PREOP) {
1212 if (fn->unop->type == EXPR_SYMBOL) {
1213 struct symbol *sym = fn->unop->symbol;
1214 if (sym->ctype.base_type->type == SYM_FN)
1215 fn = fn->unop;
1218 if (fn->type == EXPR_SYMBOL) {
1219 call = symbol_pseudo(ep, fn->symbol);
1220 } else {
1221 call = linearize_expression(ep, fn);
1223 use_pseudo(call, &insn->func);
1224 retval = VOID;
1225 if (expr->ctype != &void_ctype)
1226 retval = alloc_pseudo(insn);
1227 insn->target = retval;
1228 add_one_insn(ep, insn);
1230 if (check || context_diff) {
1231 insn = alloc_instruction(OP_CONTEXT, 0);
1232 insn->increment = context_diff;
1233 insn->check = check;
1234 add_one_insn(ep, insn);
1237 return retval;
1240 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr)
1242 pseudo_t src1, src2, dst;
1243 static const int opcode[] = {
1244 ['+'] = OP_ADD, ['-'] = OP_SUB,
1245 ['*'] = OP_MULU, ['/'] = OP_DIVU,
1246 ['%'] = OP_MODU, ['&'] = OP_AND,
1247 ['|'] = OP_OR, ['^'] = OP_XOR,
1248 [SPECIAL_LEFTSHIFT] = OP_SHL,
1249 [SPECIAL_RIGHTSHIFT] = OP_LSR,
1250 [SPECIAL_LOGICAL_AND] = OP_AND_BOOL,
1251 [SPECIAL_LOGICAL_OR] = OP_OR_BOOL,
1253 int op;
1255 src1 = linearize_expression(ep, expr->left);
1256 src2 = linearize_expression(ep, expr->right);
1257 op = opcode_sign(opcode[expr->op], expr->ctype);
1258 dst = add_binary_op(ep, expr->ctype, op, src1, src2);
1259 return dst;
1262 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1264 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1266 static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr)
1268 pseudo_t cond, true, false, res;
1269 struct instruction *insn;
1271 true = linearize_expression(ep, expr->cond_true);
1272 false = linearize_expression(ep, expr->cond_false);
1273 cond = linearize_expression(ep, expr->conditional);
1275 insn = alloc_typed_instruction(OP_SEL, expr->ctype);
1276 if (!expr->cond_true)
1277 true = cond;
1278 use_pseudo(cond, &insn->src1);
1279 use_pseudo(true, &insn->src2);
1280 use_pseudo(false, &insn->src3);
1282 res = alloc_pseudo(insn);
1283 insn->target = res;
1284 add_one_insn(ep, insn);
1285 return res;
1288 static pseudo_t add_join_conditional(struct entrypoint *ep, struct expression *expr,
1289 pseudo_t phi1, pseudo_t phi2)
1291 pseudo_t target;
1292 struct instruction *phi_node;
1294 if (phi1 == VOID)
1295 return phi2;
1296 if (phi2 == VOID)
1297 return phi1;
1299 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1300 use_pseudo(phi1, add_pseudo(&phi_node->phi_list, phi1));
1301 use_pseudo(phi2, add_pseudo(&phi_node->phi_list, phi2));
1302 phi_node->target = target = alloc_pseudo(phi_node);
1303 add_one_insn(ep, phi_node);
1304 return target;
1307 static pseudo_t linearize_short_conditional(struct entrypoint *ep, struct expression *expr,
1308 struct expression *cond,
1309 struct expression *expr_false)
1311 pseudo_t src1, src2;
1312 struct basic_block *bb_false;
1313 struct basic_block *merge = alloc_basic_block(ep, expr->pos);
1314 pseudo_t phi1, phi2;
1315 int size = type_size(expr->ctype);
1317 if (!expr_false || !ep->active)
1318 return VOID;
1320 bb_false = alloc_basic_block(ep, expr_false->pos);
1321 src1 = linearize_expression(ep, cond);
1322 phi1 = alloc_phi(ep->active, src1, size);
1323 add_branch(ep, expr, src1, merge, bb_false);
1325 set_activeblock(ep, bb_false);
1326 src2 = linearize_expression(ep, expr_false);
1327 phi2 = alloc_phi(ep->active, src2, size);
1328 set_activeblock(ep, merge);
1330 return add_join_conditional(ep, expr, phi1, phi2);
1333 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr,
1334 struct expression *cond,
1335 struct expression *expr_true,
1336 struct expression *expr_false)
1338 pseudo_t src1, src2;
1339 pseudo_t phi1, phi2;
1340 struct basic_block *bb_true, *bb_false, *merge;
1341 int size = type_size(expr->ctype);
1343 if (!cond || !expr_true || !expr_false || !ep->active)
1344 return VOID;
1345 bb_true = alloc_basic_block(ep, expr_true->pos);
1346 bb_false = alloc_basic_block(ep, expr_false->pos);
1347 merge = alloc_basic_block(ep, expr->pos);
1349 linearize_cond_branch(ep, cond, bb_true, bb_false);
1351 set_activeblock(ep, bb_true);
1352 src1 = linearize_expression(ep, expr_true);
1353 phi1 = alloc_phi(ep->active, src1, size);
1354 add_goto(ep, merge);
1356 set_activeblock(ep, bb_false);
1357 src2 = linearize_expression(ep, expr_false);
1358 phi2 = alloc_phi(ep->active, src2, size);
1359 set_activeblock(ep, merge);
1361 return add_join_conditional(ep, expr, phi1, phi2);
1364 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr)
1366 struct expression *shortcut;
1368 shortcut = alloc_const_expression(expr->pos, expr->op == SPECIAL_LOGICAL_OR);
1369 shortcut->ctype = expr->ctype;
1370 return linearize_conditional(ep, expr, expr->left, shortcut, expr->right);
1373 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr)
1375 static const int cmpop[] = {
1376 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT,
1377 [SPECIAL_EQUAL] = OP_SET_EQ,
1378 [SPECIAL_NOTEQUAL] = OP_SET_NE,
1379 [SPECIAL_GTE] = OP_SET_GE,
1380 [SPECIAL_LTE] = OP_SET_LE,
1381 [SPECIAL_UNSIGNED_LT] = OP_SET_B,
1382 [SPECIAL_UNSIGNED_GT] = OP_SET_A,
1383 [SPECIAL_UNSIGNED_LTE] = OP_SET_BE,
1384 [SPECIAL_UNSIGNED_GTE] = OP_SET_AE,
1387 pseudo_t src1 = linearize_expression(ep, expr->left);
1388 pseudo_t src2 = linearize_expression(ep, expr->right);
1389 pseudo_t dst = add_binary_op(ep, expr->left->ctype, cmpop[expr->op], src1, src2);
1390 return dst;
1394 pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1396 pseudo_t cond;
1398 if (!expr || !bb_reachable(ep->active))
1399 return VOID;
1401 switch (expr->type) {
1403 case EXPR_STRING:
1404 case EXPR_VALUE:
1405 add_goto(ep, expr->value ? bb_true : bb_false);
1406 return VOID;
1408 case EXPR_FVALUE:
1409 add_goto(ep, expr->fvalue ? bb_true : bb_false);
1410 return VOID;
1412 case EXPR_LOGICAL:
1413 linearize_logical_branch(ep, expr, bb_true, bb_false);
1414 return VOID;
1416 case EXPR_COMPARE:
1417 cond = linearize_compare(ep, expr);
1418 add_branch(ep, expr, cond, bb_true, bb_false);
1419 break;
1421 case EXPR_PREOP:
1422 if (expr->op == '!')
1423 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true);
1424 /* fall through */
1425 default: {
1426 cond = linearize_expression(ep, expr);
1427 add_branch(ep, expr, cond, bb_true, bb_false);
1429 return VOID;
1432 return VOID;
1437 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1439 struct basic_block *next = alloc_basic_block(ep, expr->pos);
1441 if (expr->op == SPECIAL_LOGICAL_OR)
1442 linearize_cond_branch(ep, expr->left, bb_true, next);
1443 else
1444 linearize_cond_branch(ep, expr->left, next, bb_false);
1445 set_activeblock(ep, next);
1446 linearize_cond_branch(ep, expr->right, bb_true, bb_false);
1447 return VOID;
1450 pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr)
1452 pseudo_t src;
1453 struct expression *orig = expr->cast_expression;
1455 if (!orig)
1456 return VOID;
1458 src = linearize_expression(ep, orig);
1459 return cast_pseudo(ep, src, orig->ctype, expr->ctype);
1462 pseudo_t linearize_position(struct entrypoint *ep, struct expression *pos, struct access_data *ad)
1464 struct expression *init_expr = pos->init_expr;
1466 ad->offset = pos->init_offset;
1467 ad->source_type = base_type(init_expr->ctype);
1468 ad->result_type = init_expr->ctype;
1469 return linearize_initializer(ep, init_expr, ad);
1472 pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *ad)
1474 switch (initializer->type) {
1475 case EXPR_INITIALIZER: {
1476 struct expression *expr;
1477 FOR_EACH_PTR(initializer->expr_list, expr) {
1478 linearize_initializer(ep, expr, ad);
1479 } END_FOR_EACH_PTR(expr);
1480 break;
1482 case EXPR_POS:
1483 linearize_position(ep, initializer, ad);
1484 break;
1485 default: {
1486 pseudo_t value = linearize_expression(ep, initializer);
1487 ad->source_type = base_type(initializer->ctype);
1488 ad->result_type = initializer->ctype;
1489 linearize_store_gen(ep, value, ad);
1493 return VOID;
1496 void linearize_argument(struct entrypoint *ep, struct symbol *arg, int nr)
1498 struct access_data ad = { NULL, };
1500 ad.source_type = arg;
1501 ad.result_type = arg;
1502 ad.address = symbol_pseudo(ep, arg);
1503 linearize_store_gen(ep, argument_pseudo(ep, nr), &ad);
1504 finish_address_gen(ep, &ad);
1507 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
1509 if (!expr)
1510 return VOID;
1512 current_pos = expr->pos;
1513 switch (expr->type) {
1514 case EXPR_SYMBOL:
1515 linearize_one_symbol(ep, expr->symbol);
1516 return add_symbol_address(ep, expr->symbol);
1518 case EXPR_VALUE:
1519 return value_pseudo(expr->value);
1521 case EXPR_STRING: case EXPR_FVALUE: case EXPR_LABEL:
1522 return add_setval(ep, expr->ctype, expr);
1524 case EXPR_STATEMENT:
1525 return linearize_statement(ep, expr->statement);
1527 case EXPR_CALL:
1528 return linearize_call_expression(ep, expr);
1530 case EXPR_BINOP:
1531 return linearize_binop(ep, expr);
1533 case EXPR_LOGICAL:
1534 return linearize_logical(ep, expr);
1536 case EXPR_COMPARE:
1537 return linearize_compare(ep, expr);
1539 case EXPR_SELECT:
1540 return linearize_select(ep, expr);
1542 case EXPR_CONDITIONAL:
1543 if (!expr->cond_true)
1544 return linearize_short_conditional(ep, expr, expr->conditional, expr->cond_false);
1546 return linearize_conditional(ep, expr, expr->conditional,
1547 expr->cond_true, expr->cond_false);
1549 case EXPR_COMMA:
1550 linearize_expression(ep, expr->left);
1551 return linearize_expression(ep, expr->right);
1553 case EXPR_ASSIGNMENT:
1554 return linearize_assignment(ep, expr);
1556 case EXPR_PREOP:
1557 return linearize_preop(ep, expr);
1559 case EXPR_POSTOP:
1560 return linearize_postop(ep, expr);
1562 case EXPR_CAST:
1563 case EXPR_IMPLIED_CAST:
1564 return linearize_cast(ep, expr);
1566 case EXPR_SLICE:
1567 return linearize_slice(ep, expr);
1569 case EXPR_INITIALIZER:
1570 case EXPR_POS:
1571 warning(expr->pos, "unexpected initializer expression (%d %d)", expr->type, expr->op);
1572 return VOID;
1573 default:
1574 warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op);
1575 return VOID;
1577 return VOID;
1580 static void linearize_one_symbol(struct entrypoint *ep, struct symbol *sym)
1582 struct access_data ad = { NULL, };
1584 if (!sym || !sym->initializer || sym->initialized)
1585 return;
1587 /* We need to output these puppies some day too.. */
1588 if (sym->ctype.modifiers & (MOD_STATIC | MOD_TOPLEVEL))
1589 return;
1591 sym->initialized = 1;
1592 ad.address = symbol_pseudo(ep, sym);
1593 linearize_initializer(ep, sym->initializer, &ad);
1594 finish_address_gen(ep, &ad);
1597 static pseudo_t linearize_compound_statement(struct entrypoint *ep, struct statement *stmt)
1599 pseudo_t pseudo;
1600 struct statement *s;
1601 struct symbol *sym;
1602 struct symbol *ret = stmt->ret;
1604 concat_symbol_list(stmt->syms, &ep->syms);
1606 FOR_EACH_PTR(stmt->syms, sym) {
1607 linearize_one_symbol(ep, sym);
1608 } END_FOR_EACH_PTR(sym);
1610 pseudo = VOID;
1611 FOR_EACH_PTR(stmt->stmts, s) {
1612 pseudo = linearize_statement(ep, s);
1613 } END_FOR_EACH_PTR(s);
1615 if (ret) {
1616 struct basic_block *bb = add_label(ep, ret);
1617 struct instruction *phi_node = first_instruction(bb->insns);
1619 if (!phi_node)
1620 return pseudo;
1622 if (pseudo_list_size(phi_node->phi_list)==1) {
1623 pseudo = first_pseudo(phi_node->phi_list);
1624 assert(pseudo->type == PSEUDO_PHI);
1625 return pseudo->def->src1;
1627 return phi_node->target;
1629 return pseudo;
1632 pseudo_t linearize_context(struct entrypoint *ep, struct statement *stmt)
1634 struct instruction *insn = alloc_instruction(OP_CONTEXT, 0);
1635 struct expression *expr = stmt->expression;
1636 int value = 0;
1638 if (expr->type == EXPR_VALUE)
1639 value = expr->value;
1641 insn->increment = value;
1642 add_one_insn(ep, insn);
1643 return VOID;
1646 pseudo_t linearize_range(struct entrypoint *ep, struct statement *stmt)
1648 struct instruction *insn = alloc_instruction(OP_RANGE, 0);
1650 use_pseudo(linearize_expression(ep, stmt->range_expression), &insn->src1);
1651 use_pseudo(linearize_expression(ep, stmt->range_low), &insn->src2);
1652 use_pseudo(linearize_expression(ep, stmt->range_high), &insn->src3);
1653 add_one_insn(ep, insn);
1654 return VOID;
1657 ALLOCATOR(asm_rules, "asm rules");
1658 ALLOCATOR(asm_constraint, "asm constraints");
1660 static void add_asm_input(struct entrypoint *ep, struct instruction *insn, struct expression *expr,
1661 const char *constraint, const struct ident *ident)
1663 pseudo_t pseudo = linearize_expression(ep, expr);
1664 struct asm_constraint *rule = __alloc_asm_constraint(0);
1666 rule->ident = ident;
1667 rule->constraint = constraint;
1668 use_pseudo(pseudo, &rule->pseudo);
1669 add_ptr_list(&insn->asm_rules->inputs, rule);
1672 static void add_asm_output(struct entrypoint *ep, struct instruction *insn, struct expression *expr,
1673 const char *constraint, const struct ident *ident)
1675 struct access_data ad = { NULL, };
1676 pseudo_t pseudo = alloc_pseudo(insn);
1677 struct asm_constraint *rule;
1679 if (!linearize_address_gen(ep, expr, &ad))
1680 return;
1681 linearize_store_gen(ep, pseudo, &ad);
1682 finish_address_gen(ep, &ad);
1683 rule = __alloc_asm_constraint(0);
1684 rule->ident = ident;
1685 rule->constraint = constraint;
1686 use_pseudo(pseudo, &rule->pseudo);
1687 add_ptr_list(&insn->asm_rules->outputs, rule);
1690 pseudo_t linearize_asm_statement(struct entrypoint *ep, struct statement *stmt)
1692 int state;
1693 struct expression *expr;
1694 struct instruction *insn;
1695 struct asm_rules *rules;
1696 const char *constraint;
1697 struct ident *ident;
1699 insn = alloc_instruction(OP_ASM, 0);
1700 expr = stmt->asm_string;
1701 if (!expr || expr->type != EXPR_STRING) {
1702 warning(stmt->pos, "expected string in inline asm");
1703 return VOID;
1705 insn->string = expr->string->data;
1707 rules = __alloc_asm_rules(0);
1708 insn->asm_rules = rules;
1710 /* Gather the inputs.. */
1711 state = 0;
1712 ident = NULL;
1713 constraint = NULL;
1714 FOR_EACH_PTR(stmt->asm_inputs, expr) {
1715 switch (state) {
1716 case 0: /* Identifier */
1717 state = 1;
1718 ident = (struct ident *)expr;
1719 continue;
1721 case 1: /* Constraint */
1722 state = 2;
1723 constraint = expr ? expr->string->data : "";
1724 continue;
1726 case 2: /* Expression */
1727 state = 0;
1728 add_asm_input(ep, insn, expr, constraint, ident);
1730 } END_FOR_EACH_PTR(expr);
1732 add_one_insn(ep, insn);
1734 /* Assign the outputs */
1735 state = 0;
1736 ident = NULL;
1737 constraint = NULL;
1738 FOR_EACH_PTR(stmt->asm_outputs, expr) {
1739 switch (state) {
1740 case 0: /* Identifier */
1741 state = 1;
1742 ident = (struct ident *)expr;
1743 continue;
1745 case 1: /* Constraint */
1746 state = 2;
1747 constraint = expr ? expr->string->data : "";
1748 continue;
1750 case 2:
1751 state = 0;
1752 add_asm_output(ep, insn, expr, constraint, ident);
1754 } END_FOR_EACH_PTR(expr);
1756 return VOID;
1759 static int multijmp_cmp(const void *_a, const void *_b)
1761 const struct multijmp *a = _a;
1762 const struct multijmp *b = _b;
1764 // "default" case?
1765 if (a->begin > a->end) {
1766 if (b->begin > b->end)
1767 return 0;
1768 return 1;
1770 if (b->begin > b->end)
1771 return -1;
1772 if (a->begin == b->begin) {
1773 if (a->end == b->end)
1774 return 0;
1775 return (a->end < b->end) ? -1 : 1;
1777 return a->begin < b->begin ? -1 : 1;
1780 static void sort_switch_cases(struct instruction *insn)
1782 sort_list((struct ptr_list **)&insn->multijmp_list, multijmp_cmp);
1785 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
1787 struct basic_block *bb;
1789 if (!stmt)
1790 return VOID;
1792 bb = ep->active;
1793 if (bb && !bb->insns)
1794 bb->pos = stmt->pos;
1795 current_pos = stmt->pos;
1797 switch (stmt->type) {
1798 case STMT_NONE:
1799 break;
1801 case STMT_CONTEXT:
1802 return linearize_context(ep, stmt);
1804 case STMT_RANGE:
1805 return linearize_range(ep, stmt);
1807 case STMT_EXPRESSION:
1808 return linearize_expression(ep, stmt->expression);
1810 case STMT_ASM:
1811 return linearize_asm_statement(ep, stmt);
1813 case STMT_RETURN: {
1814 struct expression *expr = stmt->expression;
1815 struct basic_block *bb_return = get_bound_block(ep, stmt->ret_target);
1816 struct basic_block *active;
1817 pseudo_t src = linearize_expression(ep, expr);
1818 active = ep->active;
1819 if (active && src != &void_pseudo) {
1820 struct instruction *phi_node = first_instruction(bb_return->insns);
1821 pseudo_t phi;
1822 if (!phi_node) {
1823 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1824 phi_node->target = alloc_pseudo(phi_node);
1825 phi_node->bb = bb_return;
1826 add_instruction(&bb_return->insns, phi_node);
1828 phi = alloc_phi(active, src, type_size(expr->ctype));
1829 phi->ident = &return_ident;
1830 use_pseudo(phi, add_pseudo(&phi_node->phi_list, phi));
1832 add_goto(ep, bb_return);
1833 return VOID;
1836 case STMT_CASE: {
1837 add_label(ep, stmt->case_label);
1838 linearize_statement(ep, stmt->case_statement);
1839 break;
1842 case STMT_LABEL: {
1843 struct symbol *label = stmt->label_identifier;
1845 if (label->used) {
1846 add_label(ep, label);
1847 linearize_statement(ep, stmt->label_statement);
1849 break;
1852 case STMT_GOTO: {
1853 struct symbol *sym;
1854 struct expression *expr;
1855 struct instruction *goto_ins;
1856 struct basic_block *active;
1857 pseudo_t pseudo;
1859 active = ep->active;
1860 if (!bb_reachable(active))
1861 break;
1863 if (stmt->goto_label) {
1864 add_goto(ep, get_bound_block(ep, stmt->goto_label));
1865 break;
1868 expr = stmt->goto_expression;
1869 if (!expr)
1870 break;
1872 /* This can happen as part of simplification */
1873 if (expr->type == EXPR_LABEL) {
1874 add_goto(ep, get_bound_block(ep, expr->label_symbol));
1875 break;
1878 pseudo = linearize_expression(ep, expr);
1879 goto_ins = alloc_instruction(OP_COMPUTEDGOTO, 0);
1880 use_pseudo(pseudo, &goto_ins->target);
1881 add_one_insn(ep, goto_ins);
1883 FOR_EACH_PTR(stmt->target_list, sym) {
1884 struct basic_block *bb_computed = get_bound_block(ep, sym);
1885 struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0);
1886 add_multijmp(&goto_ins->multijmp_list, jmp);
1887 add_bb(&bb_computed->parents, ep->active);
1888 add_bb(&active->children, bb_computed);
1889 } END_FOR_EACH_PTR(sym);
1891 finish_block(ep);
1892 break;
1895 case STMT_COMPOUND:
1896 return linearize_compound_statement(ep, stmt);
1899 * This could take 'likely/unlikely' into account, and
1900 * switch the arms around appropriately..
1902 case STMT_IF: {
1903 struct basic_block *bb_true, *bb_false, *endif;
1904 struct expression *cond = stmt->if_conditional;
1906 bb_true = alloc_basic_block(ep, stmt->pos);
1907 bb_false = endif = alloc_basic_block(ep, stmt->pos);
1909 linearize_cond_branch(ep, cond, bb_true, bb_false);
1911 set_activeblock(ep, bb_true);
1912 linearize_statement(ep, stmt->if_true);
1914 if (stmt->if_false) {
1915 endif = alloc_basic_block(ep, stmt->pos);
1916 add_goto(ep, endif);
1917 set_activeblock(ep, bb_false);
1918 linearize_statement(ep, stmt->if_false);
1920 set_activeblock(ep, endif);
1921 break;
1924 case STMT_SWITCH: {
1925 struct symbol *sym;
1926 struct instruction *switch_ins;
1927 struct basic_block *switch_end = alloc_basic_block(ep, stmt->pos);
1928 struct basic_block *active, *default_case;
1929 struct multijmp *jmp;
1930 pseudo_t pseudo;
1932 pseudo = linearize_expression(ep, stmt->switch_expression);
1934 active = ep->active;
1935 if (!bb_reachable(active))
1936 break;
1938 switch_ins = alloc_instruction(OP_SWITCH, 0);
1939 use_pseudo(pseudo, &switch_ins->cond);
1940 add_one_insn(ep, switch_ins);
1941 finish_block(ep);
1943 default_case = NULL;
1944 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
1945 struct statement *case_stmt = sym->stmt;
1946 struct basic_block *bb_case = get_bound_block(ep, sym);
1948 if (!case_stmt->case_expression) {
1949 default_case = bb_case;
1950 continue;
1951 } else {
1952 int begin, end;
1954 begin = end = case_stmt->case_expression->value;
1955 if (case_stmt->case_to)
1956 end = case_stmt->case_to->value;
1957 if (begin > end)
1958 jmp = alloc_multijmp(bb_case, end, begin);
1959 else
1960 jmp = alloc_multijmp(bb_case, begin, end);
1963 add_multijmp(&switch_ins->multijmp_list, jmp);
1964 add_bb(&bb_case->parents, active);
1965 add_bb(&active->children, bb_case);
1966 } END_FOR_EACH_PTR(sym);
1968 bind_label(stmt->switch_break, switch_end, stmt->pos);
1970 /* And linearize the actual statement */
1971 linearize_statement(ep, stmt->switch_statement);
1972 set_activeblock(ep, switch_end);
1974 if (!default_case)
1975 default_case = switch_end;
1977 jmp = alloc_multijmp(default_case, 1, 0);
1978 add_multijmp(&switch_ins->multijmp_list, jmp);
1979 add_bb(&default_case->parents, active);
1980 add_bb(&active->children, default_case);
1981 sort_switch_cases(switch_ins);
1983 break;
1986 case STMT_ITERATOR: {
1987 struct statement *pre_statement = stmt->iterator_pre_statement;
1988 struct expression *pre_condition = stmt->iterator_pre_condition;
1989 struct statement *statement = stmt->iterator_statement;
1990 struct statement *post_statement = stmt->iterator_post_statement;
1991 struct expression *post_condition = stmt->iterator_post_condition;
1992 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end;
1994 concat_symbol_list(stmt->iterator_syms, &ep->syms);
1995 linearize_statement(ep, pre_statement);
1997 loop_body = loop_top = alloc_basic_block(ep, stmt->pos);
1998 loop_continue = alloc_basic_block(ep, stmt->pos);
1999 loop_end = alloc_basic_block(ep, stmt->pos);
2001 if (pre_condition == post_condition) {
2002 loop_top = alloc_basic_block(ep, stmt->pos);
2003 set_activeblock(ep, loop_top);
2006 if (pre_condition)
2007 linearize_cond_branch(ep, pre_condition, loop_body, loop_end);
2009 bind_label(stmt->iterator_continue, loop_continue, stmt->pos);
2010 bind_label(stmt->iterator_break, loop_end, stmt->pos);
2012 set_activeblock(ep, loop_body);
2013 linearize_statement(ep, statement);
2014 add_goto(ep, loop_continue);
2016 set_activeblock(ep, loop_continue);
2017 linearize_statement(ep, post_statement);
2018 if (!post_condition || pre_condition == post_condition)
2019 add_goto(ep, loop_top);
2020 else
2021 linearize_cond_branch(ep, post_condition, loop_top, loop_end);
2022 set_activeblock(ep, loop_end);
2023 break;
2026 default:
2027 break;
2029 return VOID;
2032 static struct entrypoint *linearize_fn(struct symbol *sym, struct symbol *base_type)
2034 struct entrypoint *ep;
2035 struct basic_block *bb;
2036 struct symbol *arg;
2037 struct instruction *entry;
2038 pseudo_t result;
2039 int i;
2041 if (!base_type->stmt)
2042 return NULL;
2044 ep = alloc_entrypoint();
2045 bb = alloc_basic_block(ep, sym->pos);
2047 ep->name = sym;
2048 set_activeblock(ep, bb);
2050 entry = alloc_instruction(OP_ENTRY, 0);
2051 add_one_insn(ep, entry);
2052 ep->entry = entry;
2054 concat_symbol_list(base_type->arguments, &ep->syms);
2056 /* FIXME!! We should do something else about varargs.. */
2057 i = 0;
2058 FOR_EACH_PTR(base_type->arguments, arg) {
2059 linearize_argument(ep, arg, ++i);
2060 } END_FOR_EACH_PTR(arg);
2062 result = linearize_statement(ep, base_type->stmt);
2063 if (bb_reachable(ep->active) && !bb_terminated(ep->active)) {
2064 struct symbol *ret_type = base_type->ctype.base_type;
2065 struct instruction *insn = alloc_typed_instruction(OP_RET, ret_type);
2067 if (type_size(ret_type) > 0)
2068 use_pseudo(result, &insn->src);
2069 add_one_insn(ep, insn);
2073 * Do trivial flow simplification - branches to
2074 * branches, kill dead basicblocks etc
2076 kill_unreachable_bbs(ep);
2079 * Turn symbols into pseudos
2081 simplify_symbol_usage(ep);
2083 repeat:
2085 * Remove trivial instructions, and try to CSE
2086 * the rest.
2088 do {
2089 cleanup_and_cse(ep);
2090 pack_basic_blocks(ep);
2091 } while (repeat_phase & REPEAT_CSE);
2093 kill_unreachable_bbs(ep);
2094 vrfy_flow(ep);
2096 /* Cleanup */
2097 clear_symbol_pseudos(ep);
2099 /* And track pseudo register usage */
2100 track_pseudo_liveness(ep);
2103 * Some flow optimizations can only effectively
2104 * be done when we've done liveness analysis. But
2105 * if they trigger, we need to start all over
2106 * again
2108 if (simplify_flow(ep)) {
2109 clear_liveness(ep);
2110 goto repeat;
2113 /* Finally, add deathnotes to pseudos now that we have them */
2114 track_pseudo_death(ep);
2116 return ep;
2119 struct entrypoint *linearize_symbol(struct symbol *sym)
2121 struct symbol *base_type;
2123 if (!sym)
2124 return NULL;
2125 current_pos = sym->pos;
2126 base_type = sym->ctype.base_type;
2127 if (!base_type)
2128 return NULL;
2129 if (base_type->type == SYM_FN)
2130 return linearize_fn(sym, base_type);
2131 return NULL;