function_ptrs: don't store nonsense fake pointers
[smatch.git] / linearize.c
blob5a8e74970d98b650988032b9b74f8de418a886fe
1 /*
2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
10 * Copyright (C) 2004 Christopher Li
13 #include <string.h>
14 #include <stdarg.h>
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <assert.h>
19 #include "parse.h"
20 #include "expression.h"
21 #include "linearize.h"
22 #include "optimize.h"
23 #include "flow.h"
24 #include "target.h"
26 static pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
27 static pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
29 static pseudo_t add_cast(struct entrypoint *ep, struct symbol *to, struct symbol *from, int op, pseudo_t src);
30 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right);
31 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val);
32 static pseudo_t linearize_one_symbol(struct entrypoint *ep, struct symbol *sym);
34 struct access_data;
35 static pseudo_t add_load(struct entrypoint *ep, struct access_data *);
36 static pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *);
37 static pseudo_t cast_pseudo(struct entrypoint *ep, pseudo_t src, struct symbol *from, struct symbol *to);
39 struct pseudo void_pseudo = {};
41 static struct position current_pos;
43 ALLOCATOR(pseudo_user, "pseudo_user");
45 static struct instruction *alloc_instruction(int opcode, int size)
47 struct instruction * insn = __alloc_instruction(0);
48 insn->opcode = opcode;
49 insn->size = size;
50 insn->pos = current_pos;
51 return insn;
54 static inline int type_size(struct symbol *type)
56 return type ? type->bit_size > 0 ? type->bit_size : 0 : 0;
59 static struct instruction *alloc_typed_instruction(int opcode, struct symbol *type)
61 struct instruction *insn = alloc_instruction(opcode, type_size(type));
62 insn->type = type;
63 return insn;
66 static struct entrypoint *alloc_entrypoint(void)
68 return __alloc_entrypoint(0);
71 static struct basic_block *alloc_basic_block(struct entrypoint *ep, struct position pos)
73 static int nr;
74 struct basic_block *bb = __alloc_basic_block(0);
75 bb->pos = pos;
76 bb->ep = ep;
77 bb->nr = nr++;
78 return bb;
81 static struct multijmp *alloc_multijmp(struct basic_block *target, long long begin, long long end)
83 struct multijmp *multijmp = __alloc_multijmp(0);
84 multijmp->target = target;
85 multijmp->begin = begin;
86 multijmp->end = end;
87 return multijmp;
90 const char *show_label(struct basic_block *bb)
92 static int n;
93 static char buffer[4][16];
94 char *buf = buffer[3 & ++n];
96 if (!bb)
97 return ".L???";
98 snprintf(buf, 64, ".L%u", bb->nr);
99 return buf;
102 const char *show_pseudo(pseudo_t pseudo)
104 static int n;
105 static char buffer[4][64];
106 char *buf;
107 int i;
109 if (!pseudo)
110 return "no pseudo";
111 if (pseudo == VOID)
112 return "VOID";
113 buf = buffer[3 & ++n];
114 switch(pseudo->type) {
115 case PSEUDO_SYM: {
116 struct symbol *sym = pseudo->sym;
117 struct expression *expr;
119 if (!sym) {
120 snprintf(buf, 64, "<bad symbol>");
121 break;
123 if (sym->bb_target) {
124 snprintf(buf, 64, "%s", show_label(sym->bb_target));
125 break;
127 if (sym->ident) {
128 snprintf(buf, 64, "%s", show_ident(sym->ident));
129 break;
131 expr = sym->initializer;
132 snprintf(buf, 64, "<anon symbol:%p>", verbose ? sym : NULL);
133 if (expr) {
134 switch (expr->type) {
135 case EXPR_VALUE:
136 snprintf(buf, 64, "<symbol value: %lld>", expr->value);
137 break;
138 case EXPR_STRING:
139 return show_string(expr->string);
140 default:
141 break;
144 break;
146 case PSEUDO_REG:
147 i = snprintf(buf, 64, "%%r%d", pseudo->nr);
148 if (pseudo->ident)
149 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
150 break;
151 case PSEUDO_VAL: {
152 long long value = pseudo->value;
153 if (value > 1000 || value < -1000)
154 snprintf(buf, 64, "$%#llx", value);
155 else
156 snprintf(buf, 64, "$%lld", value);
157 break;
159 case PSEUDO_ARG:
160 snprintf(buf, 64, "%%arg%d", pseudo->nr);
161 break;
162 case PSEUDO_PHI:
163 i = snprintf(buf, 64, "%%phi%d", pseudo->nr);
164 if (pseudo->ident)
165 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
166 break;
167 case PSEUDO_UNDEF:
168 return "UNDEF";
169 default:
170 snprintf(buf, 64, "<bad pseudo type %d>", pseudo->type);
172 return buf;
175 static const char *opcodes[] = {
176 [OP_BADOP] = "bad_op",
178 /* Fn entrypoint */
179 [OP_ENTRY] = "<entry-point>",
181 /* Terminator */
182 [OP_RET] = "ret",
183 [OP_BR] = "br",
184 [OP_CBR] = "cbr",
185 [OP_SWITCH] = "switch",
186 [OP_UNREACH] = "unreachable",
187 [OP_COMPUTEDGOTO] = "jmp *",
189 /* Binary */
190 [OP_ADD] = "add",
191 [OP_SUB] = "sub",
192 [OP_MUL] = "mul",
193 [OP_DIVU] = "divu",
194 [OP_DIVS] = "divs",
195 [OP_MODU] = "modu",
196 [OP_MODS] = "mods",
197 [OP_SHL] = "shl",
198 [OP_LSR] = "lsr",
199 [OP_ASR] = "asr",
201 /* Floating-point Binary */
202 [OP_FADD] = "fadd",
203 [OP_FSUB] = "fsub",
204 [OP_FMUL] = "fmul",
205 [OP_FDIV] = "fdiv",
207 /* Logical */
208 [OP_AND] = "and",
209 [OP_OR] = "or",
210 [OP_XOR] = "xor",
212 /* Binary comparison */
213 [OP_SET_EQ] = "seteq",
214 [OP_SET_NE] = "setne",
215 [OP_SET_LE] = "setle",
216 [OP_SET_GE] = "setge",
217 [OP_SET_LT] = "setlt",
218 [OP_SET_GT] = "setgt",
219 [OP_SET_B] = "setb",
220 [OP_SET_A] = "seta",
221 [OP_SET_BE] = "setbe",
222 [OP_SET_AE] = "setae",
224 /* floating-point comparison */
225 [OP_FCMP_ORD] = "fcmpord",
226 [OP_FCMP_OEQ] = "fcmpoeq",
227 [OP_FCMP_ONE] = "fcmpone",
228 [OP_FCMP_OLE] = "fcmpole",
229 [OP_FCMP_OGE] = "fcmpoge",
230 [OP_FCMP_OLT] = "fcmpolt",
231 [OP_FCMP_OGT] = "fcmpogt",
232 [OP_FCMP_UEQ] = "fcmpueq",
233 [OP_FCMP_UNE] = "fcmpune",
234 [OP_FCMP_ULE] = "fcmpule",
235 [OP_FCMP_UGE] = "fcmpuge",
236 [OP_FCMP_ULT] = "fcmpult",
237 [OP_FCMP_UGT] = "fcmpugt",
238 [OP_FCMP_UNO] = "fcmpuno",
240 /* Uni */
241 [OP_NOT] = "not",
242 [OP_NEG] = "neg",
243 [OP_FNEG] = "fneg",
245 /* Special three-input */
246 [OP_SEL] = "select",
248 /* Memory */
249 [OP_LOAD] = "load",
250 [OP_STORE] = "store",
251 [OP_SETVAL] = "set",
252 [OP_SETFVAL] = "setfval",
253 [OP_SYMADDR] = "symaddr",
255 /* Other */
256 [OP_PHI] = "phi",
257 [OP_PHISOURCE] = "phisrc",
258 [OP_SEXT] = "sext",
259 [OP_ZEXT] = "zext",
260 [OP_TRUNC] = "trunc",
261 [OP_FCVTU] = "fcvtu",
262 [OP_FCVTS] = "fcvts",
263 [OP_UCVTF] = "ucvtf",
264 [OP_SCVTF] = "scvtf",
265 [OP_FCVTF] = "fcvtf",
266 [OP_UTPTR] = "utptr",
267 [OP_PTRTU] = "ptrtu",
268 [OP_PTRCAST] = "ptrcast",
269 [OP_INLINED_CALL] = "# call",
270 [OP_CALL] = "call",
271 [OP_SLICE] = "slice",
272 [OP_NOP] = "nop",
273 [OP_DEATHNOTE] = "dead",
274 [OP_ASM] = "asm",
276 /* Sparse tagging (line numbers, context, whatever) */
277 [OP_CONTEXT] = "context",
278 [OP_RANGE] = "range-check",
280 [OP_COPY] = "copy",
283 static char *show_asm_constraints(char *buf, const char *sep, struct asm_constraint_list *list)
285 struct asm_constraint *entry;
287 FOR_EACH_PTR(list, entry) {
288 buf += sprintf(buf, "%s\"%s\"", sep, entry->constraint);
289 if (entry->pseudo)
290 buf += sprintf(buf, " (%s)", show_pseudo(entry->pseudo));
291 if (entry->ident)
292 buf += sprintf(buf, " [%s]", show_ident(entry->ident));
293 sep = ", ";
294 } END_FOR_EACH_PTR(entry);
295 return buf;
298 static char *show_asm(char *buf, struct instruction *insn)
300 struct asm_rules *rules = insn->asm_rules;
302 buf += sprintf(buf, "\"%s\"", insn->string);
303 buf = show_asm_constraints(buf, "\n\t\tout: ", rules->outputs);
304 buf = show_asm_constraints(buf, "\n\t\tin: ", rules->inputs);
305 buf = show_asm_constraints(buf, "\n\t\tclobber: ", rules->clobbers);
306 return buf;
309 const char *show_instruction(struct instruction *insn)
311 int opcode = insn->opcode;
312 static char buffer[4096];
313 char *buf;
315 buf = buffer;
316 if (!insn->bb)
317 buf += sprintf(buf, "# ");
319 if (opcode < ARRAY_SIZE(opcodes)) {
320 const char *op = opcodes[opcode];
321 if (!op)
322 buf += sprintf(buf, "opcode:%d", opcode);
323 else
324 buf += sprintf(buf, "%s", op);
325 if (insn->size)
326 buf += sprintf(buf, ".%d", insn->size);
327 memset(buf, ' ', 20);
328 buf++;
331 if (buf < buffer + 12)
332 buf = buffer + 12;
333 switch (opcode) {
334 case OP_RET:
335 if (insn->src && insn->src != VOID)
336 buf += sprintf(buf, "%s", show_pseudo(insn->src));
337 break;
339 case OP_CBR:
340 buf += sprintf(buf, "%s, %s, %s", show_pseudo(insn->cond), show_label(insn->bb_true), show_label(insn->bb_false));
341 break;
343 case OP_BR:
344 buf += sprintf(buf, "%s", show_label(insn->bb_true));
345 break;
347 case OP_SETVAL: {
348 struct expression *expr = insn->val;
349 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
351 if (!expr) {
352 buf += sprintf(buf, "%s", "<none>");
353 break;
356 switch (expr->type) {
357 case EXPR_VALUE:
358 buf += sprintf(buf, "%lld", expr->value);
359 break;
360 case EXPR_FVALUE:
361 buf += sprintf(buf, "%Le", expr->fvalue);
362 break;
363 case EXPR_STRING:
364 buf += sprintf(buf, "%.40s", show_string(expr->string));
365 break;
366 case EXPR_SYMBOL:
367 buf += sprintf(buf, "%s", show_ident(expr->symbol->ident));
368 break;
369 case EXPR_LABEL:
370 buf += sprintf(buf, "%s", show_label(expr->symbol->bb_target));
371 break;
372 default:
373 buf += sprintf(buf, "SETVAL EXPR TYPE %d", expr->type);
375 break;
377 case OP_SETFVAL:
378 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
379 buf += sprintf(buf, "%Le", insn->fvalue);
380 break;
382 case OP_SWITCH: {
383 struct multijmp *jmp;
384 buf += sprintf(buf, "%s", show_pseudo(insn->cond));
385 FOR_EACH_PTR(insn->multijmp_list, jmp) {
386 if (jmp->begin == jmp->end)
387 buf += sprintf(buf, ", %lld -> %s", jmp->begin, show_label(jmp->target));
388 else if (jmp->begin < jmp->end)
389 buf += sprintf(buf, ", %lld ... %lld -> %s", jmp->begin, jmp->end, show_label(jmp->target));
390 else
391 buf += sprintf(buf, ", default -> %s", show_label(jmp->target));
392 } END_FOR_EACH_PTR(jmp);
393 break;
395 case OP_COMPUTEDGOTO: {
396 struct multijmp *jmp;
397 buf += sprintf(buf, "%s", show_pseudo(insn->src));
398 FOR_EACH_PTR(insn->multijmp_list, jmp) {
399 buf += sprintf(buf, ", %s", show_label(jmp->target));
400 } END_FOR_EACH_PTR(jmp);
401 break;
403 case OP_UNREACH:
404 break;
406 case OP_PHISOURCE: {
407 struct instruction *phi;
408 buf += sprintf(buf, "%s <- %s ", show_pseudo(insn->target), show_pseudo(insn->phi_src));
409 FOR_EACH_PTR(insn->phi_users, phi) {
410 buf += sprintf(buf, " (%s)", show_pseudo(phi->target));
411 } END_FOR_EACH_PTR(phi);
412 break;
415 case OP_PHI: {
416 pseudo_t phi;
417 const char *s = " <-";
418 buf += sprintf(buf, "%s", show_pseudo(insn->target));
419 FOR_EACH_PTR(insn->phi_list, phi) {
420 if (phi == VOID && !verbose)
421 continue;
422 buf += sprintf(buf, "%s %s", s, show_pseudo(phi));
423 s = ",";
424 } END_FOR_EACH_PTR(phi);
425 break;
427 case OP_LOAD:
428 buf += sprintf(buf, "%s <- %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
429 break;
430 case OP_STORE:
431 buf += sprintf(buf, "%s -> %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
432 break;
433 case OP_INLINED_CALL:
434 case OP_CALL: {
435 struct pseudo *arg;
436 if (insn->target && insn->target != VOID)
437 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
438 buf += sprintf(buf, "%s", show_pseudo(insn->func));
439 FOR_EACH_PTR(insn->arguments, arg) {
440 buf += sprintf(buf, ", %s", show_pseudo(arg));
441 } END_FOR_EACH_PTR(arg);
442 break;
444 case OP_SEXT: case OP_ZEXT:
445 case OP_TRUNC:
446 case OP_FCVTU: case OP_FCVTS:
447 case OP_UCVTF: case OP_SCVTF:
448 case OP_FCVTF:
449 case OP_UTPTR:
450 case OP_PTRTU:
451 case OP_PTRCAST:
452 buf += sprintf(buf, "%s <- (%d) %s",
453 show_pseudo(insn->target),
454 type_size(insn->orig_type),
455 show_pseudo(insn->src));
456 break;
457 case OP_BINARY ... OP_BINARY_END:
458 case OP_FPCMP ... OP_FPCMP_END:
459 case OP_BINCMP ... OP_BINCMP_END:
460 buf += sprintf(buf, "%s <- %s, %s", show_pseudo(insn->target), show_pseudo(insn->src1), show_pseudo(insn->src2));
461 break;
463 case OP_SEL:
464 buf += sprintf(buf, "%s <- %s, %s, %s", show_pseudo(insn->target),
465 show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
466 break;
468 case OP_SLICE:
469 buf += sprintf(buf, "%s <- %s, %d, %d", show_pseudo(insn->target), show_pseudo(insn->base), insn->from, insn->len);
470 break;
472 case OP_NOT: case OP_NEG:
473 case OP_FNEG:
474 case OP_SYMADDR:
475 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
476 break;
478 case OP_CONTEXT:
479 buf += sprintf(buf, "%s%d", insn->check ? "check: " : "", insn->increment);
480 break;
481 case OP_RANGE:
482 buf += sprintf(buf, "%s between %s..%s", show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
483 break;
484 case OP_NOP:
485 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
486 break;
487 case OP_DEATHNOTE:
488 buf += sprintf(buf, "%s", show_pseudo(insn->target));
489 break;
490 case OP_ASM:
491 buf = show_asm(buf, insn);
492 break;
493 case OP_COPY:
494 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src));
495 break;
496 default:
497 break;
500 if (buf >= buffer + sizeof(buffer))
501 die("instruction buffer overflowed %td\n", buf - buffer);
502 do { --buf; } while (*buf == ' ');
503 *++buf = 0;
504 return buffer;
507 void show_bb(struct basic_block *bb)
509 struct instruction *insn;
511 printf("%s:\n", show_label(bb));
512 if (verbose) {
513 pseudo_t needs, defines;
514 printf("%s:%d\n", stream_name(bb->pos.stream), bb->pos.line);
516 FOR_EACH_PTR(bb->needs, needs) {
517 struct instruction *def = needs->def;
518 if (def->opcode != OP_PHI) {
519 printf(" **uses %s (from %s)**\n", show_pseudo(needs), show_label(def->bb));
520 } else {
521 pseudo_t phi;
522 const char *sep = " ";
523 printf(" **uses %s (from", show_pseudo(needs));
524 FOR_EACH_PTR(def->phi_list, phi) {
525 if (phi == VOID)
526 continue;
527 printf("%s(%s:%s)", sep, show_pseudo(phi), show_label(phi->def->bb));
528 sep = ", ";
529 } END_FOR_EACH_PTR(phi);
530 printf(")**\n");
532 } END_FOR_EACH_PTR(needs);
534 FOR_EACH_PTR(bb->defines, defines) {
535 printf(" **defines %s **\n", show_pseudo(defines));
536 } END_FOR_EACH_PTR(defines);
538 if (bb->parents) {
539 struct basic_block *from;
540 FOR_EACH_PTR(bb->parents, from) {
541 printf(" **from %s (%s:%d:%d)**\n", show_label(from),
542 stream_name(from->pos.stream), from->pos.line, from->pos.pos);
543 } END_FOR_EACH_PTR(from);
546 if (bb->children) {
547 struct basic_block *to;
548 FOR_EACH_PTR(bb->children, to) {
549 printf(" **to %s (%s:%d:%d)**\n", show_label(to),
550 stream_name(to->pos.stream), to->pos.line, to->pos.pos);
551 } END_FOR_EACH_PTR(to);
555 FOR_EACH_PTR(bb->insns, insn) {
556 if (!insn->bb && verbose < 2)
557 continue;
558 printf("\t%s\n", show_instruction(insn));
559 } END_FOR_EACH_PTR(insn);
560 if (!bb_terminated(bb))
561 printf("\tEND\n");
564 static void show_symbol_usage(pseudo_t pseudo)
566 struct pseudo_user *pu;
568 if (pseudo) {
569 FOR_EACH_PTR(pseudo->users, pu) {
570 printf("\t%s\n", show_instruction(pu->insn));
571 } END_FOR_EACH_PTR(pu);
575 void show_entry(struct entrypoint *ep)
577 struct symbol *sym;
578 struct basic_block *bb;
580 printf("%s:\n", show_ident(ep->name->ident));
582 if (verbose) {
583 printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
585 FOR_EACH_PTR(ep->syms, sym) {
586 if (!sym->pseudo)
587 continue;
588 if (!sym->pseudo->users)
589 continue;
590 printf(" sym: %p %s\n", sym, show_ident(sym->ident));
591 if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE))
592 printf("\texternal visibility\n");
593 show_symbol_usage(sym->pseudo);
594 } END_FOR_EACH_PTR(sym);
596 printf("\n");
599 FOR_EACH_PTR(ep->bbs, bb) {
600 if (!bb)
601 continue;
602 if (!bb->parents && !bb->children && !bb->insns && verbose < 2)
603 continue;
604 show_bb(bb);
605 printf("\n");
606 } END_FOR_EACH_PTR(bb);
608 printf("\n");
611 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos)
613 if (label->bb_target)
614 warning(pos, "label '%s' already bound", show_ident(label->ident));
615 label->bb_target = bb;
618 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label)
620 struct basic_block *bb = label->bb_target;
622 if (!bb) {
623 bb = alloc_basic_block(ep, label->pos);
624 label->bb_target = bb;
626 return bb;
629 static void finish_block(struct entrypoint *ep)
631 struct basic_block *src = ep->active;
632 if (bb_reachable(src))
633 ep->active = NULL;
636 static void add_goto(struct entrypoint *ep, struct basic_block *dst)
638 struct basic_block *src = ep->active;
639 if (bb_reachable(src)) {
640 struct instruction *br = alloc_instruction(OP_BR, 0);
641 br->bb_true = dst;
642 add_bb(&dst->parents, src);
643 add_bb(&src->children, dst);
644 br->bb = src;
645 add_instruction(&src->insns, br);
646 ep->active = NULL;
650 static void add_one_insn(struct entrypoint *ep, struct instruction *insn)
652 struct basic_block *bb = ep->active;
654 if (bb_reachable(bb)) {
655 insn->bb = bb;
656 add_instruction(&bb->insns, insn);
660 static void add_unreachable(struct entrypoint *ep)
662 struct instruction *insn = alloc_instruction(OP_UNREACH, 0);
663 add_one_insn(ep, insn);
664 ep->active = NULL;
667 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb)
669 if (!bb_terminated(ep->active))
670 add_goto(ep, bb);
672 ep->active = bb;
673 if (bb_reachable(bb))
674 add_bb(&ep->bbs, bb);
677 static void remove_parent(struct basic_block *child, struct basic_block *parent)
679 remove_bb_from_list(&child->parents, parent, 1);
680 if (!child->parents)
681 repeat_phase |= REPEAT_CFG_CLEANUP;
684 /* Change a "switch" or a conditional branch into a branch */
685 void insert_branch(struct basic_block *bb, struct instruction *jmp, struct basic_block *target)
687 struct instruction *br, *old;
688 struct basic_block *child;
690 /* Remove the switch */
691 old = delete_last_instruction(&bb->insns);
692 assert(old == jmp);
693 kill_instruction(old);
695 br = alloc_instruction(OP_BR, 0);
696 br->bb = bb;
697 br->bb_true = target;
698 add_instruction(&bb->insns, br);
700 FOR_EACH_PTR(bb->children, child) {
701 if (child == target) {
702 target = NULL; /* Trigger just once */
703 continue;
705 DELETE_CURRENT_PTR(child);
706 remove_parent(child, bb);
707 } END_FOR_EACH_PTR(child);
708 PACK_PTR_LIST(&bb->children);
712 void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi_node, pseudo_t if_true, pseudo_t if_false)
714 pseudo_t target;
715 struct instruction *select;
717 /* Remove the 'br' */
718 delete_last_instruction(&bb->insns);
720 select = alloc_typed_instruction(OP_SEL, phi_node->type);
721 select->bb = bb;
723 assert(br->cond);
724 use_pseudo(select, br->cond, &select->src1);
726 target = phi_node->target;
727 assert(target->def == phi_node);
728 select->target = target;
729 target->def = select;
731 use_pseudo(select, if_true, &select->src2);
732 use_pseudo(select, if_false, &select->src3);
734 add_instruction(&bb->insns, select);
735 add_instruction(&bb->insns, br);
738 static inline int bb_empty(struct basic_block *bb)
740 return !bb->insns;
743 /* Add a label to the currently active block, return new active block */
744 static struct basic_block * add_label(struct entrypoint *ep, struct symbol *label)
746 struct basic_block *bb = label->bb_target;
748 if (bb) {
749 set_activeblock(ep, bb);
750 return bb;
752 bb = ep->active;
753 if (!bb_reachable(bb) || !bb_empty(bb)) {
754 bb = alloc_basic_block(ep, label->pos);
755 set_activeblock(ep, bb);
757 label->bb_target = bb;
758 return bb;
761 static void add_branch(struct entrypoint *ep, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false)
763 struct basic_block *bb = ep->active;
764 struct instruction *br;
766 if (bb_reachable(bb)) {
767 br = alloc_instruction(OP_CBR, 0);
768 use_pseudo(br, cond, &br->cond);
769 br->bb_true = bb_true;
770 br->bb_false = bb_false;
771 add_bb(&bb_true->parents, bb);
772 add_bb(&bb_false->parents, bb);
773 add_bb(&bb->children, bb_true);
774 add_bb(&bb->children, bb_false);
775 add_one_insn(ep, br);
779 pseudo_t alloc_pseudo(struct instruction *def)
781 static int nr = 0;
782 struct pseudo * pseudo = __alloc_pseudo(0);
783 pseudo->type = PSEUDO_REG;
784 pseudo->nr = ++nr;
785 pseudo->def = def;
786 return pseudo;
789 static pseudo_t symbol_pseudo(struct entrypoint *ep, struct symbol *sym)
791 pseudo_t pseudo;
793 if (!sym)
794 return VOID;
796 pseudo = sym->pseudo;
797 if (!pseudo) {
798 pseudo = __alloc_pseudo(0);
799 pseudo->nr = -1;
800 pseudo->type = PSEUDO_SYM;
801 pseudo->sym = sym;
802 pseudo->ident = sym->ident;
803 sym->pseudo = pseudo;
804 add_pseudo(&ep->accesses, pseudo);
806 /* Symbol pseudos have neither nr nor def */
807 return pseudo;
810 pseudo_t value_pseudo(long long val)
812 #define MAX_VAL_HASH 64
813 static struct pseudo_list *prev[MAX_VAL_HASH];
814 int hash = val & (MAX_VAL_HASH-1);
815 struct pseudo_list **list = prev + hash;
816 pseudo_t pseudo;
818 FOR_EACH_PTR(*list, pseudo) {
819 if (pseudo->value == val)
820 return pseudo;
821 } END_FOR_EACH_PTR(pseudo);
823 pseudo = __alloc_pseudo(0);
824 pseudo->type = PSEUDO_VAL;
825 pseudo->value = val;
826 add_pseudo(list, pseudo);
828 /* Value pseudos have neither nr, usage nor def */
829 return pseudo;
832 pseudo_t undef_pseudo(void)
834 pseudo_t pseudo = __alloc_pseudo(0);
835 pseudo->type = PSEUDO_UNDEF;
836 return pseudo;
839 static pseudo_t argument_pseudo(struct entrypoint *ep, int nr)
841 pseudo_t pseudo = __alloc_pseudo(0);
842 struct instruction *entry = ep->entry;
844 pseudo->type = PSEUDO_ARG;
845 pseudo->nr = nr;
846 pseudo->def = entry;
847 add_pseudo(&entry->arg_list, pseudo);
849 /* Argument pseudos have neither usage nor def */
850 return pseudo;
853 struct instruction *alloc_phisrc(pseudo_t pseudo, struct symbol *type)
855 struct instruction *insn = alloc_typed_instruction(OP_PHISOURCE, type);
856 pseudo_t phi = __alloc_pseudo(0);
857 static int nr = 0;
859 phi->type = PSEUDO_PHI;
860 phi->nr = ++nr;
861 phi->def = insn;
863 use_pseudo(insn, pseudo, &insn->phi_src);
864 insn->target = phi;
865 return insn;
868 pseudo_t alloc_phi(struct basic_block *source, pseudo_t pseudo, struct symbol *type)
870 struct instruction *insn;
872 if (!source)
873 return VOID;
875 insn = alloc_phisrc(pseudo, type);
876 insn->bb = source;
877 add_instruction(&source->insns, insn);
878 return insn->target;
881 struct instruction *alloc_phi_node(struct basic_block *bb, struct symbol *type, struct ident *ident)
883 struct instruction *phi_node = alloc_typed_instruction(OP_PHI, type);
884 pseudo_t phi;
886 phi = alloc_pseudo(phi_node);
887 phi->ident = ident;
888 phi->def = phi_node;
889 phi_node->target = phi;
890 phi_node->bb = bb;
891 return phi_node;
894 void add_phi_node(struct basic_block *bb, struct instruction *phi_node)
896 struct instruction *insn;
898 FOR_EACH_PTR(bb->insns, insn) {
899 enum opcode op = insn->opcode;
900 if (op == OP_PHI)
901 continue;
902 INSERT_CURRENT(phi_node, insn);
903 return;
904 } END_FOR_EACH_PTR(insn);
906 // FIXME
907 add_instruction(&bb->insns, phi_node);
910 struct instruction *insert_phi_node(struct basic_block *bb, struct symbol *var)
912 struct instruction *phi_node = alloc_phi_node(bb, var, var->ident);
913 add_phi_node(bb, phi_node);
914 return phi_node;
918 * We carry the "access_data" structure around for any accesses,
919 * which simplifies things a lot. It contains all the access
920 * information in one place.
922 struct access_data {
923 struct symbol *type; // ctype
924 struct symbol *btype; // base type of bitfields
925 pseudo_t address; // pseudo containing address ..
926 unsigned int offset; // byte offset
929 static int linearize_simple_address(struct entrypoint *ep,
930 struct expression *addr,
931 struct access_data *ad)
933 if (addr->type == EXPR_SYMBOL) {
934 linearize_one_symbol(ep, addr->symbol);
935 ad->address = symbol_pseudo(ep, addr->symbol);
936 return 1;
938 if (addr->type == EXPR_BINOP) {
939 if (addr->right->type == EXPR_VALUE) {
940 if (addr->op == '+') {
941 ad->offset += get_expression_value(addr->right);
942 return linearize_simple_address(ep, addr->left, ad);
946 ad->address = linearize_expression(ep, addr);
947 return 1;
950 static struct symbol *bitfield_base_type(struct symbol *sym)
952 struct symbol *base = sym;
954 if (sym) {
955 if (sym->type == SYM_NODE)
956 base = base->ctype.base_type;
957 if (base->type == SYM_BITFIELD)
958 return base->ctype.base_type;
960 return sym;
963 static int linearize_address_gen(struct entrypoint *ep,
964 struct expression *expr,
965 struct access_data *ad)
967 struct symbol *ctype = expr->ctype;
969 if (!ctype)
970 return 0;
971 ad->type = ctype;
972 if (expr->type == EXPR_PREOP && expr->op == '*')
973 return linearize_simple_address(ep, expr->unop, ad);
975 warning(expr->pos, "generating address of non-lvalue (%d)", expr->type);
976 return 0;
979 static pseudo_t add_load(struct entrypoint *ep, struct access_data *ad)
981 struct instruction *insn;
982 pseudo_t new;
984 if (!ep->active)
985 return VOID;
987 insn = alloc_typed_instruction(OP_LOAD, ad->btype);
988 new = alloc_pseudo(insn);
990 insn->target = new;
991 insn->offset = ad->offset;
992 insn->is_volatile = ad->type && (ad->type->ctype.modifiers & MOD_VOLATILE);
993 use_pseudo(insn, ad->address, &insn->src);
994 add_one_insn(ep, insn);
995 return new;
998 static void add_store(struct entrypoint *ep, struct access_data *ad, pseudo_t value)
1000 struct basic_block *bb = ep->active;
1001 struct instruction *store;
1003 if (!bb)
1004 return;
1006 store = alloc_typed_instruction(OP_STORE, ad->btype);
1007 store->offset = ad->offset;
1008 store->is_volatile = ad->type && (ad->type->ctype.modifiers & MOD_VOLATILE);
1009 use_pseudo(store, value, &store->target);
1010 use_pseudo(store, ad->address, &store->src);
1011 add_one_insn(ep, store);
1014 static pseudo_t linearize_bitfield_insert(struct entrypoint *ep,
1015 pseudo_t ori, pseudo_t val, struct symbol *ctype, struct symbol *btype)
1017 unsigned int shift = ctype->bit_offset;
1018 unsigned int size = ctype->bit_size;
1019 unsigned long long mask = ((1ULL << size) - 1);
1020 unsigned long long smask= bits_mask(btype->bit_size);
1022 val = add_cast(ep, btype, ctype, OP_ZEXT, val);
1023 if (shift) {
1024 val = add_binary_op(ep, btype, OP_SHL, val, value_pseudo(shift));
1025 mask <<= shift;
1027 ori = add_binary_op(ep, btype, OP_AND, ori, value_pseudo(~mask & smask));
1028 val = add_binary_op(ep, btype, OP_OR, ori, val);
1030 return val;
1033 static pseudo_t linearize_store_gen(struct entrypoint *ep,
1034 pseudo_t value,
1035 struct access_data *ad)
1037 struct symbol *ctype = ad->type;
1038 struct symbol *btype;
1039 pseudo_t store = value;
1041 if (!ep->active)
1042 return VOID;
1044 btype = ad->btype = bitfield_base_type(ctype);
1045 if (type_size(btype) != type_size(ctype)) {
1046 pseudo_t orig = add_load(ep, ad);
1047 store = linearize_bitfield_insert(ep, orig, value, ctype, btype);
1049 add_store(ep, ad, store);
1050 return value;
1053 static void taint_undefined_behaviour(struct instruction *insn)
1055 pseudo_t src2;
1057 switch (insn->opcode) {
1058 case OP_LSR:
1059 case OP_ASR:
1060 case OP_SHL:
1061 src2 = insn->src2;
1062 if (src2->type != PSEUDO_VAL)
1063 break;
1064 if ((unsigned long long)src2->value >= insn->size)
1065 insn->tainted = 1;
1066 break;
1070 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right)
1072 struct instruction *insn = alloc_typed_instruction(op, ctype);
1073 pseudo_t target = alloc_pseudo(insn);
1074 insn->target = target;
1075 use_pseudo(insn, left, &insn->src1);
1076 use_pseudo(insn, right, &insn->src2);
1077 add_one_insn(ep, insn);
1078 return target;
1081 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val)
1083 struct instruction *insn = alloc_typed_instruction(OP_SETVAL, ctype);
1084 pseudo_t target = alloc_pseudo(insn);
1085 insn->target = target;
1086 insn->val = val;
1087 add_one_insn(ep, insn);
1088 return target;
1091 static pseudo_t add_setfval(struct entrypoint *ep, struct symbol *ctype, long double fval)
1093 struct instruction *insn = alloc_typed_instruction(OP_SETFVAL, ctype);
1094 pseudo_t target = alloc_pseudo(insn);
1095 insn->target = target;
1096 insn->fvalue = fval;
1097 add_one_insn(ep, insn);
1098 return target;
1101 static pseudo_t add_symbol_address(struct entrypoint *ep, struct symbol *sym)
1103 struct instruction *insn = alloc_instruction(OP_SYMADDR, bits_in_pointer);
1104 pseudo_t target = alloc_pseudo(insn);
1106 insn->target = target;
1107 use_pseudo(insn, symbol_pseudo(ep, sym), &insn->src);
1108 add_one_insn(ep, insn);
1109 return target;
1112 static pseudo_t linearize_bitfield_extract(struct entrypoint *ep,
1113 pseudo_t val, struct symbol *ctype, struct symbol *btype)
1115 unsigned int off = ctype->bit_offset;
1117 if (off) {
1118 pseudo_t shift = value_pseudo(off);
1119 val = add_binary_op(ep, btype, OP_LSR, val, shift);
1121 val = cast_pseudo(ep, val, btype, ctype);
1122 return val;
1125 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct access_data *ad)
1127 struct symbol *ctype = ad->type;
1128 struct symbol *btype;
1129 pseudo_t new;
1131 if (!ep->active)
1132 return VOID;
1134 btype = ad->btype = bitfield_base_type(ctype);
1135 new = add_load(ep, ad);
1136 if (ctype->bit_size != type_size(btype))
1137 new = linearize_bitfield_extract(ep, new, ctype, btype);
1138 return new;
1141 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
1143 struct access_data ad = { NULL, };
1144 pseudo_t value;
1146 if (!linearize_address_gen(ep, expr, &ad))
1147 return VOID;
1148 value = linearize_load_gen(ep, &ad);
1149 return value;
1152 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
1154 struct access_data ad = { NULL, };
1155 pseudo_t old, new, one;
1156 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB;
1158 if (!linearize_address_gen(ep, expr->unop, &ad))
1159 return VOID;
1161 old = linearize_load_gen(ep, &ad);
1162 op = opcode_float(op, expr->ctype);
1163 if (is_float_type(expr->ctype))
1164 one = add_setfval(ep, expr->ctype, expr->op_value);
1165 else
1166 one = value_pseudo(expr->op_value);
1167 if (ad.btype != ad.type)
1168 old = cast_pseudo(ep, old, ad.type, ad.btype);
1169 new = add_binary_op(ep, ad.btype, op, old, one);
1170 if (ad.btype != ad.type)
1171 new = cast_pseudo(ep, new, ad.btype, ad.type);
1172 linearize_store_gen(ep, new, &ad);
1173 return postop ? old : new;
1176 static pseudo_t add_unop(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t src)
1178 struct instruction *insn = alloc_typed_instruction(op, ctype);
1179 pseudo_t new = alloc_pseudo(insn);
1181 insn->target = new;
1182 use_pseudo(insn, src, &insn->src1);
1183 add_one_insn(ep, insn);
1184 return new;
1187 static pseudo_t add_cast(struct entrypoint *ep, struct symbol *to,
1188 struct symbol *from, int op, pseudo_t src)
1190 pseudo_t new = add_unop(ep, to, op, src);
1191 new->def->orig_type = from;
1192 return new;
1195 static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr)
1197 pseudo_t pre = linearize_expression(ep, expr->base);
1198 struct instruction *insn = alloc_typed_instruction(OP_SLICE, expr->ctype);
1199 pseudo_t new = alloc_pseudo(insn);
1201 insn->target = new;
1202 insn->from = expr->r_bitpos;
1203 insn->len = expr->r_nrbits;
1204 use_pseudo(insn, pre, &insn->base);
1205 add_one_insn(ep, insn);
1206 return new;
1209 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
1211 pseudo_t pre = linearize_expression(ep, expr->unop);
1212 struct symbol *ctype = expr->ctype;
1213 switch (expr->op) {
1214 case '+':
1215 return pre;
1216 case '!': {
1217 pseudo_t zero = value_pseudo(0);
1218 return add_binary_op(ep, ctype, OP_SET_EQ, pre, zero);
1220 case '~':
1221 return add_unop(ep, ctype, OP_NOT, pre);
1222 case '-':
1223 return add_unop(ep, ctype, opcode_float(OP_NEG, ctype), pre);
1225 return VOID;
1228 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
1231 * '*' is an lvalue access, and is fundamentally different
1232 * from an arithmetic operation. Maybe it should have an
1233 * expression type of its own..
1235 if (expr->op == '*')
1236 return linearize_access(ep, expr);
1237 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
1238 return linearize_inc_dec(ep, expr, 0);
1239 return linearize_regular_preop(ep, expr);
1242 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr)
1244 return linearize_inc_dec(ep, expr, 1);
1248 * Casts to pointers are "less safe" than other casts, since
1249 * they imply type-unsafe accesses. "void *" is a special
1250 * case, since you can't access through it anyway without another
1251 * cast.
1253 enum mtype {
1254 MTYPE_UINT,
1255 MTYPE_SINT,
1256 MTYPE_PTR,
1257 MTYPE_VPTR, // TODO: must be removed ?
1258 MTYPE_FLOAT,
1259 MTYPE_BAD,
1262 static enum mtype get_mtype(struct symbol *s)
1264 int sign = (s->ctype.modifiers & MOD_SIGNED) ? 1 : 0;
1266 retry: switch (s->type) {
1267 case SYM_NODE:
1268 s = s->ctype.base_type;
1269 goto retry;
1270 case SYM_PTR:
1271 if (s->ctype.base_type == &void_ctype)
1272 return MTYPE_VPTR;
1273 return MTYPE_PTR;
1274 case SYM_BITFIELD:
1275 case SYM_RESTRICT:
1276 case SYM_FOULED:
1277 case SYM_ENUM:
1278 s = s->ctype.base_type;
1279 /* fall-through */
1280 case_int:
1281 return sign ? MTYPE_SINT : MTYPE_UINT;
1282 case SYM_BASETYPE:
1283 if (s->ctype.base_type == &fp_type)
1284 return MTYPE_FLOAT;
1285 if (s->ctype.base_type == &int_type)
1286 goto case_int;
1287 /* fall-through */
1288 default:
1289 return MTYPE_BAD;
1293 static int get_cast_opcode(struct symbol *dst, struct symbol *src)
1295 enum mtype stype = get_mtype(src);
1296 enum mtype dtype = get_mtype(dst);
1298 switch (dtype) {
1299 case MTYPE_FLOAT:
1300 switch (stype) {
1301 case MTYPE_FLOAT:
1302 if (dst->bit_size == src->bit_size)
1303 return OP_NOP;
1304 return OP_FCVTF;
1305 case MTYPE_UINT:
1306 return OP_UCVTF;
1307 case MTYPE_SINT:
1308 return OP_SCVTF;
1309 default:
1310 return OP_BADOP;
1312 case MTYPE_PTR:
1313 switch (stype) {
1314 case MTYPE_UINT:
1315 case MTYPE_SINT:
1316 return OP_UTPTR;
1317 case MTYPE_PTR:
1318 case MTYPE_VPTR:
1319 return OP_PTRCAST;
1320 default:
1321 return OP_BADOP;
1323 case MTYPE_VPTR:
1324 switch (stype) {
1325 case MTYPE_PTR:
1326 case MTYPE_VPTR:
1327 case MTYPE_UINT:
1328 stype = MTYPE_UINT;
1329 /* fall through */
1330 case MTYPE_SINT:
1331 break;
1332 default:
1333 return OP_BADOP;
1335 /* fall through */
1336 case MTYPE_UINT:
1337 case MTYPE_SINT:
1338 switch (stype) {
1339 case MTYPE_FLOAT:
1340 return dtype == MTYPE_UINT ? OP_FCVTU : OP_FCVTS;
1341 case MTYPE_PTR:
1342 return OP_PTRTU;
1343 case MTYPE_VPTR:
1344 case MTYPE_UINT:
1345 case MTYPE_SINT:
1346 if (dst->bit_size ==src->bit_size)
1347 return OP_NOP;
1348 if (dst->bit_size < src->bit_size)
1349 return OP_TRUNC;
1350 return stype == MTYPE_SINT ? OP_SEXT : OP_ZEXT;
1351 default:
1352 return OP_BADOP;
1354 /* fall through */
1355 default:
1356 if (src->type == SYM_NODE)
1357 src = src->ctype.base_type;
1358 if (dst->type == SYM_NODE)
1359 dst = dst->ctype.base_type;
1360 if (src == dst)
1361 return OP_NOP;
1362 return OP_BADOP;
1366 static pseudo_t cast_pseudo(struct entrypoint *ep, pseudo_t src, struct symbol *from, struct symbol *to)
1368 const struct position pos = current_pos;
1369 pseudo_t result;
1370 struct instruction *insn;
1371 int opcode;
1373 if (src == VOID)
1374 return VOID;
1375 if (!from || !to)
1376 return VOID;
1377 if (from->bit_size < 0 || to->bit_size < 0)
1378 return VOID;
1379 opcode = get_cast_opcode(to, from);
1380 switch (opcode) {
1381 case OP_NOP:
1382 return src;
1383 case OP_UTPTR:
1384 if (from->bit_size == to->bit_size)
1385 break;
1386 if (src == value_pseudo(0))
1387 break;
1388 if (Wint_to_pointer_cast)
1389 warning(pos, "non size-preserving integer to pointer cast");
1390 src = cast_pseudo(ep, src, from, size_t_ctype);
1391 from = size_t_ctype;
1392 break;
1393 case OP_PTRTU:
1394 if (from->bit_size == to->bit_size)
1395 break;
1396 if (Wpointer_to_int_cast)
1397 warning(pos, "non size-preserving pointer to integer cast");
1398 src = cast_pseudo(ep, src, from, size_t_ctype);
1399 return cast_pseudo(ep, src, size_t_ctype, to);
1400 case OP_BADOP:
1401 return VOID;
1402 default:
1403 break;
1405 insn = alloc_typed_instruction(opcode, to);
1406 result = alloc_pseudo(insn);
1407 insn->target = result;
1408 insn->orig_type = from;
1409 use_pseudo(insn, src, &insn->src);
1410 add_one_insn(ep, insn);
1411 return result;
1414 static int map_opcode(int opcode, struct symbol *ctype)
1416 if (ctype && is_float_type(ctype))
1417 return opcode_table[opcode].to_float;
1418 if (ctype && (ctype->ctype.modifiers & MOD_SIGNED)) {
1419 switch(opcode) {
1420 case OP_DIVU: case OP_MODU: case OP_LSR:
1421 opcode++;
1424 return opcode;
1427 static inline pseudo_t add_convert_to_bool(struct entrypoint *ep, pseudo_t src, struct symbol *type)
1429 pseudo_t zero;
1430 int op;
1432 if (!type || src == VOID)
1433 return VOID;
1434 if (is_bool_type(type))
1435 return src;
1436 if (src->type == PSEUDO_VAL && (src->value == 0 || src->value == 1))
1437 return src;
1438 if (is_float_type(type)) {
1439 zero = add_setfval(ep, type, 0.0);
1440 op = map_opcode(OP_SET_NE, type);
1441 } else {
1442 zero = value_pseudo(0);
1443 op = OP_SET_NE;
1445 return add_binary_op(ep, &bool_ctype, op, src, zero);
1448 static pseudo_t linearize_expression_to_bool(struct entrypoint *ep, struct expression *expr)
1450 pseudo_t dst;
1451 dst = linearize_expression(ep, expr);
1452 dst = add_convert_to_bool(ep, dst, expr->ctype);
1453 return dst;
1456 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
1458 struct access_data ad = { NULL, };
1459 struct expression *target = expr->left;
1460 struct expression *src = expr->right;
1461 struct symbol *ctype;
1462 pseudo_t value;
1464 value = linearize_expression(ep, src);
1465 if (!target || !linearize_address_gen(ep, target, &ad))
1466 return value;
1467 if (expr->op != '=') {
1468 pseudo_t oldvalue = linearize_load_gen(ep, &ad);
1469 pseudo_t dst;
1470 static const int op_trans[] = {
1471 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD,
1472 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB,
1473 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MUL,
1474 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIVU,
1475 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MODU,
1476 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL,
1477 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_LSR,
1478 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND,
1479 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR,
1480 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR
1482 int opcode;
1484 if (!src)
1485 return VOID;
1487 ctype = src->ctype;
1488 oldvalue = cast_pseudo(ep, oldvalue, target->ctype, ctype);
1489 opcode = map_opcode(op_trans[expr->op - SPECIAL_BASE], ctype);
1490 dst = add_binary_op(ep, ctype, opcode, oldvalue, value);
1491 taint_undefined_behaviour(dst->def);
1492 value = cast_pseudo(ep, dst, ctype, expr->ctype);
1494 value = linearize_store_gen(ep, value, &ad);
1495 return value;
1498 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr)
1500 struct expression *arg, *fn;
1501 struct instruction *insn = alloc_typed_instruction(OP_CALL, expr->ctype);
1502 pseudo_t retval, call;
1503 struct ctype *ctype = NULL;
1504 struct symbol *fntype;
1505 struct context *context;
1507 if (!expr->ctype)
1508 return VOID;
1510 fn = expr->fn;
1511 fntype = fn->ctype;
1513 // handle builtins
1514 if (fntype->op && fntype->op->linearize)
1515 return fntype->op->linearize(ep, expr);
1517 ctype = &fntype->ctype;
1518 if (fntype->type == SYM_NODE)
1519 fntype = fntype->ctype.base_type;
1521 add_symbol(&insn->fntypes, fntype);
1522 FOR_EACH_PTR(expr->args, arg) {
1523 pseudo_t new = linearize_expression(ep, arg);
1524 use_pseudo(insn, new, add_pseudo(&insn->arguments, new));
1525 add_symbol(&insn->fntypes, arg->ctype);
1526 } END_FOR_EACH_PTR(arg);
1528 if (fn->type == EXPR_PREOP && fn->op == '*' && is_func_type(fn->ctype))
1529 fn = fn->unop;
1531 if (fn->type == EXPR_SYMBOL) {
1532 call = symbol_pseudo(ep, fn->symbol);
1533 } else {
1534 call = linearize_expression(ep, fn);
1536 use_pseudo(insn, call, &insn->func);
1537 retval = VOID;
1538 if (expr->ctype != &void_ctype)
1539 retval = alloc_pseudo(insn);
1540 insn->target = retval;
1541 add_one_insn(ep, insn);
1543 if (ctype) {
1544 FOR_EACH_PTR(ctype->contexts, context) {
1545 int in = context->in;
1546 int out = context->out;
1547 int check = 0;
1548 int context_diff;
1549 if (in < 0) {
1550 check = 1;
1551 in = 0;
1553 if (out < 0) {
1554 check = 0;
1555 out = 0;
1557 context_diff = out - in;
1558 if (check || context_diff) {
1559 insn = alloc_instruction(OP_CONTEXT, 0);
1560 insn->increment = context_diff;
1561 insn->check = check;
1562 insn->context_expr = context->context;
1563 add_one_insn(ep, insn);
1565 } END_FOR_EACH_PTR(context);
1567 if (ctype->modifiers & MOD_NORETURN)
1568 add_unreachable(ep);
1571 return retval;
1574 static pseudo_t linearize_binop_bool(struct entrypoint *ep, struct expression *expr)
1576 pseudo_t src1, src2, dst;
1577 int op = (expr->op == SPECIAL_LOGICAL_OR) ? OP_OR : OP_AND;
1579 src1 = linearize_expression_to_bool(ep, expr->left);
1580 src2 = linearize_expression_to_bool(ep, expr->right);
1581 dst = add_binary_op(ep, &bool_ctype, op, src1, src2);
1582 if (expr->ctype != &bool_ctype)
1583 dst = cast_pseudo(ep, dst, &bool_ctype, expr->ctype);
1584 return dst;
1587 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr)
1589 pseudo_t src1, src2, dst;
1590 static const int opcode[] = {
1591 ['+'] = OP_ADD, ['-'] = OP_SUB,
1592 ['*'] = OP_MUL, ['/'] = OP_DIVU,
1593 ['%'] = OP_MODU, ['&'] = OP_AND,
1594 ['|'] = OP_OR, ['^'] = OP_XOR,
1595 [SPECIAL_LEFTSHIFT] = OP_SHL,
1596 [SPECIAL_RIGHTSHIFT] = OP_LSR,
1598 int op;
1600 src1 = linearize_expression(ep, expr->left);
1601 src2 = linearize_expression(ep, expr->right);
1602 op = map_opcode(opcode[expr->op], expr->ctype);
1603 dst = add_binary_op(ep, expr->ctype, op, src1, src2);
1604 taint_undefined_behaviour(dst->def);
1605 return dst;
1608 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1610 static pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1612 static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr)
1614 pseudo_t cond, valt, valf, res;
1615 struct instruction *insn;
1617 valt = linearize_expression(ep, expr->cond_true);
1618 valf = linearize_expression(ep, expr->cond_false);
1619 cond = linearize_expression(ep, expr->conditional);
1621 insn = alloc_typed_instruction(OP_SEL, expr->ctype);
1622 if (!expr->cond_true)
1623 valt = cond;
1624 use_pseudo(insn, cond, &insn->src1);
1625 use_pseudo(insn, valt, &insn->src2);
1626 use_pseudo(insn, valf, &insn->src3);
1628 res = alloc_pseudo(insn);
1629 insn->target = res;
1630 add_one_insn(ep, insn);
1631 return res;
1634 static pseudo_t add_join_conditional(struct entrypoint *ep, struct expression *expr,
1635 pseudo_t phi1, pseudo_t phi2)
1637 pseudo_t target;
1638 struct instruction *phi_node;
1640 if (phi1 == VOID)
1641 return phi2;
1642 if (phi2 == VOID)
1643 return phi1;
1645 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1646 use_pseudo(phi_node, phi1, add_pseudo(&phi_node->phi_list, phi1));
1647 use_pseudo(phi_node, phi2, add_pseudo(&phi_node->phi_list, phi2));
1648 phi_node->target = target = alloc_pseudo(phi_node);
1649 add_one_insn(ep, phi_node);
1650 return target;
1653 static pseudo_t linearize_short_conditional(struct entrypoint *ep, struct expression *expr,
1654 struct expression *cond,
1655 struct expression *expr_false)
1657 pseudo_t src1, src2;
1658 struct basic_block *bb_false;
1659 struct basic_block *merge;
1660 pseudo_t phi1, phi2;
1662 if (!expr_false || !ep->active)
1663 return VOID;
1665 bb_false = alloc_basic_block(ep, expr_false->pos);
1666 merge = alloc_basic_block(ep, expr->pos);
1668 src1 = linearize_expression(ep, cond);
1669 phi1 = alloc_phi(ep->active, src1, expr->ctype);
1670 add_branch(ep, src1, merge, bb_false);
1672 set_activeblock(ep, bb_false);
1673 src2 = linearize_expression(ep, expr_false);
1674 phi2 = alloc_phi(ep->active, src2, expr->ctype);
1675 set_activeblock(ep, merge);
1677 return add_join_conditional(ep, expr, phi1, phi2);
1680 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr,
1681 struct expression *cond,
1682 struct expression *expr_true,
1683 struct expression *expr_false)
1685 pseudo_t src1, src2;
1686 pseudo_t phi1, phi2;
1687 struct basic_block *bb_true, *bb_false, *merge;
1689 if (!cond || !expr_true || !expr_false || !ep->active)
1690 return VOID;
1691 bb_true = alloc_basic_block(ep, expr_true->pos);
1692 bb_false = alloc_basic_block(ep, expr_false->pos);
1693 merge = alloc_basic_block(ep, expr->pos);
1695 linearize_cond_branch(ep, cond, bb_true, bb_false);
1697 set_activeblock(ep, bb_true);
1698 src1 = linearize_expression(ep, expr_true);
1699 phi1 = alloc_phi(ep->active, src1, expr->ctype);
1700 add_goto(ep, merge);
1702 set_activeblock(ep, bb_false);
1703 src2 = linearize_expression(ep, expr_false);
1704 phi2 = alloc_phi(ep->active, src2, expr->ctype);
1705 set_activeblock(ep, merge);
1707 return add_join_conditional(ep, expr, phi1, phi2);
1710 static void insert_phis(struct basic_block *bb, pseudo_t src, struct symbol *ctype,
1711 struct instruction *node)
1713 struct basic_block *parent;
1715 FOR_EACH_PTR(bb->parents, parent) {
1716 struct instruction *br = delete_last_instruction(&parent->insns);
1717 pseudo_t phi = alloc_phi(parent, src, ctype);
1718 add_instruction(&parent->insns, br);
1719 use_pseudo(node, phi, add_pseudo(&node->phi_list, phi));
1720 } END_FOR_EACH_PTR(parent);
1723 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr)
1725 struct symbol *ctype = expr->ctype;
1726 struct basic_block *other, *merge;
1727 struct instruction *node;
1728 pseudo_t src1, src2, phi2;
1730 if (!ep->active || !expr->left || !expr->right)
1731 return VOID;
1733 other = alloc_basic_block(ep, expr->right->pos);
1734 merge = alloc_basic_block(ep, expr->pos);
1735 node = alloc_phi_node(merge, ctype, NULL);
1737 // LHS and its shortcut
1738 if (expr->op == SPECIAL_LOGICAL_OR) {
1739 linearize_cond_branch(ep, expr->left, merge, other);
1740 src1 = value_pseudo(1);
1741 } else {
1742 linearize_cond_branch(ep, expr->left, other, merge);
1743 src1 = value_pseudo(0);
1745 insert_phis(merge, src1, ctype, node);
1747 // RHS
1748 set_activeblock(ep, other);
1749 src2 = linearize_expression_to_bool(ep, expr->right);
1750 src2 = cast_pseudo(ep, src2, &bool_ctype, ctype);
1751 phi2 = alloc_phi(ep->active, src2, ctype);
1752 use_pseudo(node, phi2, add_pseudo(&node->phi_list, phi2));
1754 // join
1755 set_activeblock(ep, merge);
1756 add_instruction(&merge->insns, node);
1757 return node->target;
1760 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr)
1762 static const int cmpop[] = {
1763 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT,
1764 [SPECIAL_EQUAL] = OP_SET_EQ,
1765 [SPECIAL_NOTEQUAL] = OP_SET_NE,
1766 [SPECIAL_GTE] = OP_SET_GE,
1767 [SPECIAL_LTE] = OP_SET_LE,
1768 [SPECIAL_UNSIGNED_LT] = OP_SET_B,
1769 [SPECIAL_UNSIGNED_GT] = OP_SET_A,
1770 [SPECIAL_UNSIGNED_LTE] = OP_SET_BE,
1771 [SPECIAL_UNSIGNED_GTE] = OP_SET_AE,
1773 int op = opcode_float(cmpop[expr->op], expr->right->ctype);
1774 pseudo_t src1 = linearize_expression(ep, expr->left);
1775 pseudo_t src2 = linearize_expression(ep, expr->right);
1776 pseudo_t dst = add_binary_op(ep, expr->ctype, op, src1, src2);
1777 return dst;
1781 static pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1783 pseudo_t cond;
1785 if (!expr || !valid_type(expr->ctype) || !bb_reachable(ep->active))
1786 return VOID;
1788 switch (expr->type) {
1790 case EXPR_STRING:
1791 case EXPR_VALUE:
1792 add_goto(ep, expr->value ? bb_true : bb_false);
1793 return VOID;
1795 case EXPR_FVALUE:
1796 add_goto(ep, expr->fvalue ? bb_true : bb_false);
1797 return VOID;
1799 case EXPR_LOGICAL:
1800 linearize_logical_branch(ep, expr, bb_true, bb_false);
1801 return VOID;
1803 case EXPR_COMPARE:
1804 cond = linearize_compare(ep, expr);
1805 add_branch(ep, cond, bb_true, bb_false);
1806 break;
1808 case EXPR_PREOP:
1809 if (expr->op == '!')
1810 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true);
1811 /* fall through */
1812 default: {
1813 cond = linearize_expression_to_bool(ep, expr);
1814 add_branch(ep, cond, bb_true, bb_false);
1816 return VOID;
1819 return VOID;
1824 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1826 struct basic_block *next = alloc_basic_block(ep, expr->pos);
1828 if (expr->op == SPECIAL_LOGICAL_OR)
1829 linearize_cond_branch(ep, expr->left, bb_true, next);
1830 else
1831 linearize_cond_branch(ep, expr->left, next, bb_false);
1832 set_activeblock(ep, next);
1833 linearize_cond_branch(ep, expr->right, bb_true, bb_false);
1834 return VOID;
1837 static pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr)
1839 pseudo_t src;
1840 struct expression *orig = expr->cast_expression;
1842 if (!orig)
1843 return VOID;
1845 src = linearize_expression(ep, orig);
1846 return cast_pseudo(ep, src, orig->ctype, expr->ctype);
1849 static pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *ad)
1851 switch (initializer->type) {
1852 case EXPR_INITIALIZER: {
1853 struct expression *expr;
1854 FOR_EACH_PTR(initializer->expr_list, expr) {
1855 linearize_initializer(ep, expr, ad);
1856 } END_FOR_EACH_PTR(expr);
1857 break;
1859 case EXPR_POS:
1860 ad->offset = initializer->init_offset;
1861 linearize_initializer(ep, initializer->init_expr, ad);
1862 break;
1863 default: {
1864 pseudo_t value = linearize_expression(ep, initializer);
1865 ad->type = initializer->ctype;
1866 linearize_store_gen(ep, value, ad);
1867 return value;
1871 return VOID;
1874 static void linearize_argument(struct entrypoint *ep, struct symbol *arg, int nr)
1876 struct access_data ad = { NULL, };
1878 ad.type = arg;
1879 ad.address = symbol_pseudo(ep, arg);
1880 linearize_store_gen(ep, argument_pseudo(ep, nr), &ad);
1883 static pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
1885 if (!expr || !valid_type(expr->ctype))
1886 return VOID;
1888 current_pos = expr->pos;
1889 switch (expr->type) {
1890 case EXPR_SYMBOL:
1891 linearize_one_symbol(ep, expr->symbol);
1892 return add_symbol_address(ep, expr->symbol);
1894 case EXPR_VALUE:
1895 return value_pseudo(expr->value);
1897 case EXPR_STRING:
1898 case EXPR_LABEL:
1899 return add_setval(ep, expr->ctype, expr);
1901 case EXPR_FVALUE:
1902 return add_setfval(ep, expr->ctype, expr->fvalue);
1904 case EXPR_STATEMENT:
1905 return linearize_statement(ep, expr->statement);
1907 case EXPR_CALL:
1908 return linearize_call_expression(ep, expr);
1910 case EXPR_BINOP:
1911 if (expr->op == SPECIAL_LOGICAL_AND || expr->op == SPECIAL_LOGICAL_OR)
1912 return linearize_binop_bool(ep, expr);
1913 return linearize_binop(ep, expr);
1915 case EXPR_LOGICAL:
1916 return linearize_logical(ep, expr);
1918 case EXPR_COMPARE:
1919 return linearize_compare(ep, expr);
1921 case EXPR_SELECT:
1922 return linearize_select(ep, expr);
1924 case EXPR_CONDITIONAL:
1925 if (!expr->cond_true)
1926 return linearize_short_conditional(ep, expr, expr->conditional, expr->cond_false);
1928 return linearize_conditional(ep, expr, expr->conditional,
1929 expr->cond_true, expr->cond_false);
1931 case EXPR_COMMA:
1932 linearize_expression(ep, expr->left);
1933 return linearize_expression(ep, expr->right);
1935 case EXPR_ASSIGNMENT:
1936 return linearize_assignment(ep, expr);
1938 case EXPR_PREOP:
1939 return linearize_preop(ep, expr);
1941 case EXPR_POSTOP:
1942 return linearize_postop(ep, expr);
1944 case EXPR_CAST:
1945 case EXPR_FORCE_CAST:
1946 case EXPR_IMPLIED_CAST:
1947 return linearize_cast(ep, expr);
1949 case EXPR_SLICE:
1950 return linearize_slice(ep, expr);
1952 case EXPR_INITIALIZER:
1953 case EXPR_POS:
1954 warning(expr->pos, "unexpected initializer expression (%d %d)", expr->type, expr->op);
1955 return VOID;
1956 default:
1957 warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op);
1958 return VOID;
1960 return VOID;
1963 static pseudo_t linearize_one_symbol(struct entrypoint *ep, struct symbol *sym)
1965 struct access_data ad = { NULL, };
1966 pseudo_t value;
1968 if (!sym || !sym->initializer || sym->initialized)
1969 return VOID;
1971 /* We need to output these puppies some day too.. */
1972 if (sym->ctype.modifiers & (MOD_STATIC | MOD_TOPLEVEL))
1973 return VOID;
1975 sym->initialized = 1;
1976 ad.address = symbol_pseudo(ep, sym);
1978 if (sym->initializer && !is_scalar_type(sym)) {
1979 // default zero initialization [6.7.9.21]
1980 // FIXME: this init the whole aggregate while
1981 // only the existing fields need to be initialized.
1982 // FIXME: this init the whole aggregate even if
1983 // all fields arelater explicitely initialized.
1984 ad.type = sym;
1985 ad.address = symbol_pseudo(ep, sym);
1986 linearize_store_gen(ep, value_pseudo(0), &ad);
1989 value = linearize_initializer(ep, sym->initializer, &ad);
1990 return value;
1993 static pseudo_t linearize_compound_statement(struct entrypoint *ep, struct statement *stmt)
1995 pseudo_t pseudo;
1996 struct statement *s;
1998 pseudo = VOID;
1999 FOR_EACH_PTR(stmt->stmts, s) {
2000 pseudo = linearize_statement(ep, s);
2001 } END_FOR_EACH_PTR(s);
2003 return pseudo;
2006 static void add_return(struct entrypoint *ep, struct basic_block *bb, struct symbol *ctype, pseudo_t src)
2008 struct instruction *phi_node = first_instruction(bb->insns);
2009 pseudo_t phi;
2010 if (!phi_node) {
2011 phi_node = alloc_typed_instruction(OP_PHI, ctype);
2012 phi_node->target = alloc_pseudo(phi_node);
2013 phi_node->bb = bb;
2014 add_instruction(&bb->insns, phi_node);
2016 phi = alloc_phi(ep->active, src, ctype);
2017 phi->ident = &return_ident;
2018 use_pseudo(phi_node, phi, add_pseudo(&phi_node->phi_list, phi));
2021 static pseudo_t linearize_fn_statement(struct entrypoint *ep, struct statement *stmt)
2023 struct instruction *phi_node;
2024 struct basic_block *bb;
2025 pseudo_t pseudo;
2027 pseudo = linearize_compound_statement(ep, stmt);
2028 if (!is_void_type(stmt->ret)) { // non-void function
2029 struct basic_block *active = ep->active;
2030 if (active && !bb_terminated(active)) { // missing return
2031 struct basic_block *bb_ret;
2032 bb_ret = get_bound_block(ep, stmt->ret);
2033 add_return(ep, bb_ret, stmt->ret, undef_pseudo());
2036 bb = add_label(ep, stmt->ret);
2037 phi_node = first_instruction(bb->insns);
2038 if (phi_node)
2039 pseudo = phi_node->target;
2040 return pseudo;
2043 static pseudo_t linearize_inlined_call(struct entrypoint *ep, struct statement *stmt)
2045 struct instruction *insn = alloc_instruction(OP_INLINED_CALL, 0);
2046 struct statement *args = stmt->args;
2047 struct basic_block *bb;
2048 pseudo_t pseudo;
2050 if (args) {
2051 struct symbol *sym;
2053 concat_symbol_list(args->declaration, &ep->syms);
2054 FOR_EACH_PTR(args->declaration, sym) {
2055 pseudo_t value = linearize_one_symbol(ep, sym);
2056 add_pseudo(&insn->arguments, value);
2057 } END_FOR_EACH_PTR(sym);
2060 pseudo = linearize_fn_statement(ep, stmt);
2061 insn->target = pseudo;
2063 use_pseudo(insn, symbol_pseudo(ep, stmt->inline_fn), &insn->func);
2064 bb = ep->active;
2065 if (!bb->insns)
2066 bb->pos = stmt->pos;
2067 add_one_insn(ep, insn);
2068 return pseudo;
2071 static pseudo_t linearize_context(struct entrypoint *ep, struct statement *stmt)
2073 struct instruction *insn = alloc_instruction(OP_CONTEXT, 0);
2074 struct expression *expr = stmt->expression;
2076 insn->increment = get_expression_value(expr);
2077 insn->context_expr = stmt->context;
2078 add_one_insn(ep, insn);
2079 return VOID;
2082 static pseudo_t linearize_range(struct entrypoint *ep, struct statement *stmt)
2084 struct instruction *insn = alloc_instruction(OP_RANGE, 0);
2086 use_pseudo(insn, linearize_expression(ep, stmt->range_expression), &insn->src1);
2087 use_pseudo(insn, linearize_expression(ep, stmt->range_low), &insn->src2);
2088 use_pseudo(insn, linearize_expression(ep, stmt->range_high), &insn->src3);
2089 add_one_insn(ep, insn);
2090 return VOID;
2093 ALLOCATOR(asm_rules, "asm rules");
2094 ALLOCATOR(asm_constraint, "asm constraints");
2096 static void add_asm_input(struct entrypoint *ep, struct instruction *insn, struct asm_operand *op)
2098 pseudo_t pseudo = linearize_expression(ep, op->expr);
2099 struct asm_constraint *rule = __alloc_asm_constraint(0);
2101 rule->ident = op->name;
2102 rule->constraint = op->constraint ? op->constraint->string->data : "";
2103 use_pseudo(insn, pseudo, &rule->pseudo);
2104 add_ptr_list(&insn->asm_rules->inputs, rule);
2107 static void add_asm_output(struct entrypoint *ep, struct instruction *insn, struct asm_operand *op)
2109 struct access_data ad = { NULL, };
2110 pseudo_t pseudo;
2111 struct asm_constraint *rule;
2113 if (op->is_memory) {
2114 pseudo = linearize_expression(ep, op->expr);
2115 } else {
2116 if (!linearize_address_gen(ep, op->expr, &ad))
2117 return;
2118 pseudo = alloc_pseudo(insn);
2119 linearize_store_gen(ep, pseudo, &ad);
2121 rule = __alloc_asm_constraint(0);
2122 rule->is_memory = op->is_memory;
2123 rule->ident = op->name;
2124 rule->constraint = op->constraint ? op->constraint->string->data : "";
2125 use_pseudo(insn, pseudo, &rule->pseudo);
2126 add_ptr_list(&insn->asm_rules->outputs, rule);
2129 static pseudo_t linearize_asm_statement(struct entrypoint *ep, struct statement *stmt)
2131 struct instruction *insn;
2132 struct expression *expr;
2133 struct asm_rules *rules;
2134 struct asm_operand *op;
2136 insn = alloc_instruction(OP_ASM, 0);
2137 expr = stmt->asm_string;
2138 if (!expr || expr->type != EXPR_STRING) {
2139 warning(stmt->pos, "expected string in inline asm");
2140 return VOID;
2142 insn->string = expr->string->data;
2144 rules = __alloc_asm_rules(0);
2145 insn->asm_rules = rules;
2147 /* Gather the inputs.. */
2148 FOR_EACH_PTR(stmt->asm_inputs, op) {
2149 add_asm_input(ep, insn, op);
2150 } END_FOR_EACH_PTR(op);
2152 add_one_insn(ep, insn);
2154 /* Assign the outputs */
2155 FOR_EACH_PTR(stmt->asm_outputs, op) {
2156 add_asm_output(ep, insn, op);
2157 } END_FOR_EACH_PTR(op);
2159 return VOID;
2162 static int multijmp_cmp(const void *_a, const void *_b)
2164 const struct multijmp *a = _a;
2165 const struct multijmp *b = _b;
2167 // "default" case?
2168 if (a->begin > a->end) {
2169 if (b->begin > b->end)
2170 return 0;
2171 return 1;
2173 if (b->begin > b->end)
2174 return -1;
2175 if (a->begin == b->begin) {
2176 if (a->end == b->end)
2177 return 0;
2178 return (a->end < b->end) ? -1 : 1;
2180 return a->begin < b->begin ? -1 : 1;
2183 static void sort_switch_cases(struct instruction *insn)
2185 sort_list((struct ptr_list **)&insn->multijmp_list, multijmp_cmp);
2188 static pseudo_t linearize_declaration(struct entrypoint *ep, struct statement *stmt)
2190 struct symbol *sym;
2192 concat_symbol_list(stmt->declaration, &ep->syms);
2194 FOR_EACH_PTR(stmt->declaration, sym) {
2195 linearize_one_symbol(ep, sym);
2196 } END_FOR_EACH_PTR(sym);
2197 return VOID;
2200 static pseudo_t linearize_return(struct entrypoint *ep, struct statement *stmt)
2202 struct expression *expr = stmt->expression;
2203 struct symbol *ret = stmt->ret_target;
2204 struct basic_block *bb_return = get_bound_block(ep, ret);
2205 struct basic_block *active;
2206 pseudo_t src = linearize_expression(ep, expr);
2207 active = ep->active;
2208 if (active && !is_void_type(ret)) {
2209 add_return(ep, bb_return, ret, src);
2211 add_goto(ep, bb_return);
2212 return VOID;
2215 static pseudo_t linearize_switch(struct entrypoint *ep, struct statement *stmt)
2217 struct symbol *sym;
2218 struct instruction *switch_ins;
2219 struct basic_block *switch_end = alloc_basic_block(ep, stmt->pos);
2220 struct basic_block *active, *default_case;
2221 struct expression *expr = stmt->switch_expression;
2222 struct multijmp *jmp;
2223 pseudo_t pseudo;
2225 if (!expr || !expr->ctype)
2226 return VOID;
2227 pseudo = linearize_expression(ep, expr);
2228 active = ep->active;
2229 if (!active) {
2230 active = alloc_basic_block(ep, stmt->pos);
2231 set_activeblock(ep, active);
2234 switch_ins = alloc_typed_instruction(OP_SWITCH, expr->ctype);
2235 use_pseudo(switch_ins, pseudo, &switch_ins->cond);
2236 add_one_insn(ep, switch_ins);
2237 finish_block(ep);
2239 default_case = NULL;
2240 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
2241 struct statement *case_stmt = sym->stmt;
2242 struct basic_block *bb_case = get_bound_block(ep, sym);
2244 if (!case_stmt->case_expression) {
2245 default_case = bb_case;
2246 continue;
2247 } else if (case_stmt->case_expression->type != EXPR_VALUE) {
2248 continue;
2249 } else {
2250 struct expression *case_to = case_stmt->case_to;
2251 long long begin, end;
2253 begin = end = case_stmt->case_expression->value;
2254 if (case_to && case_to->type == EXPR_VALUE)
2255 end = case_to->value;
2256 if (begin > end)
2257 jmp = alloc_multijmp(bb_case, end, begin);
2258 else
2259 jmp = alloc_multijmp(bb_case, begin, end);
2262 add_multijmp(&switch_ins->multijmp_list, jmp);
2263 add_bb(&bb_case->parents, active);
2264 add_bb(&active->children, bb_case);
2265 } END_FOR_EACH_PTR(sym);
2267 bind_label(stmt->switch_break, switch_end, stmt->pos);
2269 /* And linearize the actual statement */
2270 linearize_statement(ep, stmt->switch_statement);
2271 set_activeblock(ep, switch_end);
2273 if (!default_case)
2274 default_case = switch_end;
2276 jmp = alloc_multijmp(default_case, 1, 0);
2277 add_multijmp(&switch_ins->multijmp_list, jmp);
2278 add_bb(&default_case->parents, active);
2279 add_bb(&active->children, default_case);
2280 sort_switch_cases(switch_ins);
2282 return VOID;
2285 static pseudo_t linearize_iterator(struct entrypoint *ep, struct statement *stmt)
2287 struct statement *pre_statement = stmt->iterator_pre_statement;
2288 struct expression *pre_condition = stmt->iterator_pre_condition;
2289 struct statement *statement = stmt->iterator_statement;
2290 struct statement *post_statement = stmt->iterator_post_statement;
2291 struct expression *post_condition = stmt->iterator_post_condition;
2292 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end;
2293 struct symbol *sym;
2295 FOR_EACH_PTR(stmt->iterator_syms, sym) {
2296 linearize_one_symbol(ep, sym);
2297 } END_FOR_EACH_PTR(sym);
2298 concat_symbol_list(stmt->iterator_syms, &ep->syms);
2299 linearize_statement(ep, pre_statement);
2301 loop_body = loop_top = alloc_basic_block(ep, stmt->pos);
2302 loop_continue = alloc_basic_block(ep, stmt->pos);
2303 loop_end = alloc_basic_block(ep, stmt->pos);
2305 /* An empty post-condition means that it's the same as the pre-condition */
2306 if (!post_condition) {
2307 loop_top = alloc_basic_block(ep, stmt->pos);
2308 set_activeblock(ep, loop_top);
2311 if (pre_condition)
2312 linearize_cond_branch(ep, pre_condition, loop_body, loop_end);
2314 bind_label(stmt->iterator_continue, loop_continue, stmt->pos);
2315 bind_label(stmt->iterator_break, loop_end, stmt->pos);
2317 set_activeblock(ep, loop_body);
2318 linearize_statement(ep, statement);
2319 add_goto(ep, loop_continue);
2321 set_activeblock(ep, loop_continue);
2322 linearize_statement(ep, post_statement);
2323 if (!post_condition)
2324 add_goto(ep, loop_top);
2325 else
2326 linearize_cond_branch(ep, post_condition, loop_top, loop_end);
2327 set_activeblock(ep, loop_end);
2329 return VOID;
2332 static pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
2334 struct basic_block *bb;
2336 if (!stmt)
2337 return VOID;
2339 bb = ep->active;
2340 if (bb && !bb->insns)
2341 bb->pos = stmt->pos;
2342 current_pos = stmt->pos;
2344 switch (stmt->type) {
2345 case STMT_NONE:
2346 break;
2348 case STMT_DECLARATION:
2349 return linearize_declaration(ep, stmt);
2351 case STMT_CONTEXT:
2352 return linearize_context(ep, stmt);
2354 case STMT_RANGE:
2355 return linearize_range(ep, stmt);
2357 case STMT_EXPRESSION:
2358 return linearize_expression(ep, stmt->expression);
2360 case STMT_ASM:
2361 return linearize_asm_statement(ep, stmt);
2363 case STMT_RETURN:
2364 return linearize_return(ep, stmt);
2366 case STMT_CASE: {
2367 add_label(ep, stmt->case_label);
2368 linearize_statement(ep, stmt->case_statement);
2369 break;
2372 case STMT_LABEL: {
2373 struct symbol *label = stmt->label_identifier;
2375 if (label->used) {
2376 add_label(ep, label);
2378 return linearize_statement(ep, stmt->label_statement);
2381 case STMT_GOTO: {
2382 struct symbol *sym;
2383 struct expression *expr;
2384 struct instruction *goto_ins;
2385 struct basic_block *active;
2386 pseudo_t pseudo;
2388 active = ep->active;
2389 if (!bb_reachable(active))
2390 break;
2392 if (stmt->goto_label) {
2393 add_goto(ep, get_bound_block(ep, stmt->goto_label));
2394 break;
2397 expr = stmt->goto_expression;
2398 if (!expr)
2399 break;
2401 /* This can happen as part of simplification */
2402 if (expr->type == EXPR_LABEL) {
2403 add_goto(ep, get_bound_block(ep, expr->label_symbol));
2404 break;
2407 pseudo = linearize_expression(ep, expr);
2408 goto_ins = alloc_instruction(OP_COMPUTEDGOTO, 0);
2409 use_pseudo(goto_ins, pseudo, &goto_ins->src);
2410 add_one_insn(ep, goto_ins);
2412 FOR_EACH_PTR(stmt->target_list, sym) {
2413 struct basic_block *bb_computed = get_bound_block(ep, sym);
2414 struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0);
2415 add_multijmp(&goto_ins->multijmp_list, jmp);
2416 add_bb(&bb_computed->parents, ep->active);
2417 add_bb(&active->children, bb_computed);
2418 } END_FOR_EACH_PTR(sym);
2420 finish_block(ep);
2421 break;
2424 case STMT_COMPOUND:
2425 if (stmt->inline_fn)
2426 return linearize_inlined_call(ep, stmt);
2427 return linearize_compound_statement(ep, stmt);
2430 * This could take 'likely/unlikely' into account, and
2431 * switch the arms around appropriately..
2433 case STMT_IF: {
2434 struct basic_block *bb_true, *bb_false, *endif;
2435 struct expression *cond = stmt->if_conditional;
2437 bb_true = alloc_basic_block(ep, stmt->pos);
2438 bb_false = endif = alloc_basic_block(ep, stmt->pos);
2440 // If the condition is invalid, the following
2441 // statement(s) are not evaluated.
2442 if (!cond || !valid_type(cond->ctype))
2443 return VOID;
2444 linearize_cond_branch(ep, cond, bb_true, bb_false);
2446 set_activeblock(ep, bb_true);
2447 linearize_statement(ep, stmt->if_true);
2449 if (stmt->if_false) {
2450 endif = alloc_basic_block(ep, stmt->pos);
2451 add_goto(ep, endif);
2452 set_activeblock(ep, bb_false);
2453 linearize_statement(ep, stmt->if_false);
2455 set_activeblock(ep, endif);
2456 break;
2459 case STMT_SWITCH:
2460 return linearize_switch(ep, stmt);
2462 case STMT_ITERATOR:
2463 return linearize_iterator(ep, stmt);
2465 default:
2466 break;
2468 return VOID;
2471 static void check_tainted_insn(struct instruction *insn)
2473 unsigned long long uval;
2474 long long sval;
2475 pseudo_t src2;
2477 switch (insn->opcode) {
2478 case OP_DIVU: case OP_DIVS:
2479 case OP_MODU: case OP_MODS:
2480 if (insn->src2 == value_pseudo(0))
2481 warning(insn->pos, "divide by zero");
2482 break;
2483 case OP_SHL: case OP_LSR: case OP_ASR:
2484 src2 = insn->src2;
2485 if (src2->type != PSEUDO_VAL)
2486 break;
2487 uval = src2->value;
2488 if (uval < insn->size)
2489 break;
2490 sval = sign_extend(uval, insn->size);
2491 if (Wshift_count_negative && sval < 0)
2492 warning(insn->pos, "shift count is negative (%lld)", sval);
2493 else if (Wshift_count_overflow)
2494 warning(insn->pos, "shift too big (%llu) for type %s", uval, show_typename(insn->type));
2499 // issue warnings after all possible DCE
2500 static void late_warnings(struct entrypoint *ep)
2502 struct basic_block *bb;
2503 FOR_EACH_PTR(ep->bbs, bb) {
2504 struct instruction *insn;
2505 FOR_EACH_PTR(bb->insns, insn) {
2506 if (!insn->bb)
2507 continue;
2508 if (insn->tainted)
2509 check_tainted_insn(insn);
2510 } END_FOR_EACH_PTR(insn);
2511 } END_FOR_EACH_PTR(bb);
2514 static struct entrypoint *linearize_fn(struct symbol *sym, struct symbol *base_type)
2516 struct statement *stmt = base_type->stmt;
2517 struct entrypoint *ep;
2518 struct basic_block *bb;
2519 struct symbol *ret_type;
2520 struct symbol *arg;
2521 struct instruction *entry;
2522 struct instruction *ret;
2523 pseudo_t result;
2524 int i;
2526 if (!stmt || sym->bogus_linear)
2527 return NULL;
2529 ep = alloc_entrypoint();
2530 ep->name = sym;
2531 sym->ep = ep;
2532 bb = alloc_basic_block(ep, sym->pos);
2533 set_activeblock(ep, bb);
2535 if (stmt->type == STMT_ASM) { // top-level asm
2536 linearize_asm_statement(ep, stmt);
2537 return ep;
2540 entry = alloc_instruction(OP_ENTRY, 0);
2541 add_one_insn(ep, entry);
2542 ep->entry = entry;
2544 concat_symbol_list(base_type->arguments, &ep->syms);
2546 /* FIXME!! We should do something else about varargs.. */
2547 i = 0;
2548 FOR_EACH_PTR(base_type->arguments, arg) {
2549 linearize_argument(ep, arg, ++i);
2550 } END_FOR_EACH_PTR(arg);
2552 result = linearize_fn_statement(ep, stmt);
2553 ret_type = base_type->ctype.base_type;
2554 ret = alloc_typed_instruction(OP_RET, ret_type);
2555 if (type_size(ret_type) > 0)
2556 use_pseudo(ret, result, &ret->src);
2557 add_one_insn(ep, ret);
2559 optimize(ep);
2560 late_warnings(ep);
2561 return ep;
2564 struct entrypoint *linearize_symbol(struct symbol *sym)
2566 struct symbol *base_type;
2568 if (!sym)
2569 return NULL;
2570 current_pos = sym->pos;
2571 base_type = sym->ctype.base_type;
2572 if (!base_type)
2573 return NULL;
2574 if (base_type->type == SYM_FN)
2575 return linearize_fn(sym, base_type);
2576 return NULL;
2580 * Builtin functions
2583 static pseudo_t linearize_unreachable(struct entrypoint *ep, struct expression *exp)
2585 add_unreachable(ep);
2586 return VOID;
2589 static struct sym_init {
2590 const char *name;
2591 pseudo_t (*linearize)(struct entrypoint *, struct expression*);
2592 struct symbol_op op;
2593 } builtins_table[] = {
2594 // must be declared in builtin.c:declare_builtins[]
2595 { "__builtin_unreachable", linearize_unreachable },
2599 void init_linearized_builtins(int stream)
2601 struct sym_init *ptr;
2603 for (ptr = builtins_table; ptr->name; ptr++) {
2604 struct symbol *sym;
2605 sym = create_symbol(stream, ptr->name, SYM_NODE, NS_SYMBOL);
2606 if (!sym->op)
2607 sym->op = &ptr->op;
2608 sym->op->type |= KW_BUILTIN;
2609 ptr->op.linearize = ptr->linearize;