comparison: select the caller_info
[smatch.git] / linearize.c
blobe80715ab2458d4c49633d4aaa03ee5e6c05dd68c
1 /*
2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
10 * Copyright (C) 2004 Christopher Li
13 #include <string.h>
14 #include <stdarg.h>
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <assert.h>
19 #include "parse.h"
20 #include "expression.h"
21 #include "linearize.h"
22 #include "optimize.h"
23 #include "flow.h"
24 #include "target.h"
26 static pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
27 static pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
29 static pseudo_t add_cast(struct entrypoint *ep, struct symbol *to, struct symbol *from, int op, pseudo_t src);
30 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right);
31 static pseudo_t linearize_one_symbol(struct entrypoint *ep, struct symbol *sym);
33 static pseudo_t cast_pseudo(struct entrypoint *ep, pseudo_t src, struct symbol *from, struct symbol *to);
35 struct pseudo void_pseudo = {};
37 static struct position current_pos;
39 ALLOCATOR(pseudo_user, "pseudo_user");
41 static struct instruction *alloc_instruction(int opcode, int size)
43 struct instruction * insn = __alloc_instruction(0);
44 insn->opcode = opcode;
45 insn->size = size;
46 insn->pos = current_pos;
47 return insn;
50 static inline int type_size(struct symbol *type)
52 return type ? type->bit_size > 0 ? type->bit_size : 0 : 0;
55 static struct instruction *alloc_typed_instruction(int opcode, struct symbol *type)
57 struct instruction *insn = alloc_instruction(opcode, type_size(type));
58 insn->type = type;
59 return insn;
62 static struct entrypoint *alloc_entrypoint(void)
64 return __alloc_entrypoint(0);
67 static struct basic_block *alloc_basic_block(struct entrypoint *ep, struct position pos)
69 static int nr;
70 struct basic_block *bb = __alloc_basic_block(0);
71 bb->pos = pos;
72 bb->ep = ep;
73 bb->nr = nr++;
74 return bb;
77 static struct multijmp *alloc_multijmp(struct basic_block *target, long long begin, long long end)
79 struct multijmp *multijmp = __alloc_multijmp(0);
80 multijmp->target = target;
81 multijmp->begin = begin;
82 multijmp->end = end;
83 return multijmp;
86 const char *show_label(struct basic_block *bb)
88 static int n;
89 static char buffer[4][16];
90 char *buf = buffer[3 & ++n];
92 if (!bb)
93 return ".L???";
94 snprintf(buf, 64, ".L%u", bb->nr);
95 return buf;
98 const char *show_pseudo(pseudo_t pseudo)
100 static int n;
101 static char buffer[4][64];
102 char *buf;
103 int i;
105 if (!pseudo)
106 return "no pseudo";
107 if (pseudo == VOID)
108 return "VOID";
109 buf = buffer[3 & ++n];
110 switch(pseudo->type) {
111 case PSEUDO_SYM: {
112 struct symbol *sym = pseudo->sym;
113 struct expression *expr;
115 if (!sym) {
116 snprintf(buf, 64, "<bad symbol>");
117 break;
119 if (sym->bb_target) {
120 snprintf(buf, 64, "%s", show_label(sym->bb_target));
121 break;
123 if (sym->ident) {
124 snprintf(buf, 64, "%s", show_ident(sym->ident));
125 break;
127 expr = sym->initializer;
128 snprintf(buf, 64, "<anon symbol:%p>", verbose ? sym : NULL);
129 if (expr) {
130 switch (expr->type) {
131 case EXPR_VALUE:
132 snprintf(buf, 64, "<symbol value: %lld>", expr->value);
133 break;
134 case EXPR_STRING:
135 return show_string(expr->string);
136 default:
137 break;
140 break;
142 case PSEUDO_REG:
143 i = snprintf(buf, 64, "%%r%d", pseudo->nr);
144 if (pseudo->ident)
145 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
146 break;
147 case PSEUDO_VAL: {
148 long long value = pseudo->value;
149 if (value > 1000 || value < -1000)
150 snprintf(buf, 64, "$%#llx", value);
151 else
152 snprintf(buf, 64, "$%lld", value);
153 break;
155 case PSEUDO_ARG:
156 snprintf(buf, 64, "%%arg%d", pseudo->nr);
157 break;
158 case PSEUDO_PHI:
159 i = snprintf(buf, 64, "%%phi%d", pseudo->nr);
160 if (pseudo->ident)
161 sprintf(buf+i, "(%s)", show_ident(pseudo->ident));
162 break;
163 case PSEUDO_UNDEF:
164 return "UNDEF";
165 default:
166 snprintf(buf, 64, "<bad pseudo type %d>", pseudo->type);
168 return buf;
171 static const char *opcodes[] = {
172 [OP_BADOP] = "bad_op",
174 /* Fn entrypoint */
175 [OP_ENTRY] = "<entry-point>",
177 /* Terminator */
178 [OP_RET] = "ret",
179 [OP_BR] = "br",
180 [OP_CBR] = "cbr",
181 [OP_SWITCH] = "switch",
182 [OP_UNREACH] = "unreachable",
183 [OP_COMPUTEDGOTO] = "jmp *",
185 /* Binary */
186 [OP_ADD] = "add",
187 [OP_SUB] = "sub",
188 [OP_MUL] = "mul",
189 [OP_DIVU] = "divu",
190 [OP_DIVS] = "divs",
191 [OP_MODU] = "modu",
192 [OP_MODS] = "mods",
193 [OP_SHL] = "shl",
194 [OP_LSR] = "lsr",
195 [OP_ASR] = "asr",
197 /* Floating-point Binary */
198 [OP_FADD] = "fadd",
199 [OP_FSUB] = "fsub",
200 [OP_FMUL] = "fmul",
201 [OP_FDIV] = "fdiv",
203 /* Logical */
204 [OP_AND] = "and",
205 [OP_OR] = "or",
206 [OP_XOR] = "xor",
208 /* Binary comparison */
209 [OP_SET_EQ] = "seteq",
210 [OP_SET_NE] = "setne",
211 [OP_SET_LE] = "setle",
212 [OP_SET_GE] = "setge",
213 [OP_SET_LT] = "setlt",
214 [OP_SET_GT] = "setgt",
215 [OP_SET_B] = "setb",
216 [OP_SET_A] = "seta",
217 [OP_SET_BE] = "setbe",
218 [OP_SET_AE] = "setae",
220 /* floating-point comparison */
221 [OP_FCMP_ORD] = "fcmpord",
222 [OP_FCMP_OEQ] = "fcmpoeq",
223 [OP_FCMP_ONE] = "fcmpone",
224 [OP_FCMP_OLE] = "fcmpole",
225 [OP_FCMP_OGE] = "fcmpoge",
226 [OP_FCMP_OLT] = "fcmpolt",
227 [OP_FCMP_OGT] = "fcmpogt",
228 [OP_FCMP_UEQ] = "fcmpueq",
229 [OP_FCMP_UNE] = "fcmpune",
230 [OP_FCMP_ULE] = "fcmpule",
231 [OP_FCMP_UGE] = "fcmpuge",
232 [OP_FCMP_ULT] = "fcmpult",
233 [OP_FCMP_UGT] = "fcmpugt",
234 [OP_FCMP_UNO] = "fcmpuno",
236 /* Uni */
237 [OP_NOT] = "not",
238 [OP_NEG] = "neg",
239 [OP_FNEG] = "fneg",
241 /* Special three-input */
242 [OP_SEL] = "select",
243 [OP_FMADD] = "fmadd",
245 /* Memory */
246 [OP_LOAD] = "load",
247 [OP_STORE] = "store",
248 [OP_LABEL] = "label",
249 [OP_SETVAL] = "set",
250 [OP_SETFVAL] = "setfval",
251 [OP_SYMADDR] = "symaddr",
253 /* Other */
254 [OP_PHI] = "phi",
255 [OP_PHISOURCE] = "phisrc",
256 [OP_SEXT] = "sext",
257 [OP_ZEXT] = "zext",
258 [OP_TRUNC] = "trunc",
259 [OP_FCVTU] = "fcvtu",
260 [OP_FCVTS] = "fcvts",
261 [OP_UCVTF] = "ucvtf",
262 [OP_SCVTF] = "scvtf",
263 [OP_FCVTF] = "fcvtf",
264 [OP_UTPTR] = "utptr",
265 [OP_PTRTU] = "ptrtu",
266 [OP_PTRCAST] = "ptrcast",
267 [OP_INLINED_CALL] = "# call",
268 [OP_CALL] = "call",
269 [OP_SLICE] = "slice",
270 [OP_NOP] = "nop",
271 [OP_DEATHNOTE] = "dead",
272 [OP_ASM] = "asm",
274 /* Sparse tagging (line numbers, context, whatever) */
275 [OP_CONTEXT] = "context",
276 [OP_RANGE] = "range-check",
278 [OP_COPY] = "copy",
281 static char *show_asm_constraints(char *buf, const char *sep, struct asm_constraint_list *list)
283 struct asm_constraint *entry;
285 FOR_EACH_PTR(list, entry) {
286 buf += sprintf(buf, "%s\"%s\"", sep, entry->constraint);
287 if (entry->pseudo)
288 buf += sprintf(buf, " (%s)", show_pseudo(entry->pseudo));
289 if (entry->ident)
290 buf += sprintf(buf, " [%s]", show_ident(entry->ident));
291 sep = ", ";
292 } END_FOR_EACH_PTR(entry);
293 return buf;
296 static char *show_asm(char *buf, struct instruction *insn)
298 struct asm_rules *rules = insn->asm_rules;
300 buf += sprintf(buf, "\"%s\"", insn->string);
301 buf = show_asm_constraints(buf, "\n\t\tout: ", rules->outputs);
302 buf = show_asm_constraints(buf, "\n\t\tin: ", rules->inputs);
303 buf = show_asm_constraints(buf, "\n\t\tclobber: ", rules->clobbers);
304 return buf;
307 const char *show_instruction(struct instruction *insn)
309 int opcode = insn->opcode;
310 static char buffer[4096];
311 char *buf;
313 buf = buffer;
314 if (!insn->bb)
315 buf += sprintf(buf, "# ");
317 if (opcode < ARRAY_SIZE(opcodes)) {
318 const char *op = opcodes[opcode];
319 if (!op)
320 buf += sprintf(buf, "opcode:%d", opcode);
321 else
322 buf += sprintf(buf, "%s", op);
323 if (insn->size)
324 buf += sprintf(buf, ".%d", insn->size);
325 memset(buf, ' ', 20);
326 buf++;
329 if (buf < buffer + 12)
330 buf = buffer + 12;
331 switch (opcode) {
332 case OP_RET:
333 if (insn->src && insn->src != VOID)
334 buf += sprintf(buf, "%s", show_pseudo(insn->src));
335 break;
337 case OP_CBR:
338 buf += sprintf(buf, "%s, %s, %s", show_pseudo(insn->cond), show_label(insn->bb_true), show_label(insn->bb_false));
339 break;
341 case OP_BR:
342 buf += sprintf(buf, "%s", show_label(insn->bb_true));
343 break;
345 case OP_LABEL:
346 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
347 buf += sprintf(buf, "%s", show_label(insn->bb_true));
348 break;
350 case OP_SETVAL: {
351 struct expression *expr = insn->val;
352 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
354 if (!expr) {
355 buf += sprintf(buf, "%s", "<none>");
356 break;
359 switch (expr->type) {
360 case EXPR_VALUE:
361 buf += sprintf(buf, "%lld", expr->value);
362 break;
363 case EXPR_FVALUE:
364 buf += sprintf(buf, "%Le", expr->fvalue);
365 break;
366 case EXPR_STRING:
367 buf += sprintf(buf, "%.40s", show_string(expr->string));
368 break;
369 case EXPR_SYMBOL:
370 buf += sprintf(buf, "%s", show_ident(expr->symbol->ident));
371 break;
372 case EXPR_LABEL:
373 buf += sprintf(buf, "%s", show_label(expr->symbol->bb_target));
374 break;
375 default:
376 buf += sprintf(buf, "SETVAL EXPR TYPE %d", expr->type);
378 break;
380 case OP_SETFVAL:
381 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
382 buf += sprintf(buf, "%Le", insn->fvalue);
383 break;
385 case OP_SWITCH: {
386 struct multijmp *jmp;
387 buf += sprintf(buf, "%s", show_pseudo(insn->cond));
388 FOR_EACH_PTR(insn->multijmp_list, jmp) {
389 if (jmp->begin == jmp->end)
390 buf += sprintf(buf, ", %lld -> %s", jmp->begin, show_label(jmp->target));
391 else if (jmp->begin < jmp->end)
392 buf += sprintf(buf, ", %lld ... %lld -> %s", jmp->begin, jmp->end, show_label(jmp->target));
393 else
394 buf += sprintf(buf, ", default -> %s", show_label(jmp->target));
395 } END_FOR_EACH_PTR(jmp);
396 break;
398 case OP_COMPUTEDGOTO: {
399 struct multijmp *jmp;
400 buf += sprintf(buf, "%s", show_pseudo(insn->src));
401 FOR_EACH_PTR(insn->multijmp_list, jmp) {
402 buf += sprintf(buf, ", %s", show_label(jmp->target));
403 } END_FOR_EACH_PTR(jmp);
404 break;
406 case OP_UNREACH:
407 break;
409 case OP_PHISOURCE: {
410 struct instruction *phi;
411 buf += sprintf(buf, "%s <- %s ", show_pseudo(insn->target), show_pseudo(insn->phi_src));
412 FOR_EACH_PTR(insn->phi_users, phi) {
413 buf += sprintf(buf, " (%s)", show_pseudo(phi->target));
414 } END_FOR_EACH_PTR(phi);
415 break;
418 case OP_PHI: {
419 pseudo_t phi;
420 const char *s = " <-";
421 buf += sprintf(buf, "%s", show_pseudo(insn->target));
422 FOR_EACH_PTR(insn->phi_list, phi) {
423 if (phi == VOID && !verbose)
424 continue;
425 buf += sprintf(buf, "%s %s", s, show_pseudo(phi));
426 s = ",";
427 } END_FOR_EACH_PTR(phi);
428 break;
430 case OP_LOAD:
431 buf += sprintf(buf, "%s <- %lld[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
432 break;
433 case OP_STORE:
434 buf += sprintf(buf, "%s -> %lld[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src));
435 break;
436 case OP_INLINED_CALL:
437 case OP_CALL: {
438 struct pseudo *arg;
439 if (insn->target && insn->target != VOID)
440 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target));
441 buf += sprintf(buf, "%s", show_pseudo(insn->func));
442 FOR_EACH_PTR(insn->arguments, arg) {
443 buf += sprintf(buf, ", %s", show_pseudo(arg));
444 } END_FOR_EACH_PTR(arg);
445 break;
447 case OP_SEXT: case OP_ZEXT:
448 case OP_TRUNC:
449 case OP_FCVTU: case OP_FCVTS:
450 case OP_UCVTF: case OP_SCVTF:
451 case OP_FCVTF:
452 case OP_UTPTR:
453 case OP_PTRTU:
454 case OP_PTRCAST:
455 buf += sprintf(buf, "%s <- (%d) %s",
456 show_pseudo(insn->target),
457 type_size(insn->orig_type),
458 show_pseudo(insn->src));
459 break;
460 case OP_BINARY ... OP_BINARY_END:
461 case OP_FPCMP ... OP_FPCMP_END:
462 case OP_BINCMP ... OP_BINCMP_END:
463 buf += sprintf(buf, "%s <- %s, %s", show_pseudo(insn->target), show_pseudo(insn->src1), show_pseudo(insn->src2));
464 break;
466 case OP_SEL:
467 case OP_FMADD:
468 buf += sprintf(buf, "%s <- %s, %s, %s", show_pseudo(insn->target),
469 show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
470 break;
472 case OP_SLICE:
473 buf += sprintf(buf, "%s <- %s, %d, %d", show_pseudo(insn->target), show_pseudo(insn->base), insn->from, insn->len);
474 break;
476 case OP_NOT: case OP_NEG:
477 case OP_FNEG:
478 case OP_SYMADDR:
479 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
480 break;
482 case OP_CONTEXT:
483 buf += sprintf(buf, "%s%d", insn->check ? "check: " : "", insn->increment);
484 break;
485 case OP_RANGE:
486 buf += sprintf(buf, "%s between %s..%s", show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3));
487 break;
488 case OP_NOP:
489 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1));
490 break;
491 case OP_DEATHNOTE:
492 buf += sprintf(buf, "%s", show_pseudo(insn->target));
493 break;
494 case OP_ASM:
495 buf = show_asm(buf, insn);
496 break;
497 case OP_COPY:
498 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src));
499 break;
500 default:
501 break;
504 if (buf >= buffer + sizeof(buffer))
505 die("instruction buffer overflowed %td\n", buf - buffer);
506 do { --buf; } while (*buf == ' ');
507 *++buf = 0;
508 return buffer;
511 void show_bb(struct basic_block *bb)
513 struct instruction *insn;
515 printf("%s:\n", show_label(bb));
516 if (verbose) {
517 pseudo_t needs, defines;
518 printf("%s:%d\n", stream_name(bb->pos.stream), bb->pos.line);
520 FOR_EACH_PTR(bb->needs, needs) {
521 struct instruction *def = needs->def;
522 if (def->opcode != OP_PHI) {
523 printf(" **uses %s (from %s)**\n", show_pseudo(needs), show_label(def->bb));
524 } else {
525 pseudo_t phi;
526 const char *sep = " ";
527 printf(" **uses %s (from", show_pseudo(needs));
528 FOR_EACH_PTR(def->phi_list, phi) {
529 if (phi == VOID)
530 continue;
531 printf("%s(%s:%s)", sep, show_pseudo(phi), show_label(phi->def->bb));
532 sep = ", ";
533 } END_FOR_EACH_PTR(phi);
534 printf(")**\n");
536 } END_FOR_EACH_PTR(needs);
538 FOR_EACH_PTR(bb->defines, defines) {
539 printf(" **defines %s **\n", show_pseudo(defines));
540 } END_FOR_EACH_PTR(defines);
542 if (bb->parents) {
543 struct basic_block *from;
544 FOR_EACH_PTR(bb->parents, from) {
545 printf(" **from %s (%s:%d:%d)**\n", show_label(from),
546 stream_name(from->pos.stream), from->pos.line, from->pos.pos);
547 } END_FOR_EACH_PTR(from);
550 if (bb->children) {
551 struct basic_block *to;
552 FOR_EACH_PTR(bb->children, to) {
553 printf(" **to %s (%s:%d:%d)**\n", show_label(to),
554 stream_name(to->pos.stream), to->pos.line, to->pos.pos);
555 } END_FOR_EACH_PTR(to);
559 FOR_EACH_PTR(bb->insns, insn) {
560 if (!insn->bb && verbose < 2)
561 continue;
562 printf("\t%s\n", show_instruction(insn));
563 } END_FOR_EACH_PTR(insn);
564 if (!bb_terminated(bb))
565 printf("\tEND\n");
569 // show BB of non-removed instruction
570 void show_insn_bb(struct instruction *insn)
572 if (!insn || !insn->bb)
573 return;
574 show_bb(insn->bb);
577 static void show_symbol_usage(pseudo_t pseudo)
579 struct pseudo_user *pu;
581 if (pseudo) {
582 FOR_EACH_PTR(pseudo->users, pu) {
583 printf("\t%s\n", show_instruction(pu->insn));
584 } END_FOR_EACH_PTR(pu);
588 void show_entry(struct entrypoint *ep)
590 struct symbol *sym;
591 struct basic_block *bb;
593 printf("%s:\n", show_ident(ep->name->ident));
595 if (verbose) {
596 printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
598 FOR_EACH_PTR(ep->syms, sym) {
599 if (!sym->pseudo)
600 continue;
601 if (!sym->pseudo->users)
602 continue;
603 printf(" sym: %p %s\n", sym, show_ident(sym->ident));
604 if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE))
605 printf("\texternal visibility\n");
606 show_symbol_usage(sym->pseudo);
607 } END_FOR_EACH_PTR(sym);
609 printf("\n");
612 FOR_EACH_PTR(ep->bbs, bb) {
613 if (!bb)
614 continue;
615 if (!bb->parents && !bb->children && !bb->insns && verbose < 2)
616 continue;
617 show_bb(bb);
618 printf("\n");
619 } END_FOR_EACH_PTR(bb);
621 printf("\n");
625 // show the function containing the instruction but only if not already removed.
626 void show_insn_entry(struct instruction *insn)
628 if (!insn || !insn->bb || !insn->bb->ep)
629 return;
630 show_entry(insn->bb->ep);
633 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos)
635 if (label->bb_target)
636 warning(pos, "label '%s' already bound", show_ident(label->ident));
637 label->bb_target = bb;
640 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label)
642 struct basic_block *bb = label->bb_target;
644 if (!bb) {
645 bb = alloc_basic_block(ep, label->pos);
646 label->bb_target = bb;
648 return bb;
651 static void finish_block(struct entrypoint *ep)
653 struct basic_block *src = ep->active;
654 if (bb_reachable(src))
655 ep->active = NULL;
658 static void add_goto(struct entrypoint *ep, struct basic_block *dst)
660 struct basic_block *src = ep->active;
661 if (bb_reachable(src)) {
662 struct instruction *br = alloc_instruction(OP_BR, 0);
663 br->bb_true = dst;
664 add_bb(&dst->parents, src);
665 add_bb(&src->children, dst);
666 br->bb = src;
667 add_instruction(&src->insns, br);
668 ep->active = NULL;
672 static void add_one_insn(struct entrypoint *ep, struct instruction *insn)
674 struct basic_block *bb = ep->active;
676 if (bb_reachable(bb)) {
677 insn->bb = bb;
678 add_instruction(&bb->insns, insn);
682 static void add_unreachable(struct entrypoint *ep)
684 struct instruction *insn = alloc_instruction(OP_UNREACH, 0);
685 add_one_insn(ep, insn);
686 ep->active = NULL;
689 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb)
691 if (!bb_terminated(ep->active))
692 add_goto(ep, bb);
694 ep->active = bb;
695 if (bb_reachable(bb))
696 add_bb(&ep->bbs, bb);
699 static void remove_parent(struct basic_block *child, struct basic_block *parent)
701 remove_bb_from_list(&child->parents, parent, 1);
702 if (!child->parents)
703 repeat_phase |= REPEAT_CFG_CLEANUP;
706 /* Change a "switch" or a conditional branch into a branch */
707 void insert_branch(struct basic_block *bb, struct instruction *jmp, struct basic_block *target)
709 struct instruction *br, *old;
710 struct basic_block *child;
712 /* Remove the switch */
713 old = delete_last_instruction(&bb->insns);
714 assert(old == jmp);
715 kill_instruction(old);
717 br = alloc_instruction(OP_BR, 0);
718 br->bb = bb;
719 br->bb_true = target;
720 add_instruction(&bb->insns, br);
722 FOR_EACH_PTR(bb->children, child) {
723 if (child == target) {
724 target = NULL; /* Trigger just once */
725 continue;
727 DELETE_CURRENT_PTR(child);
728 remove_parent(child, bb);
729 } END_FOR_EACH_PTR(child);
730 PACK_PTR_LIST(&bb->children);
731 repeat_phase |= REPEAT_CFG_CLEANUP;
735 void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi_node, pseudo_t if_true, pseudo_t if_false)
737 pseudo_t target;
738 struct instruction *select;
740 /* Remove the 'br' */
741 delete_last_instruction(&bb->insns);
743 select = alloc_typed_instruction(OP_SEL, phi_node->type);
744 select->bb = bb;
746 assert(br->cond);
747 use_pseudo(select, br->cond, &select->src1);
749 target = phi_node->target;
750 assert(target->def == phi_node);
751 select->target = target;
752 target->def = select;
754 use_pseudo(select, if_true, &select->src2);
755 use_pseudo(select, if_false, &select->src3);
757 add_instruction(&bb->insns, select);
758 add_instruction(&bb->insns, br);
761 static inline int bb_empty(struct basic_block *bb)
763 return !bb->insns;
766 /* Add a label to the currently active block, return new active block */
767 static struct basic_block * add_label(struct entrypoint *ep, struct symbol *label)
769 struct basic_block *bb = label->bb_target;
771 if (bb) {
772 set_activeblock(ep, bb);
773 return bb;
775 bb = ep->active;
776 if (!bb_reachable(bb) || !bb_empty(bb)) {
777 bb = alloc_basic_block(ep, label->pos);
778 set_activeblock(ep, bb);
780 label->bb_target = bb;
781 return bb;
784 static void add_branch(struct entrypoint *ep, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false)
786 struct basic_block *bb = ep->active;
787 struct instruction *br;
789 if (bb_reachable(bb)) {
790 br = alloc_instruction(OP_CBR, 0);
791 use_pseudo(br, cond, &br->cond);
792 br->bb_true = bb_true;
793 br->bb_false = bb_false;
794 add_bb(&bb_true->parents, bb);
795 add_bb(&bb_false->parents, bb);
796 add_bb(&bb->children, bb_true);
797 add_bb(&bb->children, bb_false);
798 add_one_insn(ep, br);
802 pseudo_t alloc_pseudo(struct instruction *def)
804 static int nr = 0;
805 struct pseudo * pseudo = __alloc_pseudo(0);
806 pseudo->type = PSEUDO_REG;
807 pseudo->nr = ++nr;
808 pseudo->def = def;
809 return pseudo;
812 static pseudo_t symbol_pseudo(struct entrypoint *ep, struct symbol *sym)
814 pseudo_t pseudo;
816 if (!sym)
817 return VOID;
819 pseudo = sym->pseudo;
820 if (!pseudo) {
821 pseudo = __alloc_pseudo(0);
822 pseudo->nr = -1;
823 pseudo->type = PSEUDO_SYM;
824 pseudo->sym = sym;
825 pseudo->ident = sym->ident;
826 sym->pseudo = pseudo;
827 add_pseudo(&ep->accesses, pseudo);
829 /* Symbol pseudos have neither nr nor def */
830 return pseudo;
833 pseudo_t value_pseudo(long long val)
835 #define MAX_VAL_HASH 64
836 static struct pseudo_list *prev[MAX_VAL_HASH];
837 int hash = val & (MAX_VAL_HASH-1);
838 struct pseudo_list **list = prev + hash;
839 pseudo_t pseudo;
841 FOR_EACH_PTR(*list, pseudo) {
842 if (pseudo->value == val)
843 return pseudo;
844 } END_FOR_EACH_PTR(pseudo);
846 pseudo = __alloc_pseudo(0);
847 pseudo->type = PSEUDO_VAL;
848 pseudo->value = val;
849 add_pseudo(list, pseudo);
851 /* Value pseudos have neither nr, usage nor def */
852 return pseudo;
855 pseudo_t undef_pseudo(void)
857 pseudo_t pseudo = __alloc_pseudo(0);
858 pseudo->type = PSEUDO_UNDEF;
859 return pseudo;
862 static pseudo_t argument_pseudo(struct entrypoint *ep, int nr)
864 pseudo_t pseudo = __alloc_pseudo(0);
865 struct instruction *entry = ep->entry;
867 pseudo->type = PSEUDO_ARG;
868 pseudo->nr = nr;
869 pseudo->def = entry;
870 add_pseudo(&entry->arg_list, pseudo);
872 /* Argument pseudos have neither usage nor def */
873 return pseudo;
876 struct instruction *alloc_phisrc(pseudo_t pseudo, struct symbol *type)
878 struct instruction *insn = alloc_typed_instruction(OP_PHISOURCE, type);
879 pseudo_t phi = __alloc_pseudo(0);
880 static int nr = 0;
882 phi->type = PSEUDO_PHI;
883 phi->nr = ++nr;
884 phi->def = insn;
886 use_pseudo(insn, pseudo, &insn->phi_src);
887 insn->target = phi;
888 return insn;
891 pseudo_t alloc_phi(struct basic_block *source, pseudo_t pseudo, struct symbol *type)
893 struct instruction *insn;
895 if (!source)
896 return VOID;
898 insn = alloc_phisrc(pseudo, type);
899 insn->bb = source;
900 add_instruction(&source->insns, insn);
901 return insn->target;
904 struct instruction *alloc_phi_node(struct basic_block *bb, struct symbol *type, struct ident *ident)
906 struct instruction *phi_node = alloc_typed_instruction(OP_PHI, type);
907 pseudo_t phi;
909 phi = alloc_pseudo(phi_node);
910 phi->ident = ident;
911 phi->def = phi_node;
912 phi_node->target = phi;
913 phi_node->bb = bb;
914 return phi_node;
917 void add_phi_node(struct basic_block *bb, struct instruction *phi_node)
919 struct instruction *insn;
921 FOR_EACH_PTR(bb->insns, insn) {
922 enum opcode op = insn->opcode;
923 if (op == OP_PHI)
924 continue;
925 INSERT_CURRENT(phi_node, insn);
926 return;
927 } END_FOR_EACH_PTR(insn);
929 // FIXME
930 add_instruction(&bb->insns, phi_node);
933 struct instruction *insert_phi_node(struct basic_block *bb, struct symbol *var)
935 struct instruction *phi_node = alloc_phi_node(bb, var, var->ident);
936 add_phi_node(bb, phi_node);
937 return phi_node;
941 * We carry the "access_data" structure around for any accesses,
942 * which simplifies things a lot. It contains all the access
943 * information in one place.
945 struct access_data {
946 struct symbol *type; // ctype
947 struct symbol *btype; // base type of bitfields
948 pseudo_t address; // pseudo containing address ..
949 long long offset; // byte offset
952 static int linearize_simple_address(struct entrypoint *ep,
953 struct expression *addr,
954 struct access_data *ad)
956 if (addr->type == EXPR_SYMBOL) {
957 linearize_one_symbol(ep, addr->symbol);
958 ad->address = symbol_pseudo(ep, addr->symbol);
959 return 1;
961 if (addr->type == EXPR_BINOP) {
962 if (addr->right->type == EXPR_VALUE) {
963 if (addr->op == '+') {
964 ad->offset += get_expression_value(addr->right);
965 return linearize_simple_address(ep, addr->left, ad);
969 ad->address = linearize_expression(ep, addr);
970 return 1;
973 static struct symbol *bitfield_base_type(struct symbol *sym)
975 struct symbol *base = sym;
977 if (sym) {
978 if (sym->type == SYM_NODE)
979 base = base->ctype.base_type;
980 if (base->type == SYM_BITFIELD) {
981 base = base->ctype.base_type;
982 if (sym->packed) {
983 int size = bits_to_bytes(sym->bit_offset + sym->bit_size);
984 sym = __alloc_symbol(0);
985 *sym = *base;
986 sym->bit_size = bytes_to_bits(size);
987 return sym;
989 return base;
992 return sym;
995 static int linearize_address_gen(struct entrypoint *ep,
996 struct expression *expr,
997 struct access_data *ad)
999 struct symbol *ctype = expr->ctype;
1001 if (!ctype)
1002 return 0;
1003 ad->type = ctype;
1004 if (expr->type == EXPR_PREOP && expr->op == '*')
1005 return linearize_simple_address(ep, expr->unop, ad);
1007 warning(expr->pos, "generating address of non-lvalue (%d)", expr->type);
1008 return 0;
1011 static pseudo_t add_load(struct entrypoint *ep, struct access_data *ad)
1013 struct instruction *insn;
1014 pseudo_t new;
1016 if (!ep->active)
1017 return VOID;
1019 insn = alloc_typed_instruction(OP_LOAD, ad->btype);
1020 new = alloc_pseudo(insn);
1022 insn->target = new;
1023 insn->offset = ad->offset;
1024 insn->is_volatile = ad->type && (ad->type->ctype.modifiers & MOD_VOLATILE);
1025 use_pseudo(insn, ad->address, &insn->src);
1026 add_one_insn(ep, insn);
1027 return new;
1030 static void add_store(struct entrypoint *ep, struct access_data *ad, pseudo_t value)
1032 struct basic_block *bb = ep->active;
1033 struct instruction *store;
1035 if (!bb)
1036 return;
1038 store = alloc_typed_instruction(OP_STORE, ad->btype);
1039 store->offset = ad->offset;
1040 store->is_volatile = ad->type && (ad->type->ctype.modifiers & MOD_VOLATILE);
1041 use_pseudo(store, value, &store->target);
1042 use_pseudo(store, ad->address, &store->src);
1043 add_one_insn(ep, store);
1046 static pseudo_t linearize_bitfield_insert(struct entrypoint *ep,
1047 pseudo_t ori, pseudo_t val, struct symbol *ctype, struct symbol *btype)
1049 unsigned int shift = ctype->bit_offset;
1050 unsigned int size = ctype->bit_size;
1051 unsigned long long mask = ((1ULL << size) - 1);
1052 unsigned long long smask= bits_mask(btype->bit_size);
1054 val = add_cast(ep, btype, ctype, OP_ZEXT, val);
1055 if (shift) {
1056 val = add_binary_op(ep, btype, OP_SHL, val, value_pseudo(shift));
1057 mask <<= shift;
1059 ori = add_binary_op(ep, btype, OP_AND, ori, value_pseudo(~mask & smask));
1060 val = add_binary_op(ep, btype, OP_OR, ori, val);
1062 return val;
1065 static pseudo_t linearize_store_gen(struct entrypoint *ep,
1066 pseudo_t value,
1067 struct access_data *ad)
1069 struct symbol *ctype = ad->type;
1070 struct symbol *btype;
1071 pseudo_t store = value;
1073 if (!ep->active)
1074 return VOID;
1076 btype = ad->btype = bitfield_base_type(ctype);
1077 if (type_size(btype) != type_size(ctype)) {
1078 pseudo_t orig = add_load(ep, ad);
1079 store = linearize_bitfield_insert(ep, orig, value, ctype, btype);
1081 add_store(ep, ad, store);
1082 return value;
1085 static void taint_undefined_behaviour(struct instruction *insn)
1087 pseudo_t src2;
1089 switch (insn->opcode) {
1090 case OP_LSR:
1091 case OP_ASR:
1092 case OP_SHL:
1093 src2 = insn->src2;
1094 if (src2->type != PSEUDO_VAL)
1095 break;
1096 if ((unsigned long long)src2->value >= insn->size)
1097 insn->tainted = 1;
1098 break;
1102 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right)
1104 struct instruction *insn = alloc_typed_instruction(op, ctype);
1105 pseudo_t target = alloc_pseudo(insn);
1106 insn->target = target;
1107 use_pseudo(insn, left, &insn->src1);
1108 use_pseudo(insn, right, &insn->src2);
1109 add_one_insn(ep, insn);
1110 return target;
1113 static pseudo_t add_cmp_op(struct entrypoint *ep, struct symbol *ctype, int op, struct symbol *itype, pseudo_t left, pseudo_t right)
1115 pseudo_t target = add_binary_op(ep, ctype, op, left, right);
1116 target->def->itype = itype;
1117 return target;
1120 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val)
1122 struct instruction *insn = alloc_typed_instruction(OP_SETVAL, ctype);
1123 pseudo_t target = alloc_pseudo(insn);
1124 insn->target = target;
1125 insn->val = val;
1126 add_one_insn(ep, insn);
1127 return target;
1130 static pseudo_t add_setfval(struct entrypoint *ep, struct symbol *ctype, long double fval)
1132 struct instruction *insn = alloc_typed_instruction(OP_SETFVAL, ctype);
1133 pseudo_t target = alloc_pseudo(insn);
1134 insn->target = target;
1135 insn->fvalue = fval;
1136 add_one_insn(ep, insn);
1137 return target;
1140 static pseudo_t add_symbol_address(struct entrypoint *ep, struct expression *expr)
1142 struct instruction *insn = alloc_typed_instruction(OP_SYMADDR, expr->ctype);
1143 pseudo_t target = alloc_pseudo(insn);
1145 insn->target = target;
1146 use_pseudo(insn, symbol_pseudo(ep, expr->symbol), &insn->src);
1147 add_one_insn(ep, insn);
1148 return target;
1151 static pseudo_t linearize_bitfield_extract(struct entrypoint *ep,
1152 pseudo_t val, struct symbol *ctype, struct symbol *btype)
1154 unsigned int off = ctype->bit_offset;
1156 if (off) {
1157 pseudo_t shift = value_pseudo(off);
1158 val = add_binary_op(ep, btype, OP_LSR, val, shift);
1160 val = cast_pseudo(ep, val, btype, ctype);
1161 return val;
1164 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct access_data *ad)
1166 struct symbol *ctype = ad->type;
1167 struct symbol *btype;
1168 pseudo_t new;
1170 if (!ep->active)
1171 return VOID;
1173 btype = ad->btype = bitfield_base_type(ctype);
1174 new = add_load(ep, ad);
1175 if (ctype->bit_size != type_size(btype))
1176 new = linearize_bitfield_extract(ep, new, ctype, btype);
1177 return new;
1180 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
1182 struct access_data ad = { NULL, };
1183 pseudo_t value;
1185 if (!linearize_address_gen(ep, expr, &ad))
1186 return VOID;
1187 value = linearize_load_gen(ep, &ad);
1188 return value;
1191 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
1193 struct access_data ad = { NULL, };
1194 pseudo_t old, new, one;
1195 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB;
1197 if (!linearize_address_gen(ep, expr->unop, &ad))
1198 return VOID;
1200 old = linearize_load_gen(ep, &ad);
1201 op = opcode_float(op, expr->ctype);
1202 if (is_float_type(expr->ctype))
1203 one = add_setfval(ep, expr->ctype, expr->op_value);
1204 else
1205 one = value_pseudo(expr->op_value);
1206 if (ad.btype != ad.type)
1207 old = cast_pseudo(ep, old, ad.type, ad.btype);
1208 new = add_binary_op(ep, ad.btype, op, old, one);
1209 if (ad.btype != ad.type)
1210 new = cast_pseudo(ep, new, ad.btype, ad.type);
1211 linearize_store_gen(ep, new, &ad);
1212 return postop ? old : new;
1215 static pseudo_t add_unop(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t src)
1217 struct instruction *insn = alloc_typed_instruction(op, ctype);
1218 pseudo_t new = alloc_pseudo(insn);
1220 insn->target = new;
1221 use_pseudo(insn, src, &insn->src1);
1222 add_one_insn(ep, insn);
1223 return new;
1226 static pseudo_t add_cast(struct entrypoint *ep, struct symbol *to,
1227 struct symbol *from, int op, pseudo_t src)
1229 pseudo_t new = add_unop(ep, to, op, src);
1230 new->def->orig_type = from;
1231 return new;
1234 static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr)
1236 pseudo_t pre = linearize_expression(ep, expr->base);
1237 struct instruction *insn = alloc_typed_instruction(OP_SLICE, expr->ctype);
1238 pseudo_t new = alloc_pseudo(insn);
1240 insn->target = new;
1241 insn->from = expr->r_bitpos;
1242 insn->len = expr->r_nrbits;
1243 use_pseudo(insn, pre, &insn->base);
1244 add_one_insn(ep, insn);
1245 return new;
1248 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
1250 pseudo_t pre = linearize_expression(ep, expr->unop);
1251 struct symbol *ctype = expr->ctype;
1252 switch (expr->op) {
1253 case '+':
1254 return pre;
1255 case '!': {
1256 pseudo_t zero = value_pseudo(0);
1257 return add_cmp_op(ep, ctype, OP_SET_EQ, expr->unop->ctype, pre, zero);
1259 case '~':
1260 return add_unop(ep, ctype, OP_NOT, pre);
1261 case '-':
1262 return add_unop(ep, ctype, opcode_float(OP_NEG, ctype), pre);
1264 return VOID;
1267 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
1270 * '*' is an lvalue access, and is fundamentally different
1271 * from an arithmetic operation. Maybe it should have an
1272 * expression type of its own..
1274 if (expr->op == '*')
1275 return linearize_access(ep, expr);
1276 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
1277 return linearize_inc_dec(ep, expr, 0);
1278 return linearize_regular_preop(ep, expr);
1281 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr)
1283 return linearize_inc_dec(ep, expr, 1);
1287 * Casts to pointers are "less safe" than other casts, since
1288 * they imply type-unsafe accesses. "void *" is a special
1289 * case, since you can't access through it anyway without another
1290 * cast.
1292 enum mtype {
1293 MTYPE_UINT,
1294 MTYPE_SINT,
1295 MTYPE_PTR,
1296 MTYPE_VPTR, // TODO: must be removed ?
1297 MTYPE_FLOAT,
1298 MTYPE_BAD,
1301 static enum mtype get_mtype(struct symbol *s)
1303 int sign = (s->ctype.modifiers & MOD_SIGNED) ? 1 : 0;
1305 retry: switch (s->type) {
1306 case SYM_NODE:
1307 s = s->ctype.base_type;
1308 goto retry;
1309 case SYM_PTR:
1310 if (s->ctype.base_type == &void_ctype)
1311 return MTYPE_VPTR;
1312 return MTYPE_PTR;
1313 case SYM_BITFIELD:
1314 case SYM_RESTRICT:
1315 case SYM_FOULED:
1316 case SYM_ENUM:
1317 s = s->ctype.base_type;
1318 /* fall-through */
1319 case_int:
1320 return sign ? MTYPE_SINT : MTYPE_UINT;
1321 case SYM_BASETYPE:
1322 if (s->ctype.base_type == &fp_type)
1323 return MTYPE_FLOAT;
1324 if (s->ctype.base_type == &int_type)
1325 goto case_int;
1326 /* fall-through */
1327 default:
1328 return MTYPE_BAD;
1332 static int get_cast_opcode(struct symbol *dst, struct symbol *src)
1334 enum mtype stype = get_mtype(src);
1335 enum mtype dtype = get_mtype(dst);
1337 switch (dtype) {
1338 case MTYPE_FLOAT:
1339 switch (stype) {
1340 case MTYPE_FLOAT:
1341 if (dst->bit_size == src->bit_size)
1342 return OP_NOP;
1343 return OP_FCVTF;
1344 case MTYPE_UINT:
1345 return OP_UCVTF;
1346 case MTYPE_SINT:
1347 return OP_SCVTF;
1348 default:
1349 return OP_BADOP;
1351 case MTYPE_PTR:
1352 switch (stype) {
1353 case MTYPE_UINT:
1354 case MTYPE_SINT:
1355 return OP_UTPTR;
1356 case MTYPE_PTR:
1357 case MTYPE_VPTR:
1358 return OP_PTRCAST;
1359 default:
1360 return OP_BADOP;
1362 case MTYPE_VPTR:
1363 switch (stype) {
1364 case MTYPE_PTR:
1365 case MTYPE_VPTR:
1366 case MTYPE_UINT:
1367 stype = MTYPE_UINT;
1368 /* fall through */
1369 case MTYPE_SINT:
1370 break;
1371 default:
1372 return OP_BADOP;
1374 /* fall through */
1375 case MTYPE_UINT:
1376 case MTYPE_SINT:
1377 switch (stype) {
1378 case MTYPE_FLOAT:
1379 return dtype == MTYPE_UINT ? OP_FCVTU : OP_FCVTS;
1380 case MTYPE_PTR:
1381 return OP_PTRTU;
1382 case MTYPE_VPTR:
1383 case MTYPE_UINT:
1384 case MTYPE_SINT:
1385 if (dst->bit_size ==src->bit_size)
1386 return OP_NOP;
1387 if (dst->bit_size < src->bit_size)
1388 return OP_TRUNC;
1389 return stype == MTYPE_SINT ? OP_SEXT : OP_ZEXT;
1390 default:
1391 return OP_BADOP;
1393 /* fall through */
1394 default:
1395 if (src->type == SYM_NODE)
1396 src = src->ctype.base_type;
1397 if (dst->type == SYM_NODE)
1398 dst = dst->ctype.base_type;
1399 if (src == dst)
1400 return OP_NOP;
1401 return OP_BADOP;
1405 static pseudo_t cast_pseudo(struct entrypoint *ep, pseudo_t src, struct symbol *from, struct symbol *to)
1407 const struct position pos = current_pos;
1408 pseudo_t result;
1409 struct instruction *insn;
1410 int opcode;
1412 if (src == VOID)
1413 return VOID;
1414 if (!from || !to)
1415 return VOID;
1416 if (from->bit_size < 0 || to->bit_size < 0)
1417 return VOID;
1418 opcode = get_cast_opcode(to, from);
1419 switch (opcode) {
1420 case OP_NOP:
1421 return src;
1422 case OP_UTPTR:
1423 if (from->bit_size == to->bit_size)
1424 break;
1425 if (src == value_pseudo(0))
1426 break;
1427 if (Wint_to_pointer_cast)
1428 warning(pos, "non size-preserving integer to pointer cast");
1429 src = cast_pseudo(ep, src, from, size_t_ctype);
1430 from = size_t_ctype;
1431 break;
1432 case OP_PTRTU:
1433 if (from->bit_size == to->bit_size)
1434 break;
1435 if (Wpointer_to_int_cast)
1436 warning(pos, "non size-preserving pointer to integer cast");
1437 src = cast_pseudo(ep, src, from, size_t_ctype);
1438 return cast_pseudo(ep, src, size_t_ctype, to);
1439 case OP_BADOP:
1440 return VOID;
1441 default:
1442 break;
1444 insn = alloc_typed_instruction(opcode, to);
1445 result = alloc_pseudo(insn);
1446 insn->target = result;
1447 insn->orig_type = from;
1448 use_pseudo(insn, src, &insn->src);
1449 add_one_insn(ep, insn);
1450 return result;
1453 static int map_opcode(int opcode, struct symbol *ctype)
1455 if (ctype && is_float_type(ctype))
1456 return opcode_table[opcode].to_float;
1457 if (ctype && (ctype->ctype.modifiers & MOD_SIGNED)) {
1458 switch(opcode) {
1459 case OP_DIVU: case OP_MODU: case OP_LSR:
1460 opcode++;
1463 return opcode;
1466 static inline pseudo_t add_convert_to_bool(struct entrypoint *ep, pseudo_t src, struct symbol *type)
1468 pseudo_t zero;
1469 int op;
1471 if (!type || src == VOID)
1472 return VOID;
1473 if (is_bool_type(type))
1474 return src;
1475 if (src->type == PSEUDO_VAL && (src->value == 0 || src->value == 1))
1476 return src;
1477 if (is_float_type(type)) {
1478 zero = add_setfval(ep, type, 0.0);
1479 op = map_opcode(OP_SET_NE, type);
1480 } else {
1481 zero = value_pseudo(0);
1482 op = OP_SET_NE;
1484 return add_cmp_op(ep, &bool_ctype, op, type, src, zero);
1487 static pseudo_t linearize_expression_to_bool(struct entrypoint *ep, struct expression *expr)
1489 pseudo_t dst;
1490 dst = linearize_expression(ep, expr);
1491 dst = add_convert_to_bool(ep, dst, expr->ctype);
1492 return dst;
1495 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
1497 struct access_data ad = { NULL, };
1498 struct expression *target = expr->left;
1499 struct expression *src = expr->right;
1500 struct symbol *ctype;
1501 pseudo_t value;
1503 value = linearize_expression(ep, src);
1504 if (!target || !linearize_address_gen(ep, target, &ad))
1505 return value;
1506 if (expr->op != '=') {
1507 pseudo_t oldvalue = linearize_load_gen(ep, &ad);
1508 pseudo_t dst;
1509 static const int op_trans[] = {
1510 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD,
1511 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB,
1512 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MUL,
1513 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIVU,
1514 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MODU,
1515 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL,
1516 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_LSR,
1517 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND,
1518 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR,
1519 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR
1521 int opcode;
1523 if (!src)
1524 return VOID;
1526 ctype = src->ctype;
1527 oldvalue = cast_pseudo(ep, oldvalue, target->ctype, ctype);
1528 opcode = map_opcode(op_trans[expr->op - SPECIAL_BASE], ctype);
1529 dst = add_binary_op(ep, ctype, opcode, oldvalue, value);
1530 taint_undefined_behaviour(dst->def);
1531 value = cast_pseudo(ep, dst, ctype, expr->ctype);
1533 value = linearize_store_gen(ep, value, &ad);
1534 return value;
1537 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr)
1539 struct expression *arg, *fn;
1540 struct instruction *insn = alloc_typed_instruction(OP_CALL, expr->ctype);
1541 pseudo_t retval, call;
1542 struct ctype *ctype = NULL;
1543 struct symbol *fntype;
1544 struct context *context;
1546 if (!expr->ctype)
1547 return VOID;
1549 fn = expr->fn;
1550 fntype = fn->ctype;
1552 // handle builtins
1553 if (fntype->op && fntype->op->linearize) {
1554 retval = fntype->op->linearize(ep, expr);
1555 if (retval)
1556 return retval;
1559 ctype = &fntype->ctype;
1561 add_symbol(&insn->fntypes, fntype);
1562 FOR_EACH_PTR(expr->args, arg) {
1563 pseudo_t new = linearize_expression(ep, arg);
1564 use_pseudo(insn, new, add_pseudo(&insn->arguments, new));
1565 add_symbol(&insn->fntypes, arg->ctype);
1566 } END_FOR_EACH_PTR(arg);
1568 if (fn->type == EXPR_PREOP && fn->op == '*' && is_func_type(fn->ctype))
1569 fn = fn->unop;
1571 if (fn->type == EXPR_SYMBOL) {
1572 call = symbol_pseudo(ep, fn->symbol);
1573 } else {
1574 call = linearize_expression(ep, fn);
1576 use_pseudo(insn, call, &insn->func);
1577 retval = VOID;
1578 if (expr->ctype != &void_ctype)
1579 retval = alloc_pseudo(insn);
1580 insn->target = retval;
1581 add_one_insn(ep, insn);
1583 if (ctype) {
1584 FOR_EACH_PTR(ctype->contexts, context) {
1585 int in = context->in;
1586 int out = context->out;
1587 int check = 0;
1588 int context_diff;
1589 if (in < 0) {
1590 check = 1;
1591 in = 0;
1593 if (out < 0) {
1594 check = 0;
1595 out = 0;
1597 context_diff = out - in;
1598 if (check || context_diff) {
1599 insn = alloc_instruction(OP_CONTEXT, 0);
1600 insn->increment = context_diff;
1601 insn->check = check;
1602 insn->context_expr = context->context;
1603 add_one_insn(ep, insn);
1605 } END_FOR_EACH_PTR(context);
1607 if (ctype->modifiers & MOD_NORETURN)
1608 add_unreachable(ep);
1611 return retval;
1614 static pseudo_t linearize_binop_bool(struct entrypoint *ep, struct expression *expr)
1616 pseudo_t src1, src2, dst;
1617 int op = (expr->op == SPECIAL_LOGICAL_OR) ? OP_OR : OP_AND;
1619 src1 = linearize_expression_to_bool(ep, expr->left);
1620 src2 = linearize_expression_to_bool(ep, expr->right);
1621 dst = add_binary_op(ep, &bool_ctype, op, src1, src2);
1622 if (expr->ctype != &bool_ctype)
1623 dst = cast_pseudo(ep, dst, &bool_ctype, expr->ctype);
1624 return dst;
1627 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr)
1629 pseudo_t src1, src2, dst;
1630 static const int opcode[] = {
1631 ['+'] = OP_ADD, ['-'] = OP_SUB,
1632 ['*'] = OP_MUL, ['/'] = OP_DIVU,
1633 ['%'] = OP_MODU, ['&'] = OP_AND,
1634 ['|'] = OP_OR, ['^'] = OP_XOR,
1635 [SPECIAL_LEFTSHIFT] = OP_SHL,
1636 [SPECIAL_RIGHTSHIFT] = OP_LSR,
1638 int op;
1640 src1 = linearize_expression(ep, expr->left);
1641 src2 = linearize_expression(ep, expr->right);
1642 op = map_opcode(opcode[expr->op], expr->ctype);
1643 dst = add_binary_op(ep, expr->ctype, op, src1, src2);
1644 taint_undefined_behaviour(dst->def);
1645 return dst;
1648 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1650 static pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false);
1652 static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr)
1654 pseudo_t cond, valt, valf, res;
1655 struct instruction *insn;
1657 valt = linearize_expression(ep, expr->cond_true);
1658 valf = linearize_expression(ep, expr->cond_false);
1659 cond = linearize_expression(ep, expr->conditional);
1661 insn = alloc_typed_instruction(OP_SEL, expr->ctype);
1662 if (!expr->cond_true)
1663 valt = cond;
1664 use_pseudo(insn, cond, &insn->src1);
1665 use_pseudo(insn, valt, &insn->src2);
1666 use_pseudo(insn, valf, &insn->src3);
1668 res = alloc_pseudo(insn);
1669 insn->target = res;
1670 add_one_insn(ep, insn);
1671 return res;
1674 static pseudo_t add_join_conditional(struct entrypoint *ep, struct expression *expr,
1675 pseudo_t phi1, pseudo_t phi2)
1677 pseudo_t target;
1678 struct instruction *phi_node;
1680 if (phi1 == VOID)
1681 return phi2;
1682 if (phi2 == VOID)
1683 return phi1;
1685 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype);
1686 use_pseudo(phi_node, phi1, add_pseudo(&phi_node->phi_list, phi1));
1687 use_pseudo(phi_node, phi2, add_pseudo(&phi_node->phi_list, phi2));
1688 phi_node->target = target = alloc_pseudo(phi_node);
1689 add_one_insn(ep, phi_node);
1690 return target;
1693 static pseudo_t linearize_short_conditional(struct entrypoint *ep, struct expression *expr,
1694 struct expression *cond,
1695 struct expression *expr_false)
1697 pseudo_t src1, src2;
1698 struct basic_block *bb_false;
1699 struct basic_block *merge;
1700 pseudo_t phi1, phi2;
1702 if (!expr_false || !ep->active)
1703 return VOID;
1705 bb_false = alloc_basic_block(ep, expr_false->pos);
1706 merge = alloc_basic_block(ep, expr->pos);
1708 src1 = linearize_expression(ep, cond);
1709 phi1 = alloc_phi(ep->active, src1, expr->ctype);
1710 add_branch(ep, src1, merge, bb_false);
1712 set_activeblock(ep, bb_false);
1713 src2 = linearize_expression(ep, expr_false);
1714 phi2 = alloc_phi(ep->active, src2, expr->ctype);
1715 set_activeblock(ep, merge);
1717 return add_join_conditional(ep, expr, phi1, phi2);
1720 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr,
1721 struct expression *cond,
1722 struct expression *expr_true,
1723 struct expression *expr_false)
1725 pseudo_t src1, src2;
1726 pseudo_t phi1, phi2;
1727 struct basic_block *bb_true, *bb_false, *merge;
1729 if (!cond || !expr_true || !expr_false || !ep->active)
1730 return VOID;
1731 bb_true = alloc_basic_block(ep, expr_true->pos);
1732 bb_false = alloc_basic_block(ep, expr_false->pos);
1733 merge = alloc_basic_block(ep, expr->pos);
1735 linearize_cond_branch(ep, cond, bb_true, bb_false);
1737 set_activeblock(ep, bb_true);
1738 src1 = linearize_expression(ep, expr_true);
1739 phi1 = alloc_phi(ep->active, src1, expr->ctype);
1740 add_goto(ep, merge);
1742 set_activeblock(ep, bb_false);
1743 src2 = linearize_expression(ep, expr_false);
1744 phi2 = alloc_phi(ep->active, src2, expr->ctype);
1745 set_activeblock(ep, merge);
1747 return add_join_conditional(ep, expr, phi1, phi2);
1750 static void insert_phis(struct basic_block *bb, pseudo_t src, struct symbol *ctype,
1751 struct instruction *node)
1753 struct basic_block *parent;
1755 FOR_EACH_PTR(bb->parents, parent) {
1756 struct instruction *br = delete_last_instruction(&parent->insns);
1757 pseudo_t phi = alloc_phi(parent, src, ctype);
1758 add_instruction(&parent->insns, br);
1759 use_pseudo(node, phi, add_pseudo(&node->phi_list, phi));
1760 } END_FOR_EACH_PTR(parent);
1763 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr)
1765 struct symbol *ctype = expr->ctype;
1766 struct basic_block *other, *merge;
1767 struct instruction *node;
1768 pseudo_t src1, src2, phi2;
1770 if (!ep->active || !expr->left || !expr->right)
1771 return VOID;
1773 other = alloc_basic_block(ep, expr->right->pos);
1774 merge = alloc_basic_block(ep, expr->pos);
1775 node = alloc_phi_node(merge, ctype, NULL);
1777 // LHS and its shortcut
1778 if (expr->op == SPECIAL_LOGICAL_OR) {
1779 linearize_cond_branch(ep, expr->left, merge, other);
1780 src1 = value_pseudo(1);
1781 } else {
1782 linearize_cond_branch(ep, expr->left, other, merge);
1783 src1 = value_pseudo(0);
1785 insert_phis(merge, src1, ctype, node);
1787 // RHS
1788 set_activeblock(ep, other);
1789 src2 = linearize_expression_to_bool(ep, expr->right);
1790 src2 = cast_pseudo(ep, src2, &bool_ctype, ctype);
1791 phi2 = alloc_phi(ep->active, src2, ctype);
1792 use_pseudo(node, phi2, add_pseudo(&node->phi_list, phi2));
1794 // join
1795 set_activeblock(ep, merge);
1796 add_instruction(&merge->insns, node);
1797 return node->target;
1800 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr)
1802 static const int cmpop[] = {
1803 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT,
1804 [SPECIAL_EQUAL] = OP_SET_EQ,
1805 [SPECIAL_NOTEQUAL] = OP_SET_NE,
1806 [SPECIAL_GTE] = OP_SET_GE,
1807 [SPECIAL_LTE] = OP_SET_LE,
1808 [SPECIAL_UNSIGNED_LT] = OP_SET_B,
1809 [SPECIAL_UNSIGNED_GT] = OP_SET_A,
1810 [SPECIAL_UNSIGNED_LTE] = OP_SET_BE,
1811 [SPECIAL_UNSIGNED_GTE] = OP_SET_AE,
1813 struct symbol *itype = expr->right->ctype;
1814 int op = opcode_float(cmpop[expr->op], itype);
1815 pseudo_t src1 = linearize_expression(ep, expr->left);
1816 pseudo_t src2 = linearize_expression(ep, expr->right);
1817 pseudo_t dst = add_cmp_op(ep, expr->ctype, op, itype, src1, src2);
1818 return dst;
1822 static pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1824 pseudo_t cond;
1826 if (!expr || !valid_type(expr->ctype) || !bb_reachable(ep->active))
1827 return VOID;
1829 switch (expr->type) {
1831 case EXPR_STRING:
1832 case EXPR_VALUE:
1833 add_goto(ep, expr->value ? bb_true : bb_false);
1834 break;
1835 case EXPR_FVALUE:
1836 add_goto(ep, expr->fvalue ? bb_true : bb_false);
1837 break;
1838 case EXPR_LOGICAL:
1839 linearize_logical_branch(ep, expr, bb_true, bb_false);
1840 break;
1841 case EXPR_COMPARE:
1842 cond = linearize_compare(ep, expr);
1843 add_branch(ep, cond, bb_true, bb_false);
1844 break;
1845 case EXPR_PREOP:
1846 if (expr->op == '!')
1847 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true);
1848 /* fall through */
1849 default:
1850 cond = linearize_expression_to_bool(ep, expr);
1851 add_branch(ep, cond, bb_true, bb_false);
1852 break;
1854 return VOID;
1858 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false)
1860 struct basic_block *next = alloc_basic_block(ep, expr->pos);
1862 if (expr->op == SPECIAL_LOGICAL_OR)
1863 linearize_cond_branch(ep, expr->left, bb_true, next);
1864 else
1865 linearize_cond_branch(ep, expr->left, next, bb_false);
1866 set_activeblock(ep, next);
1867 linearize_cond_branch(ep, expr->right, bb_true, bb_false);
1868 return VOID;
1871 static pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr)
1873 pseudo_t src;
1874 struct expression *orig = expr->cast_expression;
1876 if (!orig)
1877 return VOID;
1879 src = linearize_expression(ep, orig);
1880 return cast_pseudo(ep, src, orig->ctype, expr->ctype);
1883 static pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *ad)
1885 switch (initializer->type) {
1886 case EXPR_INITIALIZER: {
1887 struct expression *expr;
1888 FOR_EACH_PTR(initializer->expr_list, expr) {
1889 linearize_initializer(ep, expr, ad);
1890 } END_FOR_EACH_PTR(expr);
1891 break;
1893 case EXPR_POS:
1894 ad->offset = initializer->init_offset;
1895 linearize_initializer(ep, initializer->init_expr, ad);
1896 break;
1897 default: {
1898 pseudo_t value = linearize_expression(ep, initializer);
1899 ad->type = initializer->ctype;
1900 linearize_store_gen(ep, value, ad);
1901 return value;
1905 return VOID;
1908 static void linearize_argument(struct entrypoint *ep, struct symbol *arg, int nr)
1910 struct access_data ad = { NULL, };
1912 ad.type = arg;
1913 ad.address = symbol_pseudo(ep, arg);
1914 linearize_store_gen(ep, argument_pseudo(ep, nr), &ad);
1917 static pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
1919 if (!expr || !valid_type(expr->ctype))
1920 return VOID;
1922 current_pos = expr->pos;
1923 switch (expr->type) {
1924 case EXPR_SYMBOL:
1925 linearize_one_symbol(ep, expr->symbol);
1926 return add_symbol_address(ep, expr);
1928 case EXPR_VALUE:
1929 return value_pseudo(expr->value);
1931 case EXPR_STRING:
1932 case EXPR_LABEL:
1933 return add_setval(ep, expr->ctype, expr);
1935 case EXPR_FVALUE:
1936 return add_setfval(ep, expr->ctype, expr->fvalue);
1938 case EXPR_STATEMENT:
1939 return linearize_statement(ep, expr->statement);
1941 case EXPR_CALL:
1942 return linearize_call_expression(ep, expr);
1944 case EXPR_BINOP:
1945 if (expr->op == SPECIAL_LOGICAL_AND || expr->op == SPECIAL_LOGICAL_OR)
1946 return linearize_binop_bool(ep, expr);
1947 return linearize_binop(ep, expr);
1949 case EXPR_LOGICAL:
1950 return linearize_logical(ep, expr);
1952 case EXPR_COMPARE:
1953 return linearize_compare(ep, expr);
1955 case EXPR_SELECT:
1956 return linearize_select(ep, expr);
1958 case EXPR_CONDITIONAL:
1959 if (!expr->cond_true)
1960 return linearize_short_conditional(ep, expr, expr->conditional, expr->cond_false);
1962 return linearize_conditional(ep, expr, expr->conditional,
1963 expr->cond_true, expr->cond_false);
1965 case EXPR_COMMA:
1966 linearize_expression(ep, expr->left);
1967 return linearize_expression(ep, expr->right);
1969 case EXPR_ASSIGNMENT:
1970 return linearize_assignment(ep, expr);
1972 case EXPR_PREOP:
1973 return linearize_preop(ep, expr);
1975 case EXPR_POSTOP:
1976 return linearize_postop(ep, expr);
1978 case EXPR_CAST:
1979 case EXPR_FORCE_CAST:
1980 case EXPR_IMPLIED_CAST:
1981 return linearize_cast(ep, expr);
1983 case EXPR_SLICE:
1984 return linearize_slice(ep, expr);
1986 case EXPR_INITIALIZER:
1987 case EXPR_POS:
1988 warning(expr->pos, "unexpected initializer expression (%d %d)", expr->type, expr->op);
1989 return VOID;
1990 default:
1991 warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op);
1992 return VOID;
1994 return VOID;
1997 static pseudo_t linearize_one_symbol(struct entrypoint *ep, struct symbol *sym)
1999 struct access_data ad = { NULL, };
2000 pseudo_t value;
2002 if (!sym || !sym->initializer || sym->initialized)
2003 return VOID;
2005 /* We need to output these puppies some day too.. */
2006 if (sym->ctype.modifiers & (MOD_STATIC | MOD_TOPLEVEL))
2007 return VOID;
2009 sym->initialized = 1;
2010 ad.address = symbol_pseudo(ep, sym);
2012 if (sym->initializer && !is_scalar_type(sym)) {
2013 // default zero initialization [6.7.9.21]
2014 // FIXME: this init the whole aggregate while
2015 // only the existing fields need to be initialized.
2016 // FIXME: this init the whole aggregate even if
2017 // all fields arelater explicitely initialized.
2018 ad.type = sym;
2019 ad.address = symbol_pseudo(ep, sym);
2020 linearize_store_gen(ep, value_pseudo(0), &ad);
2023 value = linearize_initializer(ep, sym->initializer, &ad);
2024 return value;
2027 static pseudo_t linearize_compound_statement(struct entrypoint *ep, struct statement *stmt)
2029 pseudo_t pseudo;
2030 struct statement *s;
2032 pseudo = VOID;
2033 FOR_EACH_PTR(stmt->stmts, s) {
2034 pseudo = linearize_statement(ep, s);
2035 } END_FOR_EACH_PTR(s);
2037 return pseudo;
2040 static void add_return(struct entrypoint *ep, struct basic_block *bb, struct symbol *ctype, pseudo_t src)
2042 struct instruction *phi_node = first_instruction(bb->insns);
2043 pseudo_t phi;
2044 if (!phi_node) {
2045 phi_node = alloc_typed_instruction(OP_PHI, ctype);
2046 phi_node->target = alloc_pseudo(phi_node);
2047 phi_node->bb = bb;
2048 add_instruction(&bb->insns, phi_node);
2050 phi = alloc_phi(ep->active, src, ctype);
2051 phi->ident = &return_ident;
2052 use_pseudo(phi_node, phi, add_pseudo(&phi_node->phi_list, phi));
2055 static pseudo_t linearize_fn_statement(struct entrypoint *ep, struct statement *stmt)
2057 struct instruction *phi_node;
2058 struct basic_block *bb;
2059 pseudo_t pseudo;
2061 pseudo = linearize_compound_statement(ep, stmt);
2062 if (!is_void_type(stmt->ret)) { // non-void function
2063 struct basic_block *active = ep->active;
2064 if (active && !bb_terminated(active)) { // missing return
2065 struct basic_block *bb_ret;
2066 bb_ret = get_bound_block(ep, stmt->ret);
2067 add_return(ep, bb_ret, stmt->ret, undef_pseudo());
2070 bb = add_label(ep, stmt->ret);
2071 phi_node = first_instruction(bb->insns);
2072 if (phi_node)
2073 pseudo = phi_node->target;
2074 return pseudo;
2077 static pseudo_t linearize_inlined_call(struct entrypoint *ep, struct statement *stmt)
2079 struct instruction *insn = alloc_instruction(OP_INLINED_CALL, 0);
2080 struct statement *args = stmt->args;
2081 struct basic_block *bb;
2082 pseudo_t pseudo;
2084 if (args) {
2085 struct symbol *sym;
2087 concat_symbol_list(args->declaration, &ep->syms);
2088 FOR_EACH_PTR(args->declaration, sym) {
2089 pseudo_t value = linearize_one_symbol(ep, sym);
2090 add_pseudo(&insn->arguments, value);
2091 } END_FOR_EACH_PTR(sym);
2094 pseudo = linearize_fn_statement(ep, stmt);
2095 insn->target = pseudo;
2097 insn->func = symbol_pseudo(ep, stmt->inline_fn);
2098 bb = ep->active;
2099 if (!bb->insns)
2100 bb->pos = stmt->pos;
2101 add_one_insn(ep, insn);
2102 return pseudo;
2105 static pseudo_t linearize_context(struct entrypoint *ep, struct statement *stmt)
2107 struct instruction *insn = alloc_instruction(OP_CONTEXT, 0);
2108 struct expression *expr = stmt->expression;
2110 insn->increment = get_expression_value(expr);
2111 insn->context_expr = stmt->context;
2112 add_one_insn(ep, insn);
2113 return VOID;
2116 static pseudo_t linearize_range(struct entrypoint *ep, struct statement *stmt)
2118 struct instruction *insn = alloc_instruction(OP_RANGE, 0);
2120 use_pseudo(insn, linearize_expression(ep, stmt->range_expression), &insn->src1);
2121 use_pseudo(insn, linearize_expression(ep, stmt->range_low), &insn->src2);
2122 use_pseudo(insn, linearize_expression(ep, stmt->range_high), &insn->src3);
2123 add_one_insn(ep, insn);
2124 return VOID;
2127 ALLOCATOR(asm_rules, "asm rules");
2128 ALLOCATOR(asm_constraint, "asm constraints");
2130 static void add_asm_input(struct entrypoint *ep, struct instruction *insn, struct asm_operand *op)
2132 pseudo_t pseudo = linearize_expression(ep, op->expr);
2133 struct asm_constraint *rule = __alloc_asm_constraint(0);
2135 rule->ident = op->name;
2136 rule->constraint = op->constraint ? op->constraint->string->data : "";
2137 use_pseudo(insn, pseudo, &rule->pseudo);
2138 add_ptr_list(&insn->asm_rules->inputs, rule);
2141 static void add_asm_output(struct entrypoint *ep, struct instruction *insn, struct asm_operand *op)
2143 struct access_data ad = { NULL, };
2144 pseudo_t pseudo;
2145 struct asm_constraint *rule;
2147 if (op->is_memory) {
2148 pseudo = linearize_expression(ep, op->expr);
2149 } else {
2150 if (!linearize_address_gen(ep, op->expr, &ad))
2151 return;
2152 pseudo = alloc_pseudo(insn);
2153 linearize_store_gen(ep, pseudo, &ad);
2155 rule = __alloc_asm_constraint(0);
2156 rule->is_memory = op->is_memory;
2157 rule->ident = op->name;
2158 rule->constraint = op->constraint ? op->constraint->string->data : "";
2159 use_pseudo(insn, pseudo, &rule->pseudo);
2160 add_ptr_list(&insn->asm_rules->outputs, rule);
2163 static pseudo_t linearize_asm_statement(struct entrypoint *ep, struct statement *stmt)
2165 struct instruction *insn;
2166 struct expression *expr;
2167 struct asm_rules *rules;
2168 struct asm_operand *op;
2170 insn = alloc_instruction(OP_ASM, 0);
2171 expr = stmt->asm_string;
2172 if (!expr || expr->type != EXPR_STRING) {
2173 warning(stmt->pos, "expected string in inline asm");
2174 return VOID;
2176 insn->string = expr->string->data;
2178 rules = __alloc_asm_rules(0);
2179 insn->asm_rules = rules;
2181 /* Gather the inputs.. */
2182 FOR_EACH_PTR(stmt->asm_inputs, op) {
2183 add_asm_input(ep, insn, op);
2184 } END_FOR_EACH_PTR(op);
2186 add_one_insn(ep, insn);
2188 /* Assign the outputs */
2189 FOR_EACH_PTR(stmt->asm_outputs, op) {
2190 add_asm_output(ep, insn, op);
2191 } END_FOR_EACH_PTR(op);
2193 return VOID;
2196 static int multijmp_cmp(const void *_a, const void *_b)
2198 const struct multijmp *a = _a;
2199 const struct multijmp *b = _b;
2201 // "default" case?
2202 if (a->begin > a->end) {
2203 if (b->begin > b->end)
2204 return 0;
2205 return 1;
2207 if (b->begin > b->end)
2208 return -1;
2209 if (a->begin == b->begin) {
2210 if (a->end == b->end)
2211 return 0;
2212 return (a->end < b->end) ? -1 : 1;
2214 return a->begin < b->begin ? -1 : 1;
2217 static void sort_switch_cases(struct instruction *insn)
2219 sort_list((struct ptr_list **)&insn->multijmp_list, multijmp_cmp);
2222 static pseudo_t linearize_declaration(struct entrypoint *ep, struct statement *stmt)
2224 struct symbol *sym;
2226 concat_symbol_list(stmt->declaration, &ep->syms);
2228 FOR_EACH_PTR(stmt->declaration, sym) {
2229 linearize_one_symbol(ep, sym);
2230 } END_FOR_EACH_PTR(sym);
2231 return VOID;
2234 static pseudo_t linearize_return(struct entrypoint *ep, struct statement *stmt)
2236 struct expression *expr = stmt->expression;
2237 struct symbol *ret = stmt->ret_target;
2238 struct basic_block *bb_return = get_bound_block(ep, ret);
2239 struct basic_block *active;
2240 pseudo_t src = linearize_expression(ep, expr);
2241 active = ep->active;
2242 if (active && !is_void_type(ret)) {
2243 add_return(ep, bb_return, ret, src);
2245 add_goto(ep, bb_return);
2246 return VOID;
2249 static pseudo_t linearize_switch(struct entrypoint *ep, struct statement *stmt)
2251 struct symbol *sym;
2252 struct instruction *switch_ins;
2253 struct basic_block *switch_end = alloc_basic_block(ep, stmt->pos);
2254 struct basic_block *active, *default_case;
2255 struct expression *expr = stmt->switch_expression;
2256 struct multijmp *jmp;
2257 pseudo_t pseudo;
2259 if (!expr || !expr->ctype)
2260 return VOID;
2261 pseudo = linearize_expression(ep, expr);
2262 active = ep->active;
2263 if (!active) {
2264 active = alloc_basic_block(ep, stmt->pos);
2265 set_activeblock(ep, active);
2268 switch_ins = alloc_typed_instruction(OP_SWITCH, expr->ctype);
2269 use_pseudo(switch_ins, pseudo, &switch_ins->cond);
2270 add_one_insn(ep, switch_ins);
2271 finish_block(ep);
2273 default_case = NULL;
2274 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
2275 struct statement *case_stmt = sym->stmt;
2276 struct basic_block *bb_case = get_bound_block(ep, sym);
2278 if (!case_stmt->case_expression) {
2279 default_case = bb_case;
2280 continue;
2281 } else if (case_stmt->case_expression->type != EXPR_VALUE) {
2282 continue;
2283 } else {
2284 struct expression *case_to = case_stmt->case_to;
2285 long long begin, end;
2287 begin = end = case_stmt->case_expression->value;
2288 if (case_to && case_to->type == EXPR_VALUE)
2289 end = case_to->value;
2290 if (begin > end)
2291 jmp = alloc_multijmp(bb_case, end, begin);
2292 else
2293 jmp = alloc_multijmp(bb_case, begin, end);
2296 add_multijmp(&switch_ins->multijmp_list, jmp);
2297 add_bb(&bb_case->parents, active);
2298 add_bb(&active->children, bb_case);
2299 } END_FOR_EACH_PTR(sym);
2301 bind_label(stmt->switch_break, switch_end, stmt->pos);
2303 /* And linearize the actual statement */
2304 linearize_statement(ep, stmt->switch_statement);
2305 set_activeblock(ep, switch_end);
2307 if (!default_case)
2308 default_case = switch_end;
2310 jmp = alloc_multijmp(default_case, 1, 0);
2311 add_multijmp(&switch_ins->multijmp_list, jmp);
2312 add_bb(&default_case->parents, active);
2313 add_bb(&active->children, default_case);
2314 sort_switch_cases(switch_ins);
2316 return VOID;
2319 static pseudo_t linearize_iterator(struct entrypoint *ep, struct statement *stmt)
2321 struct statement *pre_statement = stmt->iterator_pre_statement;
2322 struct expression *pre_condition = stmt->iterator_pre_condition;
2323 struct statement *statement = stmt->iterator_statement;
2324 struct statement *post_statement = stmt->iterator_post_statement;
2325 struct expression *post_condition = stmt->iterator_post_condition;
2326 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end;
2327 struct symbol *sym;
2329 FOR_EACH_PTR(stmt->iterator_syms, sym) {
2330 linearize_one_symbol(ep, sym);
2331 } END_FOR_EACH_PTR(sym);
2332 concat_symbol_list(stmt->iterator_syms, &ep->syms);
2333 linearize_statement(ep, pre_statement);
2335 loop_body = loop_top = alloc_basic_block(ep, stmt->pos);
2336 loop_continue = alloc_basic_block(ep, stmt->pos);
2337 loop_end = alloc_basic_block(ep, stmt->pos);
2339 /* An empty post-condition means that it's the same as the pre-condition */
2340 if (!post_condition) {
2341 loop_top = alloc_basic_block(ep, stmt->pos);
2342 set_activeblock(ep, loop_top);
2345 if (pre_condition)
2346 linearize_cond_branch(ep, pre_condition, loop_body, loop_end);
2348 bind_label(stmt->iterator_continue, loop_continue, stmt->pos);
2349 bind_label(stmt->iterator_break, loop_end, stmt->pos);
2351 set_activeblock(ep, loop_body);
2352 linearize_statement(ep, statement);
2353 add_goto(ep, loop_continue);
2355 set_activeblock(ep, loop_continue);
2356 linearize_statement(ep, post_statement);
2357 if (!post_condition)
2358 add_goto(ep, loop_top);
2359 else
2360 linearize_cond_branch(ep, post_condition, loop_top, loop_end);
2361 set_activeblock(ep, loop_end);
2363 return VOID;
2366 static pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
2368 struct basic_block *bb;
2370 if (!stmt)
2371 return VOID;
2373 bb = ep->active;
2374 if (bb && !bb->insns)
2375 bb->pos = stmt->pos;
2376 current_pos = stmt->pos;
2378 switch (stmt->type) {
2379 case STMT_NONE:
2380 break;
2382 case STMT_DECLARATION:
2383 return linearize_declaration(ep, stmt);
2385 case STMT_CONTEXT:
2386 return linearize_context(ep, stmt);
2388 case STMT_RANGE:
2389 return linearize_range(ep, stmt);
2391 case STMT_EXPRESSION:
2392 return linearize_expression(ep, stmt->expression);
2394 case STMT_ASM:
2395 return linearize_asm_statement(ep, stmt);
2397 case STMT_RETURN:
2398 return linearize_return(ep, stmt);
2400 case STMT_CASE: {
2401 add_label(ep, stmt->case_label);
2402 linearize_statement(ep, stmt->case_statement);
2403 break;
2406 case STMT_LABEL: {
2407 struct symbol *label = stmt->label_identifier;
2409 if (label->used) {
2410 add_label(ep, label);
2412 return linearize_statement(ep, stmt->label_statement);
2415 case STMT_GOTO: {
2416 struct symbol *sym;
2417 struct expression *expr;
2418 struct instruction *goto_ins;
2419 struct basic_block *active;
2420 pseudo_t pseudo;
2422 active = ep->active;
2423 if (!bb_reachable(active))
2424 break;
2426 if (stmt->goto_label) {
2427 add_goto(ep, get_bound_block(ep, stmt->goto_label));
2428 break;
2431 expr = stmt->goto_expression;
2432 if (!expr)
2433 break;
2435 /* This can happen as part of simplification */
2436 if (expr->type == EXPR_LABEL) {
2437 add_goto(ep, get_bound_block(ep, expr->label_symbol));
2438 break;
2441 pseudo = linearize_expression(ep, expr);
2442 goto_ins = alloc_instruction(OP_COMPUTEDGOTO, 0);
2443 use_pseudo(goto_ins, pseudo, &goto_ins->src);
2444 add_one_insn(ep, goto_ins);
2446 FOR_EACH_PTR(stmt->target_list, sym) {
2447 struct basic_block *bb_computed = get_bound_block(ep, sym);
2448 struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0);
2449 add_multijmp(&goto_ins->multijmp_list, jmp);
2450 add_bb(&bb_computed->parents, ep->active);
2451 add_bb(&active->children, bb_computed);
2452 } END_FOR_EACH_PTR(sym);
2454 finish_block(ep);
2455 break;
2458 case STMT_COMPOUND:
2459 if (stmt->inline_fn)
2460 return linearize_inlined_call(ep, stmt);
2461 return linearize_compound_statement(ep, stmt);
2464 * This could take 'likely/unlikely' into account, and
2465 * switch the arms around appropriately..
2467 case STMT_IF: {
2468 struct basic_block *bb_true, *bb_false, *endif;
2469 struct expression *cond = stmt->if_conditional;
2471 bb_true = alloc_basic_block(ep, stmt->pos);
2472 bb_false = endif = alloc_basic_block(ep, stmt->pos);
2474 // If the condition is invalid, the following
2475 // statement(s) are not evaluated.
2476 if (!cond || !valid_type(cond->ctype))
2477 return VOID;
2478 linearize_cond_branch(ep, cond, bb_true, bb_false);
2480 set_activeblock(ep, bb_true);
2481 linearize_statement(ep, stmt->if_true);
2483 if (stmt->if_false) {
2484 endif = alloc_basic_block(ep, stmt->pos);
2485 add_goto(ep, endif);
2486 set_activeblock(ep, bb_false);
2487 linearize_statement(ep, stmt->if_false);
2489 set_activeblock(ep, endif);
2490 break;
2493 case STMT_SWITCH:
2494 return linearize_switch(ep, stmt);
2496 case STMT_ITERATOR:
2497 return linearize_iterator(ep, stmt);
2499 default:
2500 break;
2502 return VOID;
2505 static void check_tainted_insn(struct instruction *insn)
2507 unsigned long long uval;
2508 long long sval;
2509 pseudo_t src2;
2511 switch (insn->opcode) {
2512 case OP_DIVU: case OP_DIVS:
2513 case OP_MODU: case OP_MODS:
2514 if (insn->src2 == value_pseudo(0))
2515 warning(insn->pos, "divide by zero");
2516 break;
2517 case OP_SHL: case OP_LSR: case OP_ASR:
2518 src2 = insn->src2;
2519 if (src2->type != PSEUDO_VAL)
2520 break;
2521 uval = src2->value;
2522 if (uval < insn->size)
2523 break;
2524 sval = sign_extend(uval, insn->size);
2525 if (Wshift_count_negative && sval < 0)
2526 warning(insn->pos, "shift count is negative (%lld)", sval);
2527 else if (Wshift_count_overflow)
2528 warning(insn->pos, "shift too big (%llu) for type %s", uval, show_typename(insn->type));
2533 // issue warnings after all possible DCE
2534 static void late_warnings(struct entrypoint *ep)
2536 struct basic_block *bb;
2537 FOR_EACH_PTR(ep->bbs, bb) {
2538 struct instruction *insn;
2539 FOR_EACH_PTR(bb->insns, insn) {
2540 if (!insn->bb)
2541 continue;
2542 if (insn->tainted)
2543 check_tainted_insn(insn);
2544 switch (insn->opcode) {
2545 case OP_LOAD:
2546 // Check for illegal offsets.
2547 check_access(insn);
2548 break;
2550 } END_FOR_EACH_PTR(insn);
2551 } END_FOR_EACH_PTR(bb);
2554 static struct entrypoint *linearize_fn(struct symbol *sym, struct symbol *base_type)
2556 struct statement *stmt = base_type->stmt;
2557 struct entrypoint *ep;
2558 struct basic_block *bb;
2559 struct symbol *ret_type;
2560 struct symbol *arg;
2561 struct instruction *entry;
2562 struct instruction *ret;
2563 pseudo_t result;
2564 int i;
2566 if (!stmt || sym->bogus_linear)
2567 return NULL;
2569 ep = alloc_entrypoint();
2570 ep->name = sym;
2571 sym->ep = ep;
2572 bb = alloc_basic_block(ep, sym->pos);
2573 set_activeblock(ep, bb);
2575 if (stmt->type == STMT_ASM) { // top-level asm
2576 linearize_asm_statement(ep, stmt);
2577 return ep;
2580 entry = alloc_instruction(OP_ENTRY, 0);
2581 add_one_insn(ep, entry);
2582 ep->entry = entry;
2584 concat_symbol_list(base_type->arguments, &ep->syms);
2586 /* FIXME!! We should do something else about varargs.. */
2587 i = 0;
2588 FOR_EACH_PTR(base_type->arguments, arg) {
2589 linearize_argument(ep, arg, ++i);
2590 } END_FOR_EACH_PTR(arg);
2592 result = linearize_fn_statement(ep, stmt);
2593 ret_type = base_type->ctype.base_type;
2594 ret = alloc_typed_instruction(OP_RET, ret_type);
2595 if (type_size(ret_type) > 0)
2596 use_pseudo(ret, result, &ret->src);
2597 add_one_insn(ep, ret);
2599 optimize(ep);
2600 late_warnings(ep);
2601 return ep;
2604 struct entrypoint *linearize_symbol(struct symbol *sym)
2606 struct symbol *base_type;
2608 if (!sym)
2609 return NULL;
2610 current_pos = sym->pos;
2611 base_type = sym->ctype.base_type;
2612 if (!base_type)
2613 return NULL;
2614 if (base_type->type == SYM_FN)
2615 return linearize_fn(sym, base_type);
2616 return NULL;
2620 * Builtin functions
2623 static pseudo_t linearize_fma(struct entrypoint *ep, struct expression *expr)
2625 struct instruction *insn = alloc_typed_instruction(OP_FMADD, expr->ctype);
2626 struct expression *arg;
2628 PREPARE_PTR_LIST(expr->args, arg);
2629 use_pseudo(insn, linearize_expression(ep, arg), &insn->src1);
2630 NEXT_PTR_LIST(arg)
2631 use_pseudo(insn, linearize_expression(ep, arg), &insn->src2);
2632 NEXT_PTR_LIST(arg)
2633 use_pseudo(insn, linearize_expression(ep, arg), &insn->src3);
2634 FINISH_PTR_LIST(arg);
2636 add_one_insn(ep, insn);
2637 return insn->target = alloc_pseudo(insn);
2640 static pseudo_t linearize_isdigit(struct entrypoint *ep, struct expression *expr)
2642 struct instruction *insn;
2643 pseudo_t src;
2645 insn = alloc_typed_instruction(OP_SUB, &int_ctype);
2646 src = linearize_expression(ep, first_expression(expr->args));
2647 use_pseudo(insn, src, &insn->src1);
2648 insn->src2 = value_pseudo('0');
2649 src = insn->target = alloc_pseudo(insn);
2650 add_one_insn(ep, insn);
2652 insn = alloc_typed_instruction(OP_SET_BE, &int_ctype);
2653 use_pseudo(insn, src, &insn->src1);
2654 insn->src2 = value_pseudo(9);
2655 insn->target = alloc_pseudo(insn);
2656 insn->itype = &int_ctype;
2657 add_one_insn(ep, insn);
2659 return insn->target;
2662 static pseudo_t linearize_unreachable(struct entrypoint *ep, struct expression *exp)
2664 add_unreachable(ep);
2665 return VOID;
2668 static struct sym_init {
2669 const char *name;
2670 pseudo_t (*linearize)(struct entrypoint *, struct expression*);
2671 struct symbol_op op;
2672 } builtins_table[] = {
2673 // must be declared in builtin.c:declare_builtins[]
2674 { "__builtin_fma", linearize_fma },
2675 { "__builtin_fmaf", linearize_fma },
2676 { "__builtin_fmal", linearize_fma },
2677 { "__builtin_isdigit", linearize_isdigit },
2678 { "__builtin_unreachable", linearize_unreachable },
2682 void init_linearized_builtins(int stream)
2684 struct sym_init *ptr;
2686 for (ptr = builtins_table; ptr->name; ptr++) {
2687 struct symbol *sym;
2688 sym = create_symbol(stream, ptr->name, SYM_NODE, NS_SYMBOL);
2689 if (!sym->op)
2690 sym->op = &ptr->op;
2691 sym->op->type |= KW_BUILTIN;
2692 sym->op->linearize = ptr->linearize;