Add new IL for expression linearization.
[smatch.git] / linearize.c
blob6ed2de2f52bbfc9cdf47a5233cfb06bcc4cb21b5
1 /*
2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
12 #include <string.h>
13 #include <stdarg.h>
14 #include <stdlib.h>
15 #include <stdio.h>
17 #include "parse.h"
18 #include "expression.h"
19 #include "linearize.h"
21 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
22 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
24 static struct instruction *alloc_instruction(int opcode, struct symbol *type)
26 struct instruction * insn = __alloc_instruction(0);
27 insn->type = type;
28 insn->opcode = opcode;
29 return insn;
32 static struct entrypoint *alloc_entrypoint(void)
34 return __alloc_entrypoint(0);
37 static struct basic_block *alloc_basic_block(void)
39 return __alloc_basic_block(0);
42 static void show_instruction(struct instruction *insn)
44 int op = insn->opcode;
46 switch (op) {
47 case OP_CONDTRUE: case OP_CONDFALSE:
48 printf("\t%s %%r%d,%p\n",
49 op == OP_CONDTRUE ? "jne" : "jz",
50 insn->target.nr, insn->address);
51 break;
52 case OP_SETVAL: {
53 struct expression *expr = insn->val;
54 switch (expr->type) {
55 case EXPR_VALUE:
56 printf("\t%%r%d <- %lld\n",
57 insn->target.nr, expr->value);
58 break;
59 case EXPR_STRING:
60 printf("\t%%r%d <- %s\n",
61 insn->target.nr, show_string(expr->string));
62 break;
63 case EXPR_SYMBOL:
64 printf("\t%%r%d <- %s\n",
65 insn->target.nr, show_ident(expr->symbol->ident));
66 break;
67 default:
68 printf("\t SETVAL ?? ");
70 break;
72 case OP_MULTIVALUE:
73 printf("\tswitch %%r%d\n", insn->target.nr);
74 break;
75 case OP_MULTIJUMP:
76 printf("\tcase %d ... %d -> %p\n", insn->begin, insn->end, insn->type);
77 break;
78 case OP_LOAD:
79 printf("\tload %%r%d <- [%%r%d]\n", insn->target.nr, insn->src.nr);
80 break;
81 case OP_STORE:
82 printf("\tstore %%r%d -> [%%r%d]\n", insn->target.nr, insn->src.nr);
83 break;
84 case OP_UNOP ... OP_LASTUNOP:
85 printf("\t%%r%d <- %c %%r%d\n",
86 insn->target.nr,
87 op - OP_UNOP, insn->src.nr);
88 break;
89 case OP_BINOP ... OP_LASTBINOP:
90 printf("\t%%r%d <- %%r%d %c %%r%d\n",
91 insn->target.nr,
92 insn->src1.nr, op - OP_UNOP, insn->src2.nr);
93 break;
94 default:
95 printf("\top %d ???\n", op);
99 static void show_bb(struct basic_block *bb)
101 struct instruction *insn;
102 struct symbol *owner = bb->this;
104 printf("bb: %p%s\n", bb, owner ? "" : " UNREACHABLE!!");
105 if (owner) {
106 struct basic_block *from;
107 FOR_EACH_PTR(owner->bb_parents, from) {
108 printf(" **from %p**\n", from);
109 } END_FOR_EACH_PTR;
111 FOR_EACH_PTR(bb->insns, insn) {
112 show_instruction(insn);
113 } END_FOR_EACH_PTR;
115 if (bb->next) {
116 printf("\tgoto\t\t.L%p\n", bb->next->bb_target);
117 } else {
118 printf("\tEND\n");
120 printf("\n");
123 static void show_entry(struct entrypoint *ep)
125 struct symbol *sym;
126 struct basic_block *bb;
128 printf("ep %p: %s\n", ep, show_ident(ep->name->ident));
130 FOR_EACH_PTR(ep->syms, sym) {
131 printf(" sym: %p %s\n", sym, show_ident(sym->ident));
132 } END_FOR_EACH_PTR;
134 printf("\n");
136 FOR_EACH_PTR(ep->bbs, bb) {
137 show_bb(bb);
138 } END_FOR_EACH_PTR;
140 printf("\n");
143 #define bb_reachable(bb) ((bb)->this != NULL)
145 static struct basic_block * new_basic_block(struct entrypoint *ep, struct symbol *owner)
147 struct basic_block *bb;
149 if (!owner) {
150 static struct basic_block unreachable;
151 return &unreachable;
154 bb = alloc_basic_block();
155 add_bb(&ep->bbs, bb);
156 bb->this = owner;
157 if (owner->bb_target)
158 warn(owner->pos, "Symbol already has a basic block %p", owner->bb_target);
159 owner->bb_target = bb;
160 return bb;
163 static void add_goto(struct basic_block *bb, struct symbol *sym)
165 if (bb_reachable(bb)) {
166 bb->next = sym;
167 add_bb(&sym->bb_parents, bb);
171 static void add_label(struct entrypoint *ep, struct symbol *sym)
173 struct basic_block *new_bb = new_basic_block(ep, sym);
174 struct basic_block *bb = ep->active;
176 add_goto(bb, sym);
177 ep->active = new_bb;
181 * Add a anonymous label, return the symbol for it..
183 * If we already have a label for the top of the active
184 * context, we can just re-use it.
186 static struct symbol *create_label(struct entrypoint *ep, struct position pos)
188 struct basic_block *bb = ep->active;
189 struct symbol *label = bb->this;
191 if (!bb_reachable(bb) || !ptr_list_empty(bb->insns)) {
192 label = alloc_symbol(pos, SYM_LABEL);
193 add_label(ep, label);
195 return label;
198 static void add_one_insn(struct entrypoint *ep, struct position pos, struct instruction *insn)
200 struct basic_block *bb = ep->active;
202 if (bb_reachable(bb)) {
203 if (bb->flags & BB_HASBRANCH) {
204 add_label(ep, alloc_symbol(pos, SYM_LABEL));
205 bb = ep->active;
207 add_instruction(&bb->insns, insn);
211 static void set_unreachable(struct entrypoint *ep)
213 ep->active = new_basic_block(ep, NULL);
216 static void add_branch(struct entrypoint *ep, struct statement *stmt, int opcode, struct expression *cond, struct symbol *target)
218 struct basic_block *bb = ep->active;
220 if (bb_reachable(bb)) {
221 struct instruction *jump = alloc_instruction(opcode, target);
222 jump->address = target;
223 bb->flags |= BB_HASBRANCH;
224 add_instruction(&bb->insns, jump);
225 add_bb(&target->bb_parents, bb);
229 /* Dummy pseudo allocator */
230 static pseudo_t alloc_pseudo(void)
232 static int nr = 0;
233 return to_pseudo(++nr);
237 * FIXME! Not all accesses are memory loads. We should
238 * check what kind of symbol is behind the dereference.
240 static pseudo_t linearize_address_gen(struct entrypoint *ep, struct expression *expr)
242 if (expr->type == EXPR_PREOP)
243 return linearize_expression(ep, expr->unop);
244 return linearize_expression(ep, expr->address);
247 static void linearize_store_gen(struct entrypoint *ep, pseudo_t value, struct expression *expr, pseudo_t addr)
249 struct instruction *store = alloc_instruction(OP_STORE, expr->ctype);
250 store->target = value;
251 store->src = addr;
252 add_one_insn(ep, expr->pos, store);
255 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct expression *expr, pseudo_t addr)
257 pseudo_t new = alloc_pseudo();
258 struct instruction *insn = alloc_instruction(OP_LOAD, expr->ctype);
260 insn->target = new;
261 insn->src = addr;
262 add_one_insn(ep, expr->pos, insn);
263 if (expr->type == EXPR_PREOP)
264 return new;
266 /* bitfield load */
267 /* FIXME! Add shift and mask!!! */
268 return new;
271 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr)
273 pseudo_t addr = linearize_address_gen(ep, expr);
274 return linearize_load_gen(ep, expr, addr);
277 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop)
279 pseudo_t addr = linearize_address_gen(ep, expr->unop);
280 pseudo_t retval, new;
281 struct instruction *insn;
283 retval = linearize_load_gen(ep, expr->unop, addr);
284 new = retval;
285 if (postop)
286 new = alloc_pseudo();
287 insn = alloc_instruction(OP_UNOP + expr->op, expr->ctype);
288 insn->target = new;
289 insn->src = retval;
290 linearize_store_gen(ep, new, expr->unop, addr);
291 return retval;
294 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr)
296 pseudo_t target = linearize_expression(ep, expr->unop);
297 pseudo_t new = alloc_pseudo();
298 struct instruction *insn = alloc_instruction(OP_UNOP + expr->op, expr->ctype);
299 insn->target = new;
300 insn->src1 = target;
301 return new;
304 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr)
307 * '*' is an lvalue access, and is fundamentally different
308 * from an arithmetic operation. Maybe it should have an
309 * expression type of its own..
311 if (expr->op == '*')
312 return linearize_access(ep, expr);
313 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT)
314 return linearize_inc_dec(ep, expr, 0);
315 return linearize_regular_preop(ep, expr);
318 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr)
320 struct expression *target = expr->left;
321 pseudo_t value, address;
323 value = linearize_expression(ep, expr->right);
324 address = linearize_address_gen(ep, target);
325 linearize_store_gen(ep, value, target, address);
326 return value;
329 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr)
331 if (!expr)
332 return VOID;
334 switch (expr->type) {
335 case EXPR_VALUE: case EXPR_STRING: case EXPR_SYMBOL: {
336 pseudo_t pseudo;
337 struct instruction *insn = alloc_instruction(OP_SETVAL, expr->ctype);
338 insn->target = pseudo = alloc_pseudo();
339 insn->val = expr;
340 add_one_insn(ep, expr->pos,insn);
341 return pseudo;
344 case EXPR_STATEMENT:
345 return linearize_statement(ep, expr->statement);
347 case EXPR_BINOP: {
348 pseudo_t src1, src2, result;
349 struct instruction *insn = alloc_instruction(OP_BINOP + expr->op, expr->ctype);
350 src1 = linearize_expression(ep, expr->left);
351 src2 = linearize_expression(ep, expr->right);
352 result = alloc_pseudo();
353 insn->target = result;
354 insn->src1 = src1;
355 insn->src2 = src2;
356 add_one_insn(ep, expr->pos, insn);
357 return result;
360 case EXPR_COMMA: {
361 linearize_expression(ep, expr->left);
362 return linearize_expression(ep, expr->right);
365 case EXPR_ASSIGNMENT:
366 return linearize_assignment(ep, expr);
368 case EXPR_PREOP:
369 return linearize_preop(ep, expr);
371 default:
372 return VOID;
374 return VOID;
377 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt)
379 if (!stmt)
380 return VOID;
382 switch (stmt->type) {
383 case STMT_NONE:
384 break;
386 case STMT_EXPRESSION:
387 return linearize_expression(ep, stmt->expression);
389 case STMT_ASM:
390 /* FIXME */
391 break;
393 case STMT_RETURN: {
394 pseudo_t pseudo = linearize_expression(ep, stmt->expression);
395 set_unreachable(ep);
396 return pseudo;
399 case STMT_CASE: {
400 add_label(ep, stmt->case_label);
401 linearize_statement(ep, stmt->case_statement);
402 break;
405 case STMT_LABEL: {
406 add_label(ep, stmt->label_identifier);
407 linearize_statement(ep, stmt->label_statement);
408 break;
411 case STMT_GOTO: {
412 add_goto(ep->active, stmt->goto_label);
413 set_unreachable(ep);
414 break;
417 case STMT_COMPOUND: {
418 pseudo_t pseudo;
419 struct statement *s;
420 concat_symbol_list(stmt->syms, &ep->syms);
421 FOR_EACH_PTR(stmt->stmts, s) {
422 pseudo = linearize_statement(ep, s);
423 } END_FOR_EACH_PTR;
424 return pseudo;
428 * This could take 'likely/unlikely' into account, and
429 * switch the arms around appropriately..
431 case STMT_IF: {
432 struct symbol *target;
433 struct basic_block *if_block;
434 struct expression *cond = stmt->if_conditional;
436 if (cond->type == EXPR_VALUE) {
437 struct statement *always = stmt->if_true;
438 struct statement *never = stmt->if_false;
440 if (!cond->value) {
441 never = always;
442 always = stmt->if_false;
444 if (always)
445 linearize_statement(ep, always);
446 if (never) {
447 struct basic_block *bb = ep->active;
448 set_unreachable(ep);
449 linearize_statement(ep, never);
452 * If the "never case" is reachable some other
453 * way, we need to merge the old always case
454 * with the fallthrough of the never case.
456 if (bb_reachable(ep->active)) {
457 add_goto(bb, create_label(ep, never->pos));
458 break;
461 /* Otherwise we just continue with the old always case.. */
462 ep->active = bb;
464 break;
468 target = alloc_symbol(stmt->pos, SYM_LABEL);
469 add_branch(ep, stmt, OP_CONDFALSE, cond, target);
471 linearize_statement(ep, stmt->if_true);
473 if_block = ep->active;
474 add_label(ep, target);
476 if (stmt->if_false) {
477 struct symbol *else_target = alloc_symbol(stmt->pos, SYM_LABEL);
478 add_goto(if_block, else_target);
479 linearize_statement(ep, stmt->if_false);
480 add_label(ep, else_target);
482 break;
485 case STMT_SWITCH: {
486 int default_seen;
487 struct symbol *sym;
488 struct instruction *switch_value;
489 pseudo_t pseudo;
491 /* Create the "head node" */
492 if (!bb_reachable(ep->active))
493 break;
495 pseudo = linearize_expression(ep, stmt->switch_expression);
496 switch_value = alloc_instruction(OP_MULTIVALUE, NULL);
497 switch_value->target = pseudo;
498 add_one_insn(ep, stmt->pos, switch_value);
500 /* Create all the sub-jumps */
501 default_seen = 0;
502 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) {
503 struct statement *case_stmt = sym->stmt;
504 struct instruction *sw_bb = alloc_instruction(OP_MULTIJUMP, sym);
505 if (!case_stmt->case_expression)
506 default_seen = 1;
507 if (case_stmt->case_expression)
508 sw_bb->begin = case_stmt->case_expression->value;
509 if (case_stmt->case_to)
510 sw_bb->end = case_stmt->case_to->value;
511 add_one_insn(ep, stmt->pos, sw_bb);
512 add_bb(&sym->bb_parents, ep->active);
513 } END_FOR_EACH_PTR;
515 /* Default fall-through case */
516 if (!default_seen)
517 add_goto(ep->active, stmt->switch_break);
518 set_unreachable(ep);
520 /* And linearize the actual statement */
521 linearize_statement(ep, stmt->switch_statement);
523 /* ..then tie it all together at the end.. */
524 add_label(ep, stmt->switch_break);
525 break;
528 case STMT_ITERATOR: {
529 struct statement *pre_statement = stmt->iterator_pre_statement;
530 struct expression *pre_condition = stmt->iterator_pre_condition;
531 struct statement *statement = stmt->iterator_statement;
532 struct statement *post_statement = stmt->iterator_post_statement;
533 struct expression *post_condition = stmt->iterator_post_condition;
534 struct symbol *loop_top = NULL, *loop_bottom = NULL;
536 concat_symbol_list(stmt->iterator_syms, &ep->syms);
537 linearize_statement(ep, pre_statement);
538 if (pre_condition) {
539 if (pre_condition->type == EXPR_VALUE) {
540 if (!pre_condition->value) {
541 loop_bottom = alloc_symbol(stmt->pos, SYM_LABEL);
542 add_goto(ep->active, loop_bottom);
543 set_unreachable(ep);
545 } else {
546 loop_bottom = alloc_symbol(stmt->pos, SYM_LABEL);
547 add_branch(ep, stmt, OP_CONDFALSE, pre_condition, loop_bottom);
551 if (!post_condition || post_condition->type != EXPR_VALUE || post_condition->value)
552 loop_top = create_label(ep, stmt->pos);
554 linearize_statement(ep, statement);
556 if (stmt->iterator_continue->used)
557 add_label(ep, stmt->iterator_continue);
559 linearize_statement(ep, post_statement);
561 if (!post_condition) {
562 add_goto(ep->active, loop_top);
563 set_unreachable(ep);
564 } else {
565 if (post_condition->type != EXPR_VALUE || post_condition->value)
566 add_branch(ep, stmt, OP_CONDTRUE, post_condition, loop_top);
569 if (stmt->iterator_break->used)
570 add_label(ep, stmt->iterator_break);
571 if (loop_bottom)
572 add_label(ep, loop_bottom);
573 break;
576 default:
577 break;
579 return VOID;
582 void linearize_symbol(struct symbol *sym)
584 struct symbol *base_type;
586 if (!sym)
587 return;
588 base_type = sym->ctype.base_type;
589 if (!base_type)
590 return;
591 if (base_type->type == SYM_FN) {
592 if (base_type->stmt) {
593 struct entrypoint *ep = alloc_entrypoint();
595 ep->name = sym;
596 ep->active = new_basic_block(ep, sym);
597 concat_symbol_list(base_type->arguments, &ep->syms);
598 linearize_statement(ep, base_type->stmt);
599 show_entry(ep);