2 * Linearize - walk the statement tree (but _not_ the expressions)
3 * to generate a linear version of it and the basic blocks.
5 * NOTE! We're not interested in the actual sub-expressions yet,
6 * even though they can generate conditional branches and
7 * subroutine calls. That's all "local" behaviour.
9 * Copyright (C) 2004 Linus Torvalds
18 #include "expression.h"
19 #include "linearize.h"
21 pseudo_t
linearize_statement(struct entrypoint
*ep
, struct statement
*stmt
);
22 pseudo_t
linearize_expression(struct entrypoint
*ep
, struct expression
*expr
);
24 static struct instruction
*alloc_instruction(int opcode
, struct symbol
*type
)
26 struct instruction
* insn
= __alloc_instruction(0);
28 insn
->opcode
= opcode
;
32 static struct entrypoint
*alloc_entrypoint(void)
34 return __alloc_entrypoint(0);
37 static struct basic_block
*alloc_basic_block(void)
39 return __alloc_basic_block(0);
42 static void show_instruction(struct instruction
*insn
)
44 int op
= insn
->opcode
;
47 case OP_CONDTRUE
: case OP_CONDFALSE
:
48 printf("\t%s %%r%d,%p\n",
49 op
== OP_CONDTRUE
? "jne" : "jz",
50 insn
->target
.nr
, insn
->address
);
53 struct expression
*expr
= insn
->val
;
56 printf("\t%%r%d <- %lld\n",
57 insn
->target
.nr
, expr
->value
);
60 printf("\t%%r%d <- %s\n",
61 insn
->target
.nr
, show_string(expr
->string
));
64 printf("\t%%r%d <- %s\n",
65 insn
->target
.nr
, show_ident(expr
->symbol
->ident
));
68 printf("\t SETVAL ?? ");
73 printf("\tswitch %%r%d\n", insn
->target
.nr
);
76 printf("\tcase %d ... %d -> %p\n", insn
->begin
, insn
->end
, insn
->type
);
79 printf("\tload %%r%d <- [%%r%d]\n", insn
->target
.nr
, insn
->src
.nr
);
82 printf("\tstore %%r%d -> [%%r%d]\n", insn
->target
.nr
, insn
->src
.nr
);
84 case OP_UNOP
... OP_LASTUNOP
:
85 printf("\t%%r%d <- %c %%r%d\n",
87 op
- OP_UNOP
, insn
->src
.nr
);
89 case OP_BINOP
... OP_LASTBINOP
:
90 printf("\t%%r%d <- %%r%d %c %%r%d\n",
92 insn
->src1
.nr
, op
- OP_UNOP
, insn
->src2
.nr
);
95 printf("\top %d ???\n", op
);
99 static void show_bb(struct basic_block
*bb
)
101 struct instruction
*insn
;
102 struct symbol
*owner
= bb
->this;
104 printf("bb: %p%s\n", bb
, owner
? "" : " UNREACHABLE!!");
106 struct basic_block
*from
;
107 FOR_EACH_PTR(owner
->bb_parents
, from
) {
108 printf(" **from %p**\n", from
);
111 FOR_EACH_PTR(bb
->insns
, insn
) {
112 show_instruction(insn
);
116 printf("\tgoto\t\t.L%p\n", bb
->next
->bb_target
);
123 static void show_entry(struct entrypoint
*ep
)
126 struct basic_block
*bb
;
128 printf("ep %p: %s\n", ep
, show_ident(ep
->name
->ident
));
130 FOR_EACH_PTR(ep
->syms
, sym
) {
131 printf(" sym: %p %s\n", sym
, show_ident(sym
->ident
));
136 FOR_EACH_PTR(ep
->bbs
, bb
) {
143 #define bb_reachable(bb) ((bb)->this != NULL)
145 static struct basic_block
* new_basic_block(struct entrypoint
*ep
, struct symbol
*owner
)
147 struct basic_block
*bb
;
150 static struct basic_block unreachable
;
154 bb
= alloc_basic_block();
155 add_bb(&ep
->bbs
, bb
);
157 if (owner
->bb_target
)
158 warn(owner
->pos
, "Symbol already has a basic block %p", owner
->bb_target
);
159 owner
->bb_target
= bb
;
163 static void add_goto(struct basic_block
*bb
, struct symbol
*sym
)
165 if (bb_reachable(bb
)) {
167 add_bb(&sym
->bb_parents
, bb
);
171 static void add_label(struct entrypoint
*ep
, struct symbol
*sym
)
173 struct basic_block
*new_bb
= new_basic_block(ep
, sym
);
174 struct basic_block
*bb
= ep
->active
;
181 * Add a anonymous label, return the symbol for it..
183 * If we already have a label for the top of the active
184 * context, we can just re-use it.
186 static struct symbol
*create_label(struct entrypoint
*ep
, struct position pos
)
188 struct basic_block
*bb
= ep
->active
;
189 struct symbol
*label
= bb
->this;
191 if (!bb_reachable(bb
) || !ptr_list_empty(bb
->insns
)) {
192 label
= alloc_symbol(pos
, SYM_LABEL
);
193 add_label(ep
, label
);
198 static void add_one_insn(struct entrypoint
*ep
, struct position pos
, struct instruction
*insn
)
200 struct basic_block
*bb
= ep
->active
;
202 if (bb_reachable(bb
)) {
203 if (bb
->flags
& BB_HASBRANCH
) {
204 add_label(ep
, alloc_symbol(pos
, SYM_LABEL
));
207 add_instruction(&bb
->insns
, insn
);
211 static void set_unreachable(struct entrypoint
*ep
)
213 ep
->active
= new_basic_block(ep
, NULL
);
216 static void add_branch(struct entrypoint
*ep
, struct statement
*stmt
, int opcode
, struct expression
*cond
, struct symbol
*target
)
218 struct basic_block
*bb
= ep
->active
;
220 if (bb_reachable(bb
)) {
221 struct instruction
*jump
= alloc_instruction(opcode
, target
);
222 jump
->address
= target
;
223 bb
->flags
|= BB_HASBRANCH
;
224 add_instruction(&bb
->insns
, jump
);
225 add_bb(&target
->bb_parents
, bb
);
229 /* Dummy pseudo allocator */
230 static pseudo_t
alloc_pseudo(void)
233 return to_pseudo(++nr
);
237 * FIXME! Not all accesses are memory loads. We should
238 * check what kind of symbol is behind the dereference.
240 static pseudo_t
linearize_address_gen(struct entrypoint
*ep
, struct expression
*expr
)
242 if (expr
->type
== EXPR_PREOP
)
243 return linearize_expression(ep
, expr
->unop
);
244 return linearize_expression(ep
, expr
->address
);
247 static void linearize_store_gen(struct entrypoint
*ep
, pseudo_t value
, struct expression
*expr
, pseudo_t addr
)
249 struct instruction
*store
= alloc_instruction(OP_STORE
, expr
->ctype
);
250 store
->target
= value
;
252 add_one_insn(ep
, expr
->pos
, store
);
255 static pseudo_t
linearize_load_gen(struct entrypoint
*ep
, struct expression
*expr
, pseudo_t addr
)
257 pseudo_t
new = alloc_pseudo();
258 struct instruction
*insn
= alloc_instruction(OP_LOAD
, expr
->ctype
);
262 add_one_insn(ep
, expr
->pos
, insn
);
263 if (expr
->type
== EXPR_PREOP
)
267 /* FIXME! Add shift and mask!!! */
271 static pseudo_t
linearize_access(struct entrypoint
*ep
, struct expression
*expr
)
273 pseudo_t addr
= linearize_address_gen(ep
, expr
);
274 return linearize_load_gen(ep
, expr
, addr
);
277 static pseudo_t
linearize_inc_dec(struct entrypoint
*ep
, struct expression
*expr
, int postop
)
279 pseudo_t addr
= linearize_address_gen(ep
, expr
->unop
);
280 pseudo_t retval
, new;
281 struct instruction
*insn
;
283 retval
= linearize_load_gen(ep
, expr
->unop
, addr
);
286 new = alloc_pseudo();
287 insn
= alloc_instruction(OP_UNOP
+ expr
->op
, expr
->ctype
);
290 linearize_store_gen(ep
, new, expr
->unop
, addr
);
294 static pseudo_t
linearize_regular_preop(struct entrypoint
*ep
, struct expression
*expr
)
296 pseudo_t target
= linearize_expression(ep
, expr
->unop
);
297 pseudo_t
new = alloc_pseudo();
298 struct instruction
*insn
= alloc_instruction(OP_UNOP
+ expr
->op
, expr
->ctype
);
304 static pseudo_t
linearize_preop(struct entrypoint
*ep
, struct expression
*expr
)
307 * '*' is an lvalue access, and is fundamentally different
308 * from an arithmetic operation. Maybe it should have an
309 * expression type of its own..
312 return linearize_access(ep
, expr
);
313 if (expr
->op
== SPECIAL_INCREMENT
|| expr
->op
== SPECIAL_DECREMENT
)
314 return linearize_inc_dec(ep
, expr
, 0);
315 return linearize_regular_preop(ep
, expr
);
318 static pseudo_t
linearize_assignment(struct entrypoint
*ep
, struct expression
*expr
)
320 struct expression
*target
= expr
->left
;
321 pseudo_t value
, address
;
323 value
= linearize_expression(ep
, expr
->right
);
324 address
= linearize_address_gen(ep
, target
);
325 linearize_store_gen(ep
, value
, target
, address
);
329 pseudo_t
linearize_expression(struct entrypoint
*ep
, struct expression
*expr
)
334 switch (expr
->type
) {
335 case EXPR_VALUE
: case EXPR_STRING
: case EXPR_SYMBOL
: {
337 struct instruction
*insn
= alloc_instruction(OP_SETVAL
, expr
->ctype
);
338 insn
->target
= pseudo
= alloc_pseudo();
340 add_one_insn(ep
, expr
->pos
,insn
);
345 return linearize_statement(ep
, expr
->statement
);
348 pseudo_t src1
, src2
, result
;
349 struct instruction
*insn
= alloc_instruction(OP_BINOP
+ expr
->op
, expr
->ctype
);
350 src1
= linearize_expression(ep
, expr
->left
);
351 src2
= linearize_expression(ep
, expr
->right
);
352 result
= alloc_pseudo();
353 insn
->target
= result
;
356 add_one_insn(ep
, expr
->pos
, insn
);
361 linearize_expression(ep
, expr
->left
);
362 return linearize_expression(ep
, expr
->right
);
365 case EXPR_ASSIGNMENT
:
366 return linearize_assignment(ep
, expr
);
369 return linearize_preop(ep
, expr
);
377 pseudo_t
linearize_statement(struct entrypoint
*ep
, struct statement
*stmt
)
382 switch (stmt
->type
) {
386 case STMT_EXPRESSION
:
387 return linearize_expression(ep
, stmt
->expression
);
394 pseudo_t pseudo
= linearize_expression(ep
, stmt
->expression
);
400 add_label(ep
, stmt
->case_label
);
401 linearize_statement(ep
, stmt
->case_statement
);
406 add_label(ep
, stmt
->label_identifier
);
407 linearize_statement(ep
, stmt
->label_statement
);
412 add_goto(ep
->active
, stmt
->goto_label
);
417 case STMT_COMPOUND
: {
420 concat_symbol_list(stmt
->syms
, &ep
->syms
);
421 FOR_EACH_PTR(stmt
->stmts
, s
) {
422 pseudo
= linearize_statement(ep
, s
);
428 * This could take 'likely/unlikely' into account, and
429 * switch the arms around appropriately..
432 struct symbol
*target
;
433 struct basic_block
*if_block
;
434 struct expression
*cond
= stmt
->if_conditional
;
436 if (cond
->type
== EXPR_VALUE
) {
437 struct statement
*always
= stmt
->if_true
;
438 struct statement
*never
= stmt
->if_false
;
442 always
= stmt
->if_false
;
445 linearize_statement(ep
, always
);
447 struct basic_block
*bb
= ep
->active
;
449 linearize_statement(ep
, never
);
452 * If the "never case" is reachable some other
453 * way, we need to merge the old always case
454 * with the fallthrough of the never case.
456 if (bb_reachable(ep
->active
)) {
457 add_goto(bb
, create_label(ep
, never
->pos
));
461 /* Otherwise we just continue with the old always case.. */
468 target
= alloc_symbol(stmt
->pos
, SYM_LABEL
);
469 add_branch(ep
, stmt
, OP_CONDFALSE
, cond
, target
);
471 linearize_statement(ep
, stmt
->if_true
);
473 if_block
= ep
->active
;
474 add_label(ep
, target
);
476 if (stmt
->if_false
) {
477 struct symbol
*else_target
= alloc_symbol(stmt
->pos
, SYM_LABEL
);
478 add_goto(if_block
, else_target
);
479 linearize_statement(ep
, stmt
->if_false
);
480 add_label(ep
, else_target
);
488 struct instruction
*switch_value
;
491 /* Create the "head node" */
492 if (!bb_reachable(ep
->active
))
495 pseudo
= linearize_expression(ep
, stmt
->switch_expression
);
496 switch_value
= alloc_instruction(OP_MULTIVALUE
, NULL
);
497 switch_value
->target
= pseudo
;
498 add_one_insn(ep
, stmt
->pos
, switch_value
);
500 /* Create all the sub-jumps */
502 FOR_EACH_PTR(stmt
->switch_case
->symbol_list
, sym
) {
503 struct statement
*case_stmt
= sym
->stmt
;
504 struct instruction
*sw_bb
= alloc_instruction(OP_MULTIJUMP
, sym
);
505 if (!case_stmt
->case_expression
)
507 if (case_stmt
->case_expression
)
508 sw_bb
->begin
= case_stmt
->case_expression
->value
;
509 if (case_stmt
->case_to
)
510 sw_bb
->end
= case_stmt
->case_to
->value
;
511 add_one_insn(ep
, stmt
->pos
, sw_bb
);
512 add_bb(&sym
->bb_parents
, ep
->active
);
515 /* Default fall-through case */
517 add_goto(ep
->active
, stmt
->switch_break
);
520 /* And linearize the actual statement */
521 linearize_statement(ep
, stmt
->switch_statement
);
523 /* ..then tie it all together at the end.. */
524 add_label(ep
, stmt
->switch_break
);
528 case STMT_ITERATOR
: {
529 struct statement
*pre_statement
= stmt
->iterator_pre_statement
;
530 struct expression
*pre_condition
= stmt
->iterator_pre_condition
;
531 struct statement
*statement
= stmt
->iterator_statement
;
532 struct statement
*post_statement
= stmt
->iterator_post_statement
;
533 struct expression
*post_condition
= stmt
->iterator_post_condition
;
534 struct symbol
*loop_top
= NULL
, *loop_bottom
= NULL
;
536 concat_symbol_list(stmt
->iterator_syms
, &ep
->syms
);
537 linearize_statement(ep
, pre_statement
);
539 if (pre_condition
->type
== EXPR_VALUE
) {
540 if (!pre_condition
->value
) {
541 loop_bottom
= alloc_symbol(stmt
->pos
, SYM_LABEL
);
542 add_goto(ep
->active
, loop_bottom
);
546 loop_bottom
= alloc_symbol(stmt
->pos
, SYM_LABEL
);
547 add_branch(ep
, stmt
, OP_CONDFALSE
, pre_condition
, loop_bottom
);
551 if (!post_condition
|| post_condition
->type
!= EXPR_VALUE
|| post_condition
->value
)
552 loop_top
= create_label(ep
, stmt
->pos
);
554 linearize_statement(ep
, statement
);
556 if (stmt
->iterator_continue
->used
)
557 add_label(ep
, stmt
->iterator_continue
);
559 linearize_statement(ep
, post_statement
);
561 if (!post_condition
) {
562 add_goto(ep
->active
, loop_top
);
565 if (post_condition
->type
!= EXPR_VALUE
|| post_condition
->value
)
566 add_branch(ep
, stmt
, OP_CONDTRUE
, post_condition
, loop_top
);
569 if (stmt
->iterator_break
->used
)
570 add_label(ep
, stmt
->iterator_break
);
572 add_label(ep
, loop_bottom
);
582 void linearize_symbol(struct symbol
*sym
)
584 struct symbol
*base_type
;
588 base_type
= sym
->ctype
.base_type
;
591 if (base_type
->type
== SYM_FN
) {
592 if (base_type
->stmt
) {
593 struct entrypoint
*ep
= alloc_entrypoint();
596 ep
->active
= new_basic_block(ep
, sym
);
597 concat_symbol_list(base_type
->arguments
, &ep
->syms
);
598 linearize_statement(ep
, base_type
->stmt
);