* opt-functions.awk (var_type): New function.
[official-gcc.git] / gcc / tree-ssa-loop-im.c
blob26244ce2aeed074a11212bda992e668b08e33bcf
1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
30 #include "output.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "domwalk.h"
37 #include "params.h"
38 #include "tree-pass.h"
39 #include "flags.h"
40 #include "real.h"
41 #include "hashtab.h"
43 /* TODO: Support for predicated code motion. I.e.
45 while (1)
47 if (cond)
49 a = inv;
50 something;
54 Where COND and INV are is invariants, but evaluating INV may trap or be
55 invalid from some other reason if !COND. This may be transformed to
57 if (cond)
58 a = inv;
59 while (1)
61 if (cond)
62 something;
63 } */
65 /* A type for the list of statements that have to be moved in order to be able
66 to hoist an invariant computation. */
68 struct depend
70 tree stmt;
71 struct depend *next;
74 /* The auxiliary data kept for each statement. */
76 struct lim_aux_data
78 struct loop *max_loop; /* The outermost loop in that the statement
79 is invariant. */
81 struct loop *tgt_loop; /* The loop out of that we want to move the
82 invariant. */
84 struct loop *always_executed_in;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
87 is entered. */
89 bool sm_done; /* True iff the store motion for a memory
90 reference in the statement has already
91 been executed. */
93 unsigned cost; /* Cost of the computation performed by the
94 statement. */
96 struct depend *depends; /* List of statements that must be also hoisted
97 out of the loop when this statement is
98 hoisted; i.e. those that define the operands
99 of the statement and are inside of the
100 MAX_LOOP loop. */
103 #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
104 ? NULL \
105 : (struct lim_aux_data *) (stmt_ann (STMT)->aux))
107 /* Description of a memory reference location for store motion. */
109 struct mem_ref_loc
111 tree *ref; /* The reference itself. */
112 tree stmt; /* The statement in that it occurs. */
113 struct mem_ref_loc *next; /* Next use in the chain. */
116 /* Description of a memory reference for store motion. */
118 struct mem_ref
120 tree mem; /* The memory itself. */
121 hashval_t hash; /* Its hash value. */
122 bool is_stored; /* True if there is a store to the location
123 in the loop. */
124 struct mem_ref_loc *locs; /* The locations where it is found. */
125 bitmap vops; /* Vops corresponding to this memory
126 location. */
127 struct mem_ref *next; /* Next memory reference in the list.
128 Memory references are stored in a hash
129 table, but the hash function depends
130 on values of pointers. Thus we cannot use
131 htab_traverse, since then we would get
132 misscompares during bootstrap (although the
133 produced code would be correct). */
136 /* Minimum cost of an expensive expression. */
137 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
139 /* The outermost loop for that execution of the header guarantees that the
140 block will be executed. */
141 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
143 /* Calls CBCK for each index in memory reference ADDR_P. There are two
144 kinds situations handled; in each of these cases, the memory reference
145 and DATA are passed to the callback:
147 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
148 pass the pointer to the index to the callback.
150 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
151 pointer to addr to the callback.
153 If the callback returns false, the whole search stops and false is returned.
154 Otherwise the function returns true after traversing through the whole
155 reference *ADDR_P. */
157 bool
158 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
160 tree *nxt, *idx;
162 for (; ; addr_p = nxt)
164 switch (TREE_CODE (*addr_p))
166 case SSA_NAME:
167 return cbck (*addr_p, addr_p, data);
169 case MISALIGNED_INDIRECT_REF:
170 case ALIGN_INDIRECT_REF:
171 case INDIRECT_REF:
172 nxt = &TREE_OPERAND (*addr_p, 0);
173 return cbck (*addr_p, nxt, data);
175 case BIT_FIELD_REF:
176 case VIEW_CONVERT_EXPR:
177 case ARRAY_RANGE_REF:
178 case REALPART_EXPR:
179 case IMAGPART_EXPR:
180 nxt = &TREE_OPERAND (*addr_p, 0);
181 break;
183 case COMPONENT_REF:
184 /* If the component has varying offset, it behaves like index
185 as well. */
186 idx = &TREE_OPERAND (*addr_p, 2);
187 if (*idx
188 && !cbck (*addr_p, idx, data))
189 return false;
191 nxt = &TREE_OPERAND (*addr_p, 0);
192 break;
194 case ARRAY_REF:
195 nxt = &TREE_OPERAND (*addr_p, 0);
196 if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
197 return false;
198 break;
200 case VAR_DECL:
201 case PARM_DECL:
202 case STRING_CST:
203 case RESULT_DECL:
204 return true;
206 default:
207 gcc_unreachable ();
212 /* If it is possible to hoist the statement STMT unconditionally,
213 returns MOVE_POSSIBLE.
214 If it is possible to hoist the statement STMT, but we must avoid making
215 it executed if it would not be executed in the original program (e.g.
216 because it may trap), return MOVE_PRESERVE_EXECUTION.
217 Otherwise return MOVE_IMPOSSIBLE. */
219 enum move_pos
220 movement_possibility (tree stmt)
222 tree lhs, rhs;
224 if (flag_unswitch_loops
225 && TREE_CODE (stmt) == COND_EXPR)
227 /* If we perform unswitching, force the operands of the invariant
228 condition to be moved out of the loop. */
229 return MOVE_POSSIBLE;
232 if (TREE_CODE (stmt) != MODIFY_EXPR)
233 return MOVE_IMPOSSIBLE;
235 if (stmt_ends_bb_p (stmt))
236 return MOVE_IMPOSSIBLE;
238 if (stmt_ann (stmt)->has_volatile_ops)
239 return MOVE_IMPOSSIBLE;
241 lhs = TREE_OPERAND (stmt, 0);
242 if (TREE_CODE (lhs) == SSA_NAME
243 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
244 return MOVE_IMPOSSIBLE;
246 rhs = TREE_OPERAND (stmt, 1);
248 if (TREE_SIDE_EFFECTS (rhs))
249 return MOVE_IMPOSSIBLE;
251 if (TREE_CODE (lhs) != SSA_NAME
252 || tree_could_trap_p (rhs))
253 return MOVE_PRESERVE_EXECUTION;
255 if (get_call_expr_in (stmt))
257 /* While pure or const call is guaranteed to have no side effects, we
258 cannot move it arbitrarily. Consider code like
260 char *s = something ();
262 while (1)
264 if (s)
265 t = strlen (s);
266 else
267 t = 0;
270 Here the strlen call cannot be moved out of the loop, even though
271 s is invariant. In addition to possibly creating a call with
272 invalid arguments, moving out a function call that is not executed
273 may cause performance regressions in case the call is costly and
274 not executed at all. */
275 return MOVE_PRESERVE_EXECUTION;
277 return MOVE_POSSIBLE;
280 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
281 loop to that we could move the expression using DEF if it did not have
282 other operands, i.e. the outermost loop enclosing LOOP in that the value
283 of DEF is invariant. */
285 static struct loop *
286 outermost_invariant_loop (tree def, struct loop *loop)
288 tree def_stmt;
289 basic_block def_bb;
290 struct loop *max_loop;
292 if (TREE_CODE (def) != SSA_NAME)
293 return superloop_at_depth (loop, 1);
295 def_stmt = SSA_NAME_DEF_STMT (def);
296 def_bb = bb_for_stmt (def_stmt);
297 if (!def_bb)
298 return superloop_at_depth (loop, 1);
300 max_loop = find_common_loop (loop, def_bb->loop_father);
302 if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop)
303 max_loop = find_common_loop (max_loop,
304 LIM_DATA (def_stmt)->max_loop->outer);
305 if (max_loop == loop)
306 return NULL;
307 max_loop = superloop_at_depth (loop, max_loop->depth + 1);
309 return max_loop;
312 /* Returns the outermost superloop of LOOP in that the expression EXPR is
313 invariant. */
315 static struct loop *
316 outermost_invariant_loop_expr (tree expr, struct loop *loop)
318 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
319 unsigned i, nops;
320 struct loop *max_loop = superloop_at_depth (loop, 1), *aloop;
322 if (TREE_CODE (expr) == SSA_NAME
323 || TREE_CODE (expr) == INTEGER_CST
324 || is_gimple_min_invariant (expr))
325 return outermost_invariant_loop (expr, loop);
327 if (class != tcc_unary
328 && class != tcc_binary
329 && class != tcc_expression
330 && class != tcc_comparison)
331 return NULL;
333 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
334 for (i = 0; i < nops; i++)
336 aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop);
337 if (!aloop)
338 return NULL;
340 if (flow_loop_nested_p (max_loop, aloop))
341 max_loop = aloop;
344 return max_loop;
347 /* DATA is a structure containing information associated with a statement
348 inside LOOP. DEF is one of the operands of this statement.
350 Find the outermost loop enclosing LOOP in that value of DEF is invariant
351 and record this in DATA->max_loop field. If DEF itself is defined inside
352 this loop as well (i.e. we need to hoist it out of the loop if we want
353 to hoist the statement represented by DATA), record the statement in that
354 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
355 add the cost of the computation of DEF to the DATA->cost.
357 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
359 static bool
360 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
361 bool add_cost)
363 tree def_stmt = SSA_NAME_DEF_STMT (def);
364 basic_block def_bb = bb_for_stmt (def_stmt);
365 struct loop *max_loop;
366 struct depend *dep;
368 if (!def_bb)
369 return true;
371 max_loop = outermost_invariant_loop (def, loop);
372 if (!max_loop)
373 return false;
375 if (flow_loop_nested_p (data->max_loop, max_loop))
376 data->max_loop = max_loop;
378 if (!LIM_DATA (def_stmt))
379 return true;
381 if (add_cost
382 /* Only add the cost if the statement defining DEF is inside LOOP,
383 i.e. if it is likely that by moving the invariants dependent
384 on it, we will be able to avoid creating a new register for
385 it (since it will be only used in these dependent invariants). */
386 && def_bb->loop_father == loop)
387 data->cost += LIM_DATA (def_stmt)->cost;
389 dep = xmalloc (sizeof (struct depend));
390 dep->stmt = def_stmt;
391 dep->next = data->depends;
392 data->depends = dep;
394 return true;
397 /* Returns an estimate for a cost of statement STMT. TODO -- the values here
398 are just ad-hoc constants. The estimates should be based on target-specific
399 values. */
401 static unsigned
402 stmt_cost (tree stmt)
404 tree rhs;
405 unsigned cost = 1;
407 /* Always try to create possibilities for unswitching. */
408 if (TREE_CODE (stmt) == COND_EXPR)
409 return LIM_EXPENSIVE;
411 rhs = TREE_OPERAND (stmt, 1);
413 /* Hoisting memory references out should almost surely be a win. */
414 if (stmt_references_memory_p (stmt))
415 cost += 20;
417 switch (TREE_CODE (rhs))
419 case CALL_EXPR:
420 /* We should be hoisting calls if possible. */
422 /* Unless the call is a builtin_constant_p; this always folds to a
423 constant, so moving it is useless. */
424 rhs = get_callee_fndecl (rhs);
425 if (DECL_BUILT_IN_CLASS (rhs) == BUILT_IN_NORMAL
426 && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P)
427 return 0;
429 cost += 20;
430 break;
432 case MULT_EXPR:
433 case TRUNC_DIV_EXPR:
434 case CEIL_DIV_EXPR:
435 case FLOOR_DIV_EXPR:
436 case ROUND_DIV_EXPR:
437 case EXACT_DIV_EXPR:
438 case CEIL_MOD_EXPR:
439 case FLOOR_MOD_EXPR:
440 case ROUND_MOD_EXPR:
441 case TRUNC_MOD_EXPR:
442 case RDIV_EXPR:
443 /* Division and multiplication are usually expensive. */
444 cost += 20;
445 break;
447 default:
448 break;
451 return cost;
454 /* Determine the outermost loop to that it is possible to hoist a statement
455 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
456 the outermost loop in that the value computed by STMT is invariant.
457 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
458 we preserve the fact whether STMT is executed. It also fills other related
459 information to LIM_DATA (STMT).
461 The function returns false if STMT cannot be hoisted outside of the loop it
462 is defined in, and true otherwise. */
464 static bool
465 determine_max_movement (tree stmt, bool must_preserve_exec)
467 basic_block bb = bb_for_stmt (stmt);
468 struct loop *loop = bb->loop_father;
469 struct loop *level;
470 struct lim_aux_data *lim_data = LIM_DATA (stmt);
471 tree val;
472 ssa_op_iter iter;
474 if (must_preserve_exec)
475 level = ALWAYS_EXECUTED_IN (bb);
476 else
477 level = superloop_at_depth (loop, 1);
478 lim_data->max_loop = level;
480 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
481 if (!add_dependency (val, lim_data, loop, true))
482 return false;
484 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
485 if (!add_dependency (val, lim_data, loop, false))
486 return false;
488 lim_data->cost += stmt_cost (stmt);
490 return true;
493 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
494 and that one of the operands of this statement is computed by STMT.
495 Ensure that STMT (together with all the statements that define its
496 operands) is hoisted at least out of the loop LEVEL. */
498 static void
499 set_level (tree stmt, struct loop *orig_loop, struct loop *level)
501 struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father;
502 struct depend *dep;
504 stmt_loop = find_common_loop (orig_loop, stmt_loop);
505 if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop)
506 stmt_loop = find_common_loop (stmt_loop,
507 LIM_DATA (stmt)->tgt_loop->outer);
508 if (flow_loop_nested_p (stmt_loop, level))
509 return;
511 gcc_assert (LIM_DATA (stmt));
512 gcc_assert (level == LIM_DATA (stmt)->max_loop
513 || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level));
515 LIM_DATA (stmt)->tgt_loop = level;
516 for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next)
517 set_level (dep->stmt, orig_loop, level);
520 /* Determines an outermost loop from that we want to hoist the statement STMT.
521 For now we chose the outermost possible loop. TODO -- use profiling
522 information to set it more sanely. */
524 static void
525 set_profitable_level (tree stmt)
527 set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop);
530 /* Returns true if STMT is not a pure call. */
532 static bool
533 nonpure_call_p (tree stmt)
535 tree call = get_call_expr_in (stmt);
537 if (!call)
538 return false;
540 return TREE_SIDE_EFFECTS (call) != 0;
543 /* Releases the memory occupied by DATA. */
545 static void
546 free_lim_aux_data (struct lim_aux_data *data)
548 struct depend *dep, *next;
550 for (dep = data->depends; dep; dep = next)
552 next = dep->next;
553 free (dep);
555 free (data);
558 /* Determine the outermost loops in that statements in basic block BB are
559 invariant, and record them to the LIM_DATA associated with the statements.
560 Callback for walk_dominator_tree. */
562 static void
563 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
564 basic_block bb)
566 enum move_pos pos;
567 block_stmt_iterator bsi;
568 tree stmt, rhs;
569 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
570 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
572 if (!bb->loop_father->outer)
573 return;
575 if (dump_file && (dump_flags & TDF_DETAILS))
576 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
577 bb->index, bb->loop_father->num, bb->loop_father->depth);
579 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
581 stmt = bsi_stmt (bsi);
583 pos = movement_possibility (stmt);
584 if (pos == MOVE_IMPOSSIBLE)
586 if (nonpure_call_p (stmt))
588 maybe_never = true;
589 outermost = NULL;
591 continue;
594 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
595 to be hoisted out of loop, saving expensive divide. */
596 if (pos == MOVE_POSSIBLE
597 && (rhs = TREE_OPERAND (stmt, 1)) != NULL
598 && TREE_CODE (rhs) == RDIV_EXPR
599 && flag_unsafe_math_optimizations
600 && outermost_invariant_loop_expr (TREE_OPERAND (rhs, 1),
601 loop_containing_stmt (stmt)) != NULL
602 && outermost_invariant_loop_expr (rhs,
603 loop_containing_stmt (stmt)) == NULL)
605 tree lhs, stmt1, stmt2, var, name;
607 lhs = TREE_OPERAND (stmt, 0);
609 /* stmt must be MODIFY_EXPR. */
610 var = create_tmp_var (TREE_TYPE (rhs), "reciptmp");
611 add_referenced_tmp_var (var);
613 stmt1 = build2 (MODIFY_EXPR, void_type_node, var,
614 build2 (RDIV_EXPR, TREE_TYPE (rhs),
615 build_real (TREE_TYPE (rhs), dconst1),
616 TREE_OPERAND (rhs, 1)));
617 name = make_ssa_name (var, stmt1);
618 TREE_OPERAND (stmt1, 0) = name;
619 stmt2 = build2 (MODIFY_EXPR, void_type_node, lhs,
620 build2 (MULT_EXPR, TREE_TYPE (rhs),
621 name, TREE_OPERAND (rhs, 0)));
623 /* Replace division stmt with reciprocal and multiply stmts.
624 The multiply stmt is not invariant, so update iterator
625 and avoid rescanning. */
626 bsi_replace (&bsi, stmt1, true);
627 bsi_insert_after (&bsi, stmt2, BSI_NEW_STMT);
628 SSA_NAME_DEF_STMT (lhs) = stmt2;
630 /* Continue processing with invariant reciprocal statment. */
631 stmt = stmt1;
634 stmt_ann (stmt)->aux = xcalloc (1, sizeof (struct lim_aux_data));
635 LIM_DATA (stmt)->always_executed_in = outermost;
637 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
638 continue;
640 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
642 LIM_DATA (stmt)->max_loop = NULL;
643 continue;
646 if (dump_file && (dump_flags & TDF_DETAILS))
648 print_generic_stmt_indented (dump_file, stmt, 0, 2);
649 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
650 LIM_DATA (stmt)->max_loop->depth,
651 LIM_DATA (stmt)->cost);
654 if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE)
655 set_profitable_level (stmt);
659 /* For each statement determines the outermost loop in that it is invariant,
660 statements on whose motion it depends and the cost of the computation.
661 This information is stored to the LIM_DATA structure associated with
662 each statement. */
664 static void
665 determine_invariantness (void)
667 struct dom_walk_data walk_data;
669 memset (&walk_data, 0, sizeof (struct dom_walk_data));
670 walk_data.before_dom_children_before_stmts = determine_invariantness_stmt;
672 init_walk_dominator_tree (&walk_data);
673 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
674 fini_walk_dominator_tree (&walk_data);
677 /* Commits edge insertions and updates loop structures. */
679 void
680 loop_commit_inserts (void)
682 unsigned old_last_basic_block, i;
683 basic_block bb;
685 old_last_basic_block = last_basic_block;
686 bsi_commit_edge_inserts ();
687 for (i = old_last_basic_block; i < (unsigned) last_basic_block; i++)
689 bb = BASIC_BLOCK (i);
690 add_bb_to_loop (bb,
691 find_common_loop (single_pred (bb)->loop_father,
692 single_succ (bb)->loop_father));
696 /* Hoist the statements in basic block BB out of the loops prescribed by
697 data stored in LIM_DATA structures associated with each statement. Callback
698 for walk_dominator_tree. */
700 static void
701 move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
702 basic_block bb)
704 struct loop *level;
705 block_stmt_iterator bsi;
706 tree stmt;
707 unsigned cost = 0;
709 if (!bb->loop_father->outer)
710 return;
712 for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
714 stmt = bsi_stmt (bsi);
716 if (!LIM_DATA (stmt))
718 bsi_next (&bsi);
719 continue;
722 cost = LIM_DATA (stmt)->cost;
723 level = LIM_DATA (stmt)->tgt_loop;
724 free_lim_aux_data (LIM_DATA (stmt));
725 stmt_ann (stmt)->aux = NULL;
727 if (!level)
729 bsi_next (&bsi);
730 continue;
733 /* We do not really want to move conditionals out of the loop; we just
734 placed it here to force its operands to be moved if necessary. */
735 if (TREE_CODE (stmt) == COND_EXPR)
736 continue;
738 if (dump_file && (dump_flags & TDF_DETAILS))
740 fprintf (dump_file, "Moving statement\n");
741 print_generic_stmt (dump_file, stmt, 0);
742 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
743 cost, level->num);
745 bsi_insert_on_edge (loop_preheader_edge (level), stmt);
746 bsi_remove (&bsi);
750 /* Hoist the statements out of the loops prescribed by data stored in
751 LIM_DATA structures associated with each statement.*/
753 static void
754 move_computations (void)
756 struct dom_walk_data walk_data;
758 memset (&walk_data, 0, sizeof (struct dom_walk_data));
759 walk_data.before_dom_children_before_stmts = move_computations_stmt;
761 init_walk_dominator_tree (&walk_data);
762 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
763 fini_walk_dominator_tree (&walk_data);
765 loop_commit_inserts ();
766 if (need_ssa_update_p ())
767 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
770 /* Checks whether the statement defining variable *INDEX can be hoisted
771 out of the loop passed in DATA. Callback for for_each_index. */
773 static bool
774 may_move_till (tree ref, tree *index, void *data)
776 struct loop *loop = data, *max_loop;
778 /* If REF is an array reference, check also that the step and the lower
779 bound is invariant in LOOP. */
780 if (TREE_CODE (ref) == ARRAY_REF)
782 tree step = array_ref_element_size (ref);
783 tree lbound = array_ref_low_bound (ref);
785 max_loop = outermost_invariant_loop_expr (step, loop);
786 if (!max_loop)
787 return false;
789 max_loop = outermost_invariant_loop_expr (lbound, loop);
790 if (!max_loop)
791 return false;
794 max_loop = outermost_invariant_loop (*index, loop);
795 if (!max_loop)
796 return false;
798 return true;
801 /* Forces statements defining (invariant) SSA names in expression EXPR to be
802 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
804 static void
805 force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop)
807 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
808 unsigned i, nops;
810 if (TREE_CODE (expr) == SSA_NAME)
812 tree stmt = SSA_NAME_DEF_STMT (expr);
813 if (IS_EMPTY_STMT (stmt))
814 return;
816 set_level (stmt, orig_loop, loop);
817 return;
820 if (class != tcc_unary
821 && class != tcc_binary
822 && class != tcc_expression
823 && class != tcc_comparison)
824 return;
826 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
827 for (i = 0; i < nops; i++)
828 force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop);
831 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
832 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
833 for_each_index. */
835 struct fmt_data
837 struct loop *loop;
838 struct loop *orig_loop;
841 static bool
842 force_move_till (tree ref, tree *index, void *data)
844 tree stmt;
845 struct fmt_data *fmt_data = data;
847 if (TREE_CODE (ref) == ARRAY_REF)
849 tree step = array_ref_element_size (ref);
850 tree lbound = array_ref_low_bound (ref);
852 force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop);
853 force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop);
856 if (TREE_CODE (*index) != SSA_NAME)
857 return true;
859 stmt = SSA_NAME_DEF_STMT (*index);
860 if (IS_EMPTY_STMT (stmt))
861 return true;
863 set_level (stmt, fmt_data->orig_loop, fmt_data->loop);
865 return true;
868 /* Records memory reference location *REF to the list MEM_REFS. The reference
869 occurs in statement STMT. */
871 static void
872 record_mem_ref_loc (struct mem_ref_loc **mem_refs, tree stmt, tree *ref)
874 struct mem_ref_loc *aref = xmalloc (sizeof (struct mem_ref_loc));
876 aref->stmt = stmt;
877 aref->ref = ref;
879 aref->next = *mem_refs;
880 *mem_refs = aref;
883 /* Releases list of memory reference locations MEM_REFS. */
885 static void
886 free_mem_ref_locs (struct mem_ref_loc *mem_refs)
888 struct mem_ref_loc *act;
890 while (mem_refs)
892 act = mem_refs;
893 mem_refs = mem_refs->next;
894 free (act);
898 /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
900 static void
901 rewrite_mem_refs (tree tmp_var, struct mem_ref_loc *mem_refs)
903 tree var;
904 ssa_op_iter iter;
906 for (; mem_refs; mem_refs = mem_refs->next)
908 FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter, SSA_OP_ALL_VIRTUALS)
909 mark_sym_for_renaming (SSA_NAME_VAR (var));
911 *mem_refs->ref = tmp_var;
912 update_stmt (mem_refs->stmt);
916 /* Records request for store motion of memory reference REF from LOOP.
917 MEM_REFS is the list of occurrences of the reference REF inside LOOP;
918 these references are rewritten by a new temporary variable.
919 Exits from the LOOP are stored in EXITS, there are N_EXITS of them.
920 The initialization of the temporary variable is put to the preheader
921 of the loop, and assignments to the reference from the temporary variable
922 are emitted to exits. */
924 static void
925 schedule_sm (struct loop *loop, edge *exits, unsigned n_exits, tree ref,
926 struct mem_ref_loc *mem_refs)
928 struct mem_ref_loc *aref;
929 tree tmp_var;
930 unsigned i;
931 tree load, store;
932 struct fmt_data fmt_data;
934 if (dump_file && (dump_flags & TDF_DETAILS))
936 fprintf (dump_file, "Executing store motion of ");
937 print_generic_expr (dump_file, ref, 0);
938 fprintf (dump_file, " from loop %d\n", loop->num);
941 tmp_var = make_rename_temp (TREE_TYPE (ref), "lsm_tmp");
943 fmt_data.loop = loop;
944 fmt_data.orig_loop = loop;
945 for_each_index (&ref, force_move_till, &fmt_data);
947 rewrite_mem_refs (tmp_var, mem_refs);
948 for (aref = mem_refs; aref; aref = aref->next)
949 if (LIM_DATA (aref->stmt))
950 LIM_DATA (aref->stmt)->sm_done = true;
952 /* Emit the load & stores. */
953 load = build (MODIFY_EXPR, void_type_node, tmp_var, ref);
954 get_stmt_ann (load)->aux = xcalloc (1, sizeof (struct lim_aux_data));
955 LIM_DATA (load)->max_loop = loop;
956 LIM_DATA (load)->tgt_loop = loop;
958 /* Put this into the latch, so that we are sure it will be processed after
959 all dependencies. */
960 bsi_insert_on_edge (loop_latch_edge (loop), load);
962 for (i = 0; i < n_exits; i++)
964 store = build (MODIFY_EXPR, void_type_node,
965 unshare_expr (ref), tmp_var);
966 bsi_insert_on_edge (exits[i], store);
970 /* Check whether memory reference REF can be hoisted out of the LOOP. If this
971 is true, prepare the statements that load the value of the memory reference
972 to a temporary variable in the loop preheader, store it back on the loop
973 exits, and replace all the references inside LOOP by this temporary variable.
974 LOOP has N_EXITS stored in EXITS. CLOBBERED_VOPS is the bitmap of virtual
975 operands that are clobbered by a call or accessed through multiple references
976 in loop. */
978 static void
979 determine_lsm_ref (struct loop *loop, edge *exits, unsigned n_exits,
980 bitmap clobbered_vops, struct mem_ref *ref)
982 struct mem_ref_loc *aref;
983 struct loop *must_exec;
985 /* In case the memory is not stored to, there is nothing for SM to do. */
986 if (!ref->is_stored)
987 return;
989 /* If the reference is aliased with any different ref, or killed by call
990 in function, then fail. */
991 if (bitmap_intersect_p (ref->vops, clobbered_vops))
992 return;
994 if (tree_could_trap_p (ref->mem))
996 /* If the memory access is unsafe (i.e. it might trap), ensure that some
997 of the statements in that it occurs is always executed when the loop
998 is entered. This way we know that by moving the load from the
999 reference out of the loop we will not cause the error that would not
1000 occur otherwise.
1002 TODO -- in fact we would like to check for anticipability of the
1003 reference, i.e. that on each path from loop entry to loop exit at
1004 least one of the statements containing the memory reference is
1005 executed. */
1007 for (aref = ref->locs; aref; aref = aref->next)
1009 if (!LIM_DATA (aref->stmt))
1010 continue;
1012 must_exec = LIM_DATA (aref->stmt)->always_executed_in;
1013 if (!must_exec)
1014 continue;
1016 if (must_exec == loop
1017 || flow_loop_nested_p (must_exec, loop))
1018 break;
1021 if (!aref)
1022 return;
1025 schedule_sm (loop, exits, n_exits, ref->mem, ref->locs);
1028 /* Hoists memory references MEM_REFS out of LOOP. CLOBBERED_VOPS is the list
1029 of vops clobbered by call in loop or accessed by multiple memory references.
1030 EXITS is the list of N_EXITS exit edges of the LOOP. */
1032 static void
1033 hoist_memory_references (struct loop *loop, struct mem_ref *mem_refs,
1034 bitmap clobbered_vops, edge *exits, unsigned n_exits)
1036 struct mem_ref *ref;
1038 for (ref = mem_refs; ref; ref = ref->next)
1039 determine_lsm_ref (loop, exits, n_exits, clobbered_vops, ref);
1042 /* Checks whether LOOP (with N_EXITS exits stored in EXITS array) is suitable
1043 for a store motion optimization (i.e. whether we can insert statement
1044 on its exits). */
1046 static bool
1047 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED, edge *exits,
1048 unsigned n_exits)
1050 unsigned i;
1052 for (i = 0; i < n_exits; i++)
1053 if (exits[i]->flags & EDGE_ABNORMAL)
1054 return false;
1056 return true;
1059 /* A hash function for struct mem_ref object OBJ. */
1061 static hashval_t
1062 memref_hash (const void *obj)
1064 const struct mem_ref *mem = obj;
1066 return mem->hash;
1069 /* An equality function for struct mem_ref object OBJ1 with
1070 memory reference OBJ2. */
1072 static int
1073 memref_eq (const void *obj1, const void *obj2)
1075 const struct mem_ref *mem1 = obj1;
1077 return operand_equal_p (mem1->mem, (tree) obj2, 0);
1080 /* Gathers memory references in statement STMT in LOOP, storing the
1081 information about them in MEM_REFS hash table. Note vops accessed through
1082 unrecognized statements in CLOBBERED_VOPS. The newly created references
1083 are also stored to MEM_REF_LIST. */
1085 static void
1086 gather_mem_refs_stmt (struct loop *loop, htab_t mem_refs,
1087 bitmap clobbered_vops, tree stmt,
1088 struct mem_ref **mem_ref_list)
1090 tree *lhs, *rhs, *mem = NULL;
1091 hashval_t hash;
1092 PTR *slot;
1093 struct mem_ref *ref = NULL;
1094 ssa_op_iter oi;
1095 tree vname;
1096 bool is_stored;
1098 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
1099 return;
1101 /* Recognize MEM = (SSA_NAME | invariant) and SSA_NAME = MEM patterns. */
1102 if (TREE_CODE (stmt) != MODIFY_EXPR)
1103 goto fail;
1105 lhs = &TREE_OPERAND (stmt, 0);
1106 rhs = &TREE_OPERAND (stmt, 1);
1108 if (TREE_CODE (*lhs) == SSA_NAME)
1110 if (!is_gimple_addressable (*rhs))
1111 goto fail;
1113 mem = rhs;
1114 is_stored = false;
1116 else if (TREE_CODE (*rhs) == SSA_NAME
1117 || is_gimple_min_invariant (*rhs))
1119 mem = lhs;
1120 is_stored = true;
1122 else
1123 goto fail;
1125 /* If we cannot create an SSA name for the result, give up. */
1126 if (!is_gimple_reg_type (TREE_TYPE (*mem))
1127 || TREE_THIS_VOLATILE (*mem))
1128 goto fail;
1130 /* If we cannot move the reference out of the loop, fail. */
1131 if (!for_each_index (mem, may_move_till, loop))
1132 goto fail;
1134 hash = iterative_hash_expr (*mem, 0);
1135 slot = htab_find_slot_with_hash (mem_refs, *mem, hash, INSERT);
1137 if (*slot)
1138 ref = *slot;
1139 else
1141 ref = xmalloc (sizeof (struct mem_ref));
1142 ref->mem = *mem;
1143 ref->hash = hash;
1144 ref->locs = NULL;
1145 ref->is_stored = false;
1146 ref->vops = BITMAP_ALLOC (NULL);
1147 ref->next = *mem_ref_list;
1148 *mem_ref_list = ref;
1149 *slot = ref;
1151 ref->is_stored |= is_stored;
1153 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1154 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1156 bitmap_set_bit (ref->vops,
1157 var_ann (SSA_NAME_VAR (vname))->uid);
1159 record_mem_ref_loc (&ref->locs, stmt, mem);
1160 return;
1162 fail:
1163 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1164 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1166 bitmap_set_bit (clobbered_vops,
1167 var_ann (SSA_NAME_VAR (vname))->uid);
1171 /* Gathers memory references in LOOP. Notes vops accessed through unrecognized
1172 statements in CLOBBERED_VOPS. The list of the references found by
1173 the function is returned. */
1175 static struct mem_ref *
1176 gather_mem_refs (struct loop *loop, bitmap clobbered_vops)
1178 basic_block *body = get_loop_body (loop);
1179 block_stmt_iterator bsi;
1180 unsigned i;
1181 struct mem_ref *mem_ref_list = NULL;
1182 htab_t mem_refs = htab_create (100, memref_hash, memref_eq, NULL);
1184 for (i = 0; i < loop->num_nodes; i++)
1186 for (bsi = bsi_start (body[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1187 gather_mem_refs_stmt (loop, mem_refs, clobbered_vops, bsi_stmt (bsi),
1188 &mem_ref_list);
1191 free (body);
1193 htab_delete (mem_refs);
1194 return mem_ref_list;
1197 /* Finds the vops accessed by more than one of the memory references described
1198 in MEM_REFS and marks them in CLOBBERED_VOPS. */
1200 static void
1201 find_more_ref_vops (struct mem_ref *mem_refs, bitmap clobbered_vops)
1203 bitmap_head tmp, all_vops;
1204 struct mem_ref *ref;
1206 bitmap_initialize (&tmp, &bitmap_default_obstack);
1207 bitmap_initialize (&all_vops, &bitmap_default_obstack);
1209 for (ref = mem_refs; ref; ref = ref->next)
1211 /* The vops that are already in all_vops are accessed by more than
1212 one memory reference. */
1213 bitmap_and (&tmp, &all_vops, ref->vops);
1214 bitmap_ior_into (clobbered_vops, &tmp);
1215 bitmap_clear (&tmp);
1217 bitmap_ior_into (&all_vops, ref->vops);
1220 bitmap_clear (&all_vops);
1223 /* Releases the memory occupied by REF. */
1225 static void
1226 free_mem_ref (struct mem_ref *ref)
1228 free_mem_ref_locs (ref->locs);
1229 BITMAP_FREE (ref->vops);
1230 free (ref);
1233 /* Releases the memory occupied by REFS. */
1235 static void
1236 free_mem_refs (struct mem_ref *refs)
1238 struct mem_ref *ref, *next;
1240 for (ref = refs; ref; ref = next)
1242 next = ref->next;
1243 free_mem_ref (ref);
1247 /* Try to perform store motion for all memory references modified inside
1248 LOOP. */
1250 static void
1251 determine_lsm_loop (struct loop *loop)
1253 unsigned n_exits;
1254 edge *exits = get_loop_exit_edges (loop, &n_exits);
1255 bitmap clobbered_vops;
1256 struct mem_ref *mem_refs;
1258 if (!loop_suitable_for_sm (loop, exits, n_exits))
1260 free (exits);
1261 return;
1264 /* Find the memory references in LOOP. */
1265 clobbered_vops = BITMAP_ALLOC (NULL);
1266 mem_refs = gather_mem_refs (loop, clobbered_vops);
1268 /* Find the vops that are used for more than one reference. */
1269 find_more_ref_vops (mem_refs, clobbered_vops);
1271 /* Hoist all suitable memory references. */
1272 hoist_memory_references (loop, mem_refs, clobbered_vops, exits, n_exits);
1274 free_mem_refs (mem_refs);
1275 free (exits);
1276 BITMAP_FREE (clobbered_vops);
1279 /* Try to perform store motion for all memory references modified inside
1280 any of LOOPS. */
1282 static void
1283 determine_lsm (struct loops *loops)
1285 struct loop *loop;
1287 if (!loops->tree_root->inner)
1288 return;
1290 /* Pass the loops from the outermost and perform the store motion as
1291 suitable. */
1293 loop = loops->tree_root->inner;
1294 while (1)
1296 determine_lsm_loop (loop);
1298 if (loop->inner)
1300 loop = loop->inner;
1301 continue;
1303 while (!loop->next)
1305 loop = loop->outer;
1306 if (loop == loops->tree_root)
1308 loop_commit_inserts ();
1309 return;
1312 loop = loop->next;
1316 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1317 for each such basic block bb records the outermost loop for that execution
1318 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1319 blocks that contain a nonpure call. */
1321 static void
1322 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
1324 basic_block bb = NULL, *bbs, last = NULL;
1325 unsigned i;
1326 edge e;
1327 struct loop *inn_loop = loop;
1329 if (!loop->header->aux)
1331 bbs = get_loop_body_in_dom_order (loop);
1333 for (i = 0; i < loop->num_nodes; i++)
1335 edge_iterator ei;
1336 bb = bbs[i];
1338 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1339 last = bb;
1341 if (TEST_BIT (contains_call, bb->index))
1342 break;
1344 FOR_EACH_EDGE (e, ei, bb->succs)
1345 if (!flow_bb_inside_loop_p (loop, e->dest))
1346 break;
1347 if (e)
1348 break;
1350 /* A loop might be infinite (TODO use simple loop analysis
1351 to disprove this if possible). */
1352 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1353 break;
1355 if (!flow_bb_inside_loop_p (inn_loop, bb))
1356 break;
1358 if (bb->loop_father->header == bb)
1360 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1361 break;
1363 /* In a loop that is always entered we may proceed anyway.
1364 But record that we entered it and stop once we leave it. */
1365 inn_loop = bb->loop_father;
1369 while (1)
1371 last->aux = loop;
1372 if (last == loop->header)
1373 break;
1374 last = get_immediate_dominator (CDI_DOMINATORS, last);
1377 free (bbs);
1380 for (loop = loop->inner; loop; loop = loop->next)
1381 fill_always_executed_in (loop, contains_call);
1384 /* Compute the global information needed by the loop invariant motion pass.
1385 LOOPS is the loop tree. */
1387 static void
1388 tree_ssa_lim_initialize (struct loops *loops)
1390 sbitmap contains_call = sbitmap_alloc (last_basic_block);
1391 block_stmt_iterator bsi;
1392 struct loop *loop;
1393 basic_block bb;
1395 sbitmap_zero (contains_call);
1396 FOR_EACH_BB (bb)
1398 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1400 if (nonpure_call_p (bsi_stmt (bsi)))
1401 break;
1404 if (!bsi_end_p (bsi))
1405 SET_BIT (contains_call, bb->index);
1408 for (loop = loops->tree_root->inner; loop; loop = loop->next)
1409 fill_always_executed_in (loop, contains_call);
1411 sbitmap_free (contains_call);
1414 /* Cleans up after the invariant motion pass. */
1416 static void
1417 tree_ssa_lim_finalize (void)
1419 basic_block bb;
1421 FOR_EACH_BB (bb)
1423 bb->aux = NULL;
1427 /* Moves invariants from LOOPS. Only "expensive" invariants are moved out --
1428 i.e. those that are likely to be win regardless of the register pressure. */
1430 void
1431 tree_ssa_lim (struct loops *loops)
1433 tree_ssa_lim_initialize (loops);
1435 /* For each statement determine the outermost loop in that it is
1436 invariant and cost for computing the invariant. */
1437 determine_invariantness ();
1439 /* For each memory reference determine whether it is possible to hoist it
1440 out of the loop. Force the necessary invariants to be moved out of the
1441 loops as well. */
1442 determine_lsm (loops);
1444 /* Move the expressions that are expensive enough. */
1445 move_computations ();
1447 tree_ssa_lim_finalize ();