* gcc.dg/intmax_t-1.c: Extend dg-error to cover sh*-*-elf targets.
[official-gcc.git] / gcc / tree-ssa-loop-im.c
blob9281079b7fd9618101e403bfad70b784b6f23f37
1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
30 #include "output.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "domwalk.h"
37 #include "params.h"
38 #include "tree-pass.h"
39 #include "flags.h"
40 #include "real.h"
41 #include "hashtab.h"
43 /* TODO: Support for predicated code motion. I.e.
45 while (1)
47 if (cond)
49 a = inv;
50 something;
54 Where COND and INV are is invariants, but evaluating INV may trap or be
55 invalid from some other reason if !COND. This may be transformed to
57 if (cond)
58 a = inv;
59 while (1)
61 if (cond)
62 something;
63 } */
65 /* A type for the list of statements that have to be moved in order to be able
66 to hoist an invariant computation. */
68 struct depend
70 tree stmt;
71 struct depend *next;
74 /* The auxiliary data kept for each statement. */
76 struct lim_aux_data
78 struct loop *max_loop; /* The outermost loop in that the statement
79 is invariant. */
81 struct loop *tgt_loop; /* The loop out of that we want to move the
82 invariant. */
84 struct loop *always_executed_in;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
87 is entered. */
89 bool sm_done; /* True iff the store motion for a memory
90 reference in the statement has already
91 been executed. */
93 unsigned cost; /* Cost of the computation performed by the
94 statement. */
96 struct depend *depends; /* List of statements that must be also hoisted
97 out of the loop when this statement is
98 hoisted; i.e. those that define the operands
99 of the statement and are inside of the
100 MAX_LOOP loop. */
103 #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
104 ? NULL \
105 : (struct lim_aux_data *) (stmt_ann (STMT)->common.aux))
107 /* Description of a memory reference location for store motion. */
109 struct mem_ref_loc
111 tree *ref; /* The reference itself. */
112 tree stmt; /* The statement in that it occurs. */
113 struct mem_ref_loc *next; /* Next use in the chain. */
116 /* Description of a memory reference for store motion. */
118 struct mem_ref
120 tree mem; /* The memory itself. */
121 hashval_t hash; /* Its hash value. */
122 bool is_stored; /* True if there is a store to the location
123 in the loop. */
124 struct mem_ref_loc *locs; /* The locations where it is found. */
125 bitmap vops; /* Vops corresponding to this memory
126 location. */
127 struct mem_ref *next; /* Next memory reference in the list.
128 Memory references are stored in a hash
129 table, but the hash function depends
130 on values of pointers. Thus we cannot use
131 htab_traverse, since then we would get
132 miscompares during bootstrap (although the
133 produced code would be correct). */
136 /* Minimum cost of an expensive expression. */
137 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
139 /* The outermost loop for that execution of the header guarantees that the
140 block will be executed. */
141 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
143 /* Calls CBCK for each index in memory reference ADDR_P. There are two
144 kinds situations handled; in each of these cases, the memory reference
145 and DATA are passed to the callback:
147 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
148 pass the pointer to the index to the callback.
150 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
151 pointer to addr to the callback.
153 If the callback returns false, the whole search stops and false is returned.
154 Otherwise the function returns true after traversing through the whole
155 reference *ADDR_P. */
157 bool
158 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
160 tree *nxt, *idx;
162 for (; ; addr_p = nxt)
164 switch (TREE_CODE (*addr_p))
166 case SSA_NAME:
167 return cbck (*addr_p, addr_p, data);
169 case MISALIGNED_INDIRECT_REF:
170 case ALIGN_INDIRECT_REF:
171 case INDIRECT_REF:
172 nxt = &TREE_OPERAND (*addr_p, 0);
173 return cbck (*addr_p, nxt, data);
175 case BIT_FIELD_REF:
176 case VIEW_CONVERT_EXPR:
177 case ARRAY_RANGE_REF:
178 case REALPART_EXPR:
179 case IMAGPART_EXPR:
180 nxt = &TREE_OPERAND (*addr_p, 0);
181 break;
183 case COMPONENT_REF:
184 /* If the component has varying offset, it behaves like index
185 as well. */
186 idx = &TREE_OPERAND (*addr_p, 2);
187 if (*idx
188 && !cbck (*addr_p, idx, data))
189 return false;
191 nxt = &TREE_OPERAND (*addr_p, 0);
192 break;
194 case ARRAY_REF:
195 nxt = &TREE_OPERAND (*addr_p, 0);
196 if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
197 return false;
198 break;
200 case VAR_DECL:
201 case PARM_DECL:
202 case STRING_CST:
203 case RESULT_DECL:
204 case VECTOR_CST:
205 case COMPLEX_CST:
206 return true;
208 case TARGET_MEM_REF:
209 idx = &TMR_BASE (*addr_p);
210 if (*idx
211 && !cbck (*addr_p, idx, data))
212 return false;
213 idx = &TMR_INDEX (*addr_p);
214 if (*idx
215 && !cbck (*addr_p, idx, data))
216 return false;
217 return true;
219 default:
220 gcc_unreachable ();
225 /* If it is possible to hoist the statement STMT unconditionally,
226 returns MOVE_POSSIBLE.
227 If it is possible to hoist the statement STMT, but we must avoid making
228 it executed if it would not be executed in the original program (e.g.
229 because it may trap), return MOVE_PRESERVE_EXECUTION.
230 Otherwise return MOVE_IMPOSSIBLE. */
232 enum move_pos
233 movement_possibility (tree stmt)
235 tree lhs, rhs;
237 if (flag_unswitch_loops
238 && TREE_CODE (stmt) == COND_EXPR)
240 /* If we perform unswitching, force the operands of the invariant
241 condition to be moved out of the loop. */
242 return MOVE_POSSIBLE;
245 if (TREE_CODE (stmt) != MODIFY_EXPR)
246 return MOVE_IMPOSSIBLE;
248 if (stmt_ends_bb_p (stmt))
249 return MOVE_IMPOSSIBLE;
251 if (stmt_ann (stmt)->has_volatile_ops)
252 return MOVE_IMPOSSIBLE;
254 lhs = TREE_OPERAND (stmt, 0);
255 if (TREE_CODE (lhs) == SSA_NAME
256 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
257 return MOVE_IMPOSSIBLE;
259 rhs = TREE_OPERAND (stmt, 1);
261 if (TREE_SIDE_EFFECTS (rhs))
262 return MOVE_IMPOSSIBLE;
264 if (TREE_CODE (lhs) != SSA_NAME
265 || tree_could_trap_p (rhs))
266 return MOVE_PRESERVE_EXECUTION;
268 if (get_call_expr_in (stmt))
270 /* While pure or const call is guaranteed to have no side effects, we
271 cannot move it arbitrarily. Consider code like
273 char *s = something ();
275 while (1)
277 if (s)
278 t = strlen (s);
279 else
280 t = 0;
283 Here the strlen call cannot be moved out of the loop, even though
284 s is invariant. In addition to possibly creating a call with
285 invalid arguments, moving out a function call that is not executed
286 may cause performance regressions in case the call is costly and
287 not executed at all. */
288 return MOVE_PRESERVE_EXECUTION;
290 return MOVE_POSSIBLE;
293 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
294 loop to that we could move the expression using DEF if it did not have
295 other operands, i.e. the outermost loop enclosing LOOP in that the value
296 of DEF is invariant. */
298 static struct loop *
299 outermost_invariant_loop (tree def, struct loop *loop)
301 tree def_stmt;
302 basic_block def_bb;
303 struct loop *max_loop;
305 if (TREE_CODE (def) != SSA_NAME)
306 return superloop_at_depth (loop, 1);
308 def_stmt = SSA_NAME_DEF_STMT (def);
309 def_bb = bb_for_stmt (def_stmt);
310 if (!def_bb)
311 return superloop_at_depth (loop, 1);
313 max_loop = find_common_loop (loop, def_bb->loop_father);
315 if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop)
316 max_loop = find_common_loop (max_loop,
317 LIM_DATA (def_stmt)->max_loop->outer);
318 if (max_loop == loop)
319 return NULL;
320 max_loop = superloop_at_depth (loop, max_loop->depth + 1);
322 return max_loop;
325 /* Returns the outermost superloop of LOOP in that the expression EXPR is
326 invariant. */
328 static struct loop *
329 outermost_invariant_loop_expr (tree expr, struct loop *loop)
331 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
332 unsigned i, nops;
333 struct loop *max_loop = superloop_at_depth (loop, 1), *aloop;
335 if (TREE_CODE (expr) == SSA_NAME
336 || TREE_CODE (expr) == INTEGER_CST
337 || is_gimple_min_invariant (expr))
338 return outermost_invariant_loop (expr, loop);
340 if (class != tcc_unary
341 && class != tcc_binary
342 && class != tcc_expression
343 && class != tcc_comparison)
344 return NULL;
346 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
347 for (i = 0; i < nops; i++)
349 aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop);
350 if (!aloop)
351 return NULL;
353 if (flow_loop_nested_p (max_loop, aloop))
354 max_loop = aloop;
357 return max_loop;
360 /* DATA is a structure containing information associated with a statement
361 inside LOOP. DEF is one of the operands of this statement.
363 Find the outermost loop enclosing LOOP in that value of DEF is invariant
364 and record this in DATA->max_loop field. If DEF itself is defined inside
365 this loop as well (i.e. we need to hoist it out of the loop if we want
366 to hoist the statement represented by DATA), record the statement in that
367 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
368 add the cost of the computation of DEF to the DATA->cost.
370 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
372 static bool
373 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
374 bool add_cost)
376 tree def_stmt = SSA_NAME_DEF_STMT (def);
377 basic_block def_bb = bb_for_stmt (def_stmt);
378 struct loop *max_loop;
379 struct depend *dep;
381 if (!def_bb)
382 return true;
384 max_loop = outermost_invariant_loop (def, loop);
385 if (!max_loop)
386 return false;
388 if (flow_loop_nested_p (data->max_loop, max_loop))
389 data->max_loop = max_loop;
391 if (!LIM_DATA (def_stmt))
392 return true;
394 if (add_cost
395 /* Only add the cost if the statement defining DEF is inside LOOP,
396 i.e. if it is likely that by moving the invariants dependent
397 on it, we will be able to avoid creating a new register for
398 it (since it will be only used in these dependent invariants). */
399 && def_bb->loop_father == loop)
400 data->cost += LIM_DATA (def_stmt)->cost;
402 dep = xmalloc (sizeof (struct depend));
403 dep->stmt = def_stmt;
404 dep->next = data->depends;
405 data->depends = dep;
407 return true;
410 /* Returns an estimate for a cost of statement STMT. TODO -- the values here
411 are just ad-hoc constants. The estimates should be based on target-specific
412 values. */
414 static unsigned
415 stmt_cost (tree stmt)
417 tree rhs;
418 unsigned cost = 1;
420 /* Always try to create possibilities for unswitching. */
421 if (TREE_CODE (stmt) == COND_EXPR)
422 return LIM_EXPENSIVE;
424 rhs = TREE_OPERAND (stmt, 1);
426 /* Hoisting memory references out should almost surely be a win. */
427 if (stmt_references_memory_p (stmt))
428 cost += 20;
430 switch (TREE_CODE (rhs))
432 case CALL_EXPR:
433 /* We should be hoisting calls if possible. */
435 /* Unless the call is a builtin_constant_p; this always folds to a
436 constant, so moving it is useless. */
437 rhs = get_callee_fndecl (rhs);
438 if (DECL_BUILT_IN_CLASS (rhs) == BUILT_IN_NORMAL
439 && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P)
440 return 0;
442 cost += 20;
443 break;
445 case MULT_EXPR:
446 case TRUNC_DIV_EXPR:
447 case CEIL_DIV_EXPR:
448 case FLOOR_DIV_EXPR:
449 case ROUND_DIV_EXPR:
450 case EXACT_DIV_EXPR:
451 case CEIL_MOD_EXPR:
452 case FLOOR_MOD_EXPR:
453 case ROUND_MOD_EXPR:
454 case TRUNC_MOD_EXPR:
455 case RDIV_EXPR:
456 /* Division and multiplication are usually expensive. */
457 cost += 20;
458 break;
460 default:
461 break;
464 return cost;
467 /* Determine the outermost loop to that it is possible to hoist a statement
468 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
469 the outermost loop in that the value computed by STMT is invariant.
470 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
471 we preserve the fact whether STMT is executed. It also fills other related
472 information to LIM_DATA (STMT).
474 The function returns false if STMT cannot be hoisted outside of the loop it
475 is defined in, and true otherwise. */
477 static bool
478 determine_max_movement (tree stmt, bool must_preserve_exec)
480 basic_block bb = bb_for_stmt (stmt);
481 struct loop *loop = bb->loop_father;
482 struct loop *level;
483 struct lim_aux_data *lim_data = LIM_DATA (stmt);
484 tree val;
485 ssa_op_iter iter;
487 if (must_preserve_exec)
488 level = ALWAYS_EXECUTED_IN (bb);
489 else
490 level = superloop_at_depth (loop, 1);
491 lim_data->max_loop = level;
493 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
494 if (!add_dependency (val, lim_data, loop, true))
495 return false;
497 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
498 if (!add_dependency (val, lim_data, loop, false))
499 return false;
501 lim_data->cost += stmt_cost (stmt);
503 return true;
506 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
507 and that one of the operands of this statement is computed by STMT.
508 Ensure that STMT (together with all the statements that define its
509 operands) is hoisted at least out of the loop LEVEL. */
511 static void
512 set_level (tree stmt, struct loop *orig_loop, struct loop *level)
514 struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father;
515 struct depend *dep;
517 stmt_loop = find_common_loop (orig_loop, stmt_loop);
518 if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop)
519 stmt_loop = find_common_loop (stmt_loop,
520 LIM_DATA (stmt)->tgt_loop->outer);
521 if (flow_loop_nested_p (stmt_loop, level))
522 return;
524 gcc_assert (LIM_DATA (stmt));
525 gcc_assert (level == LIM_DATA (stmt)->max_loop
526 || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level));
528 LIM_DATA (stmt)->tgt_loop = level;
529 for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next)
530 set_level (dep->stmt, orig_loop, level);
533 /* Determines an outermost loop from that we want to hoist the statement STMT.
534 For now we chose the outermost possible loop. TODO -- use profiling
535 information to set it more sanely. */
537 static void
538 set_profitable_level (tree stmt)
540 set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop);
543 /* Returns true if STMT is not a pure call. */
545 static bool
546 nonpure_call_p (tree stmt)
548 tree call = get_call_expr_in (stmt);
550 if (!call)
551 return false;
553 return TREE_SIDE_EFFECTS (call) != 0;
556 /* Releases the memory occupied by DATA. */
558 static void
559 free_lim_aux_data (struct lim_aux_data *data)
561 struct depend *dep, *next;
563 for (dep = data->depends; dep; dep = next)
565 next = dep->next;
566 free (dep);
568 free (data);
571 /* Determine the outermost loops in that statements in basic block BB are
572 invariant, and record them to the LIM_DATA associated with the statements.
573 Callback for walk_dominator_tree. */
575 static void
576 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
577 basic_block bb)
579 enum move_pos pos;
580 block_stmt_iterator bsi;
581 tree stmt, rhs;
582 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
583 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
585 if (!bb->loop_father->outer)
586 return;
588 if (dump_file && (dump_flags & TDF_DETAILS))
589 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
590 bb->index, bb->loop_father->num, bb->loop_father->depth);
592 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
594 stmt = bsi_stmt (bsi);
596 pos = movement_possibility (stmt);
597 if (pos == MOVE_IMPOSSIBLE)
599 if (nonpure_call_p (stmt))
601 maybe_never = true;
602 outermost = NULL;
604 continue;
607 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
608 to be hoisted out of loop, saving expensive divide. */
609 if (pos == MOVE_POSSIBLE
610 && (rhs = TREE_OPERAND (stmt, 1)) != NULL
611 && TREE_CODE (rhs) == RDIV_EXPR
612 && flag_unsafe_math_optimizations
613 && !flag_trapping_math
614 && outermost_invariant_loop_expr (TREE_OPERAND (rhs, 1),
615 loop_containing_stmt (stmt)) != NULL
616 && outermost_invariant_loop_expr (rhs,
617 loop_containing_stmt (stmt)) == NULL)
619 tree lhs, stmt1, stmt2, var, name;
621 lhs = TREE_OPERAND (stmt, 0);
623 /* stmt must be MODIFY_EXPR. */
624 var = create_tmp_var (TREE_TYPE (rhs), "reciptmp");
625 add_referenced_tmp_var (var);
627 stmt1 = build2 (MODIFY_EXPR, void_type_node, var,
628 build2 (RDIV_EXPR, TREE_TYPE (rhs),
629 build_real (TREE_TYPE (rhs), dconst1),
630 TREE_OPERAND (rhs, 1)));
631 name = make_ssa_name (var, stmt1);
632 TREE_OPERAND (stmt1, 0) = name;
633 stmt2 = build2 (MODIFY_EXPR, void_type_node, lhs,
634 build2 (MULT_EXPR, TREE_TYPE (rhs),
635 name, TREE_OPERAND (rhs, 0)));
637 /* Replace division stmt with reciprocal and multiply stmts.
638 The multiply stmt is not invariant, so update iterator
639 and avoid rescanning. */
640 bsi_replace (&bsi, stmt1, true);
641 bsi_insert_after (&bsi, stmt2, BSI_NEW_STMT);
642 SSA_NAME_DEF_STMT (lhs) = stmt2;
644 /* Continue processing with invariant reciprocal statement. */
645 stmt = stmt1;
648 stmt_ann (stmt)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
649 LIM_DATA (stmt)->always_executed_in = outermost;
651 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
652 continue;
654 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
656 LIM_DATA (stmt)->max_loop = NULL;
657 continue;
660 if (dump_file && (dump_flags & TDF_DETAILS))
662 print_generic_stmt_indented (dump_file, stmt, 0, 2);
663 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
664 LIM_DATA (stmt)->max_loop->depth,
665 LIM_DATA (stmt)->cost);
668 if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE)
669 set_profitable_level (stmt);
673 /* For each statement determines the outermost loop in that it is invariant,
674 statements on whose motion it depends and the cost of the computation.
675 This information is stored to the LIM_DATA structure associated with
676 each statement. */
678 static void
679 determine_invariantness (void)
681 struct dom_walk_data walk_data;
683 memset (&walk_data, 0, sizeof (struct dom_walk_data));
684 walk_data.before_dom_children_before_stmts = determine_invariantness_stmt;
686 init_walk_dominator_tree (&walk_data);
687 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
688 fini_walk_dominator_tree (&walk_data);
691 /* Commits edge insertions and updates loop structures. */
693 void
694 loop_commit_inserts (void)
696 unsigned old_last_basic_block, i;
697 basic_block bb;
699 old_last_basic_block = last_basic_block;
700 bsi_commit_edge_inserts ();
701 for (i = old_last_basic_block; i < (unsigned) last_basic_block; i++)
703 bb = BASIC_BLOCK (i);
704 add_bb_to_loop (bb,
705 find_common_loop (single_pred (bb)->loop_father,
706 single_succ (bb)->loop_father));
710 /* Hoist the statements in basic block BB out of the loops prescribed by
711 data stored in LIM_DATA structures associated with each statement. Callback
712 for walk_dominator_tree. */
714 static void
715 move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
716 basic_block bb)
718 struct loop *level;
719 block_stmt_iterator bsi;
720 tree stmt;
721 unsigned cost = 0;
723 if (!bb->loop_father->outer)
724 return;
726 for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
728 stmt = bsi_stmt (bsi);
730 if (!LIM_DATA (stmt))
732 bsi_next (&bsi);
733 continue;
736 cost = LIM_DATA (stmt)->cost;
737 level = LIM_DATA (stmt)->tgt_loop;
738 free_lim_aux_data (LIM_DATA (stmt));
739 stmt_ann (stmt)->common.aux = NULL;
741 if (!level)
743 bsi_next (&bsi);
744 continue;
747 /* We do not really want to move conditionals out of the loop; we just
748 placed it here to force its operands to be moved if necessary. */
749 if (TREE_CODE (stmt) == COND_EXPR)
750 continue;
752 if (dump_file && (dump_flags & TDF_DETAILS))
754 fprintf (dump_file, "Moving statement\n");
755 print_generic_stmt (dump_file, stmt, 0);
756 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
757 cost, level->num);
759 bsi_insert_on_edge (loop_preheader_edge (level), stmt);
760 bsi_remove (&bsi);
764 /* Hoist the statements out of the loops prescribed by data stored in
765 LIM_DATA structures associated with each statement.*/
767 static void
768 move_computations (void)
770 struct dom_walk_data walk_data;
772 memset (&walk_data, 0, sizeof (struct dom_walk_data));
773 walk_data.before_dom_children_before_stmts = move_computations_stmt;
775 init_walk_dominator_tree (&walk_data);
776 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
777 fini_walk_dominator_tree (&walk_data);
779 loop_commit_inserts ();
780 if (need_ssa_update_p ())
781 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
784 /* Checks whether the statement defining variable *INDEX can be hoisted
785 out of the loop passed in DATA. Callback for for_each_index. */
787 static bool
788 may_move_till (tree ref, tree *index, void *data)
790 struct loop *loop = data, *max_loop;
792 /* If REF is an array reference, check also that the step and the lower
793 bound is invariant in LOOP. */
794 if (TREE_CODE (ref) == ARRAY_REF)
796 tree step = array_ref_element_size (ref);
797 tree lbound = array_ref_low_bound (ref);
799 max_loop = outermost_invariant_loop_expr (step, loop);
800 if (!max_loop)
801 return false;
803 max_loop = outermost_invariant_loop_expr (lbound, loop);
804 if (!max_loop)
805 return false;
808 max_loop = outermost_invariant_loop (*index, loop);
809 if (!max_loop)
810 return false;
812 return true;
815 /* Forces statements defining (invariant) SSA names in expression EXPR to be
816 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
818 static void
819 force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop)
821 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
822 unsigned i, nops;
824 if (TREE_CODE (expr) == SSA_NAME)
826 tree stmt = SSA_NAME_DEF_STMT (expr);
827 if (IS_EMPTY_STMT (stmt))
828 return;
830 set_level (stmt, orig_loop, loop);
831 return;
834 if (class != tcc_unary
835 && class != tcc_binary
836 && class != tcc_expression
837 && class != tcc_comparison)
838 return;
840 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
841 for (i = 0; i < nops; i++)
842 force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop);
845 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
846 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
847 for_each_index. */
849 struct fmt_data
851 struct loop *loop;
852 struct loop *orig_loop;
855 static bool
856 force_move_till (tree ref, tree *index, void *data)
858 tree stmt;
859 struct fmt_data *fmt_data = data;
861 if (TREE_CODE (ref) == ARRAY_REF)
863 tree step = array_ref_element_size (ref);
864 tree lbound = array_ref_low_bound (ref);
866 force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop);
867 force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop);
870 if (TREE_CODE (*index) != SSA_NAME)
871 return true;
873 stmt = SSA_NAME_DEF_STMT (*index);
874 if (IS_EMPTY_STMT (stmt))
875 return true;
877 set_level (stmt, fmt_data->orig_loop, fmt_data->loop);
879 return true;
882 /* Records memory reference location *REF to the list MEM_REFS. The reference
883 occurs in statement STMT. */
885 static void
886 record_mem_ref_loc (struct mem_ref_loc **mem_refs, tree stmt, tree *ref)
888 struct mem_ref_loc *aref = xmalloc (sizeof (struct mem_ref_loc));
890 aref->stmt = stmt;
891 aref->ref = ref;
893 aref->next = *mem_refs;
894 *mem_refs = aref;
897 /* Releases list of memory reference locations MEM_REFS. */
899 static void
900 free_mem_ref_locs (struct mem_ref_loc *mem_refs)
902 struct mem_ref_loc *act;
904 while (mem_refs)
906 act = mem_refs;
907 mem_refs = mem_refs->next;
908 free (act);
912 /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
914 static void
915 rewrite_mem_refs (tree tmp_var, struct mem_ref_loc *mem_refs)
917 tree var;
918 ssa_op_iter iter;
920 for (; mem_refs; mem_refs = mem_refs->next)
922 FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter, SSA_OP_ALL_VIRTUALS)
923 mark_sym_for_renaming (SSA_NAME_VAR (var));
925 *mem_refs->ref = tmp_var;
926 update_stmt (mem_refs->stmt);
930 /* Records request for store motion of memory reference REF from LOOP.
931 MEM_REFS is the list of occurrences of the reference REF inside LOOP;
932 these references are rewritten by a new temporary variable.
933 Exits from the LOOP are stored in EXITS, there are N_EXITS of them.
934 The initialization of the temporary variable is put to the preheader
935 of the loop, and assignments to the reference from the temporary variable
936 are emitted to exits. */
938 static void
939 schedule_sm (struct loop *loop, edge *exits, unsigned n_exits, tree ref,
940 struct mem_ref_loc *mem_refs)
942 struct mem_ref_loc *aref;
943 tree tmp_var;
944 unsigned i;
945 tree load, store;
946 struct fmt_data fmt_data;
948 if (dump_file && (dump_flags & TDF_DETAILS))
950 fprintf (dump_file, "Executing store motion of ");
951 print_generic_expr (dump_file, ref, 0);
952 fprintf (dump_file, " from loop %d\n", loop->num);
955 tmp_var = make_rename_temp (TREE_TYPE (ref), "lsm_tmp");
957 fmt_data.loop = loop;
958 fmt_data.orig_loop = loop;
959 for_each_index (&ref, force_move_till, &fmt_data);
961 rewrite_mem_refs (tmp_var, mem_refs);
962 for (aref = mem_refs; aref; aref = aref->next)
963 if (LIM_DATA (aref->stmt))
964 LIM_DATA (aref->stmt)->sm_done = true;
966 /* Emit the load & stores. */
967 load = build (MODIFY_EXPR, void_type_node, tmp_var, ref);
968 get_stmt_ann (load)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
969 LIM_DATA (load)->max_loop = loop;
970 LIM_DATA (load)->tgt_loop = loop;
972 /* Put this into the latch, so that we are sure it will be processed after
973 all dependencies. */
974 bsi_insert_on_edge (loop_latch_edge (loop), load);
976 for (i = 0; i < n_exits; i++)
978 store = build (MODIFY_EXPR, void_type_node,
979 unshare_expr (ref), tmp_var);
980 bsi_insert_on_edge (exits[i], store);
984 /* Check whether memory reference REF can be hoisted out of the LOOP. If this
985 is true, prepare the statements that load the value of the memory reference
986 to a temporary variable in the loop preheader, store it back on the loop
987 exits, and replace all the references inside LOOP by this temporary variable.
988 LOOP has N_EXITS stored in EXITS. CLOBBERED_VOPS is the bitmap of virtual
989 operands that are clobbered by a call or accessed through multiple references
990 in loop. */
992 static void
993 determine_lsm_ref (struct loop *loop, edge *exits, unsigned n_exits,
994 bitmap clobbered_vops, struct mem_ref *ref)
996 struct mem_ref_loc *aref;
997 struct loop *must_exec;
999 /* In case the memory is not stored to, there is nothing for SM to do. */
1000 if (!ref->is_stored)
1001 return;
1003 /* If the reference is aliased with any different ref, or killed by call
1004 in function, then fail. */
1005 if (bitmap_intersect_p (ref->vops, clobbered_vops))
1006 return;
1008 if (tree_could_trap_p (ref->mem))
1010 /* If the memory access is unsafe (i.e. it might trap), ensure that some
1011 of the statements in that it occurs is always executed when the loop
1012 is entered. This way we know that by moving the load from the
1013 reference out of the loop we will not cause the error that would not
1014 occur otherwise.
1016 TODO -- in fact we would like to check for anticipability of the
1017 reference, i.e. that on each path from loop entry to loop exit at
1018 least one of the statements containing the memory reference is
1019 executed. */
1021 for (aref = ref->locs; aref; aref = aref->next)
1023 if (!LIM_DATA (aref->stmt))
1024 continue;
1026 must_exec = LIM_DATA (aref->stmt)->always_executed_in;
1027 if (!must_exec)
1028 continue;
1030 if (must_exec == loop
1031 || flow_loop_nested_p (must_exec, loop))
1032 break;
1035 if (!aref)
1036 return;
1039 schedule_sm (loop, exits, n_exits, ref->mem, ref->locs);
1042 /* Hoists memory references MEM_REFS out of LOOP. CLOBBERED_VOPS is the list
1043 of vops clobbered by call in loop or accessed by multiple memory references.
1044 EXITS is the list of N_EXITS exit edges of the LOOP. */
1046 static void
1047 hoist_memory_references (struct loop *loop, struct mem_ref *mem_refs,
1048 bitmap clobbered_vops, edge *exits, unsigned n_exits)
1050 struct mem_ref *ref;
1052 for (ref = mem_refs; ref; ref = ref->next)
1053 determine_lsm_ref (loop, exits, n_exits, clobbered_vops, ref);
1056 /* Checks whether LOOP (with N_EXITS exits stored in EXITS array) is suitable
1057 for a store motion optimization (i.e. whether we can insert statement
1058 on its exits). */
1060 static bool
1061 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED, edge *exits,
1062 unsigned n_exits)
1064 unsigned i;
1066 for (i = 0; i < n_exits; i++)
1067 if (exits[i]->flags & EDGE_ABNORMAL)
1068 return false;
1070 return true;
1073 /* A hash function for struct mem_ref object OBJ. */
1075 static hashval_t
1076 memref_hash (const void *obj)
1078 const struct mem_ref *mem = obj;
1080 return mem->hash;
1083 /* An equality function for struct mem_ref object OBJ1 with
1084 memory reference OBJ2. */
1086 static int
1087 memref_eq (const void *obj1, const void *obj2)
1089 const struct mem_ref *mem1 = obj1;
1091 return operand_equal_p (mem1->mem, (tree) obj2, 0);
1094 /* Gathers memory references in statement STMT in LOOP, storing the
1095 information about them in MEM_REFS hash table. Note vops accessed through
1096 unrecognized statements in CLOBBERED_VOPS. The newly created references
1097 are also stored to MEM_REF_LIST. */
1099 static void
1100 gather_mem_refs_stmt (struct loop *loop, htab_t mem_refs,
1101 bitmap clobbered_vops, tree stmt,
1102 struct mem_ref **mem_ref_list)
1104 tree *lhs, *rhs, *mem = NULL;
1105 hashval_t hash;
1106 PTR *slot;
1107 struct mem_ref *ref = NULL;
1108 ssa_op_iter oi;
1109 tree vname;
1110 bool is_stored;
1112 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
1113 return;
1115 /* Recognize MEM = (SSA_NAME | invariant) and SSA_NAME = MEM patterns. */
1116 if (TREE_CODE (stmt) != MODIFY_EXPR)
1117 goto fail;
1119 lhs = &TREE_OPERAND (stmt, 0);
1120 rhs = &TREE_OPERAND (stmt, 1);
1122 if (TREE_CODE (*lhs) == SSA_NAME)
1124 if (!is_gimple_addressable (*rhs))
1125 goto fail;
1127 mem = rhs;
1128 is_stored = false;
1130 else if (TREE_CODE (*rhs) == SSA_NAME
1131 || is_gimple_min_invariant (*rhs))
1133 mem = lhs;
1134 is_stored = true;
1136 else
1137 goto fail;
1139 /* If we cannot create an SSA name for the result, give up. */
1140 if (!is_gimple_reg_type (TREE_TYPE (*mem))
1141 || TREE_THIS_VOLATILE (*mem))
1142 goto fail;
1144 /* If we cannot move the reference out of the loop, fail. */
1145 if (!for_each_index (mem, may_move_till, loop))
1146 goto fail;
1148 hash = iterative_hash_expr (*mem, 0);
1149 slot = htab_find_slot_with_hash (mem_refs, *mem, hash, INSERT);
1151 if (*slot)
1152 ref = *slot;
1153 else
1155 ref = xmalloc (sizeof (struct mem_ref));
1156 ref->mem = *mem;
1157 ref->hash = hash;
1158 ref->locs = NULL;
1159 ref->is_stored = false;
1160 ref->vops = BITMAP_ALLOC (NULL);
1161 ref->next = *mem_ref_list;
1162 *mem_ref_list = ref;
1163 *slot = ref;
1165 ref->is_stored |= is_stored;
1167 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1168 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1169 bitmap_set_bit (ref->vops, DECL_UID (SSA_NAME_VAR (vname)));
1170 record_mem_ref_loc (&ref->locs, stmt, mem);
1171 return;
1173 fail:
1174 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1175 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1176 bitmap_set_bit (clobbered_vops, DECL_UID (SSA_NAME_VAR (vname)));
1179 /* Gathers memory references in LOOP. Notes vops accessed through unrecognized
1180 statements in CLOBBERED_VOPS. The list of the references found by
1181 the function is returned. */
1183 static struct mem_ref *
1184 gather_mem_refs (struct loop *loop, bitmap clobbered_vops)
1186 basic_block *body = get_loop_body (loop);
1187 block_stmt_iterator bsi;
1188 unsigned i;
1189 struct mem_ref *mem_ref_list = NULL;
1190 htab_t mem_refs = htab_create (100, memref_hash, memref_eq, NULL);
1192 for (i = 0; i < loop->num_nodes; i++)
1194 for (bsi = bsi_start (body[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1195 gather_mem_refs_stmt (loop, mem_refs, clobbered_vops, bsi_stmt (bsi),
1196 &mem_ref_list);
1199 free (body);
1201 htab_delete (mem_refs);
1202 return mem_ref_list;
1205 /* Finds the vops accessed by more than one of the memory references described
1206 in MEM_REFS and marks them in CLOBBERED_VOPS. */
1208 static void
1209 find_more_ref_vops (struct mem_ref *mem_refs, bitmap clobbered_vops)
1211 bitmap_head tmp, all_vops;
1212 struct mem_ref *ref;
1214 bitmap_initialize (&tmp, &bitmap_default_obstack);
1215 bitmap_initialize (&all_vops, &bitmap_default_obstack);
1217 for (ref = mem_refs; ref; ref = ref->next)
1219 /* The vops that are already in all_vops are accessed by more than
1220 one memory reference. */
1221 bitmap_and (&tmp, &all_vops, ref->vops);
1222 bitmap_ior_into (clobbered_vops, &tmp);
1223 bitmap_clear (&tmp);
1225 bitmap_ior_into (&all_vops, ref->vops);
1228 bitmap_clear (&all_vops);
1231 /* Releases the memory occupied by REF. */
1233 static void
1234 free_mem_ref (struct mem_ref *ref)
1236 free_mem_ref_locs (ref->locs);
1237 BITMAP_FREE (ref->vops);
1238 free (ref);
1241 /* Releases the memory occupied by REFS. */
1243 static void
1244 free_mem_refs (struct mem_ref *refs)
1246 struct mem_ref *ref, *next;
1248 for (ref = refs; ref; ref = next)
1250 next = ref->next;
1251 free_mem_ref (ref);
1255 /* Try to perform store motion for all memory references modified inside
1256 LOOP. */
1258 static void
1259 determine_lsm_loop (struct loop *loop)
1261 unsigned n_exits;
1262 edge *exits = get_loop_exit_edges (loop, &n_exits);
1263 bitmap clobbered_vops;
1264 struct mem_ref *mem_refs;
1266 if (!loop_suitable_for_sm (loop, exits, n_exits))
1268 free (exits);
1269 return;
1272 /* Find the memory references in LOOP. */
1273 clobbered_vops = BITMAP_ALLOC (NULL);
1274 mem_refs = gather_mem_refs (loop, clobbered_vops);
1276 /* Find the vops that are used for more than one reference. */
1277 find_more_ref_vops (mem_refs, clobbered_vops);
1279 /* Hoist all suitable memory references. */
1280 hoist_memory_references (loop, mem_refs, clobbered_vops, exits, n_exits);
1282 free_mem_refs (mem_refs);
1283 free (exits);
1284 BITMAP_FREE (clobbered_vops);
1287 /* Try to perform store motion for all memory references modified inside
1288 any of LOOPS. */
1290 static void
1291 determine_lsm (struct loops *loops)
1293 struct loop *loop;
1295 if (!loops->tree_root->inner)
1296 return;
1298 /* Pass the loops from the outermost and perform the store motion as
1299 suitable. */
1301 loop = loops->tree_root->inner;
1302 while (1)
1304 determine_lsm_loop (loop);
1306 if (loop->inner)
1308 loop = loop->inner;
1309 continue;
1311 while (!loop->next)
1313 loop = loop->outer;
1314 if (loop == loops->tree_root)
1316 loop_commit_inserts ();
1317 return;
1320 loop = loop->next;
1324 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1325 for each such basic block bb records the outermost loop for that execution
1326 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1327 blocks that contain a nonpure call. */
1329 static void
1330 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
1332 basic_block bb = NULL, *bbs, last = NULL;
1333 unsigned i;
1334 edge e;
1335 struct loop *inn_loop = loop;
1337 if (!loop->header->aux)
1339 bbs = get_loop_body_in_dom_order (loop);
1341 for (i = 0; i < loop->num_nodes; i++)
1343 edge_iterator ei;
1344 bb = bbs[i];
1346 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1347 last = bb;
1349 if (TEST_BIT (contains_call, bb->index))
1350 break;
1352 FOR_EACH_EDGE (e, ei, bb->succs)
1353 if (!flow_bb_inside_loop_p (loop, e->dest))
1354 break;
1355 if (e)
1356 break;
1358 /* A loop might be infinite (TODO use simple loop analysis
1359 to disprove this if possible). */
1360 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1361 break;
1363 if (!flow_bb_inside_loop_p (inn_loop, bb))
1364 break;
1366 if (bb->loop_father->header == bb)
1368 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1369 break;
1371 /* In a loop that is always entered we may proceed anyway.
1372 But record that we entered it and stop once we leave it. */
1373 inn_loop = bb->loop_father;
1377 while (1)
1379 last->aux = loop;
1380 if (last == loop->header)
1381 break;
1382 last = get_immediate_dominator (CDI_DOMINATORS, last);
1385 free (bbs);
1388 for (loop = loop->inner; loop; loop = loop->next)
1389 fill_always_executed_in (loop, contains_call);
1392 /* Compute the global information needed by the loop invariant motion pass.
1393 LOOPS is the loop tree. */
1395 static void
1396 tree_ssa_lim_initialize (struct loops *loops)
1398 sbitmap contains_call = sbitmap_alloc (last_basic_block);
1399 block_stmt_iterator bsi;
1400 struct loop *loop;
1401 basic_block bb;
1403 sbitmap_zero (contains_call);
1404 FOR_EACH_BB (bb)
1406 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1408 if (nonpure_call_p (bsi_stmt (bsi)))
1409 break;
1412 if (!bsi_end_p (bsi))
1413 SET_BIT (contains_call, bb->index);
1416 for (loop = loops->tree_root->inner; loop; loop = loop->next)
1417 fill_always_executed_in (loop, contains_call);
1419 sbitmap_free (contains_call);
1422 /* Cleans up after the invariant motion pass. */
1424 static void
1425 tree_ssa_lim_finalize (void)
1427 basic_block bb;
1429 FOR_EACH_BB (bb)
1431 bb->aux = NULL;
1435 /* Moves invariants from LOOPS. Only "expensive" invariants are moved out --
1436 i.e. those that are likely to be win regardless of the register pressure. */
1438 void
1439 tree_ssa_lim (struct loops *loops)
1441 tree_ssa_lim_initialize (loops);
1443 /* For each statement determine the outermost loop in that it is
1444 invariant and cost for computing the invariant. */
1445 determine_invariantness ();
1447 /* For each memory reference determine whether it is possible to hoist it
1448 out of the loop. Force the necessary invariants to be moved out of the
1449 loops as well. */
1450 determine_lsm (loops);
1452 /* Move the expressions that are expensive enough. */
1453 move_computations ();
1455 tree_ssa_lim_finalize ();