2005-06-07 Adrian Straetling <straetling@de.ibm.com>
[official-gcc.git] / gcc / tree-ssa-loop-im.c
blobb7a57f61ede659c21bd85ff9566659635cc835b3
1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
30 #include "output.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "domwalk.h"
37 #include "params.h"
38 #include "tree-pass.h"
39 #include "flags.h"
40 #include "real.h"
41 #include "hashtab.h"
43 /* TODO: Support for predicated code motion. I.e.
45 while (1)
47 if (cond)
49 a = inv;
50 something;
54 Where COND and INV are is invariants, but evaluating INV may trap or be
55 invalid from some other reason if !COND. This may be transformed to
57 if (cond)
58 a = inv;
59 while (1)
61 if (cond)
62 something;
63 } */
65 /* A type for the list of statements that have to be moved in order to be able
66 to hoist an invariant computation. */
68 struct depend
70 tree stmt;
71 struct depend *next;
74 /* The auxiliary data kept for each statement. */
76 struct lim_aux_data
78 struct loop *max_loop; /* The outermost loop in that the statement
79 is invariant. */
81 struct loop *tgt_loop; /* The loop out of that we want to move the
82 invariant. */
84 struct loop *always_executed_in;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
87 is entered. */
89 bool sm_done; /* True iff the store motion for a memory
90 reference in the statement has already
91 been executed. */
93 unsigned cost; /* Cost of the computation performed by the
94 statement. */
96 struct depend *depends; /* List of statements that must be also hoisted
97 out of the loop when this statement is
98 hoisted; i.e. those that define the operands
99 of the statement and are inside of the
100 MAX_LOOP loop. */
103 #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
104 ? NULL \
105 : (struct lim_aux_data *) (stmt_ann (STMT)->common.aux))
107 /* Description of a memory reference location for store motion. */
109 struct mem_ref_loc
111 tree *ref; /* The reference itself. */
112 tree stmt; /* The statement in that it occurs. */
113 struct mem_ref_loc *next; /* Next use in the chain. */
116 /* Description of a memory reference for store motion. */
118 struct mem_ref
120 tree mem; /* The memory itself. */
121 hashval_t hash; /* Its hash value. */
122 bool is_stored; /* True if there is a store to the location
123 in the loop. */
124 struct mem_ref_loc *locs; /* The locations where it is found. */
125 bitmap vops; /* Vops corresponding to this memory
126 location. */
127 struct mem_ref *next; /* Next memory reference in the list.
128 Memory references are stored in a hash
129 table, but the hash function depends
130 on values of pointers. Thus we cannot use
131 htab_traverse, since then we would get
132 miscompares during bootstrap (although the
133 produced code would be correct). */
136 /* Minimum cost of an expensive expression. */
137 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
139 /* The outermost loop for that execution of the header guarantees that the
140 block will be executed. */
141 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
143 /* Calls CBCK for each index in memory reference ADDR_P. There are two
144 kinds situations handled; in each of these cases, the memory reference
145 and DATA are passed to the callback:
147 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
148 pass the pointer to the index to the callback.
150 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
151 pointer to addr to the callback.
153 If the callback returns false, the whole search stops and false is returned.
154 Otherwise the function returns true after traversing through the whole
155 reference *ADDR_P. */
157 bool
158 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
160 tree *nxt, *idx;
162 for (; ; addr_p = nxt)
164 switch (TREE_CODE (*addr_p))
166 case SSA_NAME:
167 return cbck (*addr_p, addr_p, data);
169 case MISALIGNED_INDIRECT_REF:
170 case ALIGN_INDIRECT_REF:
171 case INDIRECT_REF:
172 nxt = &TREE_OPERAND (*addr_p, 0);
173 return cbck (*addr_p, nxt, data);
175 case BIT_FIELD_REF:
176 case VIEW_CONVERT_EXPR:
177 case ARRAY_RANGE_REF:
178 case REALPART_EXPR:
179 case IMAGPART_EXPR:
180 nxt = &TREE_OPERAND (*addr_p, 0);
181 break;
183 case COMPONENT_REF:
184 /* If the component has varying offset, it behaves like index
185 as well. */
186 idx = &TREE_OPERAND (*addr_p, 2);
187 if (*idx
188 && !cbck (*addr_p, idx, data))
189 return false;
191 nxt = &TREE_OPERAND (*addr_p, 0);
192 break;
194 case ARRAY_REF:
195 nxt = &TREE_OPERAND (*addr_p, 0);
196 if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
197 return false;
198 break;
200 case VAR_DECL:
201 case PARM_DECL:
202 case STRING_CST:
203 case RESULT_DECL:
204 case VECTOR_CST:
205 return true;
207 case TARGET_MEM_REF:
208 idx = &TMR_BASE (*addr_p);
209 if (*idx
210 && !cbck (*addr_p, idx, data))
211 return false;
212 idx = &TMR_INDEX (*addr_p);
213 if (*idx
214 && !cbck (*addr_p, idx, data))
215 return false;
216 return true;
218 default:
219 gcc_unreachable ();
224 /* If it is possible to hoist the statement STMT unconditionally,
225 returns MOVE_POSSIBLE.
226 If it is possible to hoist the statement STMT, but we must avoid making
227 it executed if it would not be executed in the original program (e.g.
228 because it may trap), return MOVE_PRESERVE_EXECUTION.
229 Otherwise return MOVE_IMPOSSIBLE. */
231 enum move_pos
232 movement_possibility (tree stmt)
234 tree lhs, rhs;
236 if (flag_unswitch_loops
237 && TREE_CODE (stmt) == COND_EXPR)
239 /* If we perform unswitching, force the operands of the invariant
240 condition to be moved out of the loop. */
241 return MOVE_POSSIBLE;
244 if (TREE_CODE (stmt) != MODIFY_EXPR)
245 return MOVE_IMPOSSIBLE;
247 if (stmt_ends_bb_p (stmt))
248 return MOVE_IMPOSSIBLE;
250 if (stmt_ann (stmt)->has_volatile_ops)
251 return MOVE_IMPOSSIBLE;
253 lhs = TREE_OPERAND (stmt, 0);
254 if (TREE_CODE (lhs) == SSA_NAME
255 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
256 return MOVE_IMPOSSIBLE;
258 rhs = TREE_OPERAND (stmt, 1);
260 if (TREE_SIDE_EFFECTS (rhs))
261 return MOVE_IMPOSSIBLE;
263 if (TREE_CODE (lhs) != SSA_NAME
264 || tree_could_trap_p (rhs))
265 return MOVE_PRESERVE_EXECUTION;
267 if (get_call_expr_in (stmt))
269 /* While pure or const call is guaranteed to have no side effects, we
270 cannot move it arbitrarily. Consider code like
272 char *s = something ();
274 while (1)
276 if (s)
277 t = strlen (s);
278 else
279 t = 0;
282 Here the strlen call cannot be moved out of the loop, even though
283 s is invariant. In addition to possibly creating a call with
284 invalid arguments, moving out a function call that is not executed
285 may cause performance regressions in case the call is costly and
286 not executed at all. */
287 return MOVE_PRESERVE_EXECUTION;
289 return MOVE_POSSIBLE;
292 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
293 loop to that we could move the expression using DEF if it did not have
294 other operands, i.e. the outermost loop enclosing LOOP in that the value
295 of DEF is invariant. */
297 static struct loop *
298 outermost_invariant_loop (tree def, struct loop *loop)
300 tree def_stmt;
301 basic_block def_bb;
302 struct loop *max_loop;
304 if (TREE_CODE (def) != SSA_NAME)
305 return superloop_at_depth (loop, 1);
307 def_stmt = SSA_NAME_DEF_STMT (def);
308 def_bb = bb_for_stmt (def_stmt);
309 if (!def_bb)
310 return superloop_at_depth (loop, 1);
312 max_loop = find_common_loop (loop, def_bb->loop_father);
314 if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop)
315 max_loop = find_common_loop (max_loop,
316 LIM_DATA (def_stmt)->max_loop->outer);
317 if (max_loop == loop)
318 return NULL;
319 max_loop = superloop_at_depth (loop, max_loop->depth + 1);
321 return max_loop;
324 /* Returns the outermost superloop of LOOP in that the expression EXPR is
325 invariant. */
327 static struct loop *
328 outermost_invariant_loop_expr (tree expr, struct loop *loop)
330 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
331 unsigned i, nops;
332 struct loop *max_loop = superloop_at_depth (loop, 1), *aloop;
334 if (TREE_CODE (expr) == SSA_NAME
335 || TREE_CODE (expr) == INTEGER_CST
336 || is_gimple_min_invariant (expr))
337 return outermost_invariant_loop (expr, loop);
339 if (class != tcc_unary
340 && class != tcc_binary
341 && class != tcc_expression
342 && class != tcc_comparison)
343 return NULL;
345 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
346 for (i = 0; i < nops; i++)
348 aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop);
349 if (!aloop)
350 return NULL;
352 if (flow_loop_nested_p (max_loop, aloop))
353 max_loop = aloop;
356 return max_loop;
359 /* DATA is a structure containing information associated with a statement
360 inside LOOP. DEF is one of the operands of this statement.
362 Find the outermost loop enclosing LOOP in that value of DEF is invariant
363 and record this in DATA->max_loop field. If DEF itself is defined inside
364 this loop as well (i.e. we need to hoist it out of the loop if we want
365 to hoist the statement represented by DATA), record the statement in that
366 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
367 add the cost of the computation of DEF to the DATA->cost.
369 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
371 static bool
372 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
373 bool add_cost)
375 tree def_stmt = SSA_NAME_DEF_STMT (def);
376 basic_block def_bb = bb_for_stmt (def_stmt);
377 struct loop *max_loop;
378 struct depend *dep;
380 if (!def_bb)
381 return true;
383 max_loop = outermost_invariant_loop (def, loop);
384 if (!max_loop)
385 return false;
387 if (flow_loop_nested_p (data->max_loop, max_loop))
388 data->max_loop = max_loop;
390 if (!LIM_DATA (def_stmt))
391 return true;
393 if (add_cost
394 /* Only add the cost if the statement defining DEF is inside LOOP,
395 i.e. if it is likely that by moving the invariants dependent
396 on it, we will be able to avoid creating a new register for
397 it (since it will be only used in these dependent invariants). */
398 && def_bb->loop_father == loop)
399 data->cost += LIM_DATA (def_stmt)->cost;
401 dep = xmalloc (sizeof (struct depend));
402 dep->stmt = def_stmt;
403 dep->next = data->depends;
404 data->depends = dep;
406 return true;
409 /* Returns an estimate for a cost of statement STMT. TODO -- the values here
410 are just ad-hoc constants. The estimates should be based on target-specific
411 values. */
413 static unsigned
414 stmt_cost (tree stmt)
416 tree rhs;
417 unsigned cost = 1;
419 /* Always try to create possibilities for unswitching. */
420 if (TREE_CODE (stmt) == COND_EXPR)
421 return LIM_EXPENSIVE;
423 rhs = TREE_OPERAND (stmt, 1);
425 /* Hoisting memory references out should almost surely be a win. */
426 if (stmt_references_memory_p (stmt))
427 cost += 20;
429 switch (TREE_CODE (rhs))
431 case CALL_EXPR:
432 /* We should be hoisting calls if possible. */
434 /* Unless the call is a builtin_constant_p; this always folds to a
435 constant, so moving it is useless. */
436 rhs = get_callee_fndecl (rhs);
437 if (DECL_BUILT_IN_CLASS (rhs) == BUILT_IN_NORMAL
438 && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P)
439 return 0;
441 cost += 20;
442 break;
444 case MULT_EXPR:
445 case TRUNC_DIV_EXPR:
446 case CEIL_DIV_EXPR:
447 case FLOOR_DIV_EXPR:
448 case ROUND_DIV_EXPR:
449 case EXACT_DIV_EXPR:
450 case CEIL_MOD_EXPR:
451 case FLOOR_MOD_EXPR:
452 case ROUND_MOD_EXPR:
453 case TRUNC_MOD_EXPR:
454 case RDIV_EXPR:
455 /* Division and multiplication are usually expensive. */
456 cost += 20;
457 break;
459 default:
460 break;
463 return cost;
466 /* Determine the outermost loop to that it is possible to hoist a statement
467 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
468 the outermost loop in that the value computed by STMT is invariant.
469 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
470 we preserve the fact whether STMT is executed. It also fills other related
471 information to LIM_DATA (STMT).
473 The function returns false if STMT cannot be hoisted outside of the loop it
474 is defined in, and true otherwise. */
476 static bool
477 determine_max_movement (tree stmt, bool must_preserve_exec)
479 basic_block bb = bb_for_stmt (stmt);
480 struct loop *loop = bb->loop_father;
481 struct loop *level;
482 struct lim_aux_data *lim_data = LIM_DATA (stmt);
483 tree val;
484 ssa_op_iter iter;
486 if (must_preserve_exec)
487 level = ALWAYS_EXECUTED_IN (bb);
488 else
489 level = superloop_at_depth (loop, 1);
490 lim_data->max_loop = level;
492 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
493 if (!add_dependency (val, lim_data, loop, true))
494 return false;
496 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
497 if (!add_dependency (val, lim_data, loop, false))
498 return false;
500 lim_data->cost += stmt_cost (stmt);
502 return true;
505 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
506 and that one of the operands of this statement is computed by STMT.
507 Ensure that STMT (together with all the statements that define its
508 operands) is hoisted at least out of the loop LEVEL. */
510 static void
511 set_level (tree stmt, struct loop *orig_loop, struct loop *level)
513 struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father;
514 struct depend *dep;
516 stmt_loop = find_common_loop (orig_loop, stmt_loop);
517 if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop)
518 stmt_loop = find_common_loop (stmt_loop,
519 LIM_DATA (stmt)->tgt_loop->outer);
520 if (flow_loop_nested_p (stmt_loop, level))
521 return;
523 gcc_assert (LIM_DATA (stmt));
524 gcc_assert (level == LIM_DATA (stmt)->max_loop
525 || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level));
527 LIM_DATA (stmt)->tgt_loop = level;
528 for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next)
529 set_level (dep->stmt, orig_loop, level);
532 /* Determines an outermost loop from that we want to hoist the statement STMT.
533 For now we chose the outermost possible loop. TODO -- use profiling
534 information to set it more sanely. */
536 static void
537 set_profitable_level (tree stmt)
539 set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop);
542 /* Returns true if STMT is not a pure call. */
544 static bool
545 nonpure_call_p (tree stmt)
547 tree call = get_call_expr_in (stmt);
549 if (!call)
550 return false;
552 return TREE_SIDE_EFFECTS (call) != 0;
555 /* Releases the memory occupied by DATA. */
557 static void
558 free_lim_aux_data (struct lim_aux_data *data)
560 struct depend *dep, *next;
562 for (dep = data->depends; dep; dep = next)
564 next = dep->next;
565 free (dep);
567 free (data);
570 /* Determine the outermost loops in that statements in basic block BB are
571 invariant, and record them to the LIM_DATA associated with the statements.
572 Callback for walk_dominator_tree. */
574 static void
575 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
576 basic_block bb)
578 enum move_pos pos;
579 block_stmt_iterator bsi;
580 tree stmt, rhs;
581 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
582 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
584 if (!bb->loop_father->outer)
585 return;
587 if (dump_file && (dump_flags & TDF_DETAILS))
588 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
589 bb->index, bb->loop_father->num, bb->loop_father->depth);
591 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
593 stmt = bsi_stmt (bsi);
595 pos = movement_possibility (stmt);
596 if (pos == MOVE_IMPOSSIBLE)
598 if (nonpure_call_p (stmt))
600 maybe_never = true;
601 outermost = NULL;
603 continue;
606 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
607 to be hoisted out of loop, saving expensive divide. */
608 if (pos == MOVE_POSSIBLE
609 && (rhs = TREE_OPERAND (stmt, 1)) != NULL
610 && TREE_CODE (rhs) == RDIV_EXPR
611 && flag_unsafe_math_optimizations
612 && outermost_invariant_loop_expr (TREE_OPERAND (rhs, 1),
613 loop_containing_stmt (stmt)) != NULL
614 && outermost_invariant_loop_expr (rhs,
615 loop_containing_stmt (stmt)) == NULL)
617 tree lhs, stmt1, stmt2, var, name;
619 lhs = TREE_OPERAND (stmt, 0);
621 /* stmt must be MODIFY_EXPR. */
622 var = create_tmp_var (TREE_TYPE (rhs), "reciptmp");
623 add_referenced_tmp_var (var);
625 stmt1 = build2 (MODIFY_EXPR, void_type_node, var,
626 build2 (RDIV_EXPR, TREE_TYPE (rhs),
627 build_real (TREE_TYPE (rhs), dconst1),
628 TREE_OPERAND (rhs, 1)));
629 name = make_ssa_name (var, stmt1);
630 TREE_OPERAND (stmt1, 0) = name;
631 stmt2 = build2 (MODIFY_EXPR, void_type_node, lhs,
632 build2 (MULT_EXPR, TREE_TYPE (rhs),
633 name, TREE_OPERAND (rhs, 0)));
635 /* Replace division stmt with reciprocal and multiply stmts.
636 The multiply stmt is not invariant, so update iterator
637 and avoid rescanning. */
638 bsi_replace (&bsi, stmt1, true);
639 bsi_insert_after (&bsi, stmt2, BSI_NEW_STMT);
640 SSA_NAME_DEF_STMT (lhs) = stmt2;
642 /* Continue processing with invariant reciprocal statement. */
643 stmt = stmt1;
646 stmt_ann (stmt)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
647 LIM_DATA (stmt)->always_executed_in = outermost;
649 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
650 continue;
652 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
654 LIM_DATA (stmt)->max_loop = NULL;
655 continue;
658 if (dump_file && (dump_flags & TDF_DETAILS))
660 print_generic_stmt_indented (dump_file, stmt, 0, 2);
661 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
662 LIM_DATA (stmt)->max_loop->depth,
663 LIM_DATA (stmt)->cost);
666 if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE)
667 set_profitable_level (stmt);
671 /* For each statement determines the outermost loop in that it is invariant,
672 statements on whose motion it depends and the cost of the computation.
673 This information is stored to the LIM_DATA structure associated with
674 each statement. */
676 static void
677 determine_invariantness (void)
679 struct dom_walk_data walk_data;
681 memset (&walk_data, 0, sizeof (struct dom_walk_data));
682 walk_data.before_dom_children_before_stmts = determine_invariantness_stmt;
684 init_walk_dominator_tree (&walk_data);
685 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
686 fini_walk_dominator_tree (&walk_data);
689 /* Commits edge insertions and updates loop structures. */
691 void
692 loop_commit_inserts (void)
694 unsigned old_last_basic_block, i;
695 basic_block bb;
697 old_last_basic_block = last_basic_block;
698 bsi_commit_edge_inserts ();
699 for (i = old_last_basic_block; i < (unsigned) last_basic_block; i++)
701 bb = BASIC_BLOCK (i);
702 add_bb_to_loop (bb,
703 find_common_loop (single_pred (bb)->loop_father,
704 single_succ (bb)->loop_father));
708 /* Hoist the statements in basic block BB out of the loops prescribed by
709 data stored in LIM_DATA structures associated with each statement. Callback
710 for walk_dominator_tree. */
712 static void
713 move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
714 basic_block bb)
716 struct loop *level;
717 block_stmt_iterator bsi;
718 tree stmt;
719 unsigned cost = 0;
721 if (!bb->loop_father->outer)
722 return;
724 for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
726 stmt = bsi_stmt (bsi);
728 if (!LIM_DATA (stmt))
730 bsi_next (&bsi);
731 continue;
734 cost = LIM_DATA (stmt)->cost;
735 level = LIM_DATA (stmt)->tgt_loop;
736 free_lim_aux_data (LIM_DATA (stmt));
737 stmt_ann (stmt)->common.aux = NULL;
739 if (!level)
741 bsi_next (&bsi);
742 continue;
745 /* We do not really want to move conditionals out of the loop; we just
746 placed it here to force its operands to be moved if necessary. */
747 if (TREE_CODE (stmt) == COND_EXPR)
748 continue;
750 if (dump_file && (dump_flags & TDF_DETAILS))
752 fprintf (dump_file, "Moving statement\n");
753 print_generic_stmt (dump_file, stmt, 0);
754 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
755 cost, level->num);
757 bsi_insert_on_edge (loop_preheader_edge (level), stmt);
758 bsi_remove (&bsi);
762 /* Hoist the statements out of the loops prescribed by data stored in
763 LIM_DATA structures associated with each statement.*/
765 static void
766 move_computations (void)
768 struct dom_walk_data walk_data;
770 memset (&walk_data, 0, sizeof (struct dom_walk_data));
771 walk_data.before_dom_children_before_stmts = move_computations_stmt;
773 init_walk_dominator_tree (&walk_data);
774 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
775 fini_walk_dominator_tree (&walk_data);
777 loop_commit_inserts ();
778 if (need_ssa_update_p ())
779 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
782 /* Checks whether the statement defining variable *INDEX can be hoisted
783 out of the loop passed in DATA. Callback for for_each_index. */
785 static bool
786 may_move_till (tree ref, tree *index, void *data)
788 struct loop *loop = data, *max_loop;
790 /* If REF is an array reference, check also that the step and the lower
791 bound is invariant in LOOP. */
792 if (TREE_CODE (ref) == ARRAY_REF)
794 tree step = array_ref_element_size (ref);
795 tree lbound = array_ref_low_bound (ref);
797 max_loop = outermost_invariant_loop_expr (step, loop);
798 if (!max_loop)
799 return false;
801 max_loop = outermost_invariant_loop_expr (lbound, loop);
802 if (!max_loop)
803 return false;
806 max_loop = outermost_invariant_loop (*index, loop);
807 if (!max_loop)
808 return false;
810 return true;
813 /* Forces statements defining (invariant) SSA names in expression EXPR to be
814 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
816 static void
817 force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop)
819 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
820 unsigned i, nops;
822 if (TREE_CODE (expr) == SSA_NAME)
824 tree stmt = SSA_NAME_DEF_STMT (expr);
825 if (IS_EMPTY_STMT (stmt))
826 return;
828 set_level (stmt, orig_loop, loop);
829 return;
832 if (class != tcc_unary
833 && class != tcc_binary
834 && class != tcc_expression
835 && class != tcc_comparison)
836 return;
838 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
839 for (i = 0; i < nops; i++)
840 force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop);
843 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
844 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
845 for_each_index. */
847 struct fmt_data
849 struct loop *loop;
850 struct loop *orig_loop;
853 static bool
854 force_move_till (tree ref, tree *index, void *data)
856 tree stmt;
857 struct fmt_data *fmt_data = data;
859 if (TREE_CODE (ref) == ARRAY_REF)
861 tree step = array_ref_element_size (ref);
862 tree lbound = array_ref_low_bound (ref);
864 force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop);
865 force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop);
868 if (TREE_CODE (*index) != SSA_NAME)
869 return true;
871 stmt = SSA_NAME_DEF_STMT (*index);
872 if (IS_EMPTY_STMT (stmt))
873 return true;
875 set_level (stmt, fmt_data->orig_loop, fmt_data->loop);
877 return true;
880 /* Records memory reference location *REF to the list MEM_REFS. The reference
881 occurs in statement STMT. */
883 static void
884 record_mem_ref_loc (struct mem_ref_loc **mem_refs, tree stmt, tree *ref)
886 struct mem_ref_loc *aref = xmalloc (sizeof (struct mem_ref_loc));
888 aref->stmt = stmt;
889 aref->ref = ref;
891 aref->next = *mem_refs;
892 *mem_refs = aref;
895 /* Releases list of memory reference locations MEM_REFS. */
897 static void
898 free_mem_ref_locs (struct mem_ref_loc *mem_refs)
900 struct mem_ref_loc *act;
902 while (mem_refs)
904 act = mem_refs;
905 mem_refs = mem_refs->next;
906 free (act);
910 /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
912 static void
913 rewrite_mem_refs (tree tmp_var, struct mem_ref_loc *mem_refs)
915 tree var;
916 ssa_op_iter iter;
918 for (; mem_refs; mem_refs = mem_refs->next)
920 FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter, SSA_OP_ALL_VIRTUALS)
921 mark_sym_for_renaming (SSA_NAME_VAR (var));
923 *mem_refs->ref = tmp_var;
924 update_stmt (mem_refs->stmt);
928 /* Records request for store motion of memory reference REF from LOOP.
929 MEM_REFS is the list of occurrences of the reference REF inside LOOP;
930 these references are rewritten by a new temporary variable.
931 Exits from the LOOP are stored in EXITS, there are N_EXITS of them.
932 The initialization of the temporary variable is put to the preheader
933 of the loop, and assignments to the reference from the temporary variable
934 are emitted to exits. */
936 static void
937 schedule_sm (struct loop *loop, edge *exits, unsigned n_exits, tree ref,
938 struct mem_ref_loc *mem_refs)
940 struct mem_ref_loc *aref;
941 tree tmp_var;
942 unsigned i;
943 tree load, store;
944 struct fmt_data fmt_data;
946 if (dump_file && (dump_flags & TDF_DETAILS))
948 fprintf (dump_file, "Executing store motion of ");
949 print_generic_expr (dump_file, ref, 0);
950 fprintf (dump_file, " from loop %d\n", loop->num);
953 tmp_var = make_rename_temp (TREE_TYPE (ref), "lsm_tmp");
955 fmt_data.loop = loop;
956 fmt_data.orig_loop = loop;
957 for_each_index (&ref, force_move_till, &fmt_data);
959 rewrite_mem_refs (tmp_var, mem_refs);
960 for (aref = mem_refs; aref; aref = aref->next)
961 if (LIM_DATA (aref->stmt))
962 LIM_DATA (aref->stmt)->sm_done = true;
964 /* Emit the load & stores. */
965 load = build (MODIFY_EXPR, void_type_node, tmp_var, ref);
966 get_stmt_ann (load)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
967 LIM_DATA (load)->max_loop = loop;
968 LIM_DATA (load)->tgt_loop = loop;
970 /* Put this into the latch, so that we are sure it will be processed after
971 all dependencies. */
972 bsi_insert_on_edge (loop_latch_edge (loop), load);
974 for (i = 0; i < n_exits; i++)
976 store = build (MODIFY_EXPR, void_type_node,
977 unshare_expr (ref), tmp_var);
978 bsi_insert_on_edge (exits[i], store);
982 /* Check whether memory reference REF can be hoisted out of the LOOP. If this
983 is true, prepare the statements that load the value of the memory reference
984 to a temporary variable in the loop preheader, store it back on the loop
985 exits, and replace all the references inside LOOP by this temporary variable.
986 LOOP has N_EXITS stored in EXITS. CLOBBERED_VOPS is the bitmap of virtual
987 operands that are clobbered by a call or accessed through multiple references
988 in loop. */
990 static void
991 determine_lsm_ref (struct loop *loop, edge *exits, unsigned n_exits,
992 bitmap clobbered_vops, struct mem_ref *ref)
994 struct mem_ref_loc *aref;
995 struct loop *must_exec;
997 /* In case the memory is not stored to, there is nothing for SM to do. */
998 if (!ref->is_stored)
999 return;
1001 /* If the reference is aliased with any different ref, or killed by call
1002 in function, then fail. */
1003 if (bitmap_intersect_p (ref->vops, clobbered_vops))
1004 return;
1006 if (tree_could_trap_p (ref->mem))
1008 /* If the memory access is unsafe (i.e. it might trap), ensure that some
1009 of the statements in that it occurs is always executed when the loop
1010 is entered. This way we know that by moving the load from the
1011 reference out of the loop we will not cause the error that would not
1012 occur otherwise.
1014 TODO -- in fact we would like to check for anticipability of the
1015 reference, i.e. that on each path from loop entry to loop exit at
1016 least one of the statements containing the memory reference is
1017 executed. */
1019 for (aref = ref->locs; aref; aref = aref->next)
1021 if (!LIM_DATA (aref->stmt))
1022 continue;
1024 must_exec = LIM_DATA (aref->stmt)->always_executed_in;
1025 if (!must_exec)
1026 continue;
1028 if (must_exec == loop
1029 || flow_loop_nested_p (must_exec, loop))
1030 break;
1033 if (!aref)
1034 return;
1037 schedule_sm (loop, exits, n_exits, ref->mem, ref->locs);
1040 /* Hoists memory references MEM_REFS out of LOOP. CLOBBERED_VOPS is the list
1041 of vops clobbered by call in loop or accessed by multiple memory references.
1042 EXITS is the list of N_EXITS exit edges of the LOOP. */
1044 static void
1045 hoist_memory_references (struct loop *loop, struct mem_ref *mem_refs,
1046 bitmap clobbered_vops, edge *exits, unsigned n_exits)
1048 struct mem_ref *ref;
1050 for (ref = mem_refs; ref; ref = ref->next)
1051 determine_lsm_ref (loop, exits, n_exits, clobbered_vops, ref);
1054 /* Checks whether LOOP (with N_EXITS exits stored in EXITS array) is suitable
1055 for a store motion optimization (i.e. whether we can insert statement
1056 on its exits). */
1058 static bool
1059 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED, edge *exits,
1060 unsigned n_exits)
1062 unsigned i;
1064 for (i = 0; i < n_exits; i++)
1065 if (exits[i]->flags & EDGE_ABNORMAL)
1066 return false;
1068 return true;
1071 /* A hash function for struct mem_ref object OBJ. */
1073 static hashval_t
1074 memref_hash (const void *obj)
1076 const struct mem_ref *mem = obj;
1078 return mem->hash;
1081 /* An equality function for struct mem_ref object OBJ1 with
1082 memory reference OBJ2. */
1084 static int
1085 memref_eq (const void *obj1, const void *obj2)
1087 const struct mem_ref *mem1 = obj1;
1089 return operand_equal_p (mem1->mem, (tree) obj2, 0);
1092 /* Gathers memory references in statement STMT in LOOP, storing the
1093 information about them in MEM_REFS hash table. Note vops accessed through
1094 unrecognized statements in CLOBBERED_VOPS. The newly created references
1095 are also stored to MEM_REF_LIST. */
1097 static void
1098 gather_mem_refs_stmt (struct loop *loop, htab_t mem_refs,
1099 bitmap clobbered_vops, tree stmt,
1100 struct mem_ref **mem_ref_list)
1102 tree *lhs, *rhs, *mem = NULL;
1103 hashval_t hash;
1104 PTR *slot;
1105 struct mem_ref *ref = NULL;
1106 ssa_op_iter oi;
1107 tree vname;
1108 bool is_stored;
1110 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
1111 return;
1113 /* Recognize MEM = (SSA_NAME | invariant) and SSA_NAME = MEM patterns. */
1114 if (TREE_CODE (stmt) != MODIFY_EXPR)
1115 goto fail;
1117 lhs = &TREE_OPERAND (stmt, 0);
1118 rhs = &TREE_OPERAND (stmt, 1);
1120 if (TREE_CODE (*lhs) == SSA_NAME)
1122 if (!is_gimple_addressable (*rhs))
1123 goto fail;
1125 mem = rhs;
1126 is_stored = false;
1128 else if (TREE_CODE (*rhs) == SSA_NAME
1129 || is_gimple_min_invariant (*rhs))
1131 mem = lhs;
1132 is_stored = true;
1134 else
1135 goto fail;
1137 /* If we cannot create an SSA name for the result, give up. */
1138 if (!is_gimple_reg_type (TREE_TYPE (*mem))
1139 || TREE_THIS_VOLATILE (*mem))
1140 goto fail;
1142 /* If we cannot move the reference out of the loop, fail. */
1143 if (!for_each_index (mem, may_move_till, loop))
1144 goto fail;
1146 hash = iterative_hash_expr (*mem, 0);
1147 slot = htab_find_slot_with_hash (mem_refs, *mem, hash, INSERT);
1149 if (*slot)
1150 ref = *slot;
1151 else
1153 ref = xmalloc (sizeof (struct mem_ref));
1154 ref->mem = *mem;
1155 ref->hash = hash;
1156 ref->locs = NULL;
1157 ref->is_stored = false;
1158 ref->vops = BITMAP_ALLOC (NULL);
1159 ref->next = *mem_ref_list;
1160 *mem_ref_list = ref;
1161 *slot = ref;
1163 ref->is_stored |= is_stored;
1165 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1166 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1168 bitmap_set_bit (ref->vops,
1169 var_ann (SSA_NAME_VAR (vname))->uid);
1171 record_mem_ref_loc (&ref->locs, stmt, mem);
1172 return;
1174 fail:
1175 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1176 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1178 bitmap_set_bit (clobbered_vops,
1179 var_ann (SSA_NAME_VAR (vname))->uid);
1183 /* Gathers memory references in LOOP. Notes vops accessed through unrecognized
1184 statements in CLOBBERED_VOPS. The list of the references found by
1185 the function is returned. */
1187 static struct mem_ref *
1188 gather_mem_refs (struct loop *loop, bitmap clobbered_vops)
1190 basic_block *body = get_loop_body (loop);
1191 block_stmt_iterator bsi;
1192 unsigned i;
1193 struct mem_ref *mem_ref_list = NULL;
1194 htab_t mem_refs = htab_create (100, memref_hash, memref_eq, NULL);
1196 for (i = 0; i < loop->num_nodes; i++)
1198 for (bsi = bsi_start (body[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1199 gather_mem_refs_stmt (loop, mem_refs, clobbered_vops, bsi_stmt (bsi),
1200 &mem_ref_list);
1203 free (body);
1205 htab_delete (mem_refs);
1206 return mem_ref_list;
1209 /* Finds the vops accessed by more than one of the memory references described
1210 in MEM_REFS and marks them in CLOBBERED_VOPS. */
1212 static void
1213 find_more_ref_vops (struct mem_ref *mem_refs, bitmap clobbered_vops)
1215 bitmap_head tmp, all_vops;
1216 struct mem_ref *ref;
1218 bitmap_initialize (&tmp, &bitmap_default_obstack);
1219 bitmap_initialize (&all_vops, &bitmap_default_obstack);
1221 for (ref = mem_refs; ref; ref = ref->next)
1223 /* The vops that are already in all_vops are accessed by more than
1224 one memory reference. */
1225 bitmap_and (&tmp, &all_vops, ref->vops);
1226 bitmap_ior_into (clobbered_vops, &tmp);
1227 bitmap_clear (&tmp);
1229 bitmap_ior_into (&all_vops, ref->vops);
1232 bitmap_clear (&all_vops);
1235 /* Releases the memory occupied by REF. */
1237 static void
1238 free_mem_ref (struct mem_ref *ref)
1240 free_mem_ref_locs (ref->locs);
1241 BITMAP_FREE (ref->vops);
1242 free (ref);
1245 /* Releases the memory occupied by REFS. */
1247 static void
1248 free_mem_refs (struct mem_ref *refs)
1250 struct mem_ref *ref, *next;
1252 for (ref = refs; ref; ref = next)
1254 next = ref->next;
1255 free_mem_ref (ref);
1259 /* Try to perform store motion for all memory references modified inside
1260 LOOP. */
1262 static void
1263 determine_lsm_loop (struct loop *loop)
1265 unsigned n_exits;
1266 edge *exits = get_loop_exit_edges (loop, &n_exits);
1267 bitmap clobbered_vops;
1268 struct mem_ref *mem_refs;
1270 if (!loop_suitable_for_sm (loop, exits, n_exits))
1272 free (exits);
1273 return;
1276 /* Find the memory references in LOOP. */
1277 clobbered_vops = BITMAP_ALLOC (NULL);
1278 mem_refs = gather_mem_refs (loop, clobbered_vops);
1280 /* Find the vops that are used for more than one reference. */
1281 find_more_ref_vops (mem_refs, clobbered_vops);
1283 /* Hoist all suitable memory references. */
1284 hoist_memory_references (loop, mem_refs, clobbered_vops, exits, n_exits);
1286 free_mem_refs (mem_refs);
1287 free (exits);
1288 BITMAP_FREE (clobbered_vops);
1291 /* Try to perform store motion for all memory references modified inside
1292 any of LOOPS. */
1294 static void
1295 determine_lsm (struct loops *loops)
1297 struct loop *loop;
1299 if (!loops->tree_root->inner)
1300 return;
1302 /* Pass the loops from the outermost and perform the store motion as
1303 suitable. */
1305 loop = loops->tree_root->inner;
1306 while (1)
1308 determine_lsm_loop (loop);
1310 if (loop->inner)
1312 loop = loop->inner;
1313 continue;
1315 while (!loop->next)
1317 loop = loop->outer;
1318 if (loop == loops->tree_root)
1320 loop_commit_inserts ();
1321 return;
1324 loop = loop->next;
1328 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1329 for each such basic block bb records the outermost loop for that execution
1330 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1331 blocks that contain a nonpure call. */
1333 static void
1334 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
1336 basic_block bb = NULL, *bbs, last = NULL;
1337 unsigned i;
1338 edge e;
1339 struct loop *inn_loop = loop;
1341 if (!loop->header->aux)
1343 bbs = get_loop_body_in_dom_order (loop);
1345 for (i = 0; i < loop->num_nodes; i++)
1347 edge_iterator ei;
1348 bb = bbs[i];
1350 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1351 last = bb;
1353 if (TEST_BIT (contains_call, bb->index))
1354 break;
1356 FOR_EACH_EDGE (e, ei, bb->succs)
1357 if (!flow_bb_inside_loop_p (loop, e->dest))
1358 break;
1359 if (e)
1360 break;
1362 /* A loop might be infinite (TODO use simple loop analysis
1363 to disprove this if possible). */
1364 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1365 break;
1367 if (!flow_bb_inside_loop_p (inn_loop, bb))
1368 break;
1370 if (bb->loop_father->header == bb)
1372 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1373 break;
1375 /* In a loop that is always entered we may proceed anyway.
1376 But record that we entered it and stop once we leave it. */
1377 inn_loop = bb->loop_father;
1381 while (1)
1383 last->aux = loop;
1384 if (last == loop->header)
1385 break;
1386 last = get_immediate_dominator (CDI_DOMINATORS, last);
1389 free (bbs);
1392 for (loop = loop->inner; loop; loop = loop->next)
1393 fill_always_executed_in (loop, contains_call);
1396 /* Compute the global information needed by the loop invariant motion pass.
1397 LOOPS is the loop tree. */
1399 static void
1400 tree_ssa_lim_initialize (struct loops *loops)
1402 sbitmap contains_call = sbitmap_alloc (last_basic_block);
1403 block_stmt_iterator bsi;
1404 struct loop *loop;
1405 basic_block bb;
1407 sbitmap_zero (contains_call);
1408 FOR_EACH_BB (bb)
1410 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1412 if (nonpure_call_p (bsi_stmt (bsi)))
1413 break;
1416 if (!bsi_end_p (bsi))
1417 SET_BIT (contains_call, bb->index);
1420 for (loop = loops->tree_root->inner; loop; loop = loop->next)
1421 fill_always_executed_in (loop, contains_call);
1423 sbitmap_free (contains_call);
1426 /* Cleans up after the invariant motion pass. */
1428 static void
1429 tree_ssa_lim_finalize (void)
1431 basic_block bb;
1433 FOR_EACH_BB (bb)
1435 bb->aux = NULL;
1439 /* Moves invariants from LOOPS. Only "expensive" invariants are moved out --
1440 i.e. those that are likely to be win regardless of the register pressure. */
1442 void
1443 tree_ssa_lim (struct loops *loops)
1445 tree_ssa_lim_initialize (loops);
1447 /* For each statement determine the outermost loop in that it is
1448 invariant and cost for computing the invariant. */
1449 determine_invariantness ();
1451 /* For each memory reference determine whether it is possible to hoist it
1452 out of the loop. Force the necessary invariants to be moved out of the
1453 loops as well. */
1454 determine_lsm (loops);
1456 /* Move the expressions that are expensive enough. */
1457 move_computations ();
1459 tree_ssa_lim_finalize ();