1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
38 #include "tree-pass.h"
43 /* TODO: Support for predicated code motion. I.e.
54 Where COND and INV are is invariants, but evaluating INV may trap or be
55 invalid from some other reason if !COND. This may be transformed to
65 /* A type for the list of statements that have to be moved in order to be able
66 to hoist an invariant computation. */
74 /* The auxiliary data kept for each statement. */
78 struct loop
*max_loop
; /* The outermost loop in that the statement
81 struct loop
*tgt_loop
; /* The loop out of that we want to move the
84 struct loop
*always_executed_in
;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
89 bool sm_done
; /* True iff the store motion for a memory
90 reference in the statement has already
93 unsigned cost
; /* Cost of the computation performed by the
96 struct depend
*depends
; /* List of statements that must be also hoisted
97 out of the loop when this statement is
98 hoisted; i.e. those that define the operands
99 of the statement and are inside of the
103 #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
105 : (struct lim_aux_data *) (stmt_ann (STMT)->common.aux))
107 /* Description of a memory reference location for store motion. */
111 tree
*ref
; /* The reference itself. */
112 tree stmt
; /* The statement in that it occurs. */
113 struct mem_ref_loc
*next
; /* Next use in the chain. */
116 /* Description of a memory reference for store motion. */
120 tree mem
; /* The memory itself. */
121 hashval_t hash
; /* Its hash value. */
122 bool is_stored
; /* True if there is a store to the location
124 struct mem_ref_loc
*locs
; /* The locations where it is found. */
125 bitmap vops
; /* Vops corresponding to this memory
127 struct mem_ref
*next
; /* Next memory reference in the list.
128 Memory references are stored in a hash
129 table, but the hash function depends
130 on values of pointers. Thus we cannot use
131 htab_traverse, since then we would get
132 miscompares during bootstrap (although the
133 produced code would be correct). */
136 /* Minimum cost of an expensive expression. */
137 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
139 /* The outermost loop for that execution of the header guarantees that the
140 block will be executed. */
141 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
143 /* Calls CBCK for each index in memory reference ADDR_P. There are two
144 kinds situations handled; in each of these cases, the memory reference
145 and DATA are passed to the callback:
147 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
148 pass the pointer to the index to the callback.
150 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
151 pointer to addr to the callback.
153 If the callback returns false, the whole search stops and false is returned.
154 Otherwise the function returns true after traversing through the whole
155 reference *ADDR_P. */
158 for_each_index (tree
*addr_p
, bool (*cbck
) (tree
, tree
*, void *), void *data
)
162 for (; ; addr_p
= nxt
)
164 switch (TREE_CODE (*addr_p
))
167 return cbck (*addr_p
, addr_p
, data
);
169 case MISALIGNED_INDIRECT_REF
:
170 case ALIGN_INDIRECT_REF
:
172 nxt
= &TREE_OPERAND (*addr_p
, 0);
173 return cbck (*addr_p
, nxt
, data
);
176 case VIEW_CONVERT_EXPR
:
177 case ARRAY_RANGE_REF
:
180 nxt
= &TREE_OPERAND (*addr_p
, 0);
184 /* If the component has varying offset, it behaves like index
186 idx
= &TREE_OPERAND (*addr_p
, 2);
188 && !cbck (*addr_p
, idx
, data
))
191 nxt
= &TREE_OPERAND (*addr_p
, 0);
195 nxt
= &TREE_OPERAND (*addr_p
, 0);
196 if (!cbck (*addr_p
, &TREE_OPERAND (*addr_p
, 1), data
))
211 idx
= &TMR_BASE (*addr_p
);
213 && !cbck (*addr_p
, idx
, data
))
215 idx
= &TMR_INDEX (*addr_p
);
217 && !cbck (*addr_p
, idx
, data
))
227 /* If it is possible to hoist the statement STMT unconditionally,
228 returns MOVE_POSSIBLE.
229 If it is possible to hoist the statement STMT, but we must avoid making
230 it executed if it would not be executed in the original program (e.g.
231 because it may trap), return MOVE_PRESERVE_EXECUTION.
232 Otherwise return MOVE_IMPOSSIBLE. */
235 movement_possibility (tree stmt
)
239 if (flag_unswitch_loops
240 && TREE_CODE (stmt
) == COND_EXPR
)
242 /* If we perform unswitching, force the operands of the invariant
243 condition to be moved out of the loop. */
244 return MOVE_POSSIBLE
;
247 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
248 return MOVE_IMPOSSIBLE
;
250 if (stmt_ends_bb_p (stmt
))
251 return MOVE_IMPOSSIBLE
;
253 if (stmt_ann (stmt
)->has_volatile_ops
)
254 return MOVE_IMPOSSIBLE
;
256 lhs
= TREE_OPERAND (stmt
, 0);
257 if (TREE_CODE (lhs
) == SSA_NAME
258 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
))
259 return MOVE_IMPOSSIBLE
;
261 rhs
= TREE_OPERAND (stmt
, 1);
263 if (TREE_SIDE_EFFECTS (rhs
))
264 return MOVE_IMPOSSIBLE
;
266 if (TREE_CODE (lhs
) != SSA_NAME
267 || tree_could_trap_p (rhs
))
268 return MOVE_PRESERVE_EXECUTION
;
270 if (get_call_expr_in (stmt
))
272 /* While pure or const call is guaranteed to have no side effects, we
273 cannot move it arbitrarily. Consider code like
275 char *s = something ();
285 Here the strlen call cannot be moved out of the loop, even though
286 s is invariant. In addition to possibly creating a call with
287 invalid arguments, moving out a function call that is not executed
288 may cause performance regressions in case the call is costly and
289 not executed at all. */
290 return MOVE_PRESERVE_EXECUTION
;
292 return MOVE_POSSIBLE
;
295 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
296 loop to that we could move the expression using DEF if it did not have
297 other operands, i.e. the outermost loop enclosing LOOP in that the value
298 of DEF is invariant. */
301 outermost_invariant_loop (tree def
, struct loop
*loop
)
305 struct loop
*max_loop
;
307 if (TREE_CODE (def
) != SSA_NAME
)
308 return superloop_at_depth (loop
, 1);
310 def_stmt
= SSA_NAME_DEF_STMT (def
);
311 def_bb
= bb_for_stmt (def_stmt
);
313 return superloop_at_depth (loop
, 1);
315 max_loop
= find_common_loop (loop
, def_bb
->loop_father
);
317 if (LIM_DATA (def_stmt
) && LIM_DATA (def_stmt
)->max_loop
)
318 max_loop
= find_common_loop (max_loop
,
319 LIM_DATA (def_stmt
)->max_loop
->outer
);
320 if (max_loop
== loop
)
322 max_loop
= superloop_at_depth (loop
, max_loop
->depth
+ 1);
327 /* Returns the outermost superloop of LOOP in that the expression EXPR is
331 outermost_invariant_loop_expr (tree expr
, struct loop
*loop
)
333 enum tree_code_class
class = TREE_CODE_CLASS (TREE_CODE (expr
));
335 struct loop
*max_loop
= superloop_at_depth (loop
, 1), *aloop
;
337 if (TREE_CODE (expr
) == SSA_NAME
338 || TREE_CODE (expr
) == INTEGER_CST
339 || is_gimple_min_invariant (expr
))
340 return outermost_invariant_loop (expr
, loop
);
342 if (class != tcc_unary
343 && class != tcc_binary
344 && class != tcc_expression
345 && class != tcc_comparison
)
348 nops
= TREE_CODE_LENGTH (TREE_CODE (expr
));
349 for (i
= 0; i
< nops
; i
++)
351 aloop
= outermost_invariant_loop_expr (TREE_OPERAND (expr
, i
), loop
);
355 if (flow_loop_nested_p (max_loop
, aloop
))
362 /* DATA is a structure containing information associated with a statement
363 inside LOOP. DEF is one of the operands of this statement.
365 Find the outermost loop enclosing LOOP in that value of DEF is invariant
366 and record this in DATA->max_loop field. If DEF itself is defined inside
367 this loop as well (i.e. we need to hoist it out of the loop if we want
368 to hoist the statement represented by DATA), record the statement in that
369 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
370 add the cost of the computation of DEF to the DATA->cost.
372 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
375 add_dependency (tree def
, struct lim_aux_data
*data
, struct loop
*loop
,
378 tree def_stmt
= SSA_NAME_DEF_STMT (def
);
379 basic_block def_bb
= bb_for_stmt (def_stmt
);
380 struct loop
*max_loop
;
386 max_loop
= outermost_invariant_loop (def
, loop
);
390 if (flow_loop_nested_p (data
->max_loop
, max_loop
))
391 data
->max_loop
= max_loop
;
393 if (!LIM_DATA (def_stmt
))
397 /* Only add the cost if the statement defining DEF is inside LOOP,
398 i.e. if it is likely that by moving the invariants dependent
399 on it, we will be able to avoid creating a new register for
400 it (since it will be only used in these dependent invariants). */
401 && def_bb
->loop_father
== loop
)
402 data
->cost
+= LIM_DATA (def_stmt
)->cost
;
404 dep
= xmalloc (sizeof (struct depend
));
405 dep
->stmt
= def_stmt
;
406 dep
->next
= data
->depends
;
412 /* Returns an estimate for a cost of statement STMT. TODO -- the values here
413 are just ad-hoc constants. The estimates should be based on target-specific
417 stmt_cost (tree stmt
)
422 /* Always try to create possibilities for unswitching. */
423 if (TREE_CODE (stmt
) == COND_EXPR
)
424 return LIM_EXPENSIVE
;
426 rhs
= TREE_OPERAND (stmt
, 1);
428 /* Hoisting memory references out should almost surely be a win. */
429 if (stmt_references_memory_p (stmt
))
432 switch (TREE_CODE (rhs
))
435 /* We should be hoisting calls if possible. */
437 /* Unless the call is a builtin_constant_p; this always folds to a
438 constant, so moving it is useless. */
439 rhs
= get_callee_fndecl (rhs
);
440 if (DECL_BUILT_IN_CLASS (rhs
) == BUILT_IN_NORMAL
441 && DECL_FUNCTION_CODE (rhs
) == BUILT_IN_CONSTANT_P
)
458 /* Division and multiplication are usually expensive. */
469 /* Determine the outermost loop to that it is possible to hoist a statement
470 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
471 the outermost loop in that the value computed by STMT is invariant.
472 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
473 we preserve the fact whether STMT is executed. It also fills other related
474 information to LIM_DATA (STMT).
476 The function returns false if STMT cannot be hoisted outside of the loop it
477 is defined in, and true otherwise. */
480 determine_max_movement (tree stmt
, bool must_preserve_exec
)
482 basic_block bb
= bb_for_stmt (stmt
);
483 struct loop
*loop
= bb
->loop_father
;
485 struct lim_aux_data
*lim_data
= LIM_DATA (stmt
);
489 if (must_preserve_exec
)
490 level
= ALWAYS_EXECUTED_IN (bb
);
492 level
= superloop_at_depth (loop
, 1);
493 lim_data
->max_loop
= level
;
495 FOR_EACH_SSA_TREE_OPERAND (val
, stmt
, iter
, SSA_OP_USE
)
496 if (!add_dependency (val
, lim_data
, loop
, true))
499 FOR_EACH_SSA_TREE_OPERAND (val
, stmt
, iter
, SSA_OP_VIRTUAL_USES
| SSA_OP_VIRTUAL_KILLS
)
500 if (!add_dependency (val
, lim_data
, loop
, false))
503 lim_data
->cost
+= stmt_cost (stmt
);
508 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
509 and that one of the operands of this statement is computed by STMT.
510 Ensure that STMT (together with all the statements that define its
511 operands) is hoisted at least out of the loop LEVEL. */
514 set_level (tree stmt
, struct loop
*orig_loop
, struct loop
*level
)
516 struct loop
*stmt_loop
= bb_for_stmt (stmt
)->loop_father
;
519 stmt_loop
= find_common_loop (orig_loop
, stmt_loop
);
520 if (LIM_DATA (stmt
) && LIM_DATA (stmt
)->tgt_loop
)
521 stmt_loop
= find_common_loop (stmt_loop
,
522 LIM_DATA (stmt
)->tgt_loop
->outer
);
523 if (flow_loop_nested_p (stmt_loop
, level
))
526 gcc_assert (LIM_DATA (stmt
));
527 gcc_assert (level
== LIM_DATA (stmt
)->max_loop
528 || flow_loop_nested_p (LIM_DATA (stmt
)->max_loop
, level
));
530 LIM_DATA (stmt
)->tgt_loop
= level
;
531 for (dep
= LIM_DATA (stmt
)->depends
; dep
; dep
= dep
->next
)
532 set_level (dep
->stmt
, orig_loop
, level
);
535 /* Determines an outermost loop from that we want to hoist the statement STMT.
536 For now we chose the outermost possible loop. TODO -- use profiling
537 information to set it more sanely. */
540 set_profitable_level (tree stmt
)
542 set_level (stmt
, bb_for_stmt (stmt
)->loop_father
, LIM_DATA (stmt
)->max_loop
);
545 /* Returns true if STMT is not a pure call. */
548 nonpure_call_p (tree stmt
)
550 tree call
= get_call_expr_in (stmt
);
555 return TREE_SIDE_EFFECTS (call
) != 0;
558 /* Releases the memory occupied by DATA. */
561 free_lim_aux_data (struct lim_aux_data
*data
)
563 struct depend
*dep
, *next
;
565 for (dep
= data
->depends
; dep
; dep
= next
)
573 /* Determine the outermost loops in that statements in basic block BB are
574 invariant, and record them to the LIM_DATA associated with the statements.
575 Callback for walk_dominator_tree. */
578 determine_invariantness_stmt (struct dom_walk_data
*dw_data ATTRIBUTE_UNUSED
,
582 block_stmt_iterator bsi
;
584 bool maybe_never
= ALWAYS_EXECUTED_IN (bb
) == NULL
;
585 struct loop
*outermost
= ALWAYS_EXECUTED_IN (bb
);
587 if (!bb
->loop_father
->outer
)
590 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
591 fprintf (dump_file
, "Basic block %d (loop %d -- depth %d):\n\n",
592 bb
->index
, bb
->loop_father
->num
, bb
->loop_father
->depth
);
594 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
596 stmt
= bsi_stmt (bsi
);
598 pos
= movement_possibility (stmt
);
599 if (pos
== MOVE_IMPOSSIBLE
)
601 if (nonpure_call_p (stmt
))
609 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
610 to be hoisted out of loop, saving expensive divide. */
611 if (pos
== MOVE_POSSIBLE
612 && (rhs
= TREE_OPERAND (stmt
, 1)) != NULL
613 && TREE_CODE (rhs
) == RDIV_EXPR
614 && flag_unsafe_math_optimizations
615 && !flag_trapping_math
616 && outermost_invariant_loop_expr (TREE_OPERAND (rhs
, 1),
617 loop_containing_stmt (stmt
)) != NULL
618 && outermost_invariant_loop_expr (rhs
,
619 loop_containing_stmt (stmt
)) == NULL
)
621 tree lhs
, stmt1
, stmt2
, var
, name
;
623 lhs
= TREE_OPERAND (stmt
, 0);
625 /* stmt must be MODIFY_EXPR. */
626 var
= create_tmp_var (TREE_TYPE (rhs
), "reciptmp");
627 add_referenced_tmp_var (var
);
629 stmt1
= build2 (MODIFY_EXPR
, void_type_node
, var
,
630 build2 (RDIV_EXPR
, TREE_TYPE (rhs
),
631 build_real (TREE_TYPE (rhs
), dconst1
),
632 TREE_OPERAND (rhs
, 1)));
633 name
= make_ssa_name (var
, stmt1
);
634 TREE_OPERAND (stmt1
, 0) = name
;
635 stmt2
= build2 (MODIFY_EXPR
, void_type_node
, lhs
,
636 build2 (MULT_EXPR
, TREE_TYPE (rhs
),
637 name
, TREE_OPERAND (rhs
, 0)));
639 /* Replace division stmt with reciprocal and multiply stmts.
640 The multiply stmt is not invariant, so update iterator
641 and avoid rescanning. */
642 bsi_replace (&bsi
, stmt1
, true);
643 bsi_insert_after (&bsi
, stmt2
, BSI_NEW_STMT
);
644 SSA_NAME_DEF_STMT (lhs
) = stmt2
;
646 /* Continue processing with invariant reciprocal statement. */
650 stmt_ann (stmt
)->common
.aux
= xcalloc (1, sizeof (struct lim_aux_data
));
651 LIM_DATA (stmt
)->always_executed_in
= outermost
;
653 if (maybe_never
&& pos
== MOVE_PRESERVE_EXECUTION
)
656 if (!determine_max_movement (stmt
, pos
== MOVE_PRESERVE_EXECUTION
))
658 LIM_DATA (stmt
)->max_loop
= NULL
;
662 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
664 print_generic_stmt_indented (dump_file
, stmt
, 0, 2);
665 fprintf (dump_file
, " invariant up to level %d, cost %d.\n\n",
666 LIM_DATA (stmt
)->max_loop
->depth
,
667 LIM_DATA (stmt
)->cost
);
670 if (LIM_DATA (stmt
)->cost
>= LIM_EXPENSIVE
)
671 set_profitable_level (stmt
);
675 /* For each statement determines the outermost loop in that it is invariant,
676 statements on whose motion it depends and the cost of the computation.
677 This information is stored to the LIM_DATA structure associated with
681 determine_invariantness (void)
683 struct dom_walk_data walk_data
;
685 memset (&walk_data
, 0, sizeof (struct dom_walk_data
));
686 walk_data
.before_dom_children_before_stmts
= determine_invariantness_stmt
;
688 init_walk_dominator_tree (&walk_data
);
689 walk_dominator_tree (&walk_data
, ENTRY_BLOCK_PTR
);
690 fini_walk_dominator_tree (&walk_data
);
693 /* Commits edge insertions and updates loop structures. */
696 loop_commit_inserts (void)
698 unsigned old_last_basic_block
, i
;
701 old_last_basic_block
= last_basic_block
;
702 bsi_commit_edge_inserts ();
703 for (i
= old_last_basic_block
; i
< (unsigned) last_basic_block
; i
++)
705 bb
= BASIC_BLOCK (i
);
707 find_common_loop (single_pred (bb
)->loop_father
,
708 single_succ (bb
)->loop_father
));
712 /* Hoist the statements in basic block BB out of the loops prescribed by
713 data stored in LIM_DATA structures associated with each statement. Callback
714 for walk_dominator_tree. */
717 move_computations_stmt (struct dom_walk_data
*dw_data ATTRIBUTE_UNUSED
,
721 block_stmt_iterator bsi
;
725 if (!bb
->loop_father
->outer
)
728 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); )
730 stmt
= bsi_stmt (bsi
);
732 if (!LIM_DATA (stmt
))
738 cost
= LIM_DATA (stmt
)->cost
;
739 level
= LIM_DATA (stmt
)->tgt_loop
;
740 free_lim_aux_data (LIM_DATA (stmt
));
741 stmt_ann (stmt
)->common
.aux
= NULL
;
749 /* We do not really want to move conditionals out of the loop; we just
750 placed it here to force its operands to be moved if necessary. */
751 if (TREE_CODE (stmt
) == COND_EXPR
)
754 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
756 fprintf (dump_file
, "Moving statement\n");
757 print_generic_stmt (dump_file
, stmt
, 0);
758 fprintf (dump_file
, "(cost %u) out of loop %d.\n\n",
761 bsi_insert_on_edge (loop_preheader_edge (level
), stmt
);
762 bsi_remove (&bsi
, false);
766 /* Hoist the statements out of the loops prescribed by data stored in
767 LIM_DATA structures associated with each statement.*/
770 move_computations (void)
772 struct dom_walk_data walk_data
;
774 memset (&walk_data
, 0, sizeof (struct dom_walk_data
));
775 walk_data
.before_dom_children_before_stmts
= move_computations_stmt
;
777 init_walk_dominator_tree (&walk_data
);
778 walk_dominator_tree (&walk_data
, ENTRY_BLOCK_PTR
);
779 fini_walk_dominator_tree (&walk_data
);
781 loop_commit_inserts ();
782 if (need_ssa_update_p ())
783 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
786 /* Checks whether the statement defining variable *INDEX can be hoisted
787 out of the loop passed in DATA. Callback for for_each_index. */
790 may_move_till (tree ref
, tree
*index
, void *data
)
792 struct loop
*loop
= data
, *max_loop
;
794 /* If REF is an array reference, check also that the step and the lower
795 bound is invariant in LOOP. */
796 if (TREE_CODE (ref
) == ARRAY_REF
)
798 tree step
= array_ref_element_size (ref
);
799 tree lbound
= array_ref_low_bound (ref
);
801 max_loop
= outermost_invariant_loop_expr (step
, loop
);
805 max_loop
= outermost_invariant_loop_expr (lbound
, loop
);
810 max_loop
= outermost_invariant_loop (*index
, loop
);
817 /* Forces statements defining (invariant) SSA names in expression EXPR to be
818 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
821 force_move_till_expr (tree expr
, struct loop
*orig_loop
, struct loop
*loop
)
823 enum tree_code_class
class = TREE_CODE_CLASS (TREE_CODE (expr
));
826 if (TREE_CODE (expr
) == SSA_NAME
)
828 tree stmt
= SSA_NAME_DEF_STMT (expr
);
829 if (IS_EMPTY_STMT (stmt
))
832 set_level (stmt
, orig_loop
, loop
);
836 if (class != tcc_unary
837 && class != tcc_binary
838 && class != tcc_expression
839 && class != tcc_comparison
)
842 nops
= TREE_CODE_LENGTH (TREE_CODE (expr
));
843 for (i
= 0; i
< nops
; i
++)
844 force_move_till_expr (TREE_OPERAND (expr
, i
), orig_loop
, loop
);
847 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
848 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
854 struct loop
*orig_loop
;
858 force_move_till (tree ref
, tree
*index
, void *data
)
861 struct fmt_data
*fmt_data
= data
;
863 if (TREE_CODE (ref
) == ARRAY_REF
)
865 tree step
= array_ref_element_size (ref
);
866 tree lbound
= array_ref_low_bound (ref
);
868 force_move_till_expr (step
, fmt_data
->orig_loop
, fmt_data
->loop
);
869 force_move_till_expr (lbound
, fmt_data
->orig_loop
, fmt_data
->loop
);
872 if (TREE_CODE (*index
) != SSA_NAME
)
875 stmt
= SSA_NAME_DEF_STMT (*index
);
876 if (IS_EMPTY_STMT (stmt
))
879 set_level (stmt
, fmt_data
->orig_loop
, fmt_data
->loop
);
884 /* Records memory reference location *REF to the list MEM_REFS. The reference
885 occurs in statement STMT. */
888 record_mem_ref_loc (struct mem_ref_loc
**mem_refs
, tree stmt
, tree
*ref
)
890 struct mem_ref_loc
*aref
= xmalloc (sizeof (struct mem_ref_loc
));
895 aref
->next
= *mem_refs
;
899 /* Releases list of memory reference locations MEM_REFS. */
902 free_mem_ref_locs (struct mem_ref_loc
*mem_refs
)
904 struct mem_ref_loc
*act
;
909 mem_refs
= mem_refs
->next
;
914 /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
917 rewrite_mem_refs (tree tmp_var
, struct mem_ref_loc
*mem_refs
)
922 for (; mem_refs
; mem_refs
= mem_refs
->next
)
924 FOR_EACH_SSA_TREE_OPERAND (var
, mem_refs
->stmt
, iter
, SSA_OP_ALL_VIRTUALS
)
925 mark_sym_for_renaming (SSA_NAME_VAR (var
));
927 *mem_refs
->ref
= tmp_var
;
928 update_stmt (mem_refs
->stmt
);
932 /* The name and the length of the currently generated variable
934 #define MAX_LSM_NAME_LENGTH 40
935 static char lsm_tmp_name
[MAX_LSM_NAME_LENGTH
+ 1];
936 static int lsm_tmp_name_length
;
938 /* Adds S to lsm_tmp_name. */
941 lsm_tmp_name_add (const char *s
)
943 int l
= strlen (s
) + lsm_tmp_name_length
;
944 if (l
> MAX_LSM_NAME_LENGTH
)
947 strcpy (lsm_tmp_name
+ lsm_tmp_name_length
, s
);
948 lsm_tmp_name_length
= l
;
951 /* Stores the name for temporary variable that replaces REF to
955 gen_lsm_tmp_name (tree ref
)
959 switch (TREE_CODE (ref
))
961 case MISALIGNED_INDIRECT_REF
:
962 case ALIGN_INDIRECT_REF
:
964 gen_lsm_tmp_name (TREE_OPERAND (ref
, 0));
965 lsm_tmp_name_add ("_");
969 case VIEW_CONVERT_EXPR
:
970 case ARRAY_RANGE_REF
:
971 gen_lsm_tmp_name (TREE_OPERAND (ref
, 0));
975 gen_lsm_tmp_name (TREE_OPERAND (ref
, 0));
976 lsm_tmp_name_add ("_RE");
980 gen_lsm_tmp_name (TREE_OPERAND (ref
, 0));
981 lsm_tmp_name_add ("_IM");
985 gen_lsm_tmp_name (TREE_OPERAND (ref
, 0));
986 lsm_tmp_name_add ("_");
987 name
= get_name (TREE_OPERAND (ref
, 1));
990 lsm_tmp_name_add ("_");
991 lsm_tmp_name_add (name
);
994 gen_lsm_tmp_name (TREE_OPERAND (ref
, 0));
995 lsm_tmp_name_add ("_I");
999 ref
= SSA_NAME_VAR (ref
);
1004 name
= get_name (ref
);
1007 lsm_tmp_name_add (name
);
1011 lsm_tmp_name_add ("S");
1015 lsm_tmp_name_add ("R");
1023 /* Determines name for temporary variable that replaces REF.
1024 The name is accumulated into the lsm_tmp_name variable. */
1027 get_lsm_tmp_name (tree ref
)
1029 lsm_tmp_name_length
= 0;
1030 gen_lsm_tmp_name (ref
);
1031 lsm_tmp_name_add ("_lsm");
1032 return lsm_tmp_name
;
1035 /* Records request for store motion of memory reference REF from LOOP.
1036 MEM_REFS is the list of occurrences of the reference REF inside LOOP;
1037 these references are rewritten by a new temporary variable.
1038 Exits from the LOOP are stored in EXITS, there are N_EXITS of them.
1039 The initialization of the temporary variable is put to the preheader
1040 of the loop, and assignments to the reference from the temporary variable
1041 are emitted to exits. */
1044 schedule_sm (struct loop
*loop
, edge
*exits
, unsigned n_exits
, tree ref
,
1045 struct mem_ref_loc
*mem_refs
)
1047 struct mem_ref_loc
*aref
;
1051 struct fmt_data fmt_data
;
1053 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1055 fprintf (dump_file
, "Executing store motion of ");
1056 print_generic_expr (dump_file
, ref
, 0);
1057 fprintf (dump_file
, " from loop %d\n", loop
->num
);
1060 tmp_var
= make_rename_temp (TREE_TYPE (ref
),
1061 get_lsm_tmp_name (ref
));
1063 fmt_data
.loop
= loop
;
1064 fmt_data
.orig_loop
= loop
;
1065 for_each_index (&ref
, force_move_till
, &fmt_data
);
1067 rewrite_mem_refs (tmp_var
, mem_refs
);
1068 for (aref
= mem_refs
; aref
; aref
= aref
->next
)
1069 if (LIM_DATA (aref
->stmt
))
1070 LIM_DATA (aref
->stmt
)->sm_done
= true;
1072 /* Emit the load & stores. */
1073 load
= build2 (MODIFY_EXPR
, void_type_node
, tmp_var
, ref
);
1074 get_stmt_ann (load
)->common
.aux
= xcalloc (1, sizeof (struct lim_aux_data
));
1075 LIM_DATA (load
)->max_loop
= loop
;
1076 LIM_DATA (load
)->tgt_loop
= loop
;
1078 /* Put this into the latch, so that we are sure it will be processed after
1079 all dependencies. */
1080 bsi_insert_on_edge (loop_latch_edge (loop
), load
);
1082 for (i
= 0; i
< n_exits
; i
++)
1084 store
= build2 (MODIFY_EXPR
, void_type_node
,
1085 unshare_expr (ref
), tmp_var
);
1086 bsi_insert_on_edge (exits
[i
], store
);
1090 /* Check whether memory reference REF can be hoisted out of the LOOP. If this
1091 is true, prepare the statements that load the value of the memory reference
1092 to a temporary variable in the loop preheader, store it back on the loop
1093 exits, and replace all the references inside LOOP by this temporary variable.
1094 LOOP has N_EXITS stored in EXITS. CLOBBERED_VOPS is the bitmap of virtual
1095 operands that are clobbered by a call or accessed through multiple references
1099 determine_lsm_ref (struct loop
*loop
, edge
*exits
, unsigned n_exits
,
1100 bitmap clobbered_vops
, struct mem_ref
*ref
)
1102 struct mem_ref_loc
*aref
;
1103 struct loop
*must_exec
;
1105 /* In case the memory is not stored to, there is nothing for SM to do. */
1106 if (!ref
->is_stored
)
1109 /* If the reference is aliased with any different ref, or killed by call
1110 in function, then fail. */
1111 if (bitmap_intersect_p (ref
->vops
, clobbered_vops
))
1114 if (tree_could_trap_p (ref
->mem
))
1116 /* If the memory access is unsafe (i.e. it might trap), ensure that some
1117 of the statements in that it occurs is always executed when the loop
1118 is entered. This way we know that by moving the load from the
1119 reference out of the loop we will not cause the error that would not
1122 TODO -- in fact we would like to check for anticipability of the
1123 reference, i.e. that on each path from loop entry to loop exit at
1124 least one of the statements containing the memory reference is
1127 for (aref
= ref
->locs
; aref
; aref
= aref
->next
)
1129 if (!LIM_DATA (aref
->stmt
))
1132 must_exec
= LIM_DATA (aref
->stmt
)->always_executed_in
;
1136 if (must_exec
== loop
1137 || flow_loop_nested_p (must_exec
, loop
))
1145 schedule_sm (loop
, exits
, n_exits
, ref
->mem
, ref
->locs
);
1148 /* Hoists memory references MEM_REFS out of LOOP. CLOBBERED_VOPS is the list
1149 of vops clobbered by call in loop or accessed by multiple memory references.
1150 EXITS is the list of N_EXITS exit edges of the LOOP. */
1153 hoist_memory_references (struct loop
*loop
, struct mem_ref
*mem_refs
,
1154 bitmap clobbered_vops
, edge
*exits
, unsigned n_exits
)
1156 struct mem_ref
*ref
;
1158 for (ref
= mem_refs
; ref
; ref
= ref
->next
)
1159 determine_lsm_ref (loop
, exits
, n_exits
, clobbered_vops
, ref
);
1162 /* Checks whether LOOP (with N_EXITS exits stored in EXITS array) is suitable
1163 for a store motion optimization (i.e. whether we can insert statement
1167 loop_suitable_for_sm (struct loop
*loop ATTRIBUTE_UNUSED
, edge
*exits
,
1172 for (i
= 0; i
< n_exits
; i
++)
1173 if (exits
[i
]->flags
& EDGE_ABNORMAL
)
1179 /* A hash function for struct mem_ref object OBJ. */
1182 memref_hash (const void *obj
)
1184 const struct mem_ref
*mem
= obj
;
1189 /* An equality function for struct mem_ref object OBJ1 with
1190 memory reference OBJ2. */
1193 memref_eq (const void *obj1
, const void *obj2
)
1195 const struct mem_ref
*mem1
= obj1
;
1197 return operand_equal_p (mem1
->mem
, (tree
) obj2
, 0);
1200 /* Gathers memory references in statement STMT in LOOP, storing the
1201 information about them in MEM_REFS hash table. Note vops accessed through
1202 unrecognized statements in CLOBBERED_VOPS. The newly created references
1203 are also stored to MEM_REF_LIST. */
1206 gather_mem_refs_stmt (struct loop
*loop
, htab_t mem_refs
,
1207 bitmap clobbered_vops
, tree stmt
,
1208 struct mem_ref
**mem_ref_list
)
1210 tree
*lhs
, *rhs
, *mem
= NULL
;
1213 struct mem_ref
*ref
= NULL
;
1218 if (ZERO_SSA_OPERANDS (stmt
, SSA_OP_ALL_VIRTUALS
))
1221 /* Recognize MEM = (SSA_NAME | invariant) and SSA_NAME = MEM patterns. */
1222 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1225 lhs
= &TREE_OPERAND (stmt
, 0);
1226 rhs
= &TREE_OPERAND (stmt
, 1);
1228 if (TREE_CODE (*lhs
) == SSA_NAME
)
1230 if (!is_gimple_addressable (*rhs
))
1236 else if (TREE_CODE (*rhs
) == SSA_NAME
1237 || is_gimple_min_invariant (*rhs
))
1245 /* If we cannot create an SSA name for the result, give up. */
1246 if (!is_gimple_reg_type (TREE_TYPE (*mem
))
1247 || TREE_THIS_VOLATILE (*mem
))
1250 /* If we cannot move the reference out of the loop, fail. */
1251 if (!for_each_index (mem
, may_move_till
, loop
))
1254 hash
= iterative_hash_expr (*mem
, 0);
1255 slot
= htab_find_slot_with_hash (mem_refs
, *mem
, hash
, INSERT
);
1261 ref
= xmalloc (sizeof (struct mem_ref
));
1265 ref
->is_stored
= false;
1266 ref
->vops
= BITMAP_ALLOC (NULL
);
1267 ref
->next
= *mem_ref_list
;
1268 *mem_ref_list
= ref
;
1271 ref
->is_stored
|= is_stored
;
1273 FOR_EACH_SSA_TREE_OPERAND (vname
, stmt
, oi
,
1274 SSA_OP_VIRTUAL_USES
| SSA_OP_VIRTUAL_KILLS
)
1275 bitmap_set_bit (ref
->vops
, DECL_UID (SSA_NAME_VAR (vname
)));
1276 record_mem_ref_loc (&ref
->locs
, stmt
, mem
);
1280 FOR_EACH_SSA_TREE_OPERAND (vname
, stmt
, oi
,
1281 SSA_OP_VIRTUAL_USES
| SSA_OP_VIRTUAL_KILLS
)
1282 bitmap_set_bit (clobbered_vops
, DECL_UID (SSA_NAME_VAR (vname
)));
1285 /* Gathers memory references in LOOP. Notes vops accessed through unrecognized
1286 statements in CLOBBERED_VOPS. The list of the references found by
1287 the function is returned. */
1289 static struct mem_ref
*
1290 gather_mem_refs (struct loop
*loop
, bitmap clobbered_vops
)
1292 basic_block
*body
= get_loop_body (loop
);
1293 block_stmt_iterator bsi
;
1295 struct mem_ref
*mem_ref_list
= NULL
;
1296 htab_t mem_refs
= htab_create (100, memref_hash
, memref_eq
, NULL
);
1298 for (i
= 0; i
< loop
->num_nodes
; i
++)
1300 for (bsi
= bsi_start (body
[i
]); !bsi_end_p (bsi
); bsi_next (&bsi
))
1301 gather_mem_refs_stmt (loop
, mem_refs
, clobbered_vops
, bsi_stmt (bsi
),
1307 htab_delete (mem_refs
);
1308 return mem_ref_list
;
1311 /* Finds the vops accessed by more than one of the memory references described
1312 in MEM_REFS and marks them in CLOBBERED_VOPS. */
1315 find_more_ref_vops (struct mem_ref
*mem_refs
, bitmap clobbered_vops
)
1317 bitmap_head tmp
, all_vops
;
1318 struct mem_ref
*ref
;
1320 bitmap_initialize (&tmp
, &bitmap_default_obstack
);
1321 bitmap_initialize (&all_vops
, &bitmap_default_obstack
);
1323 for (ref
= mem_refs
; ref
; ref
= ref
->next
)
1325 /* The vops that are already in all_vops are accessed by more than
1326 one memory reference. */
1327 bitmap_and (&tmp
, &all_vops
, ref
->vops
);
1328 bitmap_ior_into (clobbered_vops
, &tmp
);
1329 bitmap_clear (&tmp
);
1331 bitmap_ior_into (&all_vops
, ref
->vops
);
1334 bitmap_clear (&all_vops
);
1337 /* Releases the memory occupied by REF. */
1340 free_mem_ref (struct mem_ref
*ref
)
1342 free_mem_ref_locs (ref
->locs
);
1343 BITMAP_FREE (ref
->vops
);
1347 /* Releases the memory occupied by REFS. */
1350 free_mem_refs (struct mem_ref
*refs
)
1352 struct mem_ref
*ref
, *next
;
1354 for (ref
= refs
; ref
; ref
= next
)
1361 /* Try to perform store motion for all memory references modified inside
1365 determine_lsm_loop (struct loop
*loop
)
1368 edge
*exits
= get_loop_exit_edges (loop
, &n_exits
);
1369 bitmap clobbered_vops
;
1370 struct mem_ref
*mem_refs
;
1372 if (!loop_suitable_for_sm (loop
, exits
, n_exits
))
1378 /* Find the memory references in LOOP. */
1379 clobbered_vops
= BITMAP_ALLOC (NULL
);
1380 mem_refs
= gather_mem_refs (loop
, clobbered_vops
);
1382 /* Find the vops that are used for more than one reference. */
1383 find_more_ref_vops (mem_refs
, clobbered_vops
);
1385 /* Hoist all suitable memory references. */
1386 hoist_memory_references (loop
, mem_refs
, clobbered_vops
, exits
, n_exits
);
1388 free_mem_refs (mem_refs
);
1390 BITMAP_FREE (clobbered_vops
);
1393 /* Try to perform store motion for all memory references modified inside
1397 determine_lsm (struct loops
*loops
)
1401 if (!loops
->tree_root
->inner
)
1404 /* Pass the loops from the outermost and perform the store motion as
1407 loop
= loops
->tree_root
->inner
;
1410 determine_lsm_loop (loop
);
1420 if (loop
== loops
->tree_root
)
1422 loop_commit_inserts ();
1430 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1431 for each such basic block bb records the outermost loop for that execution
1432 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1433 blocks that contain a nonpure call. */
1436 fill_always_executed_in (struct loop
*loop
, sbitmap contains_call
)
1438 basic_block bb
= NULL
, *bbs
, last
= NULL
;
1441 struct loop
*inn_loop
= loop
;
1443 if (!loop
->header
->aux
)
1445 bbs
= get_loop_body_in_dom_order (loop
);
1447 for (i
= 0; i
< loop
->num_nodes
; i
++)
1452 if (dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
1455 if (TEST_BIT (contains_call
, bb
->index
))
1458 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1459 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
1464 /* A loop might be infinite (TODO use simple loop analysis
1465 to disprove this if possible). */
1466 if (bb
->flags
& BB_IRREDUCIBLE_LOOP
)
1469 if (!flow_bb_inside_loop_p (inn_loop
, bb
))
1472 if (bb
->loop_father
->header
== bb
)
1474 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
1477 /* In a loop that is always entered we may proceed anyway.
1478 But record that we entered it and stop once we leave it. */
1479 inn_loop
= bb
->loop_father
;
1486 if (last
== loop
->header
)
1488 last
= get_immediate_dominator (CDI_DOMINATORS
, last
);
1494 for (loop
= loop
->inner
; loop
; loop
= loop
->next
)
1495 fill_always_executed_in (loop
, contains_call
);
1498 /* Compute the global information needed by the loop invariant motion pass.
1499 LOOPS is the loop tree. */
1502 tree_ssa_lim_initialize (struct loops
*loops
)
1504 sbitmap contains_call
= sbitmap_alloc (last_basic_block
);
1505 block_stmt_iterator bsi
;
1509 sbitmap_zero (contains_call
);
1512 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1514 if (nonpure_call_p (bsi_stmt (bsi
)))
1518 if (!bsi_end_p (bsi
))
1519 SET_BIT (contains_call
, bb
->index
);
1522 for (loop
= loops
->tree_root
->inner
; loop
; loop
= loop
->next
)
1523 fill_always_executed_in (loop
, contains_call
);
1525 sbitmap_free (contains_call
);
1528 /* Cleans up after the invariant motion pass. */
1531 tree_ssa_lim_finalize (void)
1541 /* Moves invariants from LOOPS. Only "expensive" invariants are moved out --
1542 i.e. those that are likely to be win regardless of the register pressure. */
1545 tree_ssa_lim (struct loops
*loops
)
1547 tree_ssa_lim_initialize (loops
);
1549 /* For each statement determine the outermost loop in that it is
1550 invariant and cost for computing the invariant. */
1551 determine_invariantness ();
1553 /* For each memory reference determine whether it is possible to hoist it
1554 out of the loop. Force the necessary invariants to be moved out of the
1556 determine_lsm (loops
);
1558 /* Move the expressions that are expensive enough. */
1559 move_computations ();
1561 tree_ssa_lim_finalize ();