1 /* Loop invariant motion.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
34 #include "gimple-iterator.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
42 #include "tree-affine.h"
43 #include "tree-ssa-propagate.h"
44 #include "trans-mem.h"
45 #include "gimple-fold.h"
46 #include "tree-scalar-evolution.h"
47 #include "tree-ssa-loop-niter.h"
52 /* TODO: Support for predicated code motion. I.e.
63 Where COND and INV are invariants, but evaluating INV may trap or be
64 invalid from some other reason if !COND. This may be transformed to
74 /* The auxiliary data kept for each statement. */
78 struct loop
*max_loop
; /* The outermost loop in that the statement
81 struct loop
*tgt_loop
; /* The loop out of that we want to move the
84 struct loop
*always_executed_in
;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
89 unsigned cost
; /* Cost of the computation performed by the
92 unsigned ref
; /* The simple_mem_ref in this stmt or 0. */
94 vec
<gimple
*> depends
; /* Vector of statements that must be also
95 hoisted out of the loop when this statement
96 is hoisted; i.e. those that define the
97 operands of the statement and are inside of
101 /* Maps statements to their lim_aux_data. */
103 static hash_map
<gimple
*, lim_aux_data
*> *lim_aux_data_map
;
105 /* Description of a memory reference location. */
109 tree
*ref
; /* The reference itself. */
110 gimple
*stmt
; /* The statement in that it occurs. */
114 /* Description of a memory reference. */
118 unsigned id
: 31; /* ID assigned to the memory reference
119 (its index in memory_accesses.refs_list) */
120 unsigned ref_canonical
: 1; /* Whether mem.ref was canonicalized. */
121 hashval_t hash
; /* Its hash value. */
123 /* The memory access itself and associated caching of alias-oracle
127 bitmap stored
; /* The set of loops in that this memory location
129 vec
<mem_ref_loc
> accesses_in_loop
;
130 /* The locations of the accesses. Vector
131 indexed by the loop number. */
133 /* The following sets are computed on demand. We keep both set and
134 its complement, so that we know whether the information was
135 already computed or not. */
136 bitmap_head indep_loop
; /* The set of loops in that the memory
137 reference is independent, meaning:
138 If it is stored in the loop, this store
139 is independent on all other loads and
141 If it is only loaded, then it is independent
142 on all stores in the loop. */
143 bitmap_head dep_loop
; /* The complement of INDEP_LOOP. */
146 /* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
147 to record (in)dependence against stores in the loop and its subloops, the
148 second to record (in)dependence against all references in the loop
150 #define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
152 /* Mem_ref hashtable helpers. */
154 struct mem_ref_hasher
: nofree_ptr_hash
<im_mem_ref
>
156 typedef ao_ref
*compare_type
;
157 static inline hashval_t
hash (const im_mem_ref
*);
158 static inline bool equal (const im_mem_ref
*, const ao_ref
*);
161 /* A hash function for struct im_mem_ref object OBJ. */
164 mem_ref_hasher::hash (const im_mem_ref
*mem
)
169 /* An equality function for struct im_mem_ref object MEM1 with
170 memory reference OBJ2. */
173 mem_ref_hasher::equal (const im_mem_ref
*mem1
, const ao_ref
*obj2
)
175 if (obj2
->max_size_known_p ())
176 return (operand_equal_p (mem1
->mem
.base
, obj2
->base
, 0)
177 && known_eq (mem1
->mem
.offset
, obj2
->offset
)
178 && known_eq (mem1
->mem
.size
, obj2
->size
)
179 && known_eq (mem1
->mem
.max_size
, obj2
->max_size
)
180 && mem1
->mem
.volatile_p
== obj2
->volatile_p
181 && (mem1
->mem
.ref_alias_set
== obj2
->ref_alias_set
182 /* We are not canonicalizing alias-sets but for the
183 special-case we didn't canonicalize yet and the
184 incoming ref is a alias-set zero MEM we pick
185 the correct one already. */
186 || (!mem1
->ref_canonical
187 && (TREE_CODE (obj2
->ref
) == MEM_REF
188 || TREE_CODE (obj2
->ref
) == TARGET_MEM_REF
)
189 && obj2
->ref_alias_set
== 0)
190 /* Likewise if there's a canonical ref with alias-set zero. */
191 || (mem1
->ref_canonical
&& mem1
->mem
.ref_alias_set
== 0))
192 && types_compatible_p (TREE_TYPE (mem1
->mem
.ref
),
193 TREE_TYPE (obj2
->ref
)));
195 return operand_equal_p (mem1
->mem
.ref
, obj2
->ref
, 0);
199 /* Description of memory accesses in loops. */
203 /* The hash table of memory references accessed in loops. */
204 hash_table
<mem_ref_hasher
> *refs
;
206 /* The list of memory references. */
207 vec
<im_mem_ref
*> refs_list
;
209 /* The set of memory references accessed in each loop. */
210 vec
<bitmap_head
> refs_in_loop
;
212 /* The set of memory references stored in each loop. */
213 vec
<bitmap_head
> refs_stored_in_loop
;
215 /* The set of memory references stored in each loop, including subloops . */
216 vec
<bitmap_head
> all_refs_stored_in_loop
;
218 /* Cache for expanding memory addresses. */
219 hash_map
<tree
, name_expansion
*> *ttae_cache
;
222 /* Obstack for the bitmaps in the above data structures. */
223 static bitmap_obstack lim_bitmap_obstack
;
224 static obstack mem_ref_obstack
;
226 static bool ref_indep_loop_p (struct loop
*, im_mem_ref
*);
227 static bool ref_always_accessed_p (struct loop
*, im_mem_ref
*, bool);
229 /* Minimum cost of an expensive expression. */
230 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
232 /* The outermost loop for which execution of the header guarantees that the
233 block will be executed. */
234 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
235 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
237 /* ID of the shared unanalyzable mem. */
238 #define UNANALYZABLE_MEM_ID 0
240 /* Whether the reference was analyzable. */
241 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
243 static struct lim_aux_data
*
244 init_lim_data (gimple
*stmt
)
246 lim_aux_data
*p
= XCNEW (struct lim_aux_data
);
247 lim_aux_data_map
->put (stmt
, p
);
252 static struct lim_aux_data
*
253 get_lim_data (gimple
*stmt
)
255 lim_aux_data
**p
= lim_aux_data_map
->get (stmt
);
262 /* Releases the memory occupied by DATA. */
265 free_lim_aux_data (struct lim_aux_data
*data
)
267 data
->depends
.release ();
272 clear_lim_data (gimple
*stmt
)
274 lim_aux_data
**p
= lim_aux_data_map
->get (stmt
);
278 free_lim_aux_data (*p
);
283 /* The possibilities of statement movement. */
286 MOVE_IMPOSSIBLE
, /* No movement -- side effect expression. */
287 MOVE_PRESERVE_EXECUTION
, /* Must not cause the non-executed statement
288 become executed -- memory accesses, ... */
289 MOVE_POSSIBLE
/* Unlimited movement. */
293 /* If it is possible to hoist the statement STMT unconditionally,
294 returns MOVE_POSSIBLE.
295 If it is possible to hoist the statement STMT, but we must avoid making
296 it executed if it would not be executed in the original program (e.g.
297 because it may trap), return MOVE_PRESERVE_EXECUTION.
298 Otherwise return MOVE_IMPOSSIBLE. */
301 movement_possibility (gimple
*stmt
)
304 enum move_pos ret
= MOVE_POSSIBLE
;
306 if (flag_unswitch_loops
307 && gimple_code (stmt
) == GIMPLE_COND
)
309 /* If we perform unswitching, force the operands of the invariant
310 condition to be moved out of the loop. */
311 return MOVE_POSSIBLE
;
314 if (gimple_code (stmt
) == GIMPLE_PHI
315 && gimple_phi_num_args (stmt
) <= 2
316 && !virtual_operand_p (gimple_phi_result (stmt
))
317 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt
)))
318 return MOVE_POSSIBLE
;
320 if (gimple_get_lhs (stmt
) == NULL_TREE
)
321 return MOVE_IMPOSSIBLE
;
323 if (gimple_vdef (stmt
))
324 return MOVE_IMPOSSIBLE
;
326 if (stmt_ends_bb_p (stmt
)
327 || gimple_has_volatile_ops (stmt
)
328 || gimple_has_side_effects (stmt
)
329 || stmt_could_throw_p (cfun
, stmt
))
330 return MOVE_IMPOSSIBLE
;
332 if (is_gimple_call (stmt
))
334 /* While pure or const call is guaranteed to have no side effects, we
335 cannot move it arbitrarily. Consider code like
337 char *s = something ();
347 Here the strlen call cannot be moved out of the loop, even though
348 s is invariant. In addition to possibly creating a call with
349 invalid arguments, moving out a function call that is not executed
350 may cause performance regressions in case the call is costly and
351 not executed at all. */
352 ret
= MOVE_PRESERVE_EXECUTION
;
353 lhs
= gimple_call_lhs (stmt
);
355 else if (is_gimple_assign (stmt
))
356 lhs
= gimple_assign_lhs (stmt
);
358 return MOVE_IMPOSSIBLE
;
360 if (TREE_CODE (lhs
) == SSA_NAME
361 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
))
362 return MOVE_IMPOSSIBLE
;
364 if (TREE_CODE (lhs
) != SSA_NAME
365 || gimple_could_trap_p (stmt
))
366 return MOVE_PRESERVE_EXECUTION
;
368 /* Non local loads in a transaction cannot be hoisted out. Well,
369 unless the load happens on every path out of the loop, but we
370 don't take this into account yet. */
372 && gimple_in_transaction (stmt
)
373 && gimple_assign_single_p (stmt
))
375 tree rhs
= gimple_assign_rhs1 (stmt
);
376 if (DECL_P (rhs
) && is_global_var (rhs
))
380 fprintf (dump_file
, "Cannot hoist conditional load of ");
381 print_generic_expr (dump_file
, rhs
, TDF_SLIM
);
382 fprintf (dump_file
, " because it is in a transaction.\n");
384 return MOVE_IMPOSSIBLE
;
391 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
392 loop to that we could move the expression using DEF if it did not have
393 other operands, i.e. the outermost loop enclosing LOOP in that the value
394 of DEF is invariant. */
397 outermost_invariant_loop (tree def
, struct loop
*loop
)
401 struct loop
*max_loop
;
402 struct lim_aux_data
*lim_data
;
405 return superloop_at_depth (loop
, 1);
407 if (TREE_CODE (def
) != SSA_NAME
)
409 gcc_assert (is_gimple_min_invariant (def
));
410 return superloop_at_depth (loop
, 1);
413 def_stmt
= SSA_NAME_DEF_STMT (def
);
414 def_bb
= gimple_bb (def_stmt
);
416 return superloop_at_depth (loop
, 1);
418 max_loop
= find_common_loop (loop
, def_bb
->loop_father
);
420 lim_data
= get_lim_data (def_stmt
);
421 if (lim_data
!= NULL
&& lim_data
->max_loop
!= NULL
)
422 max_loop
= find_common_loop (max_loop
,
423 loop_outer (lim_data
->max_loop
));
424 if (max_loop
== loop
)
426 max_loop
= superloop_at_depth (loop
, loop_depth (max_loop
) + 1);
431 /* DATA is a structure containing information associated with a statement
432 inside LOOP. DEF is one of the operands of this statement.
434 Find the outermost loop enclosing LOOP in that value of DEF is invariant
435 and record this in DATA->max_loop field. If DEF itself is defined inside
436 this loop as well (i.e. we need to hoist it out of the loop if we want
437 to hoist the statement represented by DATA), record the statement in that
438 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
439 add the cost of the computation of DEF to the DATA->cost.
441 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
444 add_dependency (tree def
, struct lim_aux_data
*data
, struct loop
*loop
,
447 gimple
*def_stmt
= SSA_NAME_DEF_STMT (def
);
448 basic_block def_bb
= gimple_bb (def_stmt
);
449 struct loop
*max_loop
;
450 struct lim_aux_data
*def_data
;
455 max_loop
= outermost_invariant_loop (def
, loop
);
459 if (flow_loop_nested_p (data
->max_loop
, max_loop
))
460 data
->max_loop
= max_loop
;
462 def_data
= get_lim_data (def_stmt
);
467 /* Only add the cost if the statement defining DEF is inside LOOP,
468 i.e. if it is likely that by moving the invariants dependent
469 on it, we will be able to avoid creating a new register for
470 it (since it will be only used in these dependent invariants). */
471 && def_bb
->loop_father
== loop
)
472 data
->cost
+= def_data
->cost
;
474 data
->depends
.safe_push (def_stmt
);
479 /* Returns an estimate for a cost of statement STMT. The values here
480 are just ad-hoc constants, similar to costs for inlining. */
483 stmt_cost (gimple
*stmt
)
485 /* Always try to create possibilities for unswitching. */
486 if (gimple_code (stmt
) == GIMPLE_COND
487 || gimple_code (stmt
) == GIMPLE_PHI
)
488 return LIM_EXPENSIVE
;
490 /* We should be hoisting calls if possible. */
491 if (is_gimple_call (stmt
))
495 /* Unless the call is a builtin_constant_p; this always folds to a
496 constant, so moving it is useless. */
497 fndecl
= gimple_call_fndecl (stmt
);
498 if (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_CONSTANT_P
))
501 return LIM_EXPENSIVE
;
504 /* Hoisting memory references out should almost surely be a win. */
505 if (gimple_references_memory_p (stmt
))
506 return LIM_EXPENSIVE
;
508 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
511 switch (gimple_assign_rhs_code (stmt
))
514 case WIDEN_MULT_EXPR
:
515 case WIDEN_MULT_PLUS_EXPR
:
516 case WIDEN_MULT_MINUS_EXPR
:
528 /* Division and multiplication are usually expensive. */
529 return LIM_EXPENSIVE
;
533 case WIDEN_LSHIFT_EXPR
:
536 /* Shifts and rotates are usually expensive. */
537 return LIM_EXPENSIVE
;
540 /* Make vector construction cost proportional to the number
542 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt
));
546 /* Whether or not something is wrapped inside a PAREN_EXPR
547 should not change move cost. Nor should an intermediate
548 unpropagated SSA name copy. */
556 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
557 REF is independent. If REF is not independent in LOOP, NULL is returned
561 outermost_indep_loop (struct loop
*outer
, struct loop
*loop
, im_mem_ref
*ref
)
565 if (ref
->stored
&& bitmap_bit_p (ref
->stored
, loop
->num
))
570 aloop
= superloop_at_depth (loop
, loop_depth (aloop
) + 1))
571 if ((!ref
->stored
|| !bitmap_bit_p (ref
->stored
, aloop
->num
))
572 && ref_indep_loop_p (aloop
, ref
))
575 if (ref_indep_loop_p (loop
, ref
))
581 /* If there is a simple load or store to a memory reference in STMT, returns
582 the location of the memory reference, and sets IS_STORE according to whether
583 it is a store or load. Otherwise, returns NULL. */
586 simple_mem_ref_in_stmt (gimple
*stmt
, bool *is_store
)
590 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
591 if (!gimple_assign_single_p (stmt
))
594 lhs
= gimple_assign_lhs_ptr (stmt
);
595 rhs
= gimple_assign_rhs1_ptr (stmt
);
597 if (TREE_CODE (*lhs
) == SSA_NAME
&& gimple_vuse (stmt
))
602 else if (gimple_vdef (stmt
)
603 && (TREE_CODE (*rhs
) == SSA_NAME
|| is_gimple_min_invariant (*rhs
)))
612 /* From a controlling predicate in DOM determine the arguments from
613 the PHI node PHI that are chosen if the predicate evaluates to
614 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
615 they are non-NULL. Returns true if the arguments can be determined,
616 else return false. */
619 extract_true_false_args_from_phi (basic_block dom
, gphi
*phi
,
620 tree
*true_arg_p
, tree
*false_arg_p
)
623 if (! extract_true_false_controlled_edges (dom
, gimple_bb (phi
),
628 *true_arg_p
= PHI_ARG_DEF (phi
, te
->dest_idx
);
630 *false_arg_p
= PHI_ARG_DEF (phi
, fe
->dest_idx
);
635 /* Determine the outermost loop to that it is possible to hoist a statement
636 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
637 the outermost loop in that the value computed by STMT is invariant.
638 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
639 we preserve the fact whether STMT is executed. It also fills other related
640 information to LIM_DATA (STMT).
642 The function returns false if STMT cannot be hoisted outside of the loop it
643 is defined in, and true otherwise. */
646 determine_max_movement (gimple
*stmt
, bool must_preserve_exec
)
648 basic_block bb
= gimple_bb (stmt
);
649 struct loop
*loop
= bb
->loop_father
;
651 struct lim_aux_data
*lim_data
= get_lim_data (stmt
);
655 if (must_preserve_exec
)
656 level
= ALWAYS_EXECUTED_IN (bb
);
658 level
= superloop_at_depth (loop
, 1);
659 lim_data
->max_loop
= level
;
661 if (gphi
*phi
= dyn_cast
<gphi
*> (stmt
))
664 unsigned min_cost
= UINT_MAX
;
665 unsigned total_cost
= 0;
666 struct lim_aux_data
*def_data
;
668 /* We will end up promoting dependencies to be unconditionally
669 evaluated. For this reason the PHI cost (and thus the
670 cost we remove from the loop by doing the invariant motion)
671 is that of the cheapest PHI argument dependency chain. */
672 FOR_EACH_PHI_ARG (use_p
, phi
, iter
, SSA_OP_USE
)
674 val
= USE_FROM_PTR (use_p
);
676 if (TREE_CODE (val
) != SSA_NAME
)
678 /* Assign const 1 to constants. */
679 min_cost
= MIN (min_cost
, 1);
683 if (!add_dependency (val
, lim_data
, loop
, false))
686 gimple
*def_stmt
= SSA_NAME_DEF_STMT (val
);
687 if (gimple_bb (def_stmt
)
688 && gimple_bb (def_stmt
)->loop_father
== loop
)
690 def_data
= get_lim_data (def_stmt
);
693 min_cost
= MIN (min_cost
, def_data
->cost
);
694 total_cost
+= def_data
->cost
;
699 min_cost
= MIN (min_cost
, total_cost
);
700 lim_data
->cost
+= min_cost
;
702 if (gimple_phi_num_args (phi
) > 1)
704 basic_block dom
= get_immediate_dominator (CDI_DOMINATORS
, bb
);
706 if (gsi_end_p (gsi_last_bb (dom
)))
708 cond
= gsi_stmt (gsi_last_bb (dom
));
709 if (gimple_code (cond
) != GIMPLE_COND
)
711 /* Verify that this is an extended form of a diamond and
712 the PHI arguments are completely controlled by the
714 if (!extract_true_false_args_from_phi (dom
, phi
, NULL
, NULL
))
717 /* Fold in dependencies and cost of the condition. */
718 FOR_EACH_SSA_TREE_OPERAND (val
, cond
, iter
, SSA_OP_USE
)
720 if (!add_dependency (val
, lim_data
, loop
, false))
722 def_data
= get_lim_data (SSA_NAME_DEF_STMT (val
));
724 lim_data
->cost
+= def_data
->cost
;
727 /* We want to avoid unconditionally executing very expensive
728 operations. As costs for our dependencies cannot be
729 negative just claim we are not invariand for this case.
730 We also are not sure whether the control-flow inside the
732 if (total_cost
- min_cost
>= 2 * LIM_EXPENSIVE
734 && total_cost
/ min_cost
<= 2))
737 /* Assume that the control-flow in the loop will vanish.
738 ??? We should verify this and not artificially increase
739 the cost if that is not the case. */
740 lim_data
->cost
+= stmt_cost (stmt
);
746 FOR_EACH_SSA_TREE_OPERAND (val
, stmt
, iter
, SSA_OP_USE
)
747 if (!add_dependency (val
, lim_data
, loop
, true))
750 if (gimple_vuse (stmt
))
753 = lim_data
? memory_accesses
.refs_list
[lim_data
->ref
] : NULL
;
755 && MEM_ANALYZABLE (ref
))
757 lim_data
->max_loop
= outermost_indep_loop (lim_data
->max_loop
,
759 if (!lim_data
->max_loop
)
762 else if (! add_dependency (gimple_vuse (stmt
), lim_data
, loop
, false))
766 lim_data
->cost
+= stmt_cost (stmt
);
771 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
772 and that one of the operands of this statement is computed by STMT.
773 Ensure that STMT (together with all the statements that define its
774 operands) is hoisted at least out of the loop LEVEL. */
777 set_level (gimple
*stmt
, struct loop
*orig_loop
, struct loop
*level
)
779 struct loop
*stmt_loop
= gimple_bb (stmt
)->loop_father
;
780 struct lim_aux_data
*lim_data
;
784 stmt_loop
= find_common_loop (orig_loop
, stmt_loop
);
785 lim_data
= get_lim_data (stmt
);
786 if (lim_data
!= NULL
&& lim_data
->tgt_loop
!= NULL
)
787 stmt_loop
= find_common_loop (stmt_loop
,
788 loop_outer (lim_data
->tgt_loop
));
789 if (flow_loop_nested_p (stmt_loop
, level
))
792 gcc_assert (level
== lim_data
->max_loop
793 || flow_loop_nested_p (lim_data
->max_loop
, level
));
795 lim_data
->tgt_loop
= level
;
796 FOR_EACH_VEC_ELT (lim_data
->depends
, i
, dep_stmt
)
797 set_level (dep_stmt
, orig_loop
, level
);
800 /* Determines an outermost loop from that we want to hoist the statement STMT.
801 For now we chose the outermost possible loop. TODO -- use profiling
802 information to set it more sanely. */
805 set_profitable_level (gimple
*stmt
)
807 set_level (stmt
, gimple_bb (stmt
)->loop_father
, get_lim_data (stmt
)->max_loop
);
810 /* Returns true if STMT is a call that has side effects. */
813 nonpure_call_p (gimple
*stmt
)
815 if (gimple_code (stmt
) != GIMPLE_CALL
)
818 return gimple_has_side_effects (stmt
);
821 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
824 rewrite_reciprocal (gimple_stmt_iterator
*bsi
)
826 gassign
*stmt
, *stmt1
, *stmt2
;
827 tree name
, lhs
, type
;
829 gimple_stmt_iterator gsi
;
831 stmt
= as_a
<gassign
*> (gsi_stmt (*bsi
));
832 lhs
= gimple_assign_lhs (stmt
);
833 type
= TREE_TYPE (lhs
);
835 real_one
= build_one_cst (type
);
837 name
= make_temp_ssa_name (type
, NULL
, "reciptmp");
838 stmt1
= gimple_build_assign (name
, RDIV_EXPR
, real_one
,
839 gimple_assign_rhs2 (stmt
));
840 stmt2
= gimple_build_assign (lhs
, MULT_EXPR
, name
,
841 gimple_assign_rhs1 (stmt
));
843 /* Replace division stmt with reciprocal and multiply stmts.
844 The multiply stmt is not invariant, so update iterator
845 and avoid rescanning. */
847 gsi_insert_before (bsi
, stmt1
, GSI_NEW_STMT
);
848 gsi_replace (&gsi
, stmt2
, true);
850 /* Continue processing with invariant reciprocal statement. */
854 /* Check if the pattern at *BSI is a bittest of the form
855 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
858 rewrite_bittest (gimple_stmt_iterator
*bsi
)
865 tree lhs
, name
, t
, a
, b
;
868 stmt
= as_a
<gassign
*> (gsi_stmt (*bsi
));
869 lhs
= gimple_assign_lhs (stmt
);
871 /* Verify that the single use of lhs is a comparison against zero. */
872 if (TREE_CODE (lhs
) != SSA_NAME
873 || !single_imm_use (lhs
, &use
, &use_stmt
))
875 cond_stmt
= dyn_cast
<gcond
*> (use_stmt
);
878 if (gimple_cond_lhs (cond_stmt
) != lhs
879 || (gimple_cond_code (cond_stmt
) != NE_EXPR
880 && gimple_cond_code (cond_stmt
) != EQ_EXPR
)
881 || !integer_zerop (gimple_cond_rhs (cond_stmt
)))
884 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
885 stmt1
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
886 if (gimple_code (stmt1
) != GIMPLE_ASSIGN
)
889 /* There is a conversion in between possibly inserted by fold. */
890 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1
)))
892 t
= gimple_assign_rhs1 (stmt1
);
893 if (TREE_CODE (t
) != SSA_NAME
894 || !has_single_use (t
))
896 stmt1
= SSA_NAME_DEF_STMT (t
);
897 if (gimple_code (stmt1
) != GIMPLE_ASSIGN
)
901 /* Verify that B is loop invariant but A is not. Verify that with
902 all the stmt walking we are still in the same loop. */
903 if (gimple_assign_rhs_code (stmt1
) != RSHIFT_EXPR
904 || loop_containing_stmt (stmt1
) != loop_containing_stmt (stmt
))
907 a
= gimple_assign_rhs1 (stmt1
);
908 b
= gimple_assign_rhs2 (stmt1
);
910 if (outermost_invariant_loop (b
, loop_containing_stmt (stmt1
)) != NULL
911 && outermost_invariant_loop (a
, loop_containing_stmt (stmt1
)) == NULL
)
913 gimple_stmt_iterator rsi
;
916 t
= fold_build2 (LSHIFT_EXPR
, TREE_TYPE (a
),
917 build_int_cst (TREE_TYPE (a
), 1), b
);
918 name
= make_temp_ssa_name (TREE_TYPE (a
), NULL
, "shifttmp");
919 stmt1
= gimple_build_assign (name
, t
);
922 t
= fold_build2 (BIT_AND_EXPR
, TREE_TYPE (a
), a
, name
);
923 name
= make_temp_ssa_name (TREE_TYPE (a
), NULL
, "shifttmp");
924 stmt2
= gimple_build_assign (name
, t
);
926 /* Replace the SSA_NAME we compare against zero. Adjust
927 the type of zero accordingly. */
929 gimple_cond_set_rhs (cond_stmt
,
930 build_int_cst_type (TREE_TYPE (name
),
933 /* Don't use gsi_replace here, none of the new assignments sets
934 the variable originally set in stmt. Move bsi to stmt1, and
935 then remove the original stmt, so that we get a chance to
936 retain debug info for it. */
938 gsi_insert_before (bsi
, stmt1
, GSI_NEW_STMT
);
939 gsi_insert_before (&rsi
, stmt2
, GSI_SAME_STMT
);
940 gimple
*to_release
= gsi_stmt (rsi
);
941 gsi_remove (&rsi
, true);
942 release_defs (to_release
);
950 /* For each statement determines the outermost loop in that it is invariant,
951 - statements on whose motion it depends and the cost of the computation.
952 - This information is stored to the LIM_DATA structure associated with
954 class invariantness_dom_walker
: public dom_walker
957 invariantness_dom_walker (cdi_direction direction
)
958 : dom_walker (direction
) {}
960 virtual edge
before_dom_children (basic_block
);
963 /* Determine the outermost loops in that statements in basic block BB are
964 invariant, and record them to the LIM_DATA associated with the statements.
965 Callback for dom_walker. */
968 invariantness_dom_walker::before_dom_children (basic_block bb
)
971 gimple_stmt_iterator bsi
;
973 bool maybe_never
= ALWAYS_EXECUTED_IN (bb
) == NULL
;
974 struct loop
*outermost
= ALWAYS_EXECUTED_IN (bb
);
975 struct lim_aux_data
*lim_data
;
977 if (!loop_outer (bb
->loop_father
))
980 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
981 fprintf (dump_file
, "Basic block %d (loop %d -- depth %d):\n\n",
982 bb
->index
, bb
->loop_father
->num
, loop_depth (bb
->loop_father
));
984 /* Look at PHI nodes, but only if there is at most two.
985 ??? We could relax this further by post-processing the inserted
986 code and transforming adjacent cond-exprs with the same predicate
987 to control flow again. */
988 bsi
= gsi_start_phis (bb
);
990 && ((gsi_next (&bsi
), gsi_end_p (bsi
))
991 || (gsi_next (&bsi
), gsi_end_p (bsi
))))
992 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
994 stmt
= gsi_stmt (bsi
);
996 pos
= movement_possibility (stmt
);
997 if (pos
== MOVE_IMPOSSIBLE
)
1000 lim_data
= get_lim_data (stmt
);
1002 lim_data
= init_lim_data (stmt
);
1003 lim_data
->always_executed_in
= outermost
;
1005 if (!determine_max_movement (stmt
, false))
1007 lim_data
->max_loop
= NULL
;
1011 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1013 print_gimple_stmt (dump_file
, stmt
, 2);
1014 fprintf (dump_file
, " invariant up to level %d, cost %d.\n\n",
1015 loop_depth (lim_data
->max_loop
),
1019 if (lim_data
->cost
>= LIM_EXPENSIVE
)
1020 set_profitable_level (stmt
);
1023 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1025 stmt
= gsi_stmt (bsi
);
1027 pos
= movement_possibility (stmt
);
1028 if (pos
== MOVE_IMPOSSIBLE
)
1030 if (nonpure_call_p (stmt
))
1035 /* Make sure to note always_executed_in for stores to make
1036 store-motion work. */
1037 else if (stmt_makes_single_store (stmt
))
1039 struct lim_aux_data
*lim_data
= get_lim_data (stmt
);
1041 lim_data
= init_lim_data (stmt
);
1042 lim_data
->always_executed_in
= outermost
;
1047 if (is_gimple_assign (stmt
)
1048 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
))
1049 == GIMPLE_BINARY_RHS
))
1051 tree op0
= gimple_assign_rhs1 (stmt
);
1052 tree op1
= gimple_assign_rhs2 (stmt
);
1053 struct loop
*ol1
= outermost_invariant_loop (op1
,
1054 loop_containing_stmt (stmt
));
1056 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1057 to be hoisted out of loop, saving expensive divide. */
1058 if (pos
== MOVE_POSSIBLE
1059 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
1060 && flag_unsafe_math_optimizations
1061 && !flag_trapping_math
1063 && outermost_invariant_loop (op0
, ol1
) == NULL
)
1064 stmt
= rewrite_reciprocal (&bsi
);
1066 /* If the shift count is invariant, convert (A >> B) & 1 to
1067 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1068 saving an expensive shift. */
1069 if (pos
== MOVE_POSSIBLE
1070 && gimple_assign_rhs_code (stmt
) == BIT_AND_EXPR
1071 && integer_onep (op1
)
1072 && TREE_CODE (op0
) == SSA_NAME
1073 && has_single_use (op0
))
1074 stmt
= rewrite_bittest (&bsi
);
1077 lim_data
= get_lim_data (stmt
);
1079 lim_data
= init_lim_data (stmt
);
1080 lim_data
->always_executed_in
= outermost
;
1082 if (maybe_never
&& pos
== MOVE_PRESERVE_EXECUTION
)
1085 if (!determine_max_movement (stmt
, pos
== MOVE_PRESERVE_EXECUTION
))
1087 lim_data
->max_loop
= NULL
;
1091 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1093 print_gimple_stmt (dump_file
, stmt
, 2);
1094 fprintf (dump_file
, " invariant up to level %d, cost %d.\n\n",
1095 loop_depth (lim_data
->max_loop
),
1099 if (lim_data
->cost
>= LIM_EXPENSIVE
)
1100 set_profitable_level (stmt
);
1105 /* Hoist the statements in basic block BB out of the loops prescribed by
1106 data stored in LIM_DATA structures associated with each statement. Callback
1107 for walk_dominator_tree. */
1110 move_computations_worker (basic_block bb
)
1114 struct lim_aux_data
*lim_data
;
1115 unsigned int todo
= 0;
1117 if (!loop_outer (bb
->loop_father
))
1120 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); )
1123 gphi
*stmt
= bsi
.phi ();
1125 lim_data
= get_lim_data (stmt
);
1126 if (lim_data
== NULL
)
1132 cost
= lim_data
->cost
;
1133 level
= lim_data
->tgt_loop
;
1134 clear_lim_data (stmt
);
1142 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1144 fprintf (dump_file
, "Moving PHI node\n");
1145 print_gimple_stmt (dump_file
, stmt
, 0);
1146 fprintf (dump_file
, "(cost %u) out of loop %d.\n\n",
1150 if (gimple_phi_num_args (stmt
) == 1)
1152 tree arg
= PHI_ARG_DEF (stmt
, 0);
1153 new_stmt
= gimple_build_assign (gimple_phi_result (stmt
),
1154 TREE_CODE (arg
), arg
);
1158 basic_block dom
= get_immediate_dominator (CDI_DOMINATORS
, bb
);
1159 gimple
*cond
= gsi_stmt (gsi_last_bb (dom
));
1160 tree arg0
= NULL_TREE
, arg1
= NULL_TREE
, t
;
1161 /* Get the PHI arguments corresponding to the true and false
1163 extract_true_false_args_from_phi (dom
, stmt
, &arg0
, &arg1
);
1164 gcc_assert (arg0
&& arg1
);
1165 t
= build2 (gimple_cond_code (cond
), boolean_type_node
,
1166 gimple_cond_lhs (cond
), gimple_cond_rhs (cond
));
1167 new_stmt
= gimple_build_assign (gimple_phi_result (stmt
),
1168 COND_EXPR
, t
, arg0
, arg1
);
1169 todo
|= TODO_cleanup_cfg
;
1171 if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt
)))
1172 && (!ALWAYS_EXECUTED_IN (bb
)
1173 || (ALWAYS_EXECUTED_IN (bb
) != level
1174 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb
), level
))))
1176 tree lhs
= gimple_assign_lhs (new_stmt
);
1177 SSA_NAME_RANGE_INFO (lhs
) = NULL
;
1179 gsi_insert_on_edge (loop_preheader_edge (level
), new_stmt
);
1180 remove_phi_node (&bsi
, false);
1183 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); )
1187 gimple
*stmt
= gsi_stmt (bsi
);
1189 lim_data
= get_lim_data (stmt
);
1190 if (lim_data
== NULL
)
1196 cost
= lim_data
->cost
;
1197 level
= lim_data
->tgt_loop
;
1198 clear_lim_data (stmt
);
1206 /* We do not really want to move conditionals out of the loop; we just
1207 placed it here to force its operands to be moved if necessary. */
1208 if (gimple_code (stmt
) == GIMPLE_COND
)
1211 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1213 fprintf (dump_file
, "Moving statement\n");
1214 print_gimple_stmt (dump_file
, stmt
, 0);
1215 fprintf (dump_file
, "(cost %u) out of loop %d.\n\n",
1219 e
= loop_preheader_edge (level
);
1220 gcc_assert (!gimple_vdef (stmt
));
1221 if (gimple_vuse (stmt
))
1223 /* The new VUSE is the one from the virtual PHI in the loop
1224 header or the one already present. */
1226 for (gsi2
= gsi_start_phis (e
->dest
);
1227 !gsi_end_p (gsi2
); gsi_next (&gsi2
))
1229 gphi
*phi
= gsi2
.phi ();
1230 if (virtual_operand_p (gimple_phi_result (phi
)))
1232 gimple_set_vuse (stmt
, PHI_ARG_DEF_FROM_EDGE (phi
, e
));
1237 gsi_remove (&bsi
, false);
1238 if (gimple_has_lhs (stmt
)
1239 && TREE_CODE (gimple_get_lhs (stmt
)) == SSA_NAME
1240 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt
)))
1241 && (!ALWAYS_EXECUTED_IN (bb
)
1242 || !(ALWAYS_EXECUTED_IN (bb
) == level
1243 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb
), level
))))
1245 tree lhs
= gimple_get_lhs (stmt
);
1246 SSA_NAME_RANGE_INFO (lhs
) = NULL
;
1248 /* In case this is a stmt that is not unconditionally executed
1249 when the target loop header is executed and the stmt may
1250 invoke undefined integer or pointer overflow rewrite it to
1251 unsigned arithmetic. */
1252 if (is_gimple_assign (stmt
)
1253 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt
)))
1254 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt
)))
1255 && arith_code_with_undefined_signed_overflow
1256 (gimple_assign_rhs_code (stmt
))
1257 && (!ALWAYS_EXECUTED_IN (bb
)
1258 || !(ALWAYS_EXECUTED_IN (bb
) == level
1259 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb
), level
))))
1260 gsi_insert_seq_on_edge (e
, rewrite_to_defined_overflow (stmt
));
1262 gsi_insert_on_edge (e
, stmt
);
1268 /* Hoist the statements out of the loops prescribed by data stored in
1269 LIM_DATA structures associated with each statement.*/
1272 move_computations (void)
1274 int *rpo
= XNEWVEC (int, last_basic_block_for_fn (cfun
));
1275 int n
= pre_and_rev_post_order_compute_fn (cfun
, NULL
, rpo
, false);
1278 for (int i
= 0; i
< n
; ++i
)
1279 todo
|= move_computations_worker (BASIC_BLOCK_FOR_FN (cfun
, rpo
[i
]));
1283 gsi_commit_edge_inserts ();
1284 if (need_ssa_update_p (cfun
))
1285 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1290 /* Checks whether the statement defining variable *INDEX can be hoisted
1291 out of the loop passed in DATA. Callback for for_each_index. */
1294 may_move_till (tree ref
, tree
*index
, void *data
)
1296 struct loop
*loop
= (struct loop
*) data
, *max_loop
;
1298 /* If REF is an array reference, check also that the step and the lower
1299 bound is invariant in LOOP. */
1300 if (TREE_CODE (ref
) == ARRAY_REF
)
1302 tree step
= TREE_OPERAND (ref
, 3);
1303 tree lbound
= TREE_OPERAND (ref
, 2);
1305 max_loop
= outermost_invariant_loop (step
, loop
);
1309 max_loop
= outermost_invariant_loop (lbound
, loop
);
1314 max_loop
= outermost_invariant_loop (*index
, loop
);
1321 /* If OP is SSA NAME, force the statement that defines it to be
1322 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1325 force_move_till_op (tree op
, struct loop
*orig_loop
, struct loop
*loop
)
1330 || is_gimple_min_invariant (op
))
1333 gcc_assert (TREE_CODE (op
) == SSA_NAME
);
1335 stmt
= SSA_NAME_DEF_STMT (op
);
1336 if (gimple_nop_p (stmt
))
1339 set_level (stmt
, orig_loop
, loop
);
1342 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1343 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1349 struct loop
*orig_loop
;
1353 force_move_till (tree ref
, tree
*index
, void *data
)
1355 struct fmt_data
*fmt_data
= (struct fmt_data
*) data
;
1357 if (TREE_CODE (ref
) == ARRAY_REF
)
1359 tree step
= TREE_OPERAND (ref
, 3);
1360 tree lbound
= TREE_OPERAND (ref
, 2);
1362 force_move_till_op (step
, fmt_data
->orig_loop
, fmt_data
->loop
);
1363 force_move_till_op (lbound
, fmt_data
->orig_loop
, fmt_data
->loop
);
1366 force_move_till_op (*index
, fmt_data
->orig_loop
, fmt_data
->loop
);
1371 /* A function to free the mem_ref object OBJ. */
1374 memref_free (struct im_mem_ref
*mem
)
1376 mem
->accesses_in_loop
.release ();
1379 /* Allocates and returns a memory reference description for MEM whose hash
1380 value is HASH and id is ID. */
1383 mem_ref_alloc (ao_ref
*mem
, unsigned hash
, unsigned id
)
1385 im_mem_ref
*ref
= XOBNEW (&mem_ref_obstack
, struct im_mem_ref
);
1389 ao_ref_init (&ref
->mem
, error_mark_node
);
1391 ref
->ref_canonical
= false;
1394 bitmap_initialize (&ref
->indep_loop
, &lim_bitmap_obstack
);
1395 bitmap_initialize (&ref
->dep_loop
, &lim_bitmap_obstack
);
1396 ref
->accesses_in_loop
.create (1);
1401 /* Records memory reference location *LOC in LOOP to the memory reference
1402 description REF. The reference occurs in statement STMT. */
1405 record_mem_ref_loc (im_mem_ref
*ref
, gimple
*stmt
, tree
*loc
)
1410 ref
->accesses_in_loop
.safe_push (aref
);
1413 /* Set the LOOP bit in REF stored bitmap and allocate that if
1414 necessary. Return whether a bit was changed. */
1417 set_ref_stored_in_loop (im_mem_ref
*ref
, struct loop
*loop
)
1420 ref
->stored
= BITMAP_ALLOC (&lim_bitmap_obstack
);
1421 return bitmap_set_bit (ref
->stored
, loop
->num
);
1424 /* Marks reference REF as stored in LOOP. */
1427 mark_ref_stored (im_mem_ref
*ref
, struct loop
*loop
)
1429 while (loop
!= current_loops
->tree_root
1430 && set_ref_stored_in_loop (ref
, loop
))
1431 loop
= loop_outer (loop
);
1434 /* Gathers memory references in statement STMT in LOOP, storing the
1435 information about them in the memory_accesses structure. Marks
1436 the vops accessed through unrecognized statements there as
1440 gather_mem_refs_stmt (struct loop
*loop
, gimple
*stmt
)
1449 if (!gimple_vuse (stmt
))
1452 mem
= simple_mem_ref_in_stmt (stmt
, &is_stored
);
1455 /* We use the shared mem_ref for all unanalyzable refs. */
1456 id
= UNANALYZABLE_MEM_ID
;
1457 ref
= memory_accesses
.refs_list
[id
];
1458 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1460 fprintf (dump_file
, "Unanalyzed memory reference %u: ", id
);
1461 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
1463 is_stored
= gimple_vdef (stmt
);
1467 /* We are looking for equal refs that might differ in structure
1468 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1469 make sure we can canonicalize the ref in the hashtable if
1470 non-operand_equal_p refs are found. For the lookup we mark
1471 the case we want strict equality with aor.max_size == -1. */
1473 ao_ref_init (&aor
, *mem
);
1475 ao_ref_alias_set (&aor
);
1476 HOST_WIDE_INT offset
, size
, max_size
;
1477 poly_int64 saved_maxsize
= aor
.max_size
, mem_off
;
1479 if (aor
.max_size_known_p ()
1480 && aor
.offset
.is_constant (&offset
)
1481 && aor
.size
.is_constant (&size
)
1482 && aor
.max_size
.is_constant (&max_size
)
1484 && (size
% BITS_PER_UNIT
) == 0
1485 /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1486 size. Make sure this is consistent with the extraction. */
1487 && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem
)))
1488 && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem
))),
1490 && (mem_base
= get_addr_base_and_unit_offset (aor
.ref
, &mem_off
)))
1492 hash
= iterative_hash_expr (ao_ref_base (&aor
), 0);
1493 hash
= iterative_hash_host_wide_int (offset
, hash
);
1494 hash
= iterative_hash_host_wide_int (size
, hash
);
1498 hash
= iterative_hash_expr (aor
.ref
, 0);
1501 slot
= memory_accesses
.refs
->find_slot_with_hash (&aor
, hash
, INSERT
);
1502 aor
.max_size
= saved_maxsize
;
1505 if (!(*slot
)->ref_canonical
1506 && !operand_equal_p (*mem
, (*slot
)->mem
.ref
, 0))
1508 /* If we didn't yet canonicalize the hashtable ref (which
1509 we'll end up using for code insertion) and hit a second
1510 equal ref that is not structurally equivalent create
1511 a canonical ref which is a bare MEM_REF. */
1512 if (TREE_CODE (*mem
) == MEM_REF
1513 || TREE_CODE (*mem
) == TARGET_MEM_REF
)
1515 (*slot
)->mem
.ref
= *mem
;
1516 (*slot
)->mem
.base_alias_set
= ao_ref_base_alias_set (&aor
);
1520 tree ref_alias_type
= reference_alias_ptr_type (*mem
);
1521 unsigned int ref_align
= get_object_alignment (*mem
);
1522 tree ref_type
= TREE_TYPE (*mem
);
1523 tree tmp
= build_fold_addr_expr (unshare_expr (mem_base
));
1524 if (TYPE_ALIGN (ref_type
) != ref_align
)
1525 ref_type
= build_aligned_type (ref_type
, ref_align
);
1527 = fold_build2 (MEM_REF
, ref_type
, tmp
,
1528 build_int_cst (ref_alias_type
, mem_off
));
1529 if ((*slot
)->mem
.volatile_p
)
1530 TREE_THIS_VOLATILE ((*slot
)->mem
.ref
) = 1;
1531 gcc_checking_assert (TREE_CODE ((*slot
)->mem
.ref
) == MEM_REF
1532 && is_gimple_mem_ref_addr
1533 (TREE_OPERAND ((*slot
)->mem
.ref
,
1535 (*slot
)->mem
.base_alias_set
= (*slot
)->mem
.ref_alias_set
;
1537 (*slot
)->ref_canonical
= true;
1544 id
= memory_accesses
.refs_list
.length ();
1545 ref
= mem_ref_alloc (&aor
, hash
, id
);
1546 memory_accesses
.refs_list
.safe_push (ref
);
1549 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1551 fprintf (dump_file
, "Memory reference %u: ", id
);
1552 print_generic_expr (dump_file
, ref
->mem
.ref
, TDF_SLIM
);
1553 fprintf (dump_file
, "\n");
1557 record_mem_ref_loc (ref
, stmt
, mem
);
1559 bitmap_set_bit (&memory_accesses
.refs_in_loop
[loop
->num
], ref
->id
);
1562 bitmap_set_bit (&memory_accesses
.refs_stored_in_loop
[loop
->num
], ref
->id
);
1563 mark_ref_stored (ref
, loop
);
1565 init_lim_data (stmt
)->ref
= ref
->id
;
1569 static unsigned *bb_loop_postorder
;
1571 /* qsort sort function to sort blocks after their loop fathers postorder. */
1574 sort_bbs_in_loop_postorder_cmp (const void *bb1_
, const void *bb2_
)
1576 basic_block bb1
= *(basic_block
*)const_cast<void *>(bb1_
);
1577 basic_block bb2
= *(basic_block
*)const_cast<void *>(bb2_
);
1578 struct loop
*loop1
= bb1
->loop_father
;
1579 struct loop
*loop2
= bb2
->loop_father
;
1580 if (loop1
->num
== loop2
->num
)
1581 return bb1
->index
- bb2
->index
;
1582 return bb_loop_postorder
[loop1
->num
] < bb_loop_postorder
[loop2
->num
] ? -1 : 1;
1585 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1588 sort_locs_in_loop_postorder_cmp (const void *loc1_
, const void *loc2_
)
1590 mem_ref_loc
*loc1
= (mem_ref_loc
*)const_cast<void *>(loc1_
);
1591 mem_ref_loc
*loc2
= (mem_ref_loc
*)const_cast<void *>(loc2_
);
1592 struct loop
*loop1
= gimple_bb (loc1
->stmt
)->loop_father
;
1593 struct loop
*loop2
= gimple_bb (loc2
->stmt
)->loop_father
;
1594 if (loop1
->num
== loop2
->num
)
1596 return bb_loop_postorder
[loop1
->num
] < bb_loop_postorder
[loop2
->num
] ? -1 : 1;
1599 /* Gathers memory references in loops. */
1602 analyze_memory_references (void)
1604 gimple_stmt_iterator bsi
;
1605 basic_block bb
, *bbs
;
1606 struct loop
*loop
, *outer
;
1609 /* Collect all basic-blocks in loops and sort them after their
1612 bbs
= XNEWVEC (basic_block
, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
1613 FOR_EACH_BB_FN (bb
, cfun
)
1614 if (bb
->loop_father
!= current_loops
->tree_root
)
1617 qsort (bbs
, n
, sizeof (basic_block
), sort_bbs_in_loop_postorder_cmp
);
1619 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1620 That results in better locality for all the bitmaps. */
1621 for (i
= 0; i
< n
; ++i
)
1623 basic_block bb
= bbs
[i
];
1624 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1625 gather_mem_refs_stmt (bb
->loop_father
, gsi_stmt (bsi
));
1628 /* Sort the location list of gathered memory references after their
1629 loop postorder number. */
1631 FOR_EACH_VEC_ELT (memory_accesses
.refs_list
, i
, ref
)
1632 ref
->accesses_in_loop
.qsort (sort_locs_in_loop_postorder_cmp
);
1635 // free (bb_loop_postorder);
1637 /* Propagate the information about accessed memory references up
1638 the loop hierarchy. */
1639 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
1641 /* Finalize the overall touched references (including subloops). */
1642 bitmap_ior_into (&memory_accesses
.all_refs_stored_in_loop
[loop
->num
],
1643 &memory_accesses
.refs_stored_in_loop
[loop
->num
]);
1645 /* Propagate the information about accessed memory references up
1646 the loop hierarchy. */
1647 outer
= loop_outer (loop
);
1648 if (outer
== current_loops
->tree_root
)
1651 bitmap_ior_into (&memory_accesses
.all_refs_stored_in_loop
[outer
->num
],
1652 &memory_accesses
.all_refs_stored_in_loop
[loop
->num
]);
1656 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1657 tree_to_aff_combination_expand. */
1660 mem_refs_may_alias_p (im_mem_ref
*mem1
, im_mem_ref
*mem2
,
1661 hash_map
<tree
, name_expansion
*> **ttae_cache
)
1663 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1664 object and their offset differ in such a way that the locations cannot
1665 overlap, then they cannot alias. */
1666 poly_widest_int size1
, size2
;
1667 aff_tree off1
, off2
;
1669 /* Perform basic offset and type-based disambiguation. */
1670 if (!refs_may_alias_p_1 (&mem1
->mem
, &mem2
->mem
, true))
1673 /* The expansion of addresses may be a bit expensive, thus we only do
1674 the check at -O2 and higher optimization levels. */
1678 get_inner_reference_aff (mem1
->mem
.ref
, &off1
, &size1
);
1679 get_inner_reference_aff (mem2
->mem
.ref
, &off2
, &size2
);
1680 aff_combination_expand (&off1
, ttae_cache
);
1681 aff_combination_expand (&off2
, ttae_cache
);
1682 aff_combination_scale (&off1
, -1);
1683 aff_combination_add (&off2
, &off1
);
1685 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
1691 /* Compare function for bsearch searching for reference locations
1695 find_ref_loc_in_loop_cmp (const void *loop_
, const void *loc_
)
1697 struct loop
*loop
= (struct loop
*)const_cast<void *>(loop_
);
1698 mem_ref_loc
*loc
= (mem_ref_loc
*)const_cast<void *>(loc_
);
1699 struct loop
*loc_loop
= gimple_bb (loc
->stmt
)->loop_father
;
1700 if (loop
->num
== loc_loop
->num
1701 || flow_loop_nested_p (loop
, loc_loop
))
1703 return (bb_loop_postorder
[loop
->num
] < bb_loop_postorder
[loc_loop
->num
]
1707 /* Iterates over all locations of REF in LOOP and its subloops calling
1708 fn.operator() with the location as argument. When that operator
1709 returns true the iteration is stopped and true is returned.
1710 Otherwise false is returned. */
1712 template <typename FN
>
1714 for_all_locs_in_loop (struct loop
*loop
, im_mem_ref
*ref
, FN fn
)
1719 /* Search for the cluster of locs in the accesses_in_loop vector
1720 which is sorted after postorder index of the loop father. */
1721 loc
= ref
->accesses_in_loop
.bsearch (loop
, find_ref_loc_in_loop_cmp
);
1725 /* We have found one location inside loop or its sub-loops. Iterate
1726 both forward and backward to cover the whole cluster. */
1727 i
= loc
- ref
->accesses_in_loop
.address ();
1731 mem_ref_loc
*l
= &ref
->accesses_in_loop
[i
];
1732 if (!flow_bb_inside_loop_p (loop
, gimple_bb (l
->stmt
)))
1737 for (i
= loc
- ref
->accesses_in_loop
.address ();
1738 i
< ref
->accesses_in_loop
.length (); ++i
)
1740 mem_ref_loc
*l
= &ref
->accesses_in_loop
[i
];
1741 if (!flow_bb_inside_loop_p (loop
, gimple_bb (l
->stmt
)))
1750 /* Rewrites location LOC by TMP_VAR. */
1752 struct rewrite_mem_ref_loc
1754 rewrite_mem_ref_loc (tree tmp_var_
) : tmp_var (tmp_var_
) {}
1755 bool operator () (mem_ref_loc
*loc
);
1760 rewrite_mem_ref_loc::operator () (mem_ref_loc
*loc
)
1762 *loc
->ref
= tmp_var
;
1763 update_stmt (loc
->stmt
);
1767 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1770 rewrite_mem_refs (struct loop
*loop
, im_mem_ref
*ref
, tree tmp_var
)
1772 for_all_locs_in_loop (loop
, ref
, rewrite_mem_ref_loc (tmp_var
));
1775 /* Stores the first reference location in LOCP. */
1777 struct first_mem_ref_loc_1
1779 first_mem_ref_loc_1 (mem_ref_loc
**locp_
) : locp (locp_
) {}
1780 bool operator () (mem_ref_loc
*loc
);
1785 first_mem_ref_loc_1::operator () (mem_ref_loc
*loc
)
1791 /* Returns the first reference location to REF in LOOP. */
1793 static mem_ref_loc
*
1794 first_mem_ref_loc (struct loop
*loop
, im_mem_ref
*ref
)
1796 mem_ref_loc
*locp
= NULL
;
1797 for_all_locs_in_loop (loop
, ref
, first_mem_ref_loc_1 (&locp
));
1801 struct prev_flag_edges
{
1802 /* Edge to insert new flag comparison code. */
1803 edge append_cond_position
;
1805 /* Edge for fall through from previous flag comparison. */
1806 edge last_cond_fallthru
;
1809 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1812 The store is only done if MEM has changed. We do this so no
1813 changes to MEM occur on code paths that did not originally store
1816 The common case for execute_sm will transform:
1836 This function will generate:
1855 execute_sm_if_changed (edge ex
, tree mem
, tree tmp_var
, tree flag
,
1856 edge preheader
, hash_set
<basic_block
> *flag_bbs
)
1858 basic_block new_bb
, then_bb
, old_dest
;
1859 bool loop_has_only_one_exit
;
1860 edge then_old_edge
, orig_ex
= ex
;
1861 gimple_stmt_iterator gsi
;
1863 struct prev_flag_edges
*prev_edges
= (struct prev_flag_edges
*) ex
->aux
;
1864 bool irr
= ex
->flags
& EDGE_IRREDUCIBLE_LOOP
;
1866 profile_count count_sum
= profile_count::zero ();
1867 int nbbs
= 0, ncount
= 0;
1868 profile_probability flag_probability
= profile_probability::uninitialized ();
1870 /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1873 This code may look fancy, but it cannot update profile very realistically
1874 because we do not know the probability that flag will be true at given
1877 We look for two interesting extremes
1878 - when exit is dominated by block setting the flag, we know it will
1879 always be true. This is a common case.
1880 - when all blocks setting the flag have very low frequency we know
1881 it will likely be false.
1882 In all other cases we default to 2/3 for flag being true. */
1884 for (hash_set
<basic_block
>::iterator it
= flag_bbs
->begin ();
1885 it
!= flag_bbs
->end (); ++it
)
1887 if ((*it
)->count
.initialized_p ())
1888 count_sum
+= (*it
)->count
, ncount
++;
1889 if (dominated_by_p (CDI_DOMINATORS
, ex
->src
, *it
))
1890 flag_probability
= profile_probability::always ();
1894 profile_probability cap
= profile_probability::always ().apply_scale (2, 3);
1896 if (flag_probability
.initialized_p ())
1898 else if (ncount
== nbbs
1899 && preheader
->count () >= count_sum
&& preheader
->count ().nonzero_p ())
1901 flag_probability
= count_sum
.probability_in (preheader
->count ());
1902 if (flag_probability
> cap
)
1903 flag_probability
= cap
;
1906 if (!flag_probability
.initialized_p ())
1907 flag_probability
= cap
;
1909 /* ?? Insert store after previous store if applicable. See note
1912 ex
= prev_edges
->append_cond_position
;
1914 loop_has_only_one_exit
= single_pred_p (ex
->dest
);
1916 if (loop_has_only_one_exit
)
1917 ex
= split_block_after_labels (ex
->dest
);
1920 for (gphi_iterator gpi
= gsi_start_phis (ex
->dest
);
1921 !gsi_end_p (gpi
); gsi_next (&gpi
))
1923 gphi
*phi
= gpi
.phi ();
1924 if (virtual_operand_p (gimple_phi_result (phi
)))
1927 /* When the destination has a non-virtual PHI node with multiple
1928 predecessors make sure we preserve the PHI structure by
1929 forcing a forwarder block so that hoisting of that PHI will
1936 old_dest
= ex
->dest
;
1937 new_bb
= split_edge (ex
);
1938 then_bb
= create_empty_bb (new_bb
);
1939 then_bb
->count
= new_bb
->count
.apply_probability (flag_probability
);
1941 then_bb
->flags
= BB_IRREDUCIBLE_LOOP
;
1942 add_bb_to_loop (then_bb
, new_bb
->loop_father
);
1944 gsi
= gsi_start_bb (new_bb
);
1945 stmt
= gimple_build_cond (NE_EXPR
, flag
, boolean_false_node
,
1946 NULL_TREE
, NULL_TREE
);
1947 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1949 gsi
= gsi_start_bb (then_bb
);
1950 /* Insert actual store. */
1951 stmt
= gimple_build_assign (unshare_expr (mem
), tmp_var
);
1952 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1954 edge e1
= single_succ_edge (new_bb
);
1955 edge e2
= make_edge (new_bb
, then_bb
,
1956 EDGE_TRUE_VALUE
| (irr
? EDGE_IRREDUCIBLE_LOOP
: 0));
1957 e2
->probability
= flag_probability
;
1959 e1
->flags
|= EDGE_FALSE_VALUE
| (irr
? EDGE_IRREDUCIBLE_LOOP
: 0);
1960 e1
->flags
&= ~EDGE_FALLTHRU
;
1962 e1
->probability
= flag_probability
.invert ();
1964 then_old_edge
= make_single_succ_edge (then_bb
, old_dest
,
1965 EDGE_FALLTHRU
| (irr
? EDGE_IRREDUCIBLE_LOOP
: 0));
1967 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, new_bb
);
1971 basic_block prevbb
= prev_edges
->last_cond_fallthru
->src
;
1972 redirect_edge_succ (prev_edges
->last_cond_fallthru
, new_bb
);
1973 set_immediate_dominator (CDI_DOMINATORS
, new_bb
, prevbb
);
1974 set_immediate_dominator (CDI_DOMINATORS
, old_dest
,
1975 recompute_dominator (CDI_DOMINATORS
, old_dest
));
1978 /* ?? Because stores may alias, they must happen in the exact
1979 sequence they originally happened. Save the position right after
1980 the (_lsm) store we just created so we can continue appending after
1981 it and maintain the original order. */
1983 struct prev_flag_edges
*p
;
1986 orig_ex
->aux
= NULL
;
1987 alloc_aux_for_edge (orig_ex
, sizeof (struct prev_flag_edges
));
1988 p
= (struct prev_flag_edges
*) orig_ex
->aux
;
1989 p
->append_cond_position
= then_old_edge
;
1990 p
->last_cond_fallthru
= find_edge (new_bb
, old_dest
);
1991 orig_ex
->aux
= (void *) p
;
1994 if (!loop_has_only_one_exit
)
1995 for (gphi_iterator gpi
= gsi_start_phis (old_dest
);
1996 !gsi_end_p (gpi
); gsi_next (&gpi
))
1998 gphi
*phi
= gpi
.phi ();
2001 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
2002 if (gimple_phi_arg_edge (phi
, i
)->src
== new_bb
)
2004 tree arg
= gimple_phi_arg_def (phi
, i
);
2005 add_phi_arg (phi
, arg
, then_old_edge
, UNKNOWN_LOCATION
);
2011 /* When REF is set on the location, set flag indicating the store. */
2013 struct sm_set_flag_if_changed
2015 sm_set_flag_if_changed (tree flag_
, hash_set
<basic_block
> *bbs_
)
2016 : flag (flag_
), bbs (bbs_
) {}
2017 bool operator () (mem_ref_loc
*loc
);
2019 hash_set
<basic_block
> *bbs
;
2023 sm_set_flag_if_changed::operator () (mem_ref_loc
*loc
)
2025 /* Only set the flag for writes. */
2026 if (is_gimple_assign (loc
->stmt
)
2027 && gimple_assign_lhs_ptr (loc
->stmt
) == loc
->ref
)
2029 gimple_stmt_iterator gsi
= gsi_for_stmt (loc
->stmt
);
2030 gimple
*stmt
= gimple_build_assign (flag
, boolean_true_node
);
2031 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
2032 bbs
->add (gimple_bb (stmt
));
2037 /* Helper function for execute_sm. On every location where REF is
2038 set, set an appropriate flag indicating the store. */
2041 execute_sm_if_changed_flag_set (struct loop
*loop
, im_mem_ref
*ref
,
2042 hash_set
<basic_block
> *bbs
)
2045 char *str
= get_lsm_tmp_name (ref
->mem
.ref
, ~0, "_flag");
2046 flag
= create_tmp_reg (boolean_type_node
, str
);
2047 for_all_locs_in_loop (loop
, ref
, sm_set_flag_if_changed (flag
, bbs
));
2051 /* Executes store motion of memory reference REF from LOOP.
2052 Exits from the LOOP are stored in EXITS. The initialization of the
2053 temporary variable is put to the preheader of the loop, and assignments
2054 to the reference from the temporary variable are emitted to exits. */
2057 execute_sm (struct loop
*loop
, vec
<edge
> exits
, im_mem_ref
*ref
)
2059 tree tmp_var
, store_flag
= NULL_TREE
;
2062 struct fmt_data fmt_data
;
2064 struct lim_aux_data
*lim_data
;
2065 bool multi_threaded_model_p
= false;
2066 gimple_stmt_iterator gsi
;
2067 hash_set
<basic_block
> flag_bbs
;
2069 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2071 fprintf (dump_file
, "Executing store motion of ");
2072 print_generic_expr (dump_file
, ref
->mem
.ref
);
2073 fprintf (dump_file
, " from loop %d\n", loop
->num
);
2076 tmp_var
= create_tmp_reg (TREE_TYPE (ref
->mem
.ref
),
2077 get_lsm_tmp_name (ref
->mem
.ref
, ~0));
2079 fmt_data
.loop
= loop
;
2080 fmt_data
.orig_loop
= loop
;
2081 for_each_index (&ref
->mem
.ref
, force_move_till
, &fmt_data
);
2083 if (bb_in_transaction (loop_preheader_edge (loop
)->src
)
2084 || (! PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES
)
2085 && ! ref_always_accessed_p (loop
, ref
, true)))
2086 multi_threaded_model_p
= true;
2088 if (multi_threaded_model_p
)
2089 store_flag
= execute_sm_if_changed_flag_set (loop
, ref
, &flag_bbs
);
2091 rewrite_mem_refs (loop
, ref
, tmp_var
);
2093 /* Emit the load code on a random exit edge or into the latch if
2094 the loop does not exit, so that we are sure it will be processed
2095 by move_computations after all dependencies. */
2096 gsi
= gsi_for_stmt (first_mem_ref_loc (loop
, ref
)->stmt
);
2098 /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2099 load altogether, since the store is predicated by a flag. We
2100 could, do the load only if it was originally in the loop. */
2101 load
= gimple_build_assign (tmp_var
, unshare_expr (ref
->mem
.ref
));
2102 lim_data
= init_lim_data (load
);
2103 lim_data
->max_loop
= loop
;
2104 lim_data
->tgt_loop
= loop
;
2105 gsi_insert_before (&gsi
, load
, GSI_SAME_STMT
);
2107 if (multi_threaded_model_p
)
2109 load
= gimple_build_assign (store_flag
, boolean_false_node
);
2110 lim_data
= init_lim_data (load
);
2111 lim_data
->max_loop
= loop
;
2112 lim_data
->tgt_loop
= loop
;
2113 gsi_insert_before (&gsi
, load
, GSI_SAME_STMT
);
2116 /* Sink the store to every exit from the loop. */
2117 FOR_EACH_VEC_ELT (exits
, i
, ex
)
2118 if (!multi_threaded_model_p
)
2121 store
= gimple_build_assign (unshare_expr (ref
->mem
.ref
), tmp_var
);
2122 gsi_insert_on_edge (ex
, store
);
2125 execute_sm_if_changed (ex
, ref
->mem
.ref
, tmp_var
, store_flag
,
2126 loop_preheader_edge (loop
), &flag_bbs
);
2129 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2130 edges of the LOOP. */
2133 hoist_memory_references (struct loop
*loop
, bitmap mem_refs
,
2140 EXECUTE_IF_SET_IN_BITMAP (mem_refs
, 0, i
, bi
)
2142 ref
= memory_accesses
.refs_list
[i
];
2143 execute_sm (loop
, exits
, ref
);
2147 struct ref_always_accessed
2149 ref_always_accessed (struct loop
*loop_
, bool stored_p_
)
2150 : loop (loop_
), stored_p (stored_p_
) {}
2151 bool operator () (mem_ref_loc
*loc
);
2157 ref_always_accessed::operator () (mem_ref_loc
*loc
)
2159 struct loop
*must_exec
;
2161 if (!get_lim_data (loc
->stmt
))
2164 /* If we require an always executed store make sure the statement
2165 stores to the reference. */
2168 tree lhs
= gimple_get_lhs (loc
->stmt
);
2170 || lhs
!= *loc
->ref
)
2174 must_exec
= get_lim_data (loc
->stmt
)->always_executed_in
;
2178 if (must_exec
== loop
2179 || flow_loop_nested_p (must_exec
, loop
))
2185 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2186 make sure REF is always stored to in LOOP. */
2189 ref_always_accessed_p (struct loop
*loop
, im_mem_ref
*ref
, bool stored_p
)
2191 return for_all_locs_in_loop (loop
, ref
,
2192 ref_always_accessed (loop
, stored_p
));
2195 /* Returns true if REF1 and REF2 are independent. */
2198 refs_independent_p (im_mem_ref
*ref1
, im_mem_ref
*ref2
)
2203 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2204 fprintf (dump_file
, "Querying dependency of refs %u and %u: ",
2205 ref1
->id
, ref2
->id
);
2207 if (mem_refs_may_alias_p (ref1
, ref2
, &memory_accesses
.ttae_cache
))
2209 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2210 fprintf (dump_file
, "dependent.\n");
2215 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2216 fprintf (dump_file
, "independent.\n");
2221 /* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2222 and its super-loops. */
2225 record_dep_loop (struct loop
*loop
, im_mem_ref
*ref
, bool stored_p
)
2227 /* We can propagate dependent-in-loop bits up the loop
2228 hierarchy to all outer loops. */
2229 while (loop
!= current_loops
->tree_root
2230 && bitmap_set_bit (&ref
->dep_loop
, LOOP_DEP_BIT (loop
->num
, stored_p
)))
2231 loop
= loop_outer (loop
);
2234 /* Returns true if REF is independent on all other memory
2235 references in LOOP. */
2238 ref_indep_loop_p_1 (struct loop
*loop
, im_mem_ref
*ref
, bool stored_p
)
2240 stored_p
|= (ref
->stored
&& bitmap_bit_p (ref
->stored
, loop
->num
));
2242 bool indep_p
= true;
2243 bitmap refs_to_check
;
2246 refs_to_check
= &memory_accesses
.refs_in_loop
[loop
->num
];
2248 refs_to_check
= &memory_accesses
.refs_stored_in_loop
[loop
->num
];
2250 if (bitmap_bit_p (refs_to_check
, UNANALYZABLE_MEM_ID
))
2254 if (bitmap_bit_p (&ref
->indep_loop
, LOOP_DEP_BIT (loop
->num
, stored_p
)))
2256 if (bitmap_bit_p (&ref
->dep_loop
, LOOP_DEP_BIT (loop
->num
, stored_p
)))
2259 struct loop
*inner
= loop
->inner
;
2262 if (!ref_indep_loop_p_1 (inner
, ref
, stored_p
))
2267 inner
= inner
->next
;
2274 EXECUTE_IF_SET_IN_BITMAP (refs_to_check
, 0, i
, bi
)
2276 im_mem_ref
*aref
= memory_accesses
.refs_list
[i
];
2277 if (!refs_independent_p (ref
, aref
))
2286 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2287 fprintf (dump_file
, "Querying dependencies of ref %u in loop %d: %s\n",
2288 ref
->id
, loop
->num
, indep_p
? "independent" : "dependent");
2290 /* Record the computed result in the cache. */
2293 if (bitmap_set_bit (&ref
->indep_loop
, LOOP_DEP_BIT (loop
->num
, stored_p
))
2296 /* If it's independend against all refs then it's independent
2297 against stores, too. */
2298 bitmap_set_bit (&ref
->indep_loop
, LOOP_DEP_BIT (loop
->num
, false));
2303 record_dep_loop (loop
, ref
, stored_p
);
2306 /* If it's dependent against stores it's dependent against
2308 record_dep_loop (loop
, ref
, true);
2315 /* Returns true if REF is independent on all other memory references in
2319 ref_indep_loop_p (struct loop
*loop
, im_mem_ref
*ref
)
2321 gcc_checking_assert (MEM_ANALYZABLE (ref
));
2323 return ref_indep_loop_p_1 (loop
, ref
, false);
2326 /* Returns true if we can perform store motion of REF from LOOP. */
2329 can_sm_ref_p (struct loop
*loop
, im_mem_ref
*ref
)
2333 /* Can't hoist unanalyzable refs. */
2334 if (!MEM_ANALYZABLE (ref
))
2337 /* It should be movable. */
2338 if (!is_gimple_reg_type (TREE_TYPE (ref
->mem
.ref
))
2339 || TREE_THIS_VOLATILE (ref
->mem
.ref
)
2340 || !for_each_index (&ref
->mem
.ref
, may_move_till
, loop
))
2343 /* If it can throw fail, we do not properly update EH info. */
2344 if (tree_could_throw_p (ref
->mem
.ref
))
2347 /* If it can trap, it must be always executed in LOOP.
2348 Readonly memory locations may trap when storing to them, but
2349 tree_could_trap_p is a predicate for rvalues, so check that
2351 base
= get_base_address (ref
->mem
.ref
);
2352 if ((tree_could_trap_p (ref
->mem
.ref
)
2353 || (DECL_P (base
) && TREE_READONLY (base
)))
2354 && !ref_always_accessed_p (loop
, ref
, true))
2357 /* And it must be independent on all other memory references
2359 if (!ref_indep_loop_p (loop
, ref
))
2365 /* Marks the references in LOOP for that store motion should be performed
2366 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2367 motion was performed in one of the outer loops. */
2370 find_refs_for_sm (struct loop
*loop
, bitmap sm_executed
, bitmap refs_to_sm
)
2372 bitmap refs
= &memory_accesses
.all_refs_stored_in_loop
[loop
->num
];
2377 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs
, sm_executed
, 0, i
, bi
)
2379 ref
= memory_accesses
.refs_list
[i
];
2380 if (can_sm_ref_p (loop
, ref
))
2381 bitmap_set_bit (refs_to_sm
, i
);
2385 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2386 for a store motion optimization (i.e. whether we can insert statement
2390 loop_suitable_for_sm (struct loop
*loop ATTRIBUTE_UNUSED
,
2396 FOR_EACH_VEC_ELT (exits
, i
, ex
)
2397 if (ex
->flags
& (EDGE_ABNORMAL
| EDGE_EH
))
2403 /* Try to perform store motion for all memory references modified inside
2404 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2405 store motion was executed in one of the outer loops. */
2408 store_motion_loop (struct loop
*loop
, bitmap sm_executed
)
2410 vec
<edge
> exits
= get_loop_exit_edges (loop
);
2411 struct loop
*subloop
;
2412 bitmap sm_in_loop
= BITMAP_ALLOC (&lim_bitmap_obstack
);
2414 if (loop_suitable_for_sm (loop
, exits
))
2416 find_refs_for_sm (loop
, sm_executed
, sm_in_loop
);
2417 hoist_memory_references (loop
, sm_in_loop
, exits
);
2421 bitmap_ior_into (sm_executed
, sm_in_loop
);
2422 for (subloop
= loop
->inner
; subloop
!= NULL
; subloop
= subloop
->next
)
2423 store_motion_loop (subloop
, sm_executed
);
2424 bitmap_and_compl_into (sm_executed
, sm_in_loop
);
2425 BITMAP_FREE (sm_in_loop
);
2428 /* Try to perform store motion for all memory references modified inside
2435 bitmap sm_executed
= BITMAP_ALLOC (&lim_bitmap_obstack
);
2437 for (loop
= current_loops
->tree_root
->inner
; loop
!= NULL
; loop
= loop
->next
)
2438 store_motion_loop (loop
, sm_executed
);
2440 BITMAP_FREE (sm_executed
);
2441 gsi_commit_edge_inserts ();
2444 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2445 for each such basic block bb records the outermost loop for that execution
2446 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2447 blocks that contain a nonpure call. */
2450 fill_always_executed_in_1 (struct loop
*loop
, sbitmap contains_call
)
2452 basic_block bb
= NULL
, *bbs
, last
= NULL
;
2455 struct loop
*inn_loop
= loop
;
2457 if (ALWAYS_EXECUTED_IN (loop
->header
) == NULL
)
2459 bbs
= get_loop_body_in_dom_order (loop
);
2461 for (i
= 0; i
< loop
->num_nodes
; i
++)
2466 if (dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
2469 if (bitmap_bit_p (contains_call
, bb
->index
))
2472 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2474 /* If there is an exit from this BB. */
2475 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
2477 /* Or we enter a possibly non-finite loop. */
2478 if (flow_loop_nested_p (bb
->loop_father
,
2479 e
->dest
->loop_father
)
2480 && ! finite_loop_p (e
->dest
->loop_father
))
2486 /* A loop might be infinite (TODO use simple loop analysis
2487 to disprove this if possible). */
2488 if (bb
->flags
& BB_IRREDUCIBLE_LOOP
)
2491 if (!flow_bb_inside_loop_p (inn_loop
, bb
))
2494 if (bb
->loop_father
->header
== bb
)
2496 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
2499 /* In a loop that is always entered we may proceed anyway.
2500 But record that we entered it and stop once we leave it. */
2501 inn_loop
= bb
->loop_father
;
2507 SET_ALWAYS_EXECUTED_IN (last
, loop
);
2508 if (last
== loop
->header
)
2510 last
= get_immediate_dominator (CDI_DOMINATORS
, last
);
2516 for (loop
= loop
->inner
; loop
; loop
= loop
->next
)
2517 fill_always_executed_in_1 (loop
, contains_call
);
2520 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2521 for each such basic block bb records the outermost loop for that execution
2522 of its header implies execution of bb. */
2525 fill_always_executed_in (void)
2530 auto_sbitmap
contains_call (last_basic_block_for_fn (cfun
));
2531 bitmap_clear (contains_call
);
2532 FOR_EACH_BB_FN (bb
, cfun
)
2534 gimple_stmt_iterator gsi
;
2535 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2537 if (nonpure_call_p (gsi_stmt (gsi
)))
2541 if (!gsi_end_p (gsi
))
2542 bitmap_set_bit (contains_call
, bb
->index
);
2545 for (loop
= current_loops
->tree_root
->inner
; loop
; loop
= loop
->next
)
2546 fill_always_executed_in_1 (loop
, contains_call
);
2550 /* Compute the global information needed by the loop invariant motion pass. */
2553 tree_ssa_lim_initialize (void)
2558 bitmap_obstack_initialize (&lim_bitmap_obstack
);
2559 gcc_obstack_init (&mem_ref_obstack
);
2560 lim_aux_data_map
= new hash_map
<gimple
*, lim_aux_data
*>;
2563 compute_transaction_bits ();
2565 alloc_aux_for_edges (0);
2567 memory_accesses
.refs
= new hash_table
<mem_ref_hasher
> (100);
2568 memory_accesses
.refs_list
.create (100);
2569 /* Allocate a special, unanalyzable mem-ref with ID zero. */
2570 memory_accesses
.refs_list
.quick_push
2571 (mem_ref_alloc (NULL
, 0, UNANALYZABLE_MEM_ID
));
2573 memory_accesses
.refs_in_loop
.create (number_of_loops (cfun
));
2574 memory_accesses
.refs_in_loop
.quick_grow (number_of_loops (cfun
));
2575 memory_accesses
.refs_stored_in_loop
.create (number_of_loops (cfun
));
2576 memory_accesses
.refs_stored_in_loop
.quick_grow (number_of_loops (cfun
));
2577 memory_accesses
.all_refs_stored_in_loop
.create (number_of_loops (cfun
));
2578 memory_accesses
.all_refs_stored_in_loop
.quick_grow (number_of_loops (cfun
));
2580 for (i
= 0; i
< number_of_loops (cfun
); i
++)
2582 bitmap_initialize (&memory_accesses
.refs_in_loop
[i
],
2583 &lim_bitmap_obstack
);
2584 bitmap_initialize (&memory_accesses
.refs_stored_in_loop
[i
],
2585 &lim_bitmap_obstack
);
2586 bitmap_initialize (&memory_accesses
.all_refs_stored_in_loop
[i
],
2587 &lim_bitmap_obstack
);
2590 memory_accesses
.ttae_cache
= NULL
;
2592 /* Initialize bb_loop_postorder with a mapping from loop->num to
2593 its postorder index. */
2595 bb_loop_postorder
= XNEWVEC (unsigned, number_of_loops (cfun
));
2596 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
2597 bb_loop_postorder
[loop
->num
] = i
++;
2600 /* Cleans up after the invariant motion pass. */
2603 tree_ssa_lim_finalize (void)
2609 free_aux_for_edges ();
2611 FOR_EACH_BB_FN (bb
, cfun
)
2612 SET_ALWAYS_EXECUTED_IN (bb
, NULL
);
2614 bitmap_obstack_release (&lim_bitmap_obstack
);
2615 delete lim_aux_data_map
;
2617 delete memory_accesses
.refs
;
2618 memory_accesses
.refs
= NULL
;
2620 FOR_EACH_VEC_ELT (memory_accesses
.refs_list
, i
, ref
)
2622 memory_accesses
.refs_list
.release ();
2623 obstack_free (&mem_ref_obstack
, NULL
);
2625 memory_accesses
.refs_in_loop
.release ();
2626 memory_accesses
.refs_stored_in_loop
.release ();
2627 memory_accesses
.all_refs_stored_in_loop
.release ();
2629 if (memory_accesses
.ttae_cache
)
2630 free_affine_expand_cache (&memory_accesses
.ttae_cache
);
2632 free (bb_loop_postorder
);
2635 /* Moves invariants from loops. Only "expensive" invariants are moved out --
2636 i.e. those that are likely to be win regardless of the register pressure. */
2643 tree_ssa_lim_initialize ();
2645 /* Gathers information about memory accesses in the loops. */
2646 analyze_memory_references ();
2648 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
2649 fill_always_executed_in ();
2651 /* For each statement determine the outermost loop in that it is
2652 invariant and cost for computing the invariant. */
2653 invariantness_dom_walker (CDI_DOMINATORS
)
2654 .walk (cfun
->cfg
->x_entry_block_ptr
);
2656 /* Execute store motion. Force the necessary invariants to be moved
2657 out of the loops as well. */
2660 /* Move the expressions that are expensive enough. */
2661 todo
= move_computations ();
2663 tree_ssa_lim_finalize ();
2668 /* Loop invariant motion pass. */
2672 const pass_data pass_data_lim
=
2674 GIMPLE_PASS
, /* type */
2676 OPTGROUP_LOOP
, /* optinfo_flags */
2678 PROP_cfg
, /* properties_required */
2679 0, /* properties_provided */
2680 0, /* properties_destroyed */
2681 0, /* todo_flags_start */
2682 0, /* todo_flags_finish */
2685 class pass_lim
: public gimple_opt_pass
2688 pass_lim (gcc::context
*ctxt
)
2689 : gimple_opt_pass (pass_data_lim
, ctxt
)
2692 /* opt_pass methods: */
2693 opt_pass
* clone () { return new pass_lim (m_ctxt
); }
2694 virtual bool gate (function
*) { return flag_tree_loop_im
!= 0; }
2695 virtual unsigned int execute (function
*);
2697 }; // class pass_lim
2700 pass_lim::execute (function
*fun
)
2702 bool in_loop_pipeline
= scev_initialized_p ();
2703 if (!in_loop_pipeline
)
2704 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2706 if (number_of_loops (fun
) <= 1)
2708 unsigned int todo
= tree_ssa_lim ();
2710 if (!in_loop_pipeline
)
2711 loop_optimizer_finalize ();
2720 make_pass_lim (gcc::context
*ctxt
)
2722 return new pass_lim (ctxt
);