Fix typo in t-dimode
[official-gcc.git] / gcc / tree-ssa-loop-im.c
blob682406d26c1f09e62c67bc01a0bac06c64441ca2
1 /* Loop invariant motion.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "tree-eh.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
39 #include "cfgloop.h"
40 #include "tree-affine.h"
41 #include "tree-ssa-propagate.h"
42 #include "trans-mem.h"
43 #include "gimple-fold.h"
44 #include "tree-scalar-evolution.h"
45 #include "tree-ssa-loop-niter.h"
46 #include "alias.h"
47 #include "builtins.h"
48 #include "tree-dfa.h"
49 #include "dbgcnt.h"
51 /* TODO: Support for predicated code motion. I.e.
53 while (1)
55 if (cond)
57 a = inv;
58 something;
62 Where COND and INV are invariants, but evaluating INV may trap or be
63 invalid from some other reason if !COND. This may be transformed to
65 if (cond)
66 a = inv;
67 while (1)
69 if (cond)
70 something;
71 } */
73 /* The auxiliary data kept for each statement. */
75 struct lim_aux_data
77 class loop *max_loop; /* The outermost loop in that the statement
78 is invariant. */
80 class loop *tgt_loop; /* The loop out of that we want to move the
81 invariant. */
83 class loop *always_executed_in;
84 /* The outermost loop for that we are sure
85 the statement is executed if the loop
86 is entered. */
88 unsigned cost; /* Cost of the computation performed by the
89 statement. */
91 unsigned ref; /* The simple_mem_ref in this stmt or 0. */
93 vec<gimple *> depends; /* Vector of statements that must be also
94 hoisted out of the loop when this statement
95 is hoisted; i.e. those that define the
96 operands of the statement and are inside of
97 the MAX_LOOP loop. */
100 /* Maps statements to their lim_aux_data. */
102 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
104 /* Description of a memory reference location. */
106 struct mem_ref_loc
108 tree *ref; /* The reference itself. */
109 gimple *stmt; /* The statement in that it occurs. */
113 /* Description of a memory reference. */
115 class im_mem_ref
117 public:
118 unsigned id : 30; /* ID assigned to the memory reference
119 (its index in memory_accesses.refs_list) */
120 unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
121 unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */
122 hashval_t hash; /* Its hash value. */
124 /* The memory access itself and associated caching of alias-oracle
125 query meta-data. We are using mem.ref == error_mark_node for the
126 case the reference is represented by its single access stmt
127 in accesses_in_loop[0]. */
128 ao_ref mem;
130 bitmap stored; /* The set of loops in that this memory location
131 is stored to. */
132 bitmap loaded; /* The set of loops in that this memory location
133 is loaded from. */
134 vec<mem_ref_loc> accesses_in_loop;
135 /* The locations of the accesses. */
137 /* The following set is computed on demand. */
138 bitmap_head dep_loop; /* The set of loops in that the memory
139 reference is {in,}dependent in
140 different modes. */
143 /* We use six bits per loop in the ref->dep_loop bitmap to record
144 the dep_kind x dep_state combinations. */
146 enum dep_kind { lim_raw, sm_war, sm_waw };
147 enum dep_state { dep_unknown, dep_independent, dep_dependent };
149 /* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */
151 static void
152 record_loop_dependence (class loop *loop, im_mem_ref *ref,
153 dep_kind kind, dep_state state)
155 gcc_assert (state != dep_unknown);
156 unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
157 bitmap_set_bit (&ref->dep_loop, bit);
160 /* Query the loop dependence cache of REF for LOOP, KIND. */
162 static dep_state
163 query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
165 unsigned first_bit = 6 * loop->num + kind * 2;
166 if (bitmap_bit_p (&ref->dep_loop, first_bit))
167 return dep_independent;
168 else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
169 return dep_dependent;
170 return dep_unknown;
173 /* Mem_ref hashtable helpers. */
175 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
177 typedef ao_ref *compare_type;
178 static inline hashval_t hash (const im_mem_ref *);
179 static inline bool equal (const im_mem_ref *, const ao_ref *);
182 /* A hash function for class im_mem_ref object OBJ. */
184 inline hashval_t
185 mem_ref_hasher::hash (const im_mem_ref *mem)
187 return mem->hash;
190 /* An equality function for class im_mem_ref object MEM1 with
191 memory reference OBJ2. */
193 inline bool
194 mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
196 if (obj2->max_size_known_p ())
197 return (mem1->ref_decomposed
198 && ((TREE_CODE (mem1->mem.base) == MEM_REF
199 && TREE_CODE (obj2->base) == MEM_REF
200 && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
201 TREE_OPERAND (obj2->base, 0), 0)
202 && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
203 mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
204 || (operand_equal_p (mem1->mem.base, obj2->base, 0)
205 && known_eq (mem1->mem.offset, obj2->offset)))
206 && known_eq (mem1->mem.size, obj2->size)
207 && known_eq (mem1->mem.max_size, obj2->max_size)
208 && mem1->mem.volatile_p == obj2->volatile_p
209 && (mem1->mem.ref_alias_set == obj2->ref_alias_set
210 /* We are not canonicalizing alias-sets but for the
211 special-case we didn't canonicalize yet and the
212 incoming ref is a alias-set zero MEM we pick
213 the correct one already. */
214 || (!mem1->ref_canonical
215 && (TREE_CODE (obj2->ref) == MEM_REF
216 || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
217 && obj2->ref_alias_set == 0)
218 /* Likewise if there's a canonical ref with alias-set zero. */
219 || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
220 && types_compatible_p (TREE_TYPE (mem1->mem.ref),
221 TREE_TYPE (obj2->ref)));
222 else
223 return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
227 /* Description of memory accesses in loops. */
229 static struct
231 /* The hash table of memory references accessed in loops. */
232 hash_table<mem_ref_hasher> *refs;
234 /* The list of memory references. */
235 vec<im_mem_ref *> refs_list;
237 /* The set of memory references accessed in each loop. */
238 vec<bitmap_head> refs_loaded_in_loop;
240 /* The set of memory references stored in each loop. */
241 vec<bitmap_head> refs_stored_in_loop;
243 /* The set of memory references stored in each loop, including subloops . */
244 vec<bitmap_head> all_refs_stored_in_loop;
246 /* Cache for expanding memory addresses. */
247 hash_map<tree, name_expansion *> *ttae_cache;
248 } memory_accesses;
250 /* Obstack for the bitmaps in the above data structures. */
251 static bitmap_obstack lim_bitmap_obstack;
252 static obstack mem_ref_obstack;
254 static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
255 static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
256 static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
258 /* Minimum cost of an expensive expression. */
259 #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
261 /* The outermost loop for which execution of the header guarantees that the
262 block will be executed. */
263 #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
264 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
266 /* ID of the shared unanalyzable mem. */
267 #define UNANALYZABLE_MEM_ID 0
269 /* Whether the reference was analyzable. */
270 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
272 static struct lim_aux_data *
273 init_lim_data (gimple *stmt)
275 lim_aux_data *p = XCNEW (struct lim_aux_data);
276 lim_aux_data_map->put (stmt, p);
278 return p;
281 static struct lim_aux_data *
282 get_lim_data (gimple *stmt)
284 lim_aux_data **p = lim_aux_data_map->get (stmt);
285 if (!p)
286 return NULL;
288 return *p;
291 /* Releases the memory occupied by DATA. */
293 static void
294 free_lim_aux_data (struct lim_aux_data *data)
296 data->depends.release ();
297 free (data);
300 static void
301 clear_lim_data (gimple *stmt)
303 lim_aux_data **p = lim_aux_data_map->get (stmt);
304 if (!p)
305 return;
307 free_lim_aux_data (*p);
308 *p = NULL;
312 /* The possibilities of statement movement. */
313 enum move_pos
315 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
316 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
317 become executed -- memory accesses, ... */
318 MOVE_POSSIBLE /* Unlimited movement. */
322 /* If it is possible to hoist the statement STMT unconditionally,
323 returns MOVE_POSSIBLE.
324 If it is possible to hoist the statement STMT, but we must avoid making
325 it executed if it would not be executed in the original program (e.g.
326 because it may trap), return MOVE_PRESERVE_EXECUTION.
327 Otherwise return MOVE_IMPOSSIBLE. */
329 enum move_pos
330 movement_possibility (gimple *stmt)
332 tree lhs;
333 enum move_pos ret = MOVE_POSSIBLE;
335 if (flag_unswitch_loops
336 && gimple_code (stmt) == GIMPLE_COND)
338 /* If we perform unswitching, force the operands of the invariant
339 condition to be moved out of the loop. */
340 return MOVE_POSSIBLE;
343 if (gimple_code (stmt) == GIMPLE_PHI
344 && gimple_phi_num_args (stmt) <= 2
345 && !virtual_operand_p (gimple_phi_result (stmt))
346 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
347 return MOVE_POSSIBLE;
349 if (gimple_get_lhs (stmt) == NULL_TREE)
350 return MOVE_IMPOSSIBLE;
352 if (gimple_vdef (stmt))
353 return MOVE_IMPOSSIBLE;
355 if (stmt_ends_bb_p (stmt)
356 || gimple_has_volatile_ops (stmt)
357 || gimple_has_side_effects (stmt)
358 || stmt_could_throw_p (cfun, stmt))
359 return MOVE_IMPOSSIBLE;
361 if (is_gimple_call (stmt))
363 /* While pure or const call is guaranteed to have no side effects, we
364 cannot move it arbitrarily. Consider code like
366 char *s = something ();
368 while (1)
370 if (s)
371 t = strlen (s);
372 else
373 t = 0;
376 Here the strlen call cannot be moved out of the loop, even though
377 s is invariant. In addition to possibly creating a call with
378 invalid arguments, moving out a function call that is not executed
379 may cause performance regressions in case the call is costly and
380 not executed at all. */
381 ret = MOVE_PRESERVE_EXECUTION;
382 lhs = gimple_call_lhs (stmt);
384 else if (is_gimple_assign (stmt))
385 lhs = gimple_assign_lhs (stmt);
386 else
387 return MOVE_IMPOSSIBLE;
389 if (TREE_CODE (lhs) == SSA_NAME
390 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
391 return MOVE_IMPOSSIBLE;
393 if (TREE_CODE (lhs) != SSA_NAME
394 || gimple_could_trap_p (stmt))
395 return MOVE_PRESERVE_EXECUTION;
397 /* Non local loads in a transaction cannot be hoisted out. Well,
398 unless the load happens on every path out of the loop, but we
399 don't take this into account yet. */
400 if (flag_tm
401 && gimple_in_transaction (stmt)
402 && gimple_assign_single_p (stmt))
404 tree rhs = gimple_assign_rhs1 (stmt);
405 if (DECL_P (rhs) && is_global_var (rhs))
407 if (dump_file)
409 fprintf (dump_file, "Cannot hoist conditional load of ");
410 print_generic_expr (dump_file, rhs, TDF_SLIM);
411 fprintf (dump_file, " because it is in a transaction.\n");
413 return MOVE_IMPOSSIBLE;
417 return ret;
420 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
421 loop to that we could move the expression using DEF if it did not have
422 other operands, i.e. the outermost loop enclosing LOOP in that the value
423 of DEF is invariant. */
425 static class loop *
426 outermost_invariant_loop (tree def, class loop *loop)
428 gimple *def_stmt;
429 basic_block def_bb;
430 class loop *max_loop;
431 struct lim_aux_data *lim_data;
433 if (!def)
434 return superloop_at_depth (loop, 1);
436 if (TREE_CODE (def) != SSA_NAME)
438 gcc_assert (is_gimple_min_invariant (def));
439 return superloop_at_depth (loop, 1);
442 def_stmt = SSA_NAME_DEF_STMT (def);
443 def_bb = gimple_bb (def_stmt);
444 if (!def_bb)
445 return superloop_at_depth (loop, 1);
447 max_loop = find_common_loop (loop, def_bb->loop_father);
449 lim_data = get_lim_data (def_stmt);
450 if (lim_data != NULL && lim_data->max_loop != NULL)
451 max_loop = find_common_loop (max_loop,
452 loop_outer (lim_data->max_loop));
453 if (max_loop == loop)
454 return NULL;
455 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
457 return max_loop;
460 /* DATA is a structure containing information associated with a statement
461 inside LOOP. DEF is one of the operands of this statement.
463 Find the outermost loop enclosing LOOP in that value of DEF is invariant
464 and record this in DATA->max_loop field. If DEF itself is defined inside
465 this loop as well (i.e. we need to hoist it out of the loop if we want
466 to hoist the statement represented by DATA), record the statement in that
467 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
468 add the cost of the computation of DEF to the DATA->cost.
470 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
472 static bool
473 add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
474 bool add_cost)
476 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
477 basic_block def_bb = gimple_bb (def_stmt);
478 class loop *max_loop;
479 struct lim_aux_data *def_data;
481 if (!def_bb)
482 return true;
484 max_loop = outermost_invariant_loop (def, loop);
485 if (!max_loop)
486 return false;
488 if (flow_loop_nested_p (data->max_loop, max_loop))
489 data->max_loop = max_loop;
491 def_data = get_lim_data (def_stmt);
492 if (!def_data)
493 return true;
495 if (add_cost
496 /* Only add the cost if the statement defining DEF is inside LOOP,
497 i.e. if it is likely that by moving the invariants dependent
498 on it, we will be able to avoid creating a new register for
499 it (since it will be only used in these dependent invariants). */
500 && def_bb->loop_father == loop)
501 data->cost += def_data->cost;
503 data->depends.safe_push (def_stmt);
505 return true;
508 /* Returns an estimate for a cost of statement STMT. The values here
509 are just ad-hoc constants, similar to costs for inlining. */
511 static unsigned
512 stmt_cost (gimple *stmt)
514 /* Always try to create possibilities for unswitching. */
515 if (gimple_code (stmt) == GIMPLE_COND
516 || gimple_code (stmt) == GIMPLE_PHI)
517 return LIM_EXPENSIVE;
519 /* We should be hoisting calls if possible. */
520 if (is_gimple_call (stmt))
522 tree fndecl;
524 /* Unless the call is a builtin_constant_p; this always folds to a
525 constant, so moving it is useless. */
526 fndecl = gimple_call_fndecl (stmt);
527 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
528 return 0;
530 return LIM_EXPENSIVE;
533 /* Hoisting memory references out should almost surely be a win. */
534 if (gimple_references_memory_p (stmt))
535 return LIM_EXPENSIVE;
537 if (gimple_code (stmt) != GIMPLE_ASSIGN)
538 return 1;
540 switch (gimple_assign_rhs_code (stmt))
542 case MULT_EXPR:
543 case WIDEN_MULT_EXPR:
544 case WIDEN_MULT_PLUS_EXPR:
545 case WIDEN_MULT_MINUS_EXPR:
546 case DOT_PROD_EXPR:
547 case TRUNC_DIV_EXPR:
548 case CEIL_DIV_EXPR:
549 case FLOOR_DIV_EXPR:
550 case ROUND_DIV_EXPR:
551 case EXACT_DIV_EXPR:
552 case CEIL_MOD_EXPR:
553 case FLOOR_MOD_EXPR:
554 case ROUND_MOD_EXPR:
555 case TRUNC_MOD_EXPR:
556 case RDIV_EXPR:
557 /* Division and multiplication are usually expensive. */
558 return LIM_EXPENSIVE;
560 case LSHIFT_EXPR:
561 case RSHIFT_EXPR:
562 case WIDEN_LSHIFT_EXPR:
563 case LROTATE_EXPR:
564 case RROTATE_EXPR:
565 /* Shifts and rotates are usually expensive. */
566 return LIM_EXPENSIVE;
568 case CONSTRUCTOR:
569 /* Make vector construction cost proportional to the number
570 of elements. */
571 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
573 case SSA_NAME:
574 case PAREN_EXPR:
575 /* Whether or not something is wrapped inside a PAREN_EXPR
576 should not change move cost. Nor should an intermediate
577 unpropagated SSA name copy. */
578 return 0;
580 default:
581 return 1;
585 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
586 REF is independent. If REF is not independent in LOOP, NULL is returned
587 instead. */
589 static class loop *
590 outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
592 class loop *aloop;
594 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
595 return NULL;
597 for (aloop = outer;
598 aloop != loop;
599 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
600 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
601 && ref_indep_loop_p (aloop, ref, lim_raw))
602 return aloop;
604 if (ref_indep_loop_p (loop, ref, lim_raw))
605 return loop;
606 else
607 return NULL;
610 /* If there is a simple load or store to a memory reference in STMT, returns
611 the location of the memory reference, and sets IS_STORE according to whether
612 it is a store or load. Otherwise, returns NULL. */
614 static tree *
615 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
617 tree *lhs, *rhs;
619 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
620 if (!gimple_assign_single_p (stmt))
621 return NULL;
623 lhs = gimple_assign_lhs_ptr (stmt);
624 rhs = gimple_assign_rhs1_ptr (stmt);
626 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
628 *is_store = false;
629 return rhs;
631 else if (gimple_vdef (stmt)
632 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
634 *is_store = true;
635 return lhs;
637 else
638 return NULL;
641 /* From a controlling predicate in DOM determine the arguments from
642 the PHI node PHI that are chosen if the predicate evaluates to
643 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
644 they are non-NULL. Returns true if the arguments can be determined,
645 else return false. */
647 static bool
648 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
649 tree *true_arg_p, tree *false_arg_p)
651 edge te, fe;
652 if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
653 &te, &fe))
654 return false;
656 if (true_arg_p)
657 *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
658 if (false_arg_p)
659 *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
661 return true;
664 /* Determine the outermost loop to that it is possible to hoist a statement
665 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
666 the outermost loop in that the value computed by STMT is invariant.
667 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
668 we preserve the fact whether STMT is executed. It also fills other related
669 information to LIM_DATA (STMT).
671 The function returns false if STMT cannot be hoisted outside of the loop it
672 is defined in, and true otherwise. */
674 static bool
675 determine_max_movement (gimple *stmt, bool must_preserve_exec)
677 basic_block bb = gimple_bb (stmt);
678 class loop *loop = bb->loop_father;
679 class loop *level;
680 struct lim_aux_data *lim_data = get_lim_data (stmt);
681 tree val;
682 ssa_op_iter iter;
684 if (must_preserve_exec)
685 level = ALWAYS_EXECUTED_IN (bb);
686 else
687 level = superloop_at_depth (loop, 1);
688 lim_data->max_loop = level;
690 if (gphi *phi = dyn_cast <gphi *> (stmt))
692 use_operand_p use_p;
693 unsigned min_cost = UINT_MAX;
694 unsigned total_cost = 0;
695 struct lim_aux_data *def_data;
697 /* We will end up promoting dependencies to be unconditionally
698 evaluated. For this reason the PHI cost (and thus the
699 cost we remove from the loop by doing the invariant motion)
700 is that of the cheapest PHI argument dependency chain. */
701 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
703 val = USE_FROM_PTR (use_p);
705 if (TREE_CODE (val) != SSA_NAME)
707 /* Assign const 1 to constants. */
708 min_cost = MIN (min_cost, 1);
709 total_cost += 1;
710 continue;
712 if (!add_dependency (val, lim_data, loop, false))
713 return false;
715 gimple *def_stmt = SSA_NAME_DEF_STMT (val);
716 if (gimple_bb (def_stmt)
717 && gimple_bb (def_stmt)->loop_father == loop)
719 def_data = get_lim_data (def_stmt);
720 if (def_data)
722 min_cost = MIN (min_cost, def_data->cost);
723 total_cost += def_data->cost;
728 min_cost = MIN (min_cost, total_cost);
729 lim_data->cost += min_cost;
731 if (gimple_phi_num_args (phi) > 1)
733 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
734 gimple *cond;
735 if (gsi_end_p (gsi_last_bb (dom)))
736 return false;
737 cond = gsi_stmt (gsi_last_bb (dom));
738 if (gimple_code (cond) != GIMPLE_COND)
739 return false;
740 /* Verify that this is an extended form of a diamond and
741 the PHI arguments are completely controlled by the
742 predicate in DOM. */
743 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
744 return false;
746 /* Fold in dependencies and cost of the condition. */
747 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
749 if (!add_dependency (val, lim_data, loop, false))
750 return false;
751 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
752 if (def_data)
753 lim_data->cost += def_data->cost;
756 /* We want to avoid unconditionally executing very expensive
757 operations. As costs for our dependencies cannot be
758 negative just claim we are not invariand for this case.
759 We also are not sure whether the control-flow inside the
760 loop will vanish. */
761 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
762 && !(min_cost != 0
763 && total_cost / min_cost <= 2))
764 return false;
766 /* Assume that the control-flow in the loop will vanish.
767 ??? We should verify this and not artificially increase
768 the cost if that is not the case. */
769 lim_data->cost += stmt_cost (stmt);
772 return true;
774 else
775 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
776 if (!add_dependency (val, lim_data, loop, true))
777 return false;
779 if (gimple_vuse (stmt))
781 im_mem_ref *ref
782 = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
783 if (ref
784 && MEM_ANALYZABLE (ref))
786 lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
787 loop, ref);
788 if (!lim_data->max_loop)
789 return false;
791 else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
792 return false;
795 lim_data->cost += stmt_cost (stmt);
797 return true;
800 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
801 and that one of the operands of this statement is computed by STMT.
802 Ensure that STMT (together with all the statements that define its
803 operands) is hoisted at least out of the loop LEVEL. */
805 static void
806 set_level (gimple *stmt, class loop *orig_loop, class loop *level)
808 class loop *stmt_loop = gimple_bb (stmt)->loop_father;
809 struct lim_aux_data *lim_data;
810 gimple *dep_stmt;
811 unsigned i;
813 stmt_loop = find_common_loop (orig_loop, stmt_loop);
814 lim_data = get_lim_data (stmt);
815 if (lim_data != NULL && lim_data->tgt_loop != NULL)
816 stmt_loop = find_common_loop (stmt_loop,
817 loop_outer (lim_data->tgt_loop));
818 if (flow_loop_nested_p (stmt_loop, level))
819 return;
821 gcc_assert (level == lim_data->max_loop
822 || flow_loop_nested_p (lim_data->max_loop, level));
824 lim_data->tgt_loop = level;
825 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
826 set_level (dep_stmt, orig_loop, level);
829 /* Determines an outermost loop from that we want to hoist the statement STMT.
830 For now we chose the outermost possible loop. TODO -- use profiling
831 information to set it more sanely. */
833 static void
834 set_profitable_level (gimple *stmt)
836 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
839 /* Returns true if STMT is a call that has side effects. */
841 static bool
842 nonpure_call_p (gimple *stmt)
844 if (gimple_code (stmt) != GIMPLE_CALL)
845 return false;
847 return gimple_has_side_effects (stmt);
850 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
852 static gimple *
853 rewrite_reciprocal (gimple_stmt_iterator *bsi)
855 gassign *stmt, *stmt1, *stmt2;
856 tree name, lhs, type;
857 tree real_one;
858 gimple_stmt_iterator gsi;
860 stmt = as_a <gassign *> (gsi_stmt (*bsi));
861 lhs = gimple_assign_lhs (stmt);
862 type = TREE_TYPE (lhs);
864 real_one = build_one_cst (type);
866 name = make_temp_ssa_name (type, NULL, "reciptmp");
867 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
868 gimple_assign_rhs2 (stmt));
869 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
870 gimple_assign_rhs1 (stmt));
872 /* Replace division stmt with reciprocal and multiply stmts.
873 The multiply stmt is not invariant, so update iterator
874 and avoid rescanning. */
875 gsi = *bsi;
876 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
877 gsi_replace (&gsi, stmt2, true);
879 /* Continue processing with invariant reciprocal statement. */
880 return stmt1;
883 /* Check if the pattern at *BSI is a bittest of the form
884 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
886 static gimple *
887 rewrite_bittest (gimple_stmt_iterator *bsi)
889 gassign *stmt;
890 gimple *stmt1;
891 gassign *stmt2;
892 gimple *use_stmt;
893 gcond *cond_stmt;
894 tree lhs, name, t, a, b;
895 use_operand_p use;
897 stmt = as_a <gassign *> (gsi_stmt (*bsi));
898 lhs = gimple_assign_lhs (stmt);
900 /* Verify that the single use of lhs is a comparison against zero. */
901 if (TREE_CODE (lhs) != SSA_NAME
902 || !single_imm_use (lhs, &use, &use_stmt))
903 return stmt;
904 cond_stmt = dyn_cast <gcond *> (use_stmt);
905 if (!cond_stmt)
906 return stmt;
907 if (gimple_cond_lhs (cond_stmt) != lhs
908 || (gimple_cond_code (cond_stmt) != NE_EXPR
909 && gimple_cond_code (cond_stmt) != EQ_EXPR)
910 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
911 return stmt;
913 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
914 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
915 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
916 return stmt;
918 /* There is a conversion in between possibly inserted by fold. */
919 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
921 t = gimple_assign_rhs1 (stmt1);
922 if (TREE_CODE (t) != SSA_NAME
923 || !has_single_use (t))
924 return stmt;
925 stmt1 = SSA_NAME_DEF_STMT (t);
926 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
927 return stmt;
930 /* Verify that B is loop invariant but A is not. Verify that with
931 all the stmt walking we are still in the same loop. */
932 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
933 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
934 return stmt;
936 a = gimple_assign_rhs1 (stmt1);
937 b = gimple_assign_rhs2 (stmt1);
939 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
940 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
942 gimple_stmt_iterator rsi;
944 /* 1 << B */
945 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
946 build_int_cst (TREE_TYPE (a), 1), b);
947 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
948 stmt1 = gimple_build_assign (name, t);
950 /* A & (1 << B) */
951 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
952 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
953 stmt2 = gimple_build_assign (name, t);
955 /* Replace the SSA_NAME we compare against zero. Adjust
956 the type of zero accordingly. */
957 SET_USE (use, name);
958 gimple_cond_set_rhs (cond_stmt,
959 build_int_cst_type (TREE_TYPE (name),
960 0));
962 /* Don't use gsi_replace here, none of the new assignments sets
963 the variable originally set in stmt. Move bsi to stmt1, and
964 then remove the original stmt, so that we get a chance to
965 retain debug info for it. */
966 rsi = *bsi;
967 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
968 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
969 gimple *to_release = gsi_stmt (rsi);
970 gsi_remove (&rsi, true);
971 release_defs (to_release);
973 return stmt1;
976 return stmt;
979 /* Determine the outermost loops in that statements in basic block BB are
980 invariant, and record them to the LIM_DATA associated with the
981 statements. */
983 static void
984 compute_invariantness (basic_block bb)
986 enum move_pos pos;
987 gimple_stmt_iterator bsi;
988 gimple *stmt;
989 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
990 class loop *outermost = ALWAYS_EXECUTED_IN (bb);
991 struct lim_aux_data *lim_data;
993 if (!loop_outer (bb->loop_father))
994 return;
996 if (dump_file && (dump_flags & TDF_DETAILS))
997 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
998 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1000 /* Look at PHI nodes, but only if there is at most two.
1001 ??? We could relax this further by post-processing the inserted
1002 code and transforming adjacent cond-exprs with the same predicate
1003 to control flow again. */
1004 bsi = gsi_start_phis (bb);
1005 if (!gsi_end_p (bsi)
1006 && ((gsi_next (&bsi), gsi_end_p (bsi))
1007 || (gsi_next (&bsi), gsi_end_p (bsi))))
1008 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1010 stmt = gsi_stmt (bsi);
1012 pos = movement_possibility (stmt);
1013 if (pos == MOVE_IMPOSSIBLE)
1014 continue;
1016 lim_data = get_lim_data (stmt);
1017 if (! lim_data)
1018 lim_data = init_lim_data (stmt);
1019 lim_data->always_executed_in = outermost;
1021 if (!determine_max_movement (stmt, false))
1023 lim_data->max_loop = NULL;
1024 continue;
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1029 print_gimple_stmt (dump_file, stmt, 2);
1030 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1031 loop_depth (lim_data->max_loop),
1032 lim_data->cost);
1035 if (lim_data->cost >= LIM_EXPENSIVE)
1036 set_profitable_level (stmt);
1039 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1041 stmt = gsi_stmt (bsi);
1043 pos = movement_possibility (stmt);
1044 if (pos == MOVE_IMPOSSIBLE)
1046 if (nonpure_call_p (stmt))
1048 maybe_never = true;
1049 outermost = NULL;
1051 /* Make sure to note always_executed_in for stores to make
1052 store-motion work. */
1053 else if (stmt_makes_single_store (stmt))
1055 struct lim_aux_data *lim_data = get_lim_data (stmt);
1056 if (! lim_data)
1057 lim_data = init_lim_data (stmt);
1058 lim_data->always_executed_in = outermost;
1060 continue;
1063 if (is_gimple_assign (stmt)
1064 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1065 == GIMPLE_BINARY_RHS))
1067 tree op0 = gimple_assign_rhs1 (stmt);
1068 tree op1 = gimple_assign_rhs2 (stmt);
1069 class loop *ol1 = outermost_invariant_loop (op1,
1070 loop_containing_stmt (stmt));
1072 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1073 to be hoisted out of loop, saving expensive divide. */
1074 if (pos == MOVE_POSSIBLE
1075 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1076 && flag_unsafe_math_optimizations
1077 && !flag_trapping_math
1078 && ol1 != NULL
1079 && outermost_invariant_loop (op0, ol1) == NULL)
1080 stmt = rewrite_reciprocal (&bsi);
1082 /* If the shift count is invariant, convert (A >> B) & 1 to
1083 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1084 saving an expensive shift. */
1085 if (pos == MOVE_POSSIBLE
1086 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1087 && integer_onep (op1)
1088 && TREE_CODE (op0) == SSA_NAME
1089 && has_single_use (op0))
1090 stmt = rewrite_bittest (&bsi);
1093 lim_data = get_lim_data (stmt);
1094 if (! lim_data)
1095 lim_data = init_lim_data (stmt);
1096 lim_data->always_executed_in = outermost;
1098 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1099 continue;
1101 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1103 lim_data->max_loop = NULL;
1104 continue;
1107 if (dump_file && (dump_flags & TDF_DETAILS))
1109 print_gimple_stmt (dump_file, stmt, 2);
1110 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1111 loop_depth (lim_data->max_loop),
1112 lim_data->cost);
1115 if (lim_data->cost >= LIM_EXPENSIVE)
1116 set_profitable_level (stmt);
1120 /* Hoist the statements in basic block BB out of the loops prescribed by
1121 data stored in LIM_DATA structures associated with each statement. Callback
1122 for walk_dominator_tree. */
1124 unsigned int
1125 move_computations_worker (basic_block bb)
1127 class loop *level;
1128 unsigned cost = 0;
1129 struct lim_aux_data *lim_data;
1130 unsigned int todo = 0;
1132 if (!loop_outer (bb->loop_father))
1133 return todo;
1135 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1137 gassign *new_stmt;
1138 gphi *stmt = bsi.phi ();
1140 lim_data = get_lim_data (stmt);
1141 if (lim_data == NULL)
1143 gsi_next (&bsi);
1144 continue;
1147 cost = lim_data->cost;
1148 level = lim_data->tgt_loop;
1149 clear_lim_data (stmt);
1151 if (!level)
1153 gsi_next (&bsi);
1154 continue;
1157 if (dump_file && (dump_flags & TDF_DETAILS))
1159 fprintf (dump_file, "Moving PHI node\n");
1160 print_gimple_stmt (dump_file, stmt, 0);
1161 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1162 cost, level->num);
1165 if (gimple_phi_num_args (stmt) == 1)
1167 tree arg = PHI_ARG_DEF (stmt, 0);
1168 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1169 TREE_CODE (arg), arg);
1171 else
1173 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1174 gimple *cond = gsi_stmt (gsi_last_bb (dom));
1175 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1176 /* Get the PHI arguments corresponding to the true and false
1177 edges of COND. */
1178 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1179 gcc_assert (arg0 && arg1);
1180 t = build2 (gimple_cond_code (cond), boolean_type_node,
1181 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1182 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1183 COND_EXPR, t, arg0, arg1);
1184 todo |= TODO_cleanup_cfg;
1186 if (!ALWAYS_EXECUTED_IN (bb)
1187 || (ALWAYS_EXECUTED_IN (bb) != level
1188 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
1189 reset_flow_sensitive_info (gimple_assign_lhs (new_stmt));
1190 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1191 remove_phi_node (&bsi, false);
1194 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1196 edge e;
1198 gimple *stmt = gsi_stmt (bsi);
1200 lim_data = get_lim_data (stmt);
1201 if (lim_data == NULL)
1203 gsi_next (&bsi);
1204 continue;
1207 cost = lim_data->cost;
1208 level = lim_data->tgt_loop;
1209 clear_lim_data (stmt);
1211 if (!level)
1213 gsi_next (&bsi);
1214 continue;
1217 /* We do not really want to move conditionals out of the loop; we just
1218 placed it here to force its operands to be moved if necessary. */
1219 if (gimple_code (stmt) == GIMPLE_COND)
1220 continue;
1222 if (dump_file && (dump_flags & TDF_DETAILS))
1224 fprintf (dump_file, "Moving statement\n");
1225 print_gimple_stmt (dump_file, stmt, 0);
1226 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1227 cost, level->num);
1230 e = loop_preheader_edge (level);
1231 gcc_assert (!gimple_vdef (stmt));
1232 if (gimple_vuse (stmt))
1234 /* The new VUSE is the one from the virtual PHI in the loop
1235 header or the one already present. */
1236 gphi_iterator gsi2;
1237 for (gsi2 = gsi_start_phis (e->dest);
1238 !gsi_end_p (gsi2); gsi_next (&gsi2))
1240 gphi *phi = gsi2.phi ();
1241 if (virtual_operand_p (gimple_phi_result (phi)))
1243 SET_USE (gimple_vuse_op (stmt),
1244 PHI_ARG_DEF_FROM_EDGE (phi, e));
1245 break;
1249 gsi_remove (&bsi, false);
1250 if (gimple_has_lhs (stmt)
1251 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1252 && (!ALWAYS_EXECUTED_IN (bb)
1253 || !(ALWAYS_EXECUTED_IN (bb) == level
1254 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1255 reset_flow_sensitive_info (gimple_get_lhs (stmt));
1256 /* In case this is a stmt that is not unconditionally executed
1257 when the target loop header is executed and the stmt may
1258 invoke undefined integer or pointer overflow rewrite it to
1259 unsigned arithmetic. */
1260 if (is_gimple_assign (stmt)
1261 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1262 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1263 && arith_code_with_undefined_signed_overflow
1264 (gimple_assign_rhs_code (stmt))
1265 && (!ALWAYS_EXECUTED_IN (bb)
1266 || !(ALWAYS_EXECUTED_IN (bb) == level
1267 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1268 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1269 else
1270 gsi_insert_on_edge (e, stmt);
1273 return todo;
1276 /* Checks whether the statement defining variable *INDEX can be hoisted
1277 out of the loop passed in DATA. Callback for for_each_index. */
1279 static bool
1280 may_move_till (tree ref, tree *index, void *data)
1282 class loop *loop = (class loop *) data, *max_loop;
1284 /* If REF is an array reference, check also that the step and the lower
1285 bound is invariant in LOOP. */
1286 if (TREE_CODE (ref) == ARRAY_REF)
1288 tree step = TREE_OPERAND (ref, 3);
1289 tree lbound = TREE_OPERAND (ref, 2);
1291 max_loop = outermost_invariant_loop (step, loop);
1292 if (!max_loop)
1293 return false;
1295 max_loop = outermost_invariant_loop (lbound, loop);
1296 if (!max_loop)
1297 return false;
1300 max_loop = outermost_invariant_loop (*index, loop);
1301 if (!max_loop)
1302 return false;
1304 return true;
1307 /* If OP is SSA NAME, force the statement that defines it to be
1308 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1310 static void
1311 force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1313 gimple *stmt;
1315 if (!op
1316 || is_gimple_min_invariant (op))
1317 return;
1319 gcc_assert (TREE_CODE (op) == SSA_NAME);
1321 stmt = SSA_NAME_DEF_STMT (op);
1322 if (gimple_nop_p (stmt))
1323 return;
1325 set_level (stmt, orig_loop, loop);
1328 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1329 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1330 for_each_index. */
1332 struct fmt_data
1334 class loop *loop;
1335 class loop *orig_loop;
1338 static bool
1339 force_move_till (tree ref, tree *index, void *data)
1341 struct fmt_data *fmt_data = (struct fmt_data *) data;
1343 if (TREE_CODE (ref) == ARRAY_REF)
1345 tree step = TREE_OPERAND (ref, 3);
1346 tree lbound = TREE_OPERAND (ref, 2);
1348 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1349 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1352 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1354 return true;
1357 /* A function to free the mem_ref object OBJ. */
1359 static void
1360 memref_free (class im_mem_ref *mem)
1362 mem->accesses_in_loop.release ();
1365 /* Allocates and returns a memory reference description for MEM whose hash
1366 value is HASH and id is ID. */
1368 static im_mem_ref *
1369 mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1371 im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1372 if (mem)
1373 ref->mem = *mem;
1374 else
1375 ao_ref_init (&ref->mem, error_mark_node);
1376 ref->id = id;
1377 ref->ref_canonical = false;
1378 ref->ref_decomposed = false;
1379 ref->hash = hash;
1380 ref->stored = NULL;
1381 ref->loaded = NULL;
1382 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1383 ref->accesses_in_loop.create (1);
1385 return ref;
1388 /* Records memory reference location *LOC in LOOP to the memory reference
1389 description REF. The reference occurs in statement STMT. */
1391 static void
1392 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1394 mem_ref_loc aref;
1395 aref.stmt = stmt;
1396 aref.ref = loc;
1397 ref->accesses_in_loop.safe_push (aref);
1400 /* Set the LOOP bit in REF stored bitmap and allocate that if
1401 necessary. Return whether a bit was changed. */
1403 static bool
1404 set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1406 if (!ref->stored)
1407 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1408 return bitmap_set_bit (ref->stored, loop->num);
1411 /* Marks reference REF as stored in LOOP. */
1413 static void
1414 mark_ref_stored (im_mem_ref *ref, class loop *loop)
1416 while (loop != current_loops->tree_root
1417 && set_ref_stored_in_loop (ref, loop))
1418 loop = loop_outer (loop);
1421 /* Set the LOOP bit in REF loaded bitmap and allocate that if
1422 necessary. Return whether a bit was changed. */
1424 static bool
1425 set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1427 if (!ref->loaded)
1428 ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
1429 return bitmap_set_bit (ref->loaded, loop->num);
1432 /* Marks reference REF as loaded in LOOP. */
1434 static void
1435 mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1437 while (loop != current_loops->tree_root
1438 && set_ref_loaded_in_loop (ref, loop))
1439 loop = loop_outer (loop);
1442 /* Gathers memory references in statement STMT in LOOP, storing the
1443 information about them in the memory_accesses structure. Marks
1444 the vops accessed through unrecognized statements there as
1445 well. */
1447 static void
1448 gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1450 tree *mem = NULL;
1451 hashval_t hash;
1452 im_mem_ref **slot;
1453 im_mem_ref *ref;
1454 bool is_stored;
1455 unsigned id;
1457 if (!gimple_vuse (stmt))
1458 return;
1460 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1461 if (!mem && is_gimple_assign (stmt))
1463 /* For aggregate copies record distinct references but use them
1464 only for disambiguation purposes. */
1465 id = memory_accesses.refs_list.length ();
1466 ref = mem_ref_alloc (NULL, 0, id);
1467 memory_accesses.refs_list.safe_push (ref);
1468 if (dump_file && (dump_flags & TDF_DETAILS))
1470 fprintf (dump_file, "Unhandled memory reference %u: ", id);
1471 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1473 record_mem_ref_loc (ref, stmt, mem);
1474 is_stored = gimple_vdef (stmt);
1476 else if (!mem)
1478 /* We use the shared mem_ref for all unanalyzable refs. */
1479 id = UNANALYZABLE_MEM_ID;
1480 ref = memory_accesses.refs_list[id];
1481 if (dump_file && (dump_flags & TDF_DETAILS))
1483 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1484 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1486 is_stored = gimple_vdef (stmt);
1488 else
1490 /* We are looking for equal refs that might differ in structure
1491 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1492 make sure we can canonicalize the ref in the hashtable if
1493 non-operand_equal_p refs are found. For the lookup we mark
1494 the case we want strict equality with aor.max_size == -1. */
1495 ao_ref aor;
1496 ao_ref_init (&aor, *mem);
1497 ao_ref_base (&aor);
1498 ao_ref_alias_set (&aor);
1499 HOST_WIDE_INT offset, size, max_size;
1500 poly_int64 saved_maxsize = aor.max_size, mem_off;
1501 tree mem_base;
1502 bool ref_decomposed;
1503 if (aor.max_size_known_p ()
1504 && aor.offset.is_constant (&offset)
1505 && aor.size.is_constant (&size)
1506 && aor.max_size.is_constant (&max_size)
1507 && size == max_size
1508 && (size % BITS_PER_UNIT) == 0
1509 /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1510 size. Make sure this is consistent with the extraction. */
1511 && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1512 && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1513 aor.size)
1514 && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1516 ref_decomposed = true;
1517 tree base = ao_ref_base (&aor);
1518 poly_int64 moffset;
1519 HOST_WIDE_INT mcoffset;
1520 if (TREE_CODE (base) == MEM_REF
1521 && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset)
1522 && moffset.is_constant (&mcoffset))
1524 hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0);
1525 hash = iterative_hash_host_wide_int (mcoffset, hash);
1527 else
1529 hash = iterative_hash_expr (base, 0);
1530 hash = iterative_hash_host_wide_int (offset, hash);
1532 hash = iterative_hash_host_wide_int (size, hash);
1534 else
1536 ref_decomposed = false;
1537 hash = iterative_hash_expr (aor.ref, 0);
1538 aor.max_size = -1;
1540 slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1541 aor.max_size = saved_maxsize;
1542 if (*slot)
1544 if (!(*slot)->ref_canonical
1545 && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1547 /* If we didn't yet canonicalize the hashtable ref (which
1548 we'll end up using for code insertion) and hit a second
1549 equal ref that is not structurally equivalent create
1550 a canonical ref which is a bare MEM_REF. */
1551 if (TREE_CODE (*mem) == MEM_REF
1552 || TREE_CODE (*mem) == TARGET_MEM_REF)
1554 (*slot)->mem.ref = *mem;
1555 (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1557 else
1559 tree ref_alias_type = reference_alias_ptr_type (*mem);
1560 unsigned int ref_align = get_object_alignment (*mem);
1561 tree ref_type = TREE_TYPE (*mem);
1562 tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1563 unshare_expr (mem_base));
1564 if (TYPE_ALIGN (ref_type) != ref_align)
1565 ref_type = build_aligned_type (ref_type, ref_align);
1566 (*slot)->mem.ref
1567 = fold_build2 (MEM_REF, ref_type, tmp,
1568 build_int_cst (ref_alias_type, mem_off));
1569 if ((*slot)->mem.volatile_p)
1570 TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1;
1571 gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1572 && is_gimple_mem_ref_addr
1573 (TREE_OPERAND ((*slot)->mem.ref,
1574 0)));
1575 (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1577 (*slot)->ref_canonical = true;
1579 ref = *slot;
1580 id = ref->id;
1582 else
1584 id = memory_accesses.refs_list.length ();
1585 ref = mem_ref_alloc (&aor, hash, id);
1586 ref->ref_decomposed = ref_decomposed;
1587 memory_accesses.refs_list.safe_push (ref);
1588 *slot = ref;
1590 if (dump_file && (dump_flags & TDF_DETAILS))
1592 fprintf (dump_file, "Memory reference %u: ", id);
1593 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1594 fprintf (dump_file, "\n");
1598 record_mem_ref_loc (ref, stmt, mem);
1600 if (is_stored)
1602 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1603 mark_ref_stored (ref, loop);
1605 /* A not simple memory op is also a read when it is a write. */
1606 if (!is_stored || id == UNANALYZABLE_MEM_ID
1607 || ref->mem.ref == error_mark_node)
1609 bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1610 mark_ref_loaded (ref, loop);
1612 init_lim_data (stmt)->ref = ref->id;
1613 return;
1616 static unsigned *bb_loop_postorder;
1618 /* qsort sort function to sort blocks after their loop fathers postorder. */
1620 static int
1621 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1622 void *bb_loop_postorder_)
1624 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1625 basic_block bb1 = *(const basic_block *)bb1_;
1626 basic_block bb2 = *(const basic_block *)bb2_;
1627 class loop *loop1 = bb1->loop_father;
1628 class loop *loop2 = bb2->loop_father;
1629 if (loop1->num == loop2->num)
1630 return bb1->index - bb2->index;
1631 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1634 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1636 static int
1637 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1638 void *bb_loop_postorder_)
1640 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1641 const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1642 const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1643 class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1644 class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1645 if (loop1->num == loop2->num)
1646 return 0;
1647 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1650 /* Gathers memory references in loops. */
1652 static void
1653 analyze_memory_references (bool store_motion)
1655 gimple_stmt_iterator bsi;
1656 basic_block bb, *bbs;
1657 class loop *outer;
1658 unsigned i, n;
1660 /* Collect all basic-blocks in loops and sort them after their
1661 loops postorder. */
1662 i = 0;
1663 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1664 FOR_EACH_BB_FN (bb, cfun)
1665 if (bb->loop_father != current_loops->tree_root)
1666 bbs[i++] = bb;
1667 n = i;
1668 gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1669 bb_loop_postorder);
1671 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1672 That results in better locality for all the bitmaps. It also
1673 automatically sorts the location list of gathered memory references
1674 after their loop postorder number allowing to binary-search it. */
1675 for (i = 0; i < n; ++i)
1677 basic_block bb = bbs[i];
1678 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1679 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1682 /* Verify the list of gathered memory references is sorted after their
1683 loop postorder number. */
1684 if (flag_checking)
1686 im_mem_ref *ref;
1687 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1688 for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1689 gcc_assert (sort_locs_in_loop_postorder_cmp
1690 (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1691 bb_loop_postorder) <= 0);
1694 free (bbs);
1696 if (!store_motion)
1697 return;
1699 /* Propagate the information about accessed memory references up
1700 the loop hierarchy. */
1701 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1703 /* Finalize the overall touched references (including subloops). */
1704 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1705 &memory_accesses.refs_stored_in_loop[loop->num]);
1707 /* Propagate the information about accessed memory references up
1708 the loop hierarchy. */
1709 outer = loop_outer (loop);
1710 if (outer == current_loops->tree_root)
1711 continue;
1713 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1714 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1718 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1719 tree_to_aff_combination_expand. */
1721 static bool
1722 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1723 hash_map<tree, name_expansion *> **ttae_cache,
1724 bool tbaa_p)
1726 gcc_checking_assert (mem1->mem.ref != error_mark_node
1727 && mem2->mem.ref != error_mark_node);
1729 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1730 object and their offset differ in such a way that the locations cannot
1731 overlap, then they cannot alias. */
1732 poly_widest_int size1, size2;
1733 aff_tree off1, off2;
1735 /* Perform basic offset and type-based disambiguation. */
1736 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1737 return false;
1739 /* The expansion of addresses may be a bit expensive, thus we only do
1740 the check at -O2 and higher optimization levels. */
1741 if (optimize < 2)
1742 return true;
1744 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1745 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1746 aff_combination_expand (&off1, ttae_cache);
1747 aff_combination_expand (&off2, ttae_cache);
1748 aff_combination_scale (&off1, -1);
1749 aff_combination_add (&off2, &off1);
1751 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1752 return false;
1754 return true;
1757 /* Compare function for bsearch searching for reference locations
1758 in a loop. */
1760 static int
1761 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1762 void *bb_loop_postorder_)
1764 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1765 class loop *loop = (class loop *)const_cast<void *>(loop_);
1766 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1767 class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1768 if (loop->num == loc_loop->num
1769 || flow_loop_nested_p (loop, loc_loop))
1770 return 0;
1771 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1772 ? -1 : 1);
1775 /* Iterates over all locations of REF in LOOP and its subloops calling
1776 fn.operator() with the location as argument. When that operator
1777 returns true the iteration is stopped and true is returned.
1778 Otherwise false is returned. */
1780 template <typename FN>
1781 static bool
1782 for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1784 unsigned i;
1785 mem_ref_loc *loc;
1787 /* Search for the cluster of locs in the accesses_in_loop vector
1788 which is sorted after postorder index of the loop father. */
1789 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1790 bb_loop_postorder);
1791 if (!loc)
1792 return false;
1794 /* We have found one location inside loop or its sub-loops. Iterate
1795 both forward and backward to cover the whole cluster. */
1796 i = loc - ref->accesses_in_loop.address ();
1797 while (i > 0)
1799 --i;
1800 mem_ref_loc *l = &ref->accesses_in_loop[i];
1801 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1802 break;
1803 if (fn (l))
1804 return true;
1806 for (i = loc - ref->accesses_in_loop.address ();
1807 i < ref->accesses_in_loop.length (); ++i)
1809 mem_ref_loc *l = &ref->accesses_in_loop[i];
1810 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1811 break;
1812 if (fn (l))
1813 return true;
1816 return false;
1819 /* Rewrites location LOC by TMP_VAR. */
1821 class rewrite_mem_ref_loc
1823 public:
1824 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1825 bool operator () (mem_ref_loc *loc);
1826 tree tmp_var;
1829 bool
1830 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1832 *loc->ref = tmp_var;
1833 update_stmt (loc->stmt);
1834 return false;
1837 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1839 static void
1840 rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
1842 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1845 /* Stores the first reference location in LOCP. */
1847 class first_mem_ref_loc_1
1849 public:
1850 first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1851 bool operator () (mem_ref_loc *loc);
1852 mem_ref_loc **locp;
1855 bool
1856 first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1858 *locp = loc;
1859 return true;
1862 /* Returns the first reference location to REF in LOOP. */
1864 static mem_ref_loc *
1865 first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
1867 mem_ref_loc *locp = NULL;
1868 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1869 return locp;
1872 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1873 MEM along edge EX.
1875 The store is only done if MEM has changed. We do this so no
1876 changes to MEM occur on code paths that did not originally store
1877 into it.
1879 The common case for execute_sm will transform:
1881 for (...) {
1882 if (foo)
1883 stuff;
1884 else
1885 MEM = TMP_VAR;
1888 into:
1890 lsm = MEM;
1891 for (...) {
1892 if (foo)
1893 stuff;
1894 else
1895 lsm = TMP_VAR;
1897 MEM = lsm;
1899 This function will generate:
1901 lsm = MEM;
1903 lsm_flag = false;
1905 for (...) {
1906 if (foo)
1907 stuff;
1908 else {
1909 lsm = TMP_VAR;
1910 lsm_flag = true;
1913 if (lsm_flag) <--
1914 MEM = lsm; <-- (X)
1916 In case MEM and TMP_VAR are NULL the function will return the then
1917 block so the caller can insert (X) and other related stmts.
1920 static basic_block
1921 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
1922 edge preheader, hash_set <basic_block> *flag_bbs,
1923 edge &append_cond_position, edge &last_cond_fallthru)
1925 basic_block new_bb, then_bb, old_dest;
1926 bool loop_has_only_one_exit;
1927 edge then_old_edge;
1928 gimple_stmt_iterator gsi;
1929 gimple *stmt;
1930 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1932 profile_count count_sum = profile_count::zero ();
1933 int nbbs = 0, ncount = 0;
1934 profile_probability flag_probability = profile_probability::uninitialized ();
1936 /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1937 at loop exit.
1939 This code may look fancy, but it cannot update profile very realistically
1940 because we do not know the probability that flag will be true at given
1941 loop exit.
1943 We look for two interesting extremes
1944 - when exit is dominated by block setting the flag, we know it will
1945 always be true. This is a common case.
1946 - when all blocks setting the flag have very low frequency we know
1947 it will likely be false.
1948 In all other cases we default to 2/3 for flag being true. */
1950 for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
1951 it != flag_bbs->end (); ++it)
1953 if ((*it)->count.initialized_p ())
1954 count_sum += (*it)->count, ncount ++;
1955 if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
1956 flag_probability = profile_probability::always ();
1957 nbbs++;
1960 profile_probability cap = profile_probability::always ().apply_scale (2, 3);
1962 if (flag_probability.initialized_p ())
1964 else if (ncount == nbbs
1965 && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
1967 flag_probability = count_sum.probability_in (preheader->count ());
1968 if (flag_probability > cap)
1969 flag_probability = cap;
1972 if (!flag_probability.initialized_p ())
1973 flag_probability = cap;
1975 /* ?? Insert store after previous store if applicable. See note
1976 below. */
1977 if (append_cond_position)
1978 ex = append_cond_position;
1980 loop_has_only_one_exit = single_pred_p (ex->dest);
1982 if (loop_has_only_one_exit)
1983 ex = split_block_after_labels (ex->dest);
1984 else
1986 for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1987 !gsi_end_p (gpi); gsi_next (&gpi))
1989 gphi *phi = gpi.phi ();
1990 if (virtual_operand_p (gimple_phi_result (phi)))
1991 continue;
1993 /* When the destination has a non-virtual PHI node with multiple
1994 predecessors make sure we preserve the PHI structure by
1995 forcing a forwarder block so that hoisting of that PHI will
1996 still work. */
1997 split_edge (ex);
1998 break;
2002 old_dest = ex->dest;
2003 new_bb = split_edge (ex);
2004 then_bb = create_empty_bb (new_bb);
2005 then_bb->count = new_bb->count.apply_probability (flag_probability);
2006 if (irr)
2007 then_bb->flags = BB_IRREDUCIBLE_LOOP;
2008 add_bb_to_loop (then_bb, new_bb->loop_father);
2010 gsi = gsi_start_bb (new_bb);
2011 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2012 NULL_TREE, NULL_TREE);
2013 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2015 /* Insert actual store. */
2016 if (mem)
2018 gsi = gsi_start_bb (then_bb);
2019 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2020 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2023 edge e1 = single_succ_edge (new_bb);
2024 edge e2 = make_edge (new_bb, then_bb,
2025 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2026 e2->probability = flag_probability;
2028 e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2029 e1->flags &= ~EDGE_FALLTHRU;
2031 e1->probability = flag_probability.invert ();
2033 then_old_edge = make_single_succ_edge (then_bb, old_dest,
2034 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2036 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2038 if (append_cond_position)
2040 basic_block prevbb = last_cond_fallthru->src;
2041 redirect_edge_succ (last_cond_fallthru, new_bb);
2042 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2043 set_immediate_dominator (CDI_DOMINATORS, old_dest,
2044 recompute_dominator (CDI_DOMINATORS, old_dest));
2047 /* ?? Because stores may alias, they must happen in the exact
2048 sequence they originally happened. Save the position right after
2049 the (_lsm) store we just created so we can continue appending after
2050 it and maintain the original order. */
2051 append_cond_position = then_old_edge;
2052 last_cond_fallthru = find_edge (new_bb, old_dest);
2054 if (!loop_has_only_one_exit)
2055 for (gphi_iterator gpi = gsi_start_phis (old_dest);
2056 !gsi_end_p (gpi); gsi_next (&gpi))
2058 gphi *phi = gpi.phi ();
2059 unsigned i;
2061 for (i = 0; i < gimple_phi_num_args (phi); i++)
2062 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2064 tree arg = gimple_phi_arg_def (phi, i);
2065 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2066 update_stmt (phi);
2070 return then_bb;
2073 /* When REF is set on the location, set flag indicating the store. */
2075 class sm_set_flag_if_changed
2077 public:
2078 sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2079 : flag (flag_), bbs (bbs_) {}
2080 bool operator () (mem_ref_loc *loc);
2081 tree flag;
2082 hash_set <basic_block> *bbs;
2085 bool
2086 sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2088 /* Only set the flag for writes. */
2089 if (is_gimple_assign (loc->stmt)
2090 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2092 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2093 gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2094 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2095 bbs->add (gimple_bb (stmt));
2097 return false;
2100 /* Helper function for execute_sm. On every location where REF is
2101 set, set an appropriate flag indicating the store. */
2103 static tree
2104 execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2105 hash_set <basic_block> *bbs)
2107 tree flag;
2108 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2109 flag = create_tmp_reg (boolean_type_node, str);
2110 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2111 return flag;
2114 struct sm_aux
2116 tree tmp_var;
2117 tree store_flag;
2118 hash_set <basic_block> flag_bbs;
2121 /* Executes store motion of memory reference REF from LOOP.
2122 Exits from the LOOP are stored in EXITS. The initialization of the
2123 temporary variable is put to the preheader of the loop, and assignments
2124 to the reference from the temporary variable are emitted to exits. */
2126 static void
2127 execute_sm (class loop *loop, im_mem_ref *ref,
2128 hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
2129 bool use_other_flag_var)
2131 gassign *load;
2132 struct fmt_data fmt_data;
2133 struct lim_aux_data *lim_data;
2134 bool multi_threaded_model_p = false;
2135 gimple_stmt_iterator gsi;
2136 sm_aux *aux = new sm_aux;
2138 if (dump_file && (dump_flags & TDF_DETAILS))
2140 fprintf (dump_file, "Executing store motion of ");
2141 print_generic_expr (dump_file, ref->mem.ref);
2142 fprintf (dump_file, " from loop %d\n", loop->num);
2145 aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2146 get_lsm_tmp_name (ref->mem.ref, ~0));
2148 fmt_data.loop = loop;
2149 fmt_data.orig_loop = loop;
2150 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2152 bool always_stored = ref_always_accessed_p (loop, ref, true);
2153 if (maybe_mt
2154 && (bb_in_transaction (loop_preheader_edge (loop)->src)
2155 || (! flag_store_data_races && ! always_stored)))
2156 multi_threaded_model_p = true;
2158 if (multi_threaded_model_p && !use_other_flag_var)
2159 aux->store_flag
2160 = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
2161 else
2162 aux->store_flag = NULL_TREE;
2164 /* Remember variable setup. */
2165 aux_map.put (ref, aux);
2167 rewrite_mem_refs (loop, ref, aux->tmp_var);
2169 /* Emit the load code on a random exit edge or into the latch if
2170 the loop does not exit, so that we are sure it will be processed
2171 by move_computations after all dependencies. */
2172 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2174 /* Avoid doing a load if there was no load of the ref in the loop.
2175 Esp. when the ref is not always stored we cannot optimize it
2176 away later. But when it is not always stored we must use a conditional
2177 store then. */
2178 if ((!always_stored && !multi_threaded_model_p)
2179 || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2180 load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2181 else
2183 /* If not emitting a load mark the uninitialized state on the
2184 loop entry as not to be warned for. */
2185 tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2186 suppress_warning (uninit, OPT_Wuninitialized);
2187 load = gimple_build_assign (aux->tmp_var, uninit);
2189 lim_data = init_lim_data (load);
2190 lim_data->max_loop = loop;
2191 lim_data->tgt_loop = loop;
2192 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2194 if (aux->store_flag)
2196 load = gimple_build_assign (aux->store_flag, boolean_false_node);
2197 lim_data = init_lim_data (load);
2198 lim_data->max_loop = loop;
2199 lim_data->tgt_loop = loop;
2200 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2204 /* sm_ord is used for ordinary stores we can retain order with respect
2205 to other stores
2206 sm_unord is used for conditional executed stores which need to be
2207 able to execute in arbitrary order with respect to other stores
2208 sm_other is used for stores we do not try to apply store motion to. */
2209 enum sm_kind { sm_ord, sm_unord, sm_other };
2210 struct seq_entry
2212 seq_entry () {}
2213 seq_entry (unsigned f, sm_kind k, tree fr = NULL)
2214 : first (f), second (k), from (fr) {}
2215 unsigned first;
2216 sm_kind second;
2217 tree from;
2220 static void
2221 execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2222 hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2223 edge &append_cond_position, edge &last_cond_fallthru)
2225 /* Sink the stores to exit from the loop. */
2226 for (unsigned i = seq.length (); i > 0; --i)
2228 im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2229 if (seq[i-1].second == sm_other)
2231 gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2232 if (dump_file && (dump_flags & TDF_DETAILS))
2234 fprintf (dump_file, "Re-issueing dependent store of ");
2235 print_generic_expr (dump_file, ref->mem.ref);
2236 fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2237 loop->num, ex->src->index, ex->dest->index);
2239 gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref),
2240 seq[i-1].from);
2241 gsi_insert_on_edge (ex, store);
2243 else
2245 sm_aux *aux = *aux_map.get (ref);
2246 if (!aux->store_flag || kind == sm_ord)
2248 gassign *store;
2249 store = gimple_build_assign (unshare_expr (ref->mem.ref),
2250 aux->tmp_var);
2251 gsi_insert_on_edge (ex, store);
2253 else
2254 execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
2255 aux->store_flag,
2256 loop_preheader_edge (loop), &aux->flag_bbs,
2257 append_cond_position, last_cond_fallthru);
2262 /* Push the SM candidate at index PTR in the sequence SEQ down until
2263 we hit the next SM candidate. Return true if that went OK and
2264 false if we could not disambiguate agains another unrelated ref.
2265 Update *AT to the index where the candidate now resides. */
2267 static bool
2268 sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2270 *at = ptr;
2271 for (; ptr > 0; --ptr)
2273 seq_entry &new_cand = seq[ptr];
2274 seq_entry &against = seq[ptr-1];
2275 if (against.second == sm_ord
2276 || (against.second == sm_other && against.from != NULL_TREE))
2277 /* Found the tail of the sequence. */
2278 break;
2279 /* We may not ignore self-dependences here. */
2280 if (new_cand.first == against.first
2281 || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2282 memory_accesses.refs_list[against.first],
2283 false))
2284 /* ??? Prune new_cand from the list of refs to apply SM to. */
2285 return false;
2286 std::swap (new_cand, against);
2287 *at = ptr - 1;
2289 return true;
2292 /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2293 walking backwards from VDEF (or the end of BB if VDEF is NULL). */
2295 static int
2296 sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2297 vec<seq_entry> &seq, bitmap refs_not_in_seq,
2298 bitmap refs_not_supported, bool forked,
2299 bitmap fully_visited)
2301 if (!vdef)
2302 for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
2303 gsi_prev (&gsi))
2305 vdef = gimple_vdef (gsi_stmt (gsi));
2306 if (vdef)
2307 break;
2309 if (!vdef)
2311 gphi *vphi = get_virtual_phi (bb);
2312 if (vphi)
2313 vdef = gimple_phi_result (vphi);
2315 if (!vdef)
2317 if (single_pred_p (bb))
2318 /* This handles the perfect nest case. */
2319 return sm_seq_valid_bb (loop, single_pred (bb), vdef,
2320 seq, refs_not_in_seq, refs_not_supported,
2321 forked, fully_visited);
2322 return 0;
2326 gimple *def = SSA_NAME_DEF_STMT (vdef);
2327 if (gimple_bb (def) != bb)
2329 /* If we forked by processing a PHI do not allow our walk to
2330 merge again until we handle that robustly. */
2331 if (forked)
2333 /* Mark refs_not_in_seq as unsupported. */
2334 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2335 return 1;
2337 /* Otherwise it doesn't really matter if we end up in different
2338 BBs. */
2339 bb = gimple_bb (def);
2341 if (gphi *phi = dyn_cast <gphi *> (def))
2343 /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb)
2344 this is still linear.
2345 Eventually we want to cache intermediate results per BB
2346 (but we can't easily cache for different exits?). */
2347 /* Stop at PHIs with possible backedges. */
2348 if (bb == bb->loop_father->header
2349 || bb->flags & BB_IRREDUCIBLE_LOOP)
2351 /* Mark refs_not_in_seq as unsupported. */
2352 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2353 return 1;
2355 if (gimple_phi_num_args (phi) == 1)
2356 return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2357 gimple_phi_arg_def (phi, 0), seq,
2358 refs_not_in_seq, refs_not_supported,
2359 false, fully_visited);
2360 if (bitmap_bit_p (fully_visited,
2361 SSA_NAME_VERSION (gimple_phi_result (phi))))
2362 return 1;
2363 auto_vec<seq_entry> first_edge_seq;
2364 auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2365 int eret;
2366 bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2367 eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2368 gimple_phi_arg_def (phi, 0),
2369 first_edge_seq,
2370 tem_refs_not_in_seq, refs_not_supported,
2371 true, fully_visited);
2372 if (eret != 1)
2373 return -1;
2374 /* Simplify our lives by pruning the sequence of !sm_ord. */
2375 while (!first_edge_seq.is_empty ()
2376 && first_edge_seq.last ().second != sm_ord)
2377 first_edge_seq.pop ();
2378 for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
2380 tree vuse = gimple_phi_arg_def (phi, i);
2381 edge e = gimple_phi_arg_edge (phi, i);
2382 auto_vec<seq_entry> edge_seq;
2383 bitmap_and_compl (tem_refs_not_in_seq,
2384 refs_not_in_seq, refs_not_supported);
2385 /* If we've marked all refs we search for as unsupported
2386 we can stop processing and use the sequence as before
2387 the PHI. */
2388 if (bitmap_empty_p (tem_refs_not_in_seq))
2389 return 1;
2390 eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
2391 tem_refs_not_in_seq, refs_not_supported,
2392 true, fully_visited);
2393 if (eret != 1)
2394 return -1;
2395 /* Simplify our lives by pruning the sequence of !sm_ord. */
2396 while (!edge_seq.is_empty ()
2397 && edge_seq.last ().second != sm_ord)
2398 edge_seq.pop ();
2399 unsigned min_len = MIN(first_edge_seq.length (),
2400 edge_seq.length ());
2401 /* Incrementally merge seqs into first_edge_seq. */
2402 int first_uneq = -1;
2403 auto_vec<seq_entry, 2> extra_refs;
2404 for (unsigned int i = 0; i < min_len; ++i)
2406 /* ??? We can more intelligently merge when we face different
2407 order by additional sinking operations in one sequence.
2408 For now we simply mark them as to be processed by the
2409 not order-preserving SM code. */
2410 if (first_edge_seq[i].first != edge_seq[i].first)
2412 if (first_edge_seq[i].second == sm_ord)
2413 bitmap_set_bit (refs_not_supported,
2414 first_edge_seq[i].first);
2415 if (edge_seq[i].second == sm_ord)
2416 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2417 first_edge_seq[i].second = sm_other;
2418 first_edge_seq[i].from = NULL_TREE;
2419 /* Record the dropped refs for later processing. */
2420 if (first_uneq == -1)
2421 first_uneq = i;
2422 extra_refs.safe_push (seq_entry (edge_seq[i].first,
2423 sm_other, NULL_TREE));
2425 /* sm_other prevails. */
2426 else if (first_edge_seq[i].second != edge_seq[i].second)
2428 /* Make sure the ref is marked as not supported. */
2429 bitmap_set_bit (refs_not_supported,
2430 first_edge_seq[i].first);
2431 first_edge_seq[i].second = sm_other;
2432 first_edge_seq[i].from = NULL_TREE;
2434 else if (first_edge_seq[i].second == sm_other
2435 && first_edge_seq[i].from != NULL_TREE
2436 && (edge_seq[i].from == NULL_TREE
2437 || !operand_equal_p (first_edge_seq[i].from,
2438 edge_seq[i].from, 0)))
2439 first_edge_seq[i].from = NULL_TREE;
2441 /* Any excess elements become sm_other since they are now
2442 coonditionally executed. */
2443 if (first_edge_seq.length () > edge_seq.length ())
2445 for (unsigned i = edge_seq.length ();
2446 i < first_edge_seq.length (); ++i)
2448 if (first_edge_seq[i].second == sm_ord)
2449 bitmap_set_bit (refs_not_supported,
2450 first_edge_seq[i].first);
2451 first_edge_seq[i].second = sm_other;
2454 else if (edge_seq.length () > first_edge_seq.length ())
2456 if (first_uneq == -1)
2457 first_uneq = first_edge_seq.length ();
2458 for (unsigned i = first_edge_seq.length ();
2459 i < edge_seq.length (); ++i)
2461 if (edge_seq[i].second == sm_ord)
2462 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2463 extra_refs.safe_push (seq_entry (edge_seq[i].first,
2464 sm_other, NULL_TREE));
2467 /* Put unmerged refs at first_uneq to force dependence checking
2468 on them. */
2469 if (first_uneq != -1)
2471 /* Missing ordered_splice_at. */
2472 if ((unsigned)first_uneq == first_edge_seq.length ())
2473 first_edge_seq.safe_splice (extra_refs);
2474 else
2476 unsigned fes_length = first_edge_seq.length ();
2477 first_edge_seq.safe_grow (fes_length
2478 + extra_refs.length ());
2479 memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
2480 &first_edge_seq[first_uneq],
2481 (fes_length - first_uneq) * sizeof (seq_entry));
2482 memcpy (&first_edge_seq[first_uneq],
2483 extra_refs.address (),
2484 extra_refs.length () * sizeof (seq_entry));
2488 /* Use the sequence from the first edge and push SMs down. */
2489 for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2491 unsigned id = first_edge_seq[i].first;
2492 seq.safe_push (first_edge_seq[i]);
2493 unsigned new_idx;
2494 if ((first_edge_seq[i].second == sm_ord
2495 || (first_edge_seq[i].second == sm_other
2496 && first_edge_seq[i].from != NULL_TREE))
2497 && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
2499 if (first_edge_seq[i].second == sm_ord)
2500 bitmap_set_bit (refs_not_supported, id);
2501 /* Mark it sm_other. */
2502 seq[new_idx].second = sm_other;
2503 seq[new_idx].from = NULL_TREE;
2506 bitmap_set_bit (fully_visited,
2507 SSA_NAME_VERSION (gimple_phi_result (phi)));
2508 return 1;
2510 lim_aux_data *data = get_lim_data (def);
2511 gcc_assert (data);
2512 if (data->ref == UNANALYZABLE_MEM_ID)
2513 return -1;
2514 /* Stop at memory references which we can't move. */
2515 else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node)
2517 /* Mark refs_not_in_seq as unsupported. */
2518 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2519 return 1;
2521 /* One of the stores we want to apply SM to and we've not yet seen. */
2522 else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2524 seq.safe_push (seq_entry (data->ref, sm_ord));
2526 /* 1) push it down the queue until a SMed
2527 and not ignored ref is reached, skipping all not SMed refs
2528 and ignored refs via non-TBAA disambiguation. */
2529 unsigned new_idx;
2530 if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
2531 /* If that fails but we did not fork yet continue, we'll see
2532 to re-materialize all of the stores in the sequence then.
2533 Further stores will only be pushed up to this one. */
2534 && forked)
2536 bitmap_set_bit (refs_not_supported, data->ref);
2537 /* Mark it sm_other. */
2538 seq[new_idx].second = sm_other;
2541 /* 2) check whether we've seen all refs we want to SM and if so
2542 declare success for the active exit */
2543 if (bitmap_empty_p (refs_not_in_seq))
2544 return 1;
2546 else
2547 /* Another store not part of the final sequence. Simply push it. */
2548 seq.safe_push (seq_entry (data->ref, sm_other,
2549 gimple_assign_rhs1 (def)));
2551 vdef = gimple_vuse (def);
2553 while (1);
2556 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2557 edges of the LOOP. */
2559 static void
2560 hoist_memory_references (class loop *loop, bitmap mem_refs,
2561 const vec<edge> &exits)
2563 im_mem_ref *ref;
2564 unsigned i;
2565 bitmap_iterator bi;
2567 /* There's a special case we can use ordered re-materialization for
2568 conditionally excuted stores which is when all stores in the loop
2569 happen in the same basic-block. In that case we know we'll reach
2570 all stores and thus can simply process that BB and emit a single
2571 conditional block of ordered materializations. See PR102436. */
2572 basic_block single_store_bb = NULL;
2573 EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
2574 0, i, bi)
2576 bool fail = false;
2577 ref = memory_accesses.refs_list[i];
2578 for (auto loc : ref->accesses_in_loop)
2579 if (!gimple_vdef (loc.stmt))
2581 else if (!single_store_bb)
2583 single_store_bb = gimple_bb (loc.stmt);
2584 bool conditional = false;
2585 for (edge e : exits)
2586 if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
2588 /* Conditional as seen from e. */
2589 conditional = true;
2590 break;
2592 if (!conditional)
2594 fail = true;
2595 break;
2598 else if (single_store_bb != gimple_bb (loc.stmt))
2600 fail = true;
2601 break;
2603 if (fail)
2605 single_store_bb = NULL;
2606 break;
2609 if (single_store_bb)
2611 /* Analyze the single block with stores. */
2612 auto_bitmap fully_visited;
2613 auto_bitmap refs_not_supported;
2614 auto_bitmap refs_not_in_seq;
2615 auto_vec<seq_entry> seq;
2616 bitmap_copy (refs_not_in_seq, mem_refs);
2617 int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE,
2618 seq, refs_not_in_seq, refs_not_supported,
2619 false, fully_visited);
2620 if (res != 1)
2622 /* Unhandled refs can still fail this. */
2623 bitmap_clear (mem_refs);
2624 return;
2627 /* We cannot handle sm_other since we neither remember the
2628 stored location nor the value at the point we execute them. */
2629 for (unsigned i = 0; i < seq.length (); ++i)
2631 unsigned new_i;
2632 if (seq[i].second == sm_other
2633 && seq[i].from != NULL_TREE)
2634 seq[i].from = NULL_TREE;
2635 else if ((seq[i].second == sm_ord
2636 || (seq[i].second == sm_other
2637 && seq[i].from != NULL_TREE))
2638 && !sm_seq_push_down (seq, i, &new_i))
2640 bitmap_set_bit (refs_not_supported, seq[new_i].first);
2641 seq[new_i].second = sm_other;
2642 seq[new_i].from = NULL_TREE;
2645 bitmap_and_compl_into (mem_refs, refs_not_supported);
2646 if (bitmap_empty_p (mem_refs))
2647 return;
2649 /* Prune seq. */
2650 while (seq.last ().second == sm_other
2651 && seq.last ().from == NULL_TREE)
2652 seq.pop ();
2654 hash_map<im_mem_ref *, sm_aux *> aux_map;
2656 /* Execute SM but delay the store materialization for ordered
2657 sequences on exit. */
2658 bool first_p = true;
2659 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2661 ref = memory_accesses.refs_list[i];
2662 execute_sm (loop, ref, aux_map, true, !first_p);
2663 first_p = false;
2666 /* Get at the single flag variable we eventually produced. */
2667 im_mem_ref *ref
2668 = memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)];
2669 sm_aux *aux = *aux_map.get (ref);
2671 /* Materialize ordered store sequences on exits. */
2672 edge e;
2673 FOR_EACH_VEC_ELT (exits, i, e)
2675 edge append_cond_position = NULL;
2676 edge last_cond_fallthru = NULL;
2677 edge insert_e = e;
2678 /* Construct the single flag variable control flow and insert
2679 the ordered seq of stores in the then block. With
2680 -fstore-data-races we can do the stores unconditionally. */
2681 if (aux->store_flag)
2682 insert_e
2683 = single_pred_edge
2684 (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
2685 aux->store_flag,
2686 loop_preheader_edge (loop),
2687 &aux->flag_bbs, append_cond_position,
2688 last_cond_fallthru));
2689 execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
2690 append_cond_position, last_cond_fallthru);
2691 gsi_commit_one_edge_insert (insert_e, NULL);
2694 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2695 iter != aux_map.end (); ++iter)
2696 delete (*iter).second;
2698 return;
2701 /* To address PR57359 before actually applying store-motion check
2702 the candidates found for validity with regards to reordering
2703 relative to other stores which we until here disambiguated using
2704 TBAA which isn't valid.
2705 What matters is the order of the last stores to the mem_refs
2706 with respect to the other stores of the loop at the point of the
2707 loop exits. */
2709 /* For each exit compute the store order, pruning from mem_refs
2710 on the fly. */
2711 /* The complexity of this is at least
2712 O(number of exits * number of SM refs) but more approaching
2713 O(number of exits * number of SM refs * number of stores). */
2714 /* ??? Somehow do this in a single sweep over the loop body. */
2715 auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2716 auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2717 edge e;
2718 FOR_EACH_VEC_ELT (exits, i, e)
2720 vec<seq_entry> seq;
2721 seq.create (4);
2722 auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2723 bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2724 if (bitmap_empty_p (refs_not_in_seq))
2726 seq.release ();
2727 break;
2729 auto_bitmap fully_visited;
2730 int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
2731 seq, refs_not_in_seq,
2732 refs_not_supported, false,
2733 fully_visited);
2734 if (res != 1)
2736 bitmap_copy (refs_not_supported, mem_refs);
2737 seq.release ();
2738 break;
2740 sms.safe_push (std::make_pair (e, seq));
2743 /* Prune pruned mem_refs from earlier processed exits. */
2744 bool changed = !bitmap_empty_p (refs_not_supported);
2745 while (changed)
2747 changed = false;
2748 std::pair<edge, vec<seq_entry> > *seq;
2749 FOR_EACH_VEC_ELT (sms, i, seq)
2751 bool need_to_push = false;
2752 for (unsigned i = 0; i < seq->second.length (); ++i)
2754 sm_kind kind = seq->second[i].second;
2755 if (kind == sm_other && seq->second[i].from == NULL_TREE)
2756 break;
2757 unsigned id = seq->second[i].first;
2758 unsigned new_idx;
2759 if (kind == sm_ord
2760 && bitmap_bit_p (refs_not_supported, id))
2762 seq->second[i].second = sm_other;
2763 gcc_assert (seq->second[i].from == NULL_TREE);
2764 need_to_push = true;
2766 else if (need_to_push
2767 && !sm_seq_push_down (seq->second, i, &new_idx))
2769 /* We need to push down both sm_ord and sm_other
2770 but for the latter we need to disqualify all
2771 following refs. */
2772 if (kind == sm_ord)
2774 if (bitmap_set_bit (refs_not_supported, id))
2775 changed = true;
2776 seq->second[new_idx].second = sm_other;
2778 else
2780 for (unsigned j = seq->second.length () - 1;
2781 j > new_idx; --j)
2782 if (seq->second[j].second == sm_ord
2783 && bitmap_set_bit (refs_not_supported,
2784 seq->second[j].first))
2785 changed = true;
2786 seq->second.truncate (new_idx);
2787 break;
2793 std::pair<edge, vec<seq_entry> > *seq;
2794 FOR_EACH_VEC_ELT (sms, i, seq)
2796 /* Prune sm_other from the end. */
2797 while (!seq->second.is_empty ()
2798 && seq->second.last ().second == sm_other)
2799 seq->second.pop ();
2800 /* Prune duplicates from the start. */
2801 auto_bitmap seen (&lim_bitmap_obstack);
2802 unsigned j, k;
2803 for (j = k = 0; j < seq->second.length (); ++j)
2804 if (bitmap_set_bit (seen, seq->second[j].first))
2806 if (k != j)
2807 seq->second[k] = seq->second[j];
2808 ++k;
2810 seq->second.truncate (k);
2811 /* And verify. */
2812 seq_entry *e;
2813 FOR_EACH_VEC_ELT (seq->second, j, e)
2814 gcc_assert (e->second == sm_ord
2815 || (e->second == sm_other && e->from != NULL_TREE));
2818 /* Verify dependence for refs we cannot handle with the order preserving
2819 code (refs_not_supported) or prune them from mem_refs. */
2820 auto_vec<seq_entry> unord_refs;
2821 EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
2823 ref = memory_accesses.refs_list[i];
2824 if (!ref_indep_loop_p (loop, ref, sm_waw))
2825 bitmap_clear_bit (mem_refs, i);
2826 /* We've now verified store order for ref with respect to all other
2827 stores in the loop does not matter. */
2828 else
2829 unord_refs.safe_push (seq_entry (i, sm_unord));
2832 hash_map<im_mem_ref *, sm_aux *> aux_map;
2834 /* Execute SM but delay the store materialization for ordered
2835 sequences on exit. */
2836 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2838 ref = memory_accesses.refs_list[i];
2839 execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i),
2840 false);
2843 /* Materialize ordered store sequences on exits. */
2844 FOR_EACH_VEC_ELT (exits, i, e)
2846 edge append_cond_position = NULL;
2847 edge last_cond_fallthru = NULL;
2848 if (i < sms.length ())
2850 gcc_assert (sms[i].first == e);
2851 execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
2852 append_cond_position, last_cond_fallthru);
2853 sms[i].second.release ();
2855 if (!unord_refs.is_empty ())
2856 execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
2857 append_cond_position, last_cond_fallthru);
2858 /* Commit edge inserts here to preserve the order of stores
2859 when an exit exits multiple loops. */
2860 gsi_commit_one_edge_insert (e, NULL);
2863 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2864 iter != aux_map.end (); ++iter)
2865 delete (*iter).second;
2868 class ref_always_accessed
2870 public:
2871 ref_always_accessed (class loop *loop_, bool stored_p_)
2872 : loop (loop_), stored_p (stored_p_) {}
2873 bool operator () (mem_ref_loc *loc);
2874 class loop *loop;
2875 bool stored_p;
2878 bool
2879 ref_always_accessed::operator () (mem_ref_loc *loc)
2881 class loop *must_exec;
2883 struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
2884 if (!lim_data)
2885 return false;
2887 /* If we require an always executed store make sure the statement
2888 is a store. */
2889 if (stored_p)
2891 tree lhs = gimple_get_lhs (loc->stmt);
2892 if (!lhs
2893 || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
2894 return false;
2897 must_exec = lim_data->always_executed_in;
2898 if (!must_exec)
2899 return false;
2901 if (must_exec == loop
2902 || flow_loop_nested_p (must_exec, loop))
2903 return true;
2905 return false;
2908 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2909 make sure REF is always stored to in LOOP. */
2911 static bool
2912 ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
2914 return for_all_locs_in_loop (loop, ref,
2915 ref_always_accessed (loop, stored_p));
2918 /* Returns true if REF1 and REF2 are independent. */
2920 static bool
2921 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
2923 if (ref1 == ref2)
2924 return true;
2926 if (dump_file && (dump_flags & TDF_DETAILS))
2927 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2928 ref1->id, ref2->id);
2930 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
2932 if (dump_file && (dump_flags & TDF_DETAILS))
2933 fprintf (dump_file, "dependent.\n");
2934 return false;
2936 else
2938 if (dump_file && (dump_flags & TDF_DETAILS))
2939 fprintf (dump_file, "independent.\n");
2940 return true;
2944 /* Returns true if REF is independent on all other accessess in LOOP.
2945 KIND specifies the kind of dependence to consider.
2946 lim_raw assumes REF is not stored in LOOP and disambiguates RAW
2947 dependences so if true REF can be hoisted out of LOOP
2948 sm_war disambiguates a store REF against all other loads to see
2949 whether the store can be sunk across loads out of LOOP
2950 sm_waw disambiguates a store REF against all other stores to see
2951 whether the store can be sunk across stores out of LOOP. */
2953 static bool
2954 ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
2956 bool indep_p = true;
2957 bitmap refs_to_check;
2959 if (kind == sm_war)
2960 refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
2961 else
2962 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2964 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
2965 || ref->mem.ref == error_mark_node)
2966 indep_p = false;
2967 else
2969 /* tri-state, { unknown, independent, dependent } */
2970 dep_state state = query_loop_dependence (loop, ref, kind);
2971 if (state != dep_unknown)
2972 return state == dep_independent ? true : false;
2974 class loop *inner = loop->inner;
2975 while (inner)
2977 if (!ref_indep_loop_p (inner, ref, kind))
2979 indep_p = false;
2980 break;
2982 inner = inner->next;
2985 if (indep_p)
2987 unsigned i;
2988 bitmap_iterator bi;
2989 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2991 im_mem_ref *aref = memory_accesses.refs_list[i];
2992 if (aref->mem.ref == error_mark_node)
2994 gimple *stmt = aref->accesses_in_loop[0].stmt;
2995 if ((kind == sm_war
2996 && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
2997 kind != sm_waw))
2998 || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
2999 kind != sm_waw))
3001 indep_p = false;
3002 break;
3005 else if (!refs_independent_p (ref, aref, kind != sm_waw))
3007 indep_p = false;
3008 break;
3014 if (dump_file && (dump_flags & TDF_DETAILS))
3015 fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
3016 kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
3017 ref->id, loop->num, indep_p ? "independent" : "dependent");
3019 /* Record the computed result in the cache. */
3020 record_loop_dependence (loop, ref, kind,
3021 indep_p ? dep_independent : dep_dependent);
3023 return indep_p;
3027 /* Returns true if we can perform store motion of REF from LOOP. */
3029 static bool
3030 can_sm_ref_p (class loop *loop, im_mem_ref *ref)
3032 tree base;
3034 /* Can't hoist unanalyzable refs. */
3035 if (!MEM_ANALYZABLE (ref))
3036 return false;
3038 /* Can't hoist/sink aggregate copies. */
3039 if (ref->mem.ref == error_mark_node)
3040 return false;
3042 /* It should be movable. */
3043 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
3044 || TREE_THIS_VOLATILE (ref->mem.ref)
3045 || !for_each_index (&ref->mem.ref, may_move_till, loop))
3046 return false;
3048 /* If it can throw fail, we do not properly update EH info. */
3049 if (tree_could_throw_p (ref->mem.ref))
3050 return false;
3052 /* If it can trap, it must be always executed in LOOP.
3053 Readonly memory locations may trap when storing to them, but
3054 tree_could_trap_p is a predicate for rvalues, so check that
3055 explicitly. */
3056 base = get_base_address (ref->mem.ref);
3057 if ((tree_could_trap_p (ref->mem.ref)
3058 || (DECL_P (base) && TREE_READONLY (base)))
3059 /* ??? We can at least use false here, allowing loads? We
3060 are forcing conditional stores if the ref is not always
3061 stored to later anyway. So this would only guard
3062 the load we need to emit. Thus when the ref is not
3063 loaded we can elide this completely? */
3064 && !ref_always_accessed_p (loop, ref, true))
3065 return false;
3067 /* Verify all loads of ref can be hoisted. */
3068 if (ref->loaded
3069 && bitmap_bit_p (ref->loaded, loop->num)
3070 && !ref_indep_loop_p (loop, ref, lim_raw))
3071 return false;
3073 /* Verify the candidate can be disambiguated against all loads,
3074 that is, we can elide all in-loop stores. Disambiguation
3075 against stores is done later when we cannot guarantee preserving
3076 the order of stores. */
3077 if (!ref_indep_loop_p (loop, ref, sm_war))
3078 return false;
3080 return true;
3083 /* Marks the references in LOOP for that store motion should be performed
3084 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
3085 motion was performed in one of the outer loops. */
3087 static void
3088 find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
3090 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
3091 unsigned i;
3092 bitmap_iterator bi;
3093 im_mem_ref *ref;
3095 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
3097 ref = memory_accesses.refs_list[i];
3098 if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
3099 bitmap_set_bit (refs_to_sm, i);
3103 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
3104 for a store motion optimization (i.e. whether we can insert statement
3105 on its exits). */
3107 static bool
3108 loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
3109 const vec<edge> &exits)
3111 unsigned i;
3112 edge ex;
3114 FOR_EACH_VEC_ELT (exits, i, ex)
3115 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
3116 return false;
3118 return true;
3121 /* Try to perform store motion for all memory references modified inside
3122 LOOP. SM_EXECUTED is the bitmap of the memory references for that
3123 store motion was executed in one of the outer loops. */
3125 static void
3126 store_motion_loop (class loop *loop, bitmap sm_executed)
3128 auto_vec<edge> exits = get_loop_exit_edges (loop);
3129 class loop *subloop;
3130 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
3132 if (loop_suitable_for_sm (loop, exits))
3134 find_refs_for_sm (loop, sm_executed, sm_in_loop);
3135 if (!bitmap_empty_p (sm_in_loop))
3136 hoist_memory_references (loop, sm_in_loop, exits);
3139 bitmap_ior_into (sm_executed, sm_in_loop);
3140 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
3141 store_motion_loop (subloop, sm_executed);
3142 bitmap_and_compl_into (sm_executed, sm_in_loop);
3143 BITMAP_FREE (sm_in_loop);
3146 /* Try to perform store motion for all memory references modified inside
3147 loops. */
3149 static void
3150 do_store_motion (void)
3152 class loop *loop;
3153 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
3155 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3156 store_motion_loop (loop, sm_executed);
3158 BITMAP_FREE (sm_executed);
3161 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
3162 for each such basic block bb records the outermost loop for that execution
3163 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
3164 blocks that contain a nonpure call. */
3166 static void
3167 fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
3169 basic_block bb = NULL, last = NULL;
3170 edge e;
3171 class loop *inn_loop = loop;
3173 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
3175 auto_vec<basic_block, 64> worklist;
3176 worklist.reserve_exact (loop->num_nodes);
3177 worklist.quick_push (loop->header);
3180 edge_iterator ei;
3181 bb = worklist.pop ();
3183 if (!flow_bb_inside_loop_p (inn_loop, bb))
3185 /* When we are leaving a possibly infinite inner loop
3186 we have to stop processing. */
3187 if (!finite_loop_p (inn_loop))
3188 break;
3189 /* If the loop was finite we can continue with processing
3190 the loop we exited to. */
3191 inn_loop = bb->loop_father;
3194 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3195 last = bb;
3197 if (bitmap_bit_p (contains_call, bb->index))
3198 break;
3200 /* If LOOP exits from this BB stop processing. */
3201 FOR_EACH_EDGE (e, ei, bb->succs)
3202 if (!flow_bb_inside_loop_p (loop, e->dest))
3203 break;
3204 if (e)
3205 break;
3207 /* A loop might be infinite (TODO use simple loop analysis
3208 to disprove this if possible). */
3209 if (bb->flags & BB_IRREDUCIBLE_LOOP)
3210 break;
3212 if (bb->loop_father->header == bb)
3213 /* Record that we enter into a subloop since it might not
3214 be finite. */
3215 /* ??? Entering into a not always executed subloop makes
3216 fill_always_executed_in quadratic in loop depth since
3217 we walk those loops N times. This is not a problem
3218 in practice though, see PR102253 for a worst-case testcase. */
3219 inn_loop = bb->loop_father;
3221 /* Walk the body of LOOP sorted by dominance relation. Additionally,
3222 if a basic block S dominates the latch, then only blocks dominated
3223 by S are after it.
3224 This is get_loop_body_in_dom_order using a worklist algorithm and
3225 stopping once we are no longer interested in visiting further
3226 blocks. */
3227 unsigned old_len = worklist.length ();
3228 unsigned postpone = 0;
3229 for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
3230 son;
3231 son = next_dom_son (CDI_DOMINATORS, son))
3233 if (!flow_bb_inside_loop_p (loop, son))
3234 continue;
3235 if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
3236 postpone = worklist.length ();
3237 worklist.quick_push (son);
3239 if (postpone)
3240 /* Postponing the block that dominates the latch means
3241 processing it last and thus putting it earliest in the
3242 worklist. */
3243 std::swap (worklist[old_len], worklist[postpone]);
3245 while (!worklist.is_empty ());
3247 while (1)
3249 if (dump_enabled_p ())
3250 dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
3251 last->index, loop->num);
3252 SET_ALWAYS_EXECUTED_IN (last, loop);
3253 if (last == loop->header)
3254 break;
3255 last = get_immediate_dominator (CDI_DOMINATORS, last);
3259 for (loop = loop->inner; loop; loop = loop->next)
3260 fill_always_executed_in_1 (loop, contains_call);
3263 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3264 for each such basic block bb records the outermost loop for that execution
3265 of its header implies execution of bb. */
3267 static void
3268 fill_always_executed_in (void)
3270 basic_block bb;
3271 class loop *loop;
3273 auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3274 bitmap_clear (contains_call);
3275 FOR_EACH_BB_FN (bb, cfun)
3277 gimple_stmt_iterator gsi;
3278 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3280 if (nonpure_call_p (gsi_stmt (gsi)))
3281 break;
3284 if (!gsi_end_p (gsi))
3285 bitmap_set_bit (contains_call, bb->index);
3288 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3289 fill_always_executed_in_1 (loop, contains_call);
3293 /* Compute the global information needed by the loop invariant motion pass. */
3295 static void
3296 tree_ssa_lim_initialize (bool store_motion)
3298 unsigned i;
3300 bitmap_obstack_initialize (&lim_bitmap_obstack);
3301 gcc_obstack_init (&mem_ref_obstack);
3302 lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3304 if (flag_tm)
3305 compute_transaction_bits ();
3307 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3308 memory_accesses.refs_list.create (100);
3309 /* Allocate a special, unanalyzable mem-ref with ID zero. */
3310 memory_accesses.refs_list.quick_push
3311 (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
3313 memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
3314 memory_accesses.refs_loaded_in_loop.quick_grow (number_of_loops (cfun));
3315 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
3316 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
3317 if (store_motion)
3319 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
3320 memory_accesses.all_refs_stored_in_loop.quick_grow
3321 (number_of_loops (cfun));
3324 for (i = 0; i < number_of_loops (cfun); i++)
3326 bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
3327 &lim_bitmap_obstack);
3328 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
3329 &lim_bitmap_obstack);
3330 if (store_motion)
3331 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
3332 &lim_bitmap_obstack);
3335 memory_accesses.ttae_cache = NULL;
3337 /* Initialize bb_loop_postorder with a mapping from loop->num to
3338 its postorder index. */
3339 i = 0;
3340 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3341 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
3342 bb_loop_postorder[loop->num] = i++;
3345 /* Cleans up after the invariant motion pass. */
3347 static void
3348 tree_ssa_lim_finalize (void)
3350 basic_block bb;
3351 unsigned i;
3352 im_mem_ref *ref;
3354 FOR_EACH_BB_FN (bb, cfun)
3355 SET_ALWAYS_EXECUTED_IN (bb, NULL);
3357 bitmap_obstack_release (&lim_bitmap_obstack);
3358 delete lim_aux_data_map;
3360 delete memory_accesses.refs;
3361 memory_accesses.refs = NULL;
3363 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3364 memref_free (ref);
3365 memory_accesses.refs_list.release ();
3366 obstack_free (&mem_ref_obstack, NULL);
3368 memory_accesses.refs_loaded_in_loop.release ();
3369 memory_accesses.refs_stored_in_loop.release ();
3370 memory_accesses.all_refs_stored_in_loop.release ();
3372 if (memory_accesses.ttae_cache)
3373 free_affine_expand_cache (&memory_accesses.ttae_cache);
3375 free (bb_loop_postorder);
3378 /* Moves invariants from loops. Only "expensive" invariants are moved out --
3379 i.e. those that are likely to be win regardless of the register pressure.
3380 Only perform store motion if STORE_MOTION is true. */
3382 unsigned int
3383 loop_invariant_motion_in_fun (function *fun, bool store_motion)
3385 unsigned int todo = 0;
3387 tree_ssa_lim_initialize (store_motion);
3389 /* Gathers information about memory accesses in the loops. */
3390 analyze_memory_references (store_motion);
3392 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
3393 fill_always_executed_in ();
3395 int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3396 int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3398 /* For each statement determine the outermost loop in that it is
3399 invariant and cost for computing the invariant. */
3400 for (int i = 0; i < n; ++i)
3401 compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3403 /* Execute store motion. Force the necessary invariants to be moved
3404 out of the loops as well. */
3405 if (store_motion)
3406 do_store_motion ();
3408 free (rpo);
3409 rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3410 n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3412 /* Move the expressions that are expensive enough. */
3413 for (int i = 0; i < n; ++i)
3414 todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3416 free (rpo);
3418 gsi_commit_edge_inserts ();
3419 if (need_ssa_update_p (fun))
3420 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3422 tree_ssa_lim_finalize ();
3424 return todo;
3427 /* Loop invariant motion pass. */
3429 namespace {
3431 const pass_data pass_data_lim =
3433 GIMPLE_PASS, /* type */
3434 "lim", /* name */
3435 OPTGROUP_LOOP, /* optinfo_flags */
3436 TV_LIM, /* tv_id */
3437 PROP_cfg, /* properties_required */
3438 0, /* properties_provided */
3439 0, /* properties_destroyed */
3440 0, /* todo_flags_start */
3441 0, /* todo_flags_finish */
3444 class pass_lim : public gimple_opt_pass
3446 public:
3447 pass_lim (gcc::context *ctxt)
3448 : gimple_opt_pass (pass_data_lim, ctxt)
3451 /* opt_pass methods: */
3452 opt_pass * clone () { return new pass_lim (m_ctxt); }
3453 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
3454 virtual unsigned int execute (function *);
3456 }; // class pass_lim
3458 unsigned int
3459 pass_lim::execute (function *fun)
3461 bool in_loop_pipeline = scev_initialized_p ();
3462 if (!in_loop_pipeline)
3463 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3465 if (number_of_loops (fun) <= 1)
3466 return 0;
3467 unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
3469 if (!in_loop_pipeline)
3470 loop_optimizer_finalize ();
3471 else
3472 scev_reset ();
3473 return todo;
3476 } // anon namespace
3478 gimple_opt_pass *
3479 make_pass_lim (gcc::context *ctxt)
3481 return new pass_lim (ctxt);