Default to dwarf version 4 on hppa64-hpux
[official-gcc.git] / gcc / tree-ssa-loop-im.c
blob4b187c2cdafe586fe159343c1f5b22abfde7446a
1 /* Loop invariant motion.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "tree-eh.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
39 #include "cfgloop.h"
40 #include "tree-affine.h"
41 #include "tree-ssa-propagate.h"
42 #include "trans-mem.h"
43 #include "gimple-fold.h"
44 #include "tree-scalar-evolution.h"
45 #include "tree-ssa-loop-niter.h"
46 #include "alias.h"
47 #include "builtins.h"
48 #include "tree-dfa.h"
49 #include "dbgcnt.h"
51 /* TODO: Support for predicated code motion. I.e.
53 while (1)
55 if (cond)
57 a = inv;
58 something;
62 Where COND and INV are invariants, but evaluating INV may trap or be
63 invalid from some other reason if !COND. This may be transformed to
65 if (cond)
66 a = inv;
67 while (1)
69 if (cond)
70 something;
71 } */
73 /* The auxiliary data kept for each statement. */
75 struct lim_aux_data
77 class loop *max_loop; /* The outermost loop in that the statement
78 is invariant. */
80 class loop *tgt_loop; /* The loop out of that we want to move the
81 invariant. */
83 class loop *always_executed_in;
84 /* The outermost loop for that we are sure
85 the statement is executed if the loop
86 is entered. */
88 unsigned cost; /* Cost of the computation performed by the
89 statement. */
91 unsigned ref; /* The simple_mem_ref in this stmt or 0. */
93 vec<gimple *> depends; /* Vector of statements that must be also
94 hoisted out of the loop when this statement
95 is hoisted; i.e. those that define the
96 operands of the statement and are inside of
97 the MAX_LOOP loop. */
100 /* Maps statements to their lim_aux_data. */
102 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
104 /* Description of a memory reference location. */
106 struct mem_ref_loc
108 tree *ref; /* The reference itself. */
109 gimple *stmt; /* The statement in that it occurs. */
113 /* Description of a memory reference. */
115 class im_mem_ref
117 public:
118 unsigned id : 30; /* ID assigned to the memory reference
119 (its index in memory_accesses.refs_list) */
120 unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
121 unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */
122 hashval_t hash; /* Its hash value. */
124 /* The memory access itself and associated caching of alias-oracle
125 query meta-data. We are using mem.ref == error_mark_node for the
126 case the reference is represented by its single access stmt
127 in accesses_in_loop[0]. */
128 ao_ref mem;
130 bitmap stored; /* The set of loops in that this memory location
131 is stored to. */
132 bitmap loaded; /* The set of loops in that this memory location
133 is loaded from. */
134 vec<mem_ref_loc> accesses_in_loop;
135 /* The locations of the accesses. */
137 /* The following set is computed on demand. */
138 bitmap_head dep_loop; /* The set of loops in that the memory
139 reference is {in,}dependent in
140 different modes. */
143 /* We use six bits per loop in the ref->dep_loop bitmap to record
144 the dep_kind x dep_state combinations. */
146 enum dep_kind { lim_raw, sm_war, sm_waw };
147 enum dep_state { dep_unknown, dep_independent, dep_dependent };
149 /* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */
151 static void
152 record_loop_dependence (class loop *loop, im_mem_ref *ref,
153 dep_kind kind, dep_state state)
155 gcc_assert (state != dep_unknown);
156 unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
157 bitmap_set_bit (&ref->dep_loop, bit);
160 /* Query the loop dependence cache of REF for LOOP, KIND. */
162 static dep_state
163 query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
165 unsigned first_bit = 6 * loop->num + kind * 2;
166 if (bitmap_bit_p (&ref->dep_loop, first_bit))
167 return dep_independent;
168 else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
169 return dep_dependent;
170 return dep_unknown;
173 /* Mem_ref hashtable helpers. */
175 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
177 typedef ao_ref *compare_type;
178 static inline hashval_t hash (const im_mem_ref *);
179 static inline bool equal (const im_mem_ref *, const ao_ref *);
182 /* A hash function for class im_mem_ref object OBJ. */
184 inline hashval_t
185 mem_ref_hasher::hash (const im_mem_ref *mem)
187 return mem->hash;
190 /* An equality function for class im_mem_ref object MEM1 with
191 memory reference OBJ2. */
193 inline bool
194 mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
196 if (obj2->max_size_known_p ())
197 return (mem1->ref_decomposed
198 && ((TREE_CODE (mem1->mem.base) == MEM_REF
199 && TREE_CODE (obj2->base) == MEM_REF
200 && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
201 TREE_OPERAND (obj2->base, 0), 0)
202 && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
203 mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
204 || (operand_equal_p (mem1->mem.base, obj2->base, 0)
205 && known_eq (mem1->mem.offset, obj2->offset)))
206 && known_eq (mem1->mem.size, obj2->size)
207 && known_eq (mem1->mem.max_size, obj2->max_size)
208 && mem1->mem.volatile_p == obj2->volatile_p
209 && (mem1->mem.ref_alias_set == obj2->ref_alias_set
210 /* We are not canonicalizing alias-sets but for the
211 special-case we didn't canonicalize yet and the
212 incoming ref is a alias-set zero MEM we pick
213 the correct one already. */
214 || (!mem1->ref_canonical
215 && (TREE_CODE (obj2->ref) == MEM_REF
216 || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
217 && obj2->ref_alias_set == 0)
218 /* Likewise if there's a canonical ref with alias-set zero. */
219 || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
220 && types_compatible_p (TREE_TYPE (mem1->mem.ref),
221 TREE_TYPE (obj2->ref)));
222 else
223 return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
227 /* Description of memory accesses in loops. */
229 static struct
231 /* The hash table of memory references accessed in loops. */
232 hash_table<mem_ref_hasher> *refs;
234 /* The list of memory references. */
235 vec<im_mem_ref *> refs_list;
237 /* The set of memory references accessed in each loop. */
238 vec<bitmap_head> refs_loaded_in_loop;
240 /* The set of memory references stored in each loop. */
241 vec<bitmap_head> refs_stored_in_loop;
243 /* The set of memory references stored in each loop, including subloops . */
244 vec<bitmap_head> all_refs_stored_in_loop;
246 /* Cache for expanding memory addresses. */
247 hash_map<tree, name_expansion *> *ttae_cache;
248 } memory_accesses;
250 /* Obstack for the bitmaps in the above data structures. */
251 static bitmap_obstack lim_bitmap_obstack;
252 static obstack mem_ref_obstack;
254 static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
255 static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
256 static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
258 /* Minimum cost of an expensive expression. */
259 #define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
261 /* The outermost loop for which execution of the header guarantees that the
262 block will be executed. */
263 #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
264 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
266 /* ID of the shared unanalyzable mem. */
267 #define UNANALYZABLE_MEM_ID 0
269 /* Whether the reference was analyzable. */
270 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
272 static struct lim_aux_data *
273 init_lim_data (gimple *stmt)
275 lim_aux_data *p = XCNEW (struct lim_aux_data);
276 lim_aux_data_map->put (stmt, p);
278 return p;
281 static struct lim_aux_data *
282 get_lim_data (gimple *stmt)
284 lim_aux_data **p = lim_aux_data_map->get (stmt);
285 if (!p)
286 return NULL;
288 return *p;
291 /* Releases the memory occupied by DATA. */
293 static void
294 free_lim_aux_data (struct lim_aux_data *data)
296 data->depends.release ();
297 free (data);
300 static void
301 clear_lim_data (gimple *stmt)
303 lim_aux_data **p = lim_aux_data_map->get (stmt);
304 if (!p)
305 return;
307 free_lim_aux_data (*p);
308 *p = NULL;
312 /* The possibilities of statement movement. */
313 enum move_pos
315 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
316 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
317 become executed -- memory accesses, ... */
318 MOVE_POSSIBLE /* Unlimited movement. */
322 /* If it is possible to hoist the statement STMT unconditionally,
323 returns MOVE_POSSIBLE.
324 If it is possible to hoist the statement STMT, but we must avoid making
325 it executed if it would not be executed in the original program (e.g.
326 because it may trap), return MOVE_PRESERVE_EXECUTION.
327 Otherwise return MOVE_IMPOSSIBLE. */
329 enum move_pos
330 movement_possibility (gimple *stmt)
332 tree lhs;
333 enum move_pos ret = MOVE_POSSIBLE;
335 if (flag_unswitch_loops
336 && gimple_code (stmt) == GIMPLE_COND)
338 /* If we perform unswitching, force the operands of the invariant
339 condition to be moved out of the loop. */
340 return MOVE_POSSIBLE;
343 if (gimple_code (stmt) == GIMPLE_PHI
344 && gimple_phi_num_args (stmt) <= 2
345 && !virtual_operand_p (gimple_phi_result (stmt))
346 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
347 return MOVE_POSSIBLE;
349 if (gimple_get_lhs (stmt) == NULL_TREE)
350 return MOVE_IMPOSSIBLE;
352 if (gimple_vdef (stmt))
353 return MOVE_IMPOSSIBLE;
355 if (stmt_ends_bb_p (stmt)
356 || gimple_has_volatile_ops (stmt)
357 || gimple_has_side_effects (stmt)
358 || stmt_could_throw_p (cfun, stmt))
359 return MOVE_IMPOSSIBLE;
361 if (is_gimple_call (stmt))
363 /* While pure or const call is guaranteed to have no side effects, we
364 cannot move it arbitrarily. Consider code like
366 char *s = something ();
368 while (1)
370 if (s)
371 t = strlen (s);
372 else
373 t = 0;
376 Here the strlen call cannot be moved out of the loop, even though
377 s is invariant. In addition to possibly creating a call with
378 invalid arguments, moving out a function call that is not executed
379 may cause performance regressions in case the call is costly and
380 not executed at all. */
381 ret = MOVE_PRESERVE_EXECUTION;
382 lhs = gimple_call_lhs (stmt);
384 else if (is_gimple_assign (stmt))
385 lhs = gimple_assign_lhs (stmt);
386 else
387 return MOVE_IMPOSSIBLE;
389 if (TREE_CODE (lhs) == SSA_NAME
390 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
391 return MOVE_IMPOSSIBLE;
393 if (TREE_CODE (lhs) != SSA_NAME
394 || gimple_could_trap_p (stmt))
395 return MOVE_PRESERVE_EXECUTION;
397 /* Non local loads in a transaction cannot be hoisted out. Well,
398 unless the load happens on every path out of the loop, but we
399 don't take this into account yet. */
400 if (flag_tm
401 && gimple_in_transaction (stmt)
402 && gimple_assign_single_p (stmt))
404 tree rhs = gimple_assign_rhs1 (stmt);
405 if (DECL_P (rhs) && is_global_var (rhs))
407 if (dump_file)
409 fprintf (dump_file, "Cannot hoist conditional load of ");
410 print_generic_expr (dump_file, rhs, TDF_SLIM);
411 fprintf (dump_file, " because it is in a transaction.\n");
413 return MOVE_IMPOSSIBLE;
417 return ret;
420 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
421 loop to that we could move the expression using DEF if it did not have
422 other operands, i.e. the outermost loop enclosing LOOP in that the value
423 of DEF is invariant. */
425 static class loop *
426 outermost_invariant_loop (tree def, class loop *loop)
428 gimple *def_stmt;
429 basic_block def_bb;
430 class loop *max_loop;
431 struct lim_aux_data *lim_data;
433 if (!def)
434 return superloop_at_depth (loop, 1);
436 if (TREE_CODE (def) != SSA_NAME)
438 gcc_assert (is_gimple_min_invariant (def));
439 return superloop_at_depth (loop, 1);
442 def_stmt = SSA_NAME_DEF_STMT (def);
443 def_bb = gimple_bb (def_stmt);
444 if (!def_bb)
445 return superloop_at_depth (loop, 1);
447 max_loop = find_common_loop (loop, def_bb->loop_father);
449 lim_data = get_lim_data (def_stmt);
450 if (lim_data != NULL && lim_data->max_loop != NULL)
451 max_loop = find_common_loop (max_loop,
452 loop_outer (lim_data->max_loop));
453 if (max_loop == loop)
454 return NULL;
455 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
457 return max_loop;
460 /* DATA is a structure containing information associated with a statement
461 inside LOOP. DEF is one of the operands of this statement.
463 Find the outermost loop enclosing LOOP in that value of DEF is invariant
464 and record this in DATA->max_loop field. If DEF itself is defined inside
465 this loop as well (i.e. we need to hoist it out of the loop if we want
466 to hoist the statement represented by DATA), record the statement in that
467 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
468 add the cost of the computation of DEF to the DATA->cost.
470 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
472 static bool
473 add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
474 bool add_cost)
476 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
477 basic_block def_bb = gimple_bb (def_stmt);
478 class loop *max_loop;
479 struct lim_aux_data *def_data;
481 if (!def_bb)
482 return true;
484 max_loop = outermost_invariant_loop (def, loop);
485 if (!max_loop)
486 return false;
488 if (flow_loop_nested_p (data->max_loop, max_loop))
489 data->max_loop = max_loop;
491 def_data = get_lim_data (def_stmt);
492 if (!def_data)
493 return true;
495 if (add_cost
496 /* Only add the cost if the statement defining DEF is inside LOOP,
497 i.e. if it is likely that by moving the invariants dependent
498 on it, we will be able to avoid creating a new register for
499 it (since it will be only used in these dependent invariants). */
500 && def_bb->loop_father == loop)
501 data->cost += def_data->cost;
503 data->depends.safe_push (def_stmt);
505 return true;
508 /* Returns an estimate for a cost of statement STMT. The values here
509 are just ad-hoc constants, similar to costs for inlining. */
511 static unsigned
512 stmt_cost (gimple *stmt)
514 /* Always try to create possibilities for unswitching. */
515 if (gimple_code (stmt) == GIMPLE_COND
516 || gimple_code (stmt) == GIMPLE_PHI)
517 return LIM_EXPENSIVE;
519 /* We should be hoisting calls if possible. */
520 if (is_gimple_call (stmt))
522 tree fndecl;
524 /* Unless the call is a builtin_constant_p; this always folds to a
525 constant, so moving it is useless. */
526 fndecl = gimple_call_fndecl (stmt);
527 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
528 return 0;
530 return LIM_EXPENSIVE;
533 /* Hoisting memory references out should almost surely be a win. */
534 if (gimple_references_memory_p (stmt))
535 return LIM_EXPENSIVE;
537 if (gimple_code (stmt) != GIMPLE_ASSIGN)
538 return 1;
540 switch (gimple_assign_rhs_code (stmt))
542 case MULT_EXPR:
543 case WIDEN_MULT_EXPR:
544 case WIDEN_MULT_PLUS_EXPR:
545 case WIDEN_MULT_MINUS_EXPR:
546 case DOT_PROD_EXPR:
547 case TRUNC_DIV_EXPR:
548 case CEIL_DIV_EXPR:
549 case FLOOR_DIV_EXPR:
550 case ROUND_DIV_EXPR:
551 case EXACT_DIV_EXPR:
552 case CEIL_MOD_EXPR:
553 case FLOOR_MOD_EXPR:
554 case ROUND_MOD_EXPR:
555 case TRUNC_MOD_EXPR:
556 case RDIV_EXPR:
557 /* Division and multiplication are usually expensive. */
558 return LIM_EXPENSIVE;
560 case LSHIFT_EXPR:
561 case RSHIFT_EXPR:
562 case WIDEN_LSHIFT_EXPR:
563 case LROTATE_EXPR:
564 case RROTATE_EXPR:
565 /* Shifts and rotates are usually expensive. */
566 return LIM_EXPENSIVE;
568 case CONSTRUCTOR:
569 /* Make vector construction cost proportional to the number
570 of elements. */
571 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
573 case SSA_NAME:
574 case PAREN_EXPR:
575 /* Whether or not something is wrapped inside a PAREN_EXPR
576 should not change move cost. Nor should an intermediate
577 unpropagated SSA name copy. */
578 return 0;
580 default:
581 return 1;
585 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
586 REF is independent. If REF is not independent in LOOP, NULL is returned
587 instead. */
589 static class loop *
590 outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
592 class loop *aloop;
594 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
595 return NULL;
597 for (aloop = outer;
598 aloop != loop;
599 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
600 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
601 && ref_indep_loop_p (aloop, ref, lim_raw))
602 return aloop;
604 if (ref_indep_loop_p (loop, ref, lim_raw))
605 return loop;
606 else
607 return NULL;
610 /* If there is a simple load or store to a memory reference in STMT, returns
611 the location of the memory reference, and sets IS_STORE according to whether
612 it is a store or load. Otherwise, returns NULL. */
614 static tree *
615 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
617 tree *lhs, *rhs;
619 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
620 if (!gimple_assign_single_p (stmt))
621 return NULL;
623 lhs = gimple_assign_lhs_ptr (stmt);
624 rhs = gimple_assign_rhs1_ptr (stmt);
626 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
628 *is_store = false;
629 return rhs;
631 else if (gimple_vdef (stmt)
632 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
634 *is_store = true;
635 return lhs;
637 else
638 return NULL;
641 /* From a controlling predicate in DOM determine the arguments from
642 the PHI node PHI that are chosen if the predicate evaluates to
643 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
644 they are non-NULL. Returns true if the arguments can be determined,
645 else return false. */
647 static bool
648 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
649 tree *true_arg_p, tree *false_arg_p)
651 edge te, fe;
652 if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
653 &te, &fe))
654 return false;
656 if (true_arg_p)
657 *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
658 if (false_arg_p)
659 *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
661 return true;
664 /* Determine the outermost loop to that it is possible to hoist a statement
665 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
666 the outermost loop in that the value computed by STMT is invariant.
667 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
668 we preserve the fact whether STMT is executed. It also fills other related
669 information to LIM_DATA (STMT).
671 The function returns false if STMT cannot be hoisted outside of the loop it
672 is defined in, and true otherwise. */
674 static bool
675 determine_max_movement (gimple *stmt, bool must_preserve_exec)
677 basic_block bb = gimple_bb (stmt);
678 class loop *loop = bb->loop_father;
679 class loop *level;
680 struct lim_aux_data *lim_data = get_lim_data (stmt);
681 tree val;
682 ssa_op_iter iter;
684 if (must_preserve_exec)
685 level = ALWAYS_EXECUTED_IN (bb);
686 else
687 level = superloop_at_depth (loop, 1);
688 lim_data->max_loop = level;
690 if (gphi *phi = dyn_cast <gphi *> (stmt))
692 use_operand_p use_p;
693 unsigned min_cost = UINT_MAX;
694 unsigned total_cost = 0;
695 struct lim_aux_data *def_data;
697 /* We will end up promoting dependencies to be unconditionally
698 evaluated. For this reason the PHI cost (and thus the
699 cost we remove from the loop by doing the invariant motion)
700 is that of the cheapest PHI argument dependency chain. */
701 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
703 val = USE_FROM_PTR (use_p);
705 if (TREE_CODE (val) != SSA_NAME)
707 /* Assign const 1 to constants. */
708 min_cost = MIN (min_cost, 1);
709 total_cost += 1;
710 continue;
712 if (!add_dependency (val, lim_data, loop, false))
713 return false;
715 gimple *def_stmt = SSA_NAME_DEF_STMT (val);
716 if (gimple_bb (def_stmt)
717 && gimple_bb (def_stmt)->loop_father == loop)
719 def_data = get_lim_data (def_stmt);
720 if (def_data)
722 min_cost = MIN (min_cost, def_data->cost);
723 total_cost += def_data->cost;
728 min_cost = MIN (min_cost, total_cost);
729 lim_data->cost += min_cost;
731 if (gimple_phi_num_args (phi) > 1)
733 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
734 gimple *cond;
735 if (gsi_end_p (gsi_last_bb (dom)))
736 return false;
737 cond = gsi_stmt (gsi_last_bb (dom));
738 if (gimple_code (cond) != GIMPLE_COND)
739 return false;
740 /* Verify that this is an extended form of a diamond and
741 the PHI arguments are completely controlled by the
742 predicate in DOM. */
743 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
744 return false;
746 /* Fold in dependencies and cost of the condition. */
747 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
749 if (!add_dependency (val, lim_data, loop, false))
750 return false;
751 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
752 if (def_data)
753 lim_data->cost += def_data->cost;
756 /* We want to avoid unconditionally executing very expensive
757 operations. As costs for our dependencies cannot be
758 negative just claim we are not invariand for this case.
759 We also are not sure whether the control-flow inside the
760 loop will vanish. */
761 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
762 && !(min_cost != 0
763 && total_cost / min_cost <= 2))
764 return false;
766 /* Assume that the control-flow in the loop will vanish.
767 ??? We should verify this and not artificially increase
768 the cost if that is not the case. */
769 lim_data->cost += stmt_cost (stmt);
772 return true;
774 else
775 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
776 if (!add_dependency (val, lim_data, loop, true))
777 return false;
779 if (gimple_vuse (stmt))
781 im_mem_ref *ref
782 = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
783 if (ref
784 && MEM_ANALYZABLE (ref))
786 lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
787 loop, ref);
788 if (!lim_data->max_loop)
789 return false;
791 else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
792 return false;
795 lim_data->cost += stmt_cost (stmt);
797 return true;
800 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
801 and that one of the operands of this statement is computed by STMT.
802 Ensure that STMT (together with all the statements that define its
803 operands) is hoisted at least out of the loop LEVEL. */
805 static void
806 set_level (gimple *stmt, class loop *orig_loop, class loop *level)
808 class loop *stmt_loop = gimple_bb (stmt)->loop_father;
809 struct lim_aux_data *lim_data;
810 gimple *dep_stmt;
811 unsigned i;
813 stmt_loop = find_common_loop (orig_loop, stmt_loop);
814 lim_data = get_lim_data (stmt);
815 if (lim_data != NULL && lim_data->tgt_loop != NULL)
816 stmt_loop = find_common_loop (stmt_loop,
817 loop_outer (lim_data->tgt_loop));
818 if (flow_loop_nested_p (stmt_loop, level))
819 return;
821 gcc_assert (level == lim_data->max_loop
822 || flow_loop_nested_p (lim_data->max_loop, level));
824 lim_data->tgt_loop = level;
825 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
826 set_level (dep_stmt, orig_loop, level);
829 /* Determines an outermost loop from that we want to hoist the statement STMT.
830 For now we chose the outermost possible loop. TODO -- use profiling
831 information to set it more sanely. */
833 static void
834 set_profitable_level (gimple *stmt)
836 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
839 /* Returns true if STMT is a call that has side effects. */
841 static bool
842 nonpure_call_p (gimple *stmt)
844 if (gimple_code (stmt) != GIMPLE_CALL)
845 return false;
847 return gimple_has_side_effects (stmt);
850 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
852 static gimple *
853 rewrite_reciprocal (gimple_stmt_iterator *bsi)
855 gassign *stmt, *stmt1, *stmt2;
856 tree name, lhs, type;
857 tree real_one;
858 gimple_stmt_iterator gsi;
860 stmt = as_a <gassign *> (gsi_stmt (*bsi));
861 lhs = gimple_assign_lhs (stmt);
862 type = TREE_TYPE (lhs);
864 real_one = build_one_cst (type);
866 name = make_temp_ssa_name (type, NULL, "reciptmp");
867 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
868 gimple_assign_rhs2 (stmt));
869 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
870 gimple_assign_rhs1 (stmt));
872 /* Replace division stmt with reciprocal and multiply stmts.
873 The multiply stmt is not invariant, so update iterator
874 and avoid rescanning. */
875 gsi = *bsi;
876 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
877 gsi_replace (&gsi, stmt2, true);
879 /* Continue processing with invariant reciprocal statement. */
880 return stmt1;
883 /* Check if the pattern at *BSI is a bittest of the form
884 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
886 static gimple *
887 rewrite_bittest (gimple_stmt_iterator *bsi)
889 gassign *stmt;
890 gimple *stmt1;
891 gassign *stmt2;
892 gimple *use_stmt;
893 gcond *cond_stmt;
894 tree lhs, name, t, a, b;
895 use_operand_p use;
897 stmt = as_a <gassign *> (gsi_stmt (*bsi));
898 lhs = gimple_assign_lhs (stmt);
900 /* Verify that the single use of lhs is a comparison against zero. */
901 if (TREE_CODE (lhs) != SSA_NAME
902 || !single_imm_use (lhs, &use, &use_stmt))
903 return stmt;
904 cond_stmt = dyn_cast <gcond *> (use_stmt);
905 if (!cond_stmt)
906 return stmt;
907 if (gimple_cond_lhs (cond_stmt) != lhs
908 || (gimple_cond_code (cond_stmt) != NE_EXPR
909 && gimple_cond_code (cond_stmt) != EQ_EXPR)
910 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
911 return stmt;
913 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
914 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
915 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
916 return stmt;
918 /* There is a conversion in between possibly inserted by fold. */
919 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
921 t = gimple_assign_rhs1 (stmt1);
922 if (TREE_CODE (t) != SSA_NAME
923 || !has_single_use (t))
924 return stmt;
925 stmt1 = SSA_NAME_DEF_STMT (t);
926 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
927 return stmt;
930 /* Verify that B is loop invariant but A is not. Verify that with
931 all the stmt walking we are still in the same loop. */
932 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
933 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
934 return stmt;
936 a = gimple_assign_rhs1 (stmt1);
937 b = gimple_assign_rhs2 (stmt1);
939 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
940 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
942 gimple_stmt_iterator rsi;
944 /* 1 << B */
945 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
946 build_int_cst (TREE_TYPE (a), 1), b);
947 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
948 stmt1 = gimple_build_assign (name, t);
950 /* A & (1 << B) */
951 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
952 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
953 stmt2 = gimple_build_assign (name, t);
955 /* Replace the SSA_NAME we compare against zero. Adjust
956 the type of zero accordingly. */
957 SET_USE (use, name);
958 gimple_cond_set_rhs (cond_stmt,
959 build_int_cst_type (TREE_TYPE (name),
960 0));
962 /* Don't use gsi_replace here, none of the new assignments sets
963 the variable originally set in stmt. Move bsi to stmt1, and
964 then remove the original stmt, so that we get a chance to
965 retain debug info for it. */
966 rsi = *bsi;
967 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
968 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
969 gimple *to_release = gsi_stmt (rsi);
970 gsi_remove (&rsi, true);
971 release_defs (to_release);
973 return stmt1;
976 return stmt;
979 /* Determine the outermost loops in that statements in basic block BB are
980 invariant, and record them to the LIM_DATA associated with the
981 statements. */
983 static void
984 compute_invariantness (basic_block bb)
986 enum move_pos pos;
987 gimple_stmt_iterator bsi;
988 gimple *stmt;
989 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
990 class loop *outermost = ALWAYS_EXECUTED_IN (bb);
991 struct lim_aux_data *lim_data;
993 if (!loop_outer (bb->loop_father))
994 return;
996 if (dump_file && (dump_flags & TDF_DETAILS))
997 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
998 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1000 /* Look at PHI nodes, but only if there is at most two.
1001 ??? We could relax this further by post-processing the inserted
1002 code and transforming adjacent cond-exprs with the same predicate
1003 to control flow again. */
1004 bsi = gsi_start_phis (bb);
1005 if (!gsi_end_p (bsi)
1006 && ((gsi_next (&bsi), gsi_end_p (bsi))
1007 || (gsi_next (&bsi), gsi_end_p (bsi))))
1008 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1010 stmt = gsi_stmt (bsi);
1012 pos = movement_possibility (stmt);
1013 if (pos == MOVE_IMPOSSIBLE)
1014 continue;
1016 lim_data = get_lim_data (stmt);
1017 if (! lim_data)
1018 lim_data = init_lim_data (stmt);
1019 lim_data->always_executed_in = outermost;
1021 if (!determine_max_movement (stmt, false))
1023 lim_data->max_loop = NULL;
1024 continue;
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1029 print_gimple_stmt (dump_file, stmt, 2);
1030 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1031 loop_depth (lim_data->max_loop),
1032 lim_data->cost);
1035 if (lim_data->cost >= LIM_EXPENSIVE)
1036 set_profitable_level (stmt);
1039 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1041 stmt = gsi_stmt (bsi);
1043 pos = movement_possibility (stmt);
1044 if (pos == MOVE_IMPOSSIBLE)
1046 if (nonpure_call_p (stmt))
1048 maybe_never = true;
1049 outermost = NULL;
1051 /* Make sure to note always_executed_in for stores to make
1052 store-motion work. */
1053 else if (stmt_makes_single_store (stmt))
1055 struct lim_aux_data *lim_data = get_lim_data (stmt);
1056 if (! lim_data)
1057 lim_data = init_lim_data (stmt);
1058 lim_data->always_executed_in = outermost;
1060 continue;
1063 if (is_gimple_assign (stmt)
1064 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1065 == GIMPLE_BINARY_RHS))
1067 tree op0 = gimple_assign_rhs1 (stmt);
1068 tree op1 = gimple_assign_rhs2 (stmt);
1069 class loop *ol1 = outermost_invariant_loop (op1,
1070 loop_containing_stmt (stmt));
1072 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1073 to be hoisted out of loop, saving expensive divide. */
1074 if (pos == MOVE_POSSIBLE
1075 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1076 && flag_unsafe_math_optimizations
1077 && !flag_trapping_math
1078 && ol1 != NULL
1079 && outermost_invariant_loop (op0, ol1) == NULL)
1080 stmt = rewrite_reciprocal (&bsi);
1082 /* If the shift count is invariant, convert (A >> B) & 1 to
1083 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1084 saving an expensive shift. */
1085 if (pos == MOVE_POSSIBLE
1086 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1087 && integer_onep (op1)
1088 && TREE_CODE (op0) == SSA_NAME
1089 && has_single_use (op0))
1090 stmt = rewrite_bittest (&bsi);
1093 lim_data = get_lim_data (stmt);
1094 if (! lim_data)
1095 lim_data = init_lim_data (stmt);
1096 lim_data->always_executed_in = outermost;
1098 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1099 continue;
1101 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1103 lim_data->max_loop = NULL;
1104 continue;
1107 if (dump_file && (dump_flags & TDF_DETAILS))
1109 print_gimple_stmt (dump_file, stmt, 2);
1110 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1111 loop_depth (lim_data->max_loop),
1112 lim_data->cost);
1115 if (lim_data->cost >= LIM_EXPENSIVE)
1116 set_profitable_level (stmt);
1120 /* Hoist the statements in basic block BB out of the loops prescribed by
1121 data stored in LIM_DATA structures associated with each statement. Callback
1122 for walk_dominator_tree. */
1124 unsigned int
1125 move_computations_worker (basic_block bb)
1127 class loop *level;
1128 unsigned cost = 0;
1129 struct lim_aux_data *lim_data;
1130 unsigned int todo = 0;
1132 if (!loop_outer (bb->loop_father))
1133 return todo;
1135 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1137 gassign *new_stmt;
1138 gphi *stmt = bsi.phi ();
1140 lim_data = get_lim_data (stmt);
1141 if (lim_data == NULL)
1143 gsi_next (&bsi);
1144 continue;
1147 cost = lim_data->cost;
1148 level = lim_data->tgt_loop;
1149 clear_lim_data (stmt);
1151 if (!level)
1153 gsi_next (&bsi);
1154 continue;
1157 if (dump_file && (dump_flags & TDF_DETAILS))
1159 fprintf (dump_file, "Moving PHI node\n");
1160 print_gimple_stmt (dump_file, stmt, 0);
1161 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1162 cost, level->num);
1165 if (gimple_phi_num_args (stmt) == 1)
1167 tree arg = PHI_ARG_DEF (stmt, 0);
1168 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1169 TREE_CODE (arg), arg);
1171 else
1173 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1174 gimple *cond = gsi_stmt (gsi_last_bb (dom));
1175 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1176 /* Get the PHI arguments corresponding to the true and false
1177 edges of COND. */
1178 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1179 gcc_assert (arg0 && arg1);
1180 t = build2 (gimple_cond_code (cond), boolean_type_node,
1181 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1182 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1183 COND_EXPR, t, arg0, arg1);
1184 todo |= TODO_cleanup_cfg;
1186 if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1187 && (!ALWAYS_EXECUTED_IN (bb)
1188 || (ALWAYS_EXECUTED_IN (bb) != level
1189 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1191 tree lhs = gimple_assign_lhs (new_stmt);
1192 SSA_NAME_RANGE_INFO (lhs) = NULL;
1194 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1195 remove_phi_node (&bsi, false);
1198 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1200 edge e;
1202 gimple *stmt = gsi_stmt (bsi);
1204 lim_data = get_lim_data (stmt);
1205 if (lim_data == NULL)
1207 gsi_next (&bsi);
1208 continue;
1211 cost = lim_data->cost;
1212 level = lim_data->tgt_loop;
1213 clear_lim_data (stmt);
1215 if (!level)
1217 gsi_next (&bsi);
1218 continue;
1221 /* We do not really want to move conditionals out of the loop; we just
1222 placed it here to force its operands to be moved if necessary. */
1223 if (gimple_code (stmt) == GIMPLE_COND)
1224 continue;
1226 if (dump_file && (dump_flags & TDF_DETAILS))
1228 fprintf (dump_file, "Moving statement\n");
1229 print_gimple_stmt (dump_file, stmt, 0);
1230 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1231 cost, level->num);
1234 e = loop_preheader_edge (level);
1235 gcc_assert (!gimple_vdef (stmt));
1236 if (gimple_vuse (stmt))
1238 /* The new VUSE is the one from the virtual PHI in the loop
1239 header or the one already present. */
1240 gphi_iterator gsi2;
1241 for (gsi2 = gsi_start_phis (e->dest);
1242 !gsi_end_p (gsi2); gsi_next (&gsi2))
1244 gphi *phi = gsi2.phi ();
1245 if (virtual_operand_p (gimple_phi_result (phi)))
1247 SET_USE (gimple_vuse_op (stmt),
1248 PHI_ARG_DEF_FROM_EDGE (phi, e));
1249 break;
1253 gsi_remove (&bsi, false);
1254 if (gimple_has_lhs (stmt)
1255 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1256 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1257 && (!ALWAYS_EXECUTED_IN (bb)
1258 || !(ALWAYS_EXECUTED_IN (bb) == level
1259 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1261 tree lhs = gimple_get_lhs (stmt);
1262 SSA_NAME_RANGE_INFO (lhs) = NULL;
1264 /* In case this is a stmt that is not unconditionally executed
1265 when the target loop header is executed and the stmt may
1266 invoke undefined integer or pointer overflow rewrite it to
1267 unsigned arithmetic. */
1268 if (is_gimple_assign (stmt)
1269 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1270 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1271 && arith_code_with_undefined_signed_overflow
1272 (gimple_assign_rhs_code (stmt))
1273 && (!ALWAYS_EXECUTED_IN (bb)
1274 || !(ALWAYS_EXECUTED_IN (bb) == level
1275 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1276 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1277 else
1278 gsi_insert_on_edge (e, stmt);
1281 return todo;
1284 /* Checks whether the statement defining variable *INDEX can be hoisted
1285 out of the loop passed in DATA. Callback for for_each_index. */
1287 static bool
1288 may_move_till (tree ref, tree *index, void *data)
1290 class loop *loop = (class loop *) data, *max_loop;
1292 /* If REF is an array reference, check also that the step and the lower
1293 bound is invariant in LOOP. */
1294 if (TREE_CODE (ref) == ARRAY_REF)
1296 tree step = TREE_OPERAND (ref, 3);
1297 tree lbound = TREE_OPERAND (ref, 2);
1299 max_loop = outermost_invariant_loop (step, loop);
1300 if (!max_loop)
1301 return false;
1303 max_loop = outermost_invariant_loop (lbound, loop);
1304 if (!max_loop)
1305 return false;
1308 max_loop = outermost_invariant_loop (*index, loop);
1309 if (!max_loop)
1310 return false;
1312 return true;
1315 /* If OP is SSA NAME, force the statement that defines it to be
1316 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1318 static void
1319 force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1321 gimple *stmt;
1323 if (!op
1324 || is_gimple_min_invariant (op))
1325 return;
1327 gcc_assert (TREE_CODE (op) == SSA_NAME);
1329 stmt = SSA_NAME_DEF_STMT (op);
1330 if (gimple_nop_p (stmt))
1331 return;
1333 set_level (stmt, orig_loop, loop);
1336 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1337 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1338 for_each_index. */
1340 struct fmt_data
1342 class loop *loop;
1343 class loop *orig_loop;
1346 static bool
1347 force_move_till (tree ref, tree *index, void *data)
1349 struct fmt_data *fmt_data = (struct fmt_data *) data;
1351 if (TREE_CODE (ref) == ARRAY_REF)
1353 tree step = TREE_OPERAND (ref, 3);
1354 tree lbound = TREE_OPERAND (ref, 2);
1356 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1357 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1360 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1362 return true;
1365 /* A function to free the mem_ref object OBJ. */
1367 static void
1368 memref_free (class im_mem_ref *mem)
1370 mem->accesses_in_loop.release ();
1373 /* Allocates and returns a memory reference description for MEM whose hash
1374 value is HASH and id is ID. */
1376 static im_mem_ref *
1377 mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1379 im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1380 if (mem)
1381 ref->mem = *mem;
1382 else
1383 ao_ref_init (&ref->mem, error_mark_node);
1384 ref->id = id;
1385 ref->ref_canonical = false;
1386 ref->ref_decomposed = false;
1387 ref->hash = hash;
1388 ref->stored = NULL;
1389 ref->loaded = NULL;
1390 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1391 ref->accesses_in_loop.create (1);
1393 return ref;
1396 /* Records memory reference location *LOC in LOOP to the memory reference
1397 description REF. The reference occurs in statement STMT. */
1399 static void
1400 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1402 mem_ref_loc aref;
1403 aref.stmt = stmt;
1404 aref.ref = loc;
1405 ref->accesses_in_loop.safe_push (aref);
1408 /* Set the LOOP bit in REF stored bitmap and allocate that if
1409 necessary. Return whether a bit was changed. */
1411 static bool
1412 set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1414 if (!ref->stored)
1415 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1416 return bitmap_set_bit (ref->stored, loop->num);
1419 /* Marks reference REF as stored in LOOP. */
1421 static void
1422 mark_ref_stored (im_mem_ref *ref, class loop *loop)
1424 while (loop != current_loops->tree_root
1425 && set_ref_stored_in_loop (ref, loop))
1426 loop = loop_outer (loop);
1429 /* Set the LOOP bit in REF loaded bitmap and allocate that if
1430 necessary. Return whether a bit was changed. */
1432 static bool
1433 set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1435 if (!ref->loaded)
1436 ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
1437 return bitmap_set_bit (ref->loaded, loop->num);
1440 /* Marks reference REF as loaded in LOOP. */
1442 static void
1443 mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1445 while (loop != current_loops->tree_root
1446 && set_ref_loaded_in_loop (ref, loop))
1447 loop = loop_outer (loop);
1450 /* Gathers memory references in statement STMT in LOOP, storing the
1451 information about them in the memory_accesses structure. Marks
1452 the vops accessed through unrecognized statements there as
1453 well. */
1455 static void
1456 gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1458 tree *mem = NULL;
1459 hashval_t hash;
1460 im_mem_ref **slot;
1461 im_mem_ref *ref;
1462 bool is_stored;
1463 unsigned id;
1465 if (!gimple_vuse (stmt))
1466 return;
1468 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1469 if (!mem && is_gimple_assign (stmt))
1471 /* For aggregate copies record distinct references but use them
1472 only for disambiguation purposes. */
1473 id = memory_accesses.refs_list.length ();
1474 ref = mem_ref_alloc (NULL, 0, id);
1475 memory_accesses.refs_list.safe_push (ref);
1476 if (dump_file && (dump_flags & TDF_DETAILS))
1478 fprintf (dump_file, "Unhandled memory reference %u: ", id);
1479 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1481 record_mem_ref_loc (ref, stmt, mem);
1482 is_stored = gimple_vdef (stmt);
1484 else if (!mem)
1486 /* We use the shared mem_ref for all unanalyzable refs. */
1487 id = UNANALYZABLE_MEM_ID;
1488 ref = memory_accesses.refs_list[id];
1489 if (dump_file && (dump_flags & TDF_DETAILS))
1491 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1492 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1494 is_stored = gimple_vdef (stmt);
1496 else
1498 /* We are looking for equal refs that might differ in structure
1499 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1500 make sure we can canonicalize the ref in the hashtable if
1501 non-operand_equal_p refs are found. For the lookup we mark
1502 the case we want strict equality with aor.max_size == -1. */
1503 ao_ref aor;
1504 ao_ref_init (&aor, *mem);
1505 ao_ref_base (&aor);
1506 ao_ref_alias_set (&aor);
1507 HOST_WIDE_INT offset, size, max_size;
1508 poly_int64 saved_maxsize = aor.max_size, mem_off;
1509 tree mem_base;
1510 bool ref_decomposed;
1511 if (aor.max_size_known_p ()
1512 && aor.offset.is_constant (&offset)
1513 && aor.size.is_constant (&size)
1514 && aor.max_size.is_constant (&max_size)
1515 && size == max_size
1516 && (size % BITS_PER_UNIT) == 0
1517 /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1518 size. Make sure this is consistent with the extraction. */
1519 && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1520 && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1521 aor.size)
1522 && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1524 ref_decomposed = true;
1525 tree base = ao_ref_base (&aor);
1526 poly_int64 moffset;
1527 HOST_WIDE_INT mcoffset;
1528 if (TREE_CODE (base) == MEM_REF
1529 && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset)
1530 && moffset.is_constant (&mcoffset))
1532 hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0);
1533 hash = iterative_hash_host_wide_int (mcoffset, hash);
1535 else
1537 hash = iterative_hash_expr (base, 0);
1538 hash = iterative_hash_host_wide_int (offset, hash);
1540 hash = iterative_hash_host_wide_int (size, hash);
1542 else
1544 ref_decomposed = false;
1545 hash = iterative_hash_expr (aor.ref, 0);
1546 aor.max_size = -1;
1548 slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1549 aor.max_size = saved_maxsize;
1550 if (*slot)
1552 if (!(*slot)->ref_canonical
1553 && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1555 /* If we didn't yet canonicalize the hashtable ref (which
1556 we'll end up using for code insertion) and hit a second
1557 equal ref that is not structurally equivalent create
1558 a canonical ref which is a bare MEM_REF. */
1559 if (TREE_CODE (*mem) == MEM_REF
1560 || TREE_CODE (*mem) == TARGET_MEM_REF)
1562 (*slot)->mem.ref = *mem;
1563 (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1565 else
1567 tree ref_alias_type = reference_alias_ptr_type (*mem);
1568 unsigned int ref_align = get_object_alignment (*mem);
1569 tree ref_type = TREE_TYPE (*mem);
1570 tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1571 unshare_expr (mem_base));
1572 if (TYPE_ALIGN (ref_type) != ref_align)
1573 ref_type = build_aligned_type (ref_type, ref_align);
1574 (*slot)->mem.ref
1575 = fold_build2 (MEM_REF, ref_type, tmp,
1576 build_int_cst (ref_alias_type, mem_off));
1577 if ((*slot)->mem.volatile_p)
1578 TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1;
1579 gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1580 && is_gimple_mem_ref_addr
1581 (TREE_OPERAND ((*slot)->mem.ref,
1582 0)));
1583 (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1585 (*slot)->ref_canonical = true;
1587 ref = *slot;
1588 id = ref->id;
1590 else
1592 id = memory_accesses.refs_list.length ();
1593 ref = mem_ref_alloc (&aor, hash, id);
1594 ref->ref_decomposed = ref_decomposed;
1595 memory_accesses.refs_list.safe_push (ref);
1596 *slot = ref;
1598 if (dump_file && (dump_flags & TDF_DETAILS))
1600 fprintf (dump_file, "Memory reference %u: ", id);
1601 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1602 fprintf (dump_file, "\n");
1606 record_mem_ref_loc (ref, stmt, mem);
1608 if (is_stored)
1610 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1611 mark_ref_stored (ref, loop);
1613 /* A not simple memory op is also a read when it is a write. */
1614 if (!is_stored || id == UNANALYZABLE_MEM_ID
1615 || ref->mem.ref == error_mark_node)
1617 bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1618 mark_ref_loaded (ref, loop);
1620 init_lim_data (stmt)->ref = ref->id;
1621 return;
1624 static unsigned *bb_loop_postorder;
1626 /* qsort sort function to sort blocks after their loop fathers postorder. */
1628 static int
1629 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1630 void *bb_loop_postorder_)
1632 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1633 basic_block bb1 = *(const basic_block *)bb1_;
1634 basic_block bb2 = *(const basic_block *)bb2_;
1635 class loop *loop1 = bb1->loop_father;
1636 class loop *loop2 = bb2->loop_father;
1637 if (loop1->num == loop2->num)
1638 return bb1->index - bb2->index;
1639 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1642 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1644 static int
1645 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1646 void *bb_loop_postorder_)
1648 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1649 const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1650 const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1651 class loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1652 class loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1653 if (loop1->num == loop2->num)
1654 return 0;
1655 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1658 /* Gathers memory references in loops. */
1660 static void
1661 analyze_memory_references (bool store_motion)
1663 gimple_stmt_iterator bsi;
1664 basic_block bb, *bbs;
1665 class loop *outer;
1666 unsigned i, n;
1668 /* Collect all basic-blocks in loops and sort them after their
1669 loops postorder. */
1670 i = 0;
1671 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1672 FOR_EACH_BB_FN (bb, cfun)
1673 if (bb->loop_father != current_loops->tree_root)
1674 bbs[i++] = bb;
1675 n = i;
1676 gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1677 bb_loop_postorder);
1679 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1680 That results in better locality for all the bitmaps. It also
1681 automatically sorts the location list of gathered memory references
1682 after their loop postorder number allowing to binary-search it. */
1683 for (i = 0; i < n; ++i)
1685 basic_block bb = bbs[i];
1686 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1687 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1690 /* Verify the list of gathered memory references is sorted after their
1691 loop postorder number. */
1692 if (flag_checking)
1694 im_mem_ref *ref;
1695 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1696 for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1697 gcc_assert (sort_locs_in_loop_postorder_cmp
1698 (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1699 bb_loop_postorder) <= 0);
1702 free (bbs);
1704 if (!store_motion)
1705 return;
1707 /* Propagate the information about accessed memory references up
1708 the loop hierarchy. */
1709 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1711 /* Finalize the overall touched references (including subloops). */
1712 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1713 &memory_accesses.refs_stored_in_loop[loop->num]);
1715 /* Propagate the information about accessed memory references up
1716 the loop hierarchy. */
1717 outer = loop_outer (loop);
1718 if (outer == current_loops->tree_root)
1719 continue;
1721 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1722 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1726 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1727 tree_to_aff_combination_expand. */
1729 static bool
1730 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1731 hash_map<tree, name_expansion *> **ttae_cache,
1732 bool tbaa_p)
1734 gcc_checking_assert (mem1->mem.ref != error_mark_node
1735 && mem2->mem.ref != error_mark_node);
1737 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1738 object and their offset differ in such a way that the locations cannot
1739 overlap, then they cannot alias. */
1740 poly_widest_int size1, size2;
1741 aff_tree off1, off2;
1743 /* Perform basic offset and type-based disambiguation. */
1744 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1745 return false;
1747 /* The expansion of addresses may be a bit expensive, thus we only do
1748 the check at -O2 and higher optimization levels. */
1749 if (optimize < 2)
1750 return true;
1752 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1753 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1754 aff_combination_expand (&off1, ttae_cache);
1755 aff_combination_expand (&off2, ttae_cache);
1756 aff_combination_scale (&off1, -1);
1757 aff_combination_add (&off2, &off1);
1759 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1760 return false;
1762 return true;
1765 /* Compare function for bsearch searching for reference locations
1766 in a loop. */
1768 static int
1769 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1770 void *bb_loop_postorder_)
1772 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1773 class loop *loop = (class loop *)const_cast<void *>(loop_);
1774 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1775 class loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1776 if (loop->num == loc_loop->num
1777 || flow_loop_nested_p (loop, loc_loop))
1778 return 0;
1779 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1780 ? -1 : 1);
1783 /* Iterates over all locations of REF in LOOP and its subloops calling
1784 fn.operator() with the location as argument. When that operator
1785 returns true the iteration is stopped and true is returned.
1786 Otherwise false is returned. */
1788 template <typename FN>
1789 static bool
1790 for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1792 unsigned i;
1793 mem_ref_loc *loc;
1795 /* Search for the cluster of locs in the accesses_in_loop vector
1796 which is sorted after postorder index of the loop father. */
1797 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp,
1798 bb_loop_postorder);
1799 if (!loc)
1800 return false;
1802 /* We have found one location inside loop or its sub-loops. Iterate
1803 both forward and backward to cover the whole cluster. */
1804 i = loc - ref->accesses_in_loop.address ();
1805 while (i > 0)
1807 --i;
1808 mem_ref_loc *l = &ref->accesses_in_loop[i];
1809 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1810 break;
1811 if (fn (l))
1812 return true;
1814 for (i = loc - ref->accesses_in_loop.address ();
1815 i < ref->accesses_in_loop.length (); ++i)
1817 mem_ref_loc *l = &ref->accesses_in_loop[i];
1818 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1819 break;
1820 if (fn (l))
1821 return true;
1824 return false;
1827 /* Rewrites location LOC by TMP_VAR. */
1829 class rewrite_mem_ref_loc
1831 public:
1832 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1833 bool operator () (mem_ref_loc *loc);
1834 tree tmp_var;
1837 bool
1838 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1840 *loc->ref = tmp_var;
1841 update_stmt (loc->stmt);
1842 return false;
1845 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1847 static void
1848 rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
1850 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1853 /* Stores the first reference location in LOCP. */
1855 class first_mem_ref_loc_1
1857 public:
1858 first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1859 bool operator () (mem_ref_loc *loc);
1860 mem_ref_loc **locp;
1863 bool
1864 first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1866 *locp = loc;
1867 return true;
1870 /* Returns the first reference location to REF in LOOP. */
1872 static mem_ref_loc *
1873 first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
1875 mem_ref_loc *locp = NULL;
1876 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1877 return locp;
1880 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1881 MEM along edge EX.
1883 The store is only done if MEM has changed. We do this so no
1884 changes to MEM occur on code paths that did not originally store
1885 into it.
1887 The common case for execute_sm will transform:
1889 for (...) {
1890 if (foo)
1891 stuff;
1892 else
1893 MEM = TMP_VAR;
1896 into:
1898 lsm = MEM;
1899 for (...) {
1900 if (foo)
1901 stuff;
1902 else
1903 lsm = TMP_VAR;
1905 MEM = lsm;
1907 This function will generate:
1909 lsm = MEM;
1911 lsm_flag = false;
1913 for (...) {
1914 if (foo)
1915 stuff;
1916 else {
1917 lsm = TMP_VAR;
1918 lsm_flag = true;
1921 if (lsm_flag) <--
1922 MEM = lsm; <--
1925 static void
1926 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
1927 edge preheader, hash_set <basic_block> *flag_bbs,
1928 edge &append_cond_position, edge &last_cond_fallthru)
1930 basic_block new_bb, then_bb, old_dest;
1931 bool loop_has_only_one_exit;
1932 edge then_old_edge;
1933 gimple_stmt_iterator gsi;
1934 gimple *stmt;
1935 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1937 profile_count count_sum = profile_count::zero ();
1938 int nbbs = 0, ncount = 0;
1939 profile_probability flag_probability = profile_probability::uninitialized ();
1941 /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1942 at loop exit.
1944 This code may look fancy, but it cannot update profile very realistically
1945 because we do not know the probability that flag will be true at given
1946 loop exit.
1948 We look for two interesting extremes
1949 - when exit is dominated by block setting the flag, we know it will
1950 always be true. This is a common case.
1951 - when all blocks setting the flag have very low frequency we know
1952 it will likely be false.
1953 In all other cases we default to 2/3 for flag being true. */
1955 for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
1956 it != flag_bbs->end (); ++it)
1958 if ((*it)->count.initialized_p ())
1959 count_sum += (*it)->count, ncount ++;
1960 if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
1961 flag_probability = profile_probability::always ();
1962 nbbs++;
1965 profile_probability cap = profile_probability::always ().apply_scale (2, 3);
1967 if (flag_probability.initialized_p ())
1969 else if (ncount == nbbs
1970 && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
1972 flag_probability = count_sum.probability_in (preheader->count ());
1973 if (flag_probability > cap)
1974 flag_probability = cap;
1977 if (!flag_probability.initialized_p ())
1978 flag_probability = cap;
1980 /* ?? Insert store after previous store if applicable. See note
1981 below. */
1982 if (append_cond_position)
1983 ex = append_cond_position;
1985 loop_has_only_one_exit = single_pred_p (ex->dest);
1987 if (loop_has_only_one_exit)
1988 ex = split_block_after_labels (ex->dest);
1989 else
1991 for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1992 !gsi_end_p (gpi); gsi_next (&gpi))
1994 gphi *phi = gpi.phi ();
1995 if (virtual_operand_p (gimple_phi_result (phi)))
1996 continue;
1998 /* When the destination has a non-virtual PHI node with multiple
1999 predecessors make sure we preserve the PHI structure by
2000 forcing a forwarder block so that hoisting of that PHI will
2001 still work. */
2002 split_edge (ex);
2003 break;
2007 old_dest = ex->dest;
2008 new_bb = split_edge (ex);
2009 then_bb = create_empty_bb (new_bb);
2010 then_bb->count = new_bb->count.apply_probability (flag_probability);
2011 if (irr)
2012 then_bb->flags = BB_IRREDUCIBLE_LOOP;
2013 add_bb_to_loop (then_bb, new_bb->loop_father);
2015 gsi = gsi_start_bb (new_bb);
2016 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2017 NULL_TREE, NULL_TREE);
2018 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2020 gsi = gsi_start_bb (then_bb);
2021 /* Insert actual store. */
2022 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2023 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2025 edge e1 = single_succ_edge (new_bb);
2026 edge e2 = make_edge (new_bb, then_bb,
2027 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2028 e2->probability = flag_probability;
2030 e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2031 e1->flags &= ~EDGE_FALLTHRU;
2033 e1->probability = flag_probability.invert ();
2035 then_old_edge = make_single_succ_edge (then_bb, old_dest,
2036 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2038 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2040 if (append_cond_position)
2042 basic_block prevbb = last_cond_fallthru->src;
2043 redirect_edge_succ (last_cond_fallthru, new_bb);
2044 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2045 set_immediate_dominator (CDI_DOMINATORS, old_dest,
2046 recompute_dominator (CDI_DOMINATORS, old_dest));
2049 /* ?? Because stores may alias, they must happen in the exact
2050 sequence they originally happened. Save the position right after
2051 the (_lsm) store we just created so we can continue appending after
2052 it and maintain the original order. */
2053 append_cond_position = then_old_edge;
2054 last_cond_fallthru = find_edge (new_bb, old_dest);
2056 if (!loop_has_only_one_exit)
2057 for (gphi_iterator gpi = gsi_start_phis (old_dest);
2058 !gsi_end_p (gpi); gsi_next (&gpi))
2060 gphi *phi = gpi.phi ();
2061 unsigned i;
2063 for (i = 0; i < gimple_phi_num_args (phi); i++)
2064 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2066 tree arg = gimple_phi_arg_def (phi, i);
2067 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2068 update_stmt (phi);
2073 /* When REF is set on the location, set flag indicating the store. */
2075 class sm_set_flag_if_changed
2077 public:
2078 sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2079 : flag (flag_), bbs (bbs_) {}
2080 bool operator () (mem_ref_loc *loc);
2081 tree flag;
2082 hash_set <basic_block> *bbs;
2085 bool
2086 sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2088 /* Only set the flag for writes. */
2089 if (is_gimple_assign (loc->stmt)
2090 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2092 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2093 gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2094 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2095 bbs->add (gimple_bb (stmt));
2097 return false;
2100 /* Helper function for execute_sm. On every location where REF is
2101 set, set an appropriate flag indicating the store. */
2103 static tree
2104 execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2105 hash_set <basic_block> *bbs)
2107 tree flag;
2108 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2109 flag = create_tmp_reg (boolean_type_node, str);
2110 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2111 return flag;
2114 struct sm_aux
2116 tree tmp_var;
2117 tree store_flag;
2118 hash_set <basic_block> flag_bbs;
2121 /* Executes store motion of memory reference REF from LOOP.
2122 Exits from the LOOP are stored in EXITS. The initialization of the
2123 temporary variable is put to the preheader of the loop, and assignments
2124 to the reference from the temporary variable are emitted to exits. */
2126 static void
2127 execute_sm (class loop *loop, im_mem_ref *ref,
2128 hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt)
2130 gassign *load;
2131 struct fmt_data fmt_data;
2132 struct lim_aux_data *lim_data;
2133 bool multi_threaded_model_p = false;
2134 gimple_stmt_iterator gsi;
2135 sm_aux *aux = new sm_aux;
2137 if (dump_file && (dump_flags & TDF_DETAILS))
2139 fprintf (dump_file, "Executing store motion of ");
2140 print_generic_expr (dump_file, ref->mem.ref);
2141 fprintf (dump_file, " from loop %d\n", loop->num);
2144 aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2145 get_lsm_tmp_name (ref->mem.ref, ~0));
2147 fmt_data.loop = loop;
2148 fmt_data.orig_loop = loop;
2149 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2151 bool always_stored = ref_always_accessed_p (loop, ref, true);
2152 if (maybe_mt
2153 && (bb_in_transaction (loop_preheader_edge (loop)->src)
2154 || (! flag_store_data_races && ! always_stored)))
2155 multi_threaded_model_p = true;
2157 if (multi_threaded_model_p)
2158 aux->store_flag
2159 = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs);
2160 else
2161 aux->store_flag = NULL_TREE;
2163 /* Remember variable setup. */
2164 aux_map.put (ref, aux);
2166 rewrite_mem_refs (loop, ref, aux->tmp_var);
2168 /* Emit the load code on a random exit edge or into the latch if
2169 the loop does not exit, so that we are sure it will be processed
2170 by move_computations after all dependencies. */
2171 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2173 /* Avoid doing a load if there was no load of the ref in the loop.
2174 Esp. when the ref is not always stored we cannot optimize it
2175 away later. But when it is not always stored we must use a conditional
2176 store then. */
2177 if ((!always_stored && !multi_threaded_model_p)
2178 || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2179 load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2180 else
2182 /* If not emitting a load mark the uninitialized state on the
2183 loop entry as not to be warned for. */
2184 tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2185 suppress_warning (uninit, OPT_Wuninitialized);
2186 load = gimple_build_assign (aux->tmp_var, uninit);
2188 lim_data = init_lim_data (load);
2189 lim_data->max_loop = loop;
2190 lim_data->tgt_loop = loop;
2191 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2193 if (multi_threaded_model_p)
2195 load = gimple_build_assign (aux->store_flag, boolean_false_node);
2196 lim_data = init_lim_data (load);
2197 lim_data->max_loop = loop;
2198 lim_data->tgt_loop = loop;
2199 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2203 /* sm_ord is used for ordinary stores we can retain order with respect
2204 to other stores
2205 sm_unord is used for conditional executed stores which need to be
2206 able to execute in arbitrary order with respect to other stores
2207 sm_other is used for stores we do not try to apply store motion to. */
2208 enum sm_kind { sm_ord, sm_unord, sm_other };
2209 struct seq_entry
2211 seq_entry () {}
2212 seq_entry (unsigned f, sm_kind k, tree fr = NULL)
2213 : first (f), second (k), from (fr) {}
2214 unsigned first;
2215 sm_kind second;
2216 tree from;
2219 static void
2220 execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2221 hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2222 edge &append_cond_position, edge &last_cond_fallthru)
2224 /* Sink the stores to exit from the loop. */
2225 for (unsigned i = seq.length (); i > 0; --i)
2227 im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2228 if (seq[i-1].second == sm_other)
2230 gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2231 if (dump_file && (dump_flags & TDF_DETAILS))
2233 fprintf (dump_file, "Re-issueing dependent store of ");
2234 print_generic_expr (dump_file, ref->mem.ref);
2235 fprintf (dump_file, " from loop %d on exit %d -> %d\n",
2236 loop->num, ex->src->index, ex->dest->index);
2238 gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref),
2239 seq[i-1].from);
2240 gsi_insert_on_edge (ex, store);
2242 else
2244 sm_aux *aux = *aux_map.get (ref);
2245 if (!aux->store_flag || kind == sm_ord)
2247 gassign *store;
2248 store = gimple_build_assign (unshare_expr (ref->mem.ref),
2249 aux->tmp_var);
2250 gsi_insert_on_edge (ex, store);
2252 else
2253 execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
2254 aux->store_flag,
2255 loop_preheader_edge (loop), &aux->flag_bbs,
2256 append_cond_position, last_cond_fallthru);
2261 /* Push the SM candidate at index PTR in the sequence SEQ down until
2262 we hit the next SM candidate. Return true if that went OK and
2263 false if we could not disambiguate agains another unrelated ref.
2264 Update *AT to the index where the candidate now resides. */
2266 static bool
2267 sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2269 *at = ptr;
2270 for (; ptr > 0; --ptr)
2272 seq_entry &new_cand = seq[ptr];
2273 seq_entry &against = seq[ptr-1];
2274 if (against.second == sm_ord
2275 || (against.second == sm_other && against.from != NULL_TREE))
2276 /* Found the tail of the sequence. */
2277 break;
2278 /* We may not ignore self-dependences here. */
2279 if (new_cand.first == against.first
2280 || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2281 memory_accesses.refs_list[against.first],
2282 false))
2283 /* ??? Prune new_cand from the list of refs to apply SM to. */
2284 return false;
2285 std::swap (new_cand, against);
2286 *at = ptr - 1;
2288 return true;
2291 /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2292 walking backwards from VDEF (or the end of BB if VDEF is NULL). */
2294 static int
2295 sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2296 vec<seq_entry> &seq, bitmap refs_not_in_seq,
2297 bitmap refs_not_supported, bool forked,
2298 bitmap fully_visited)
2300 if (!vdef)
2301 for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
2302 gsi_prev (&gsi))
2304 vdef = gimple_vdef (gsi_stmt (gsi));
2305 if (vdef)
2306 break;
2308 if (!vdef)
2310 gphi *vphi = get_virtual_phi (bb);
2311 if (vphi)
2312 vdef = gimple_phi_result (vphi);
2314 if (!vdef)
2316 if (single_pred_p (bb))
2317 /* This handles the perfect nest case. */
2318 return sm_seq_valid_bb (loop, single_pred (bb), vdef,
2319 seq, refs_not_in_seq, refs_not_supported,
2320 forked, fully_visited);
2321 return 0;
2325 gimple *def = SSA_NAME_DEF_STMT (vdef);
2326 if (gimple_bb (def) != bb)
2328 /* If we forked by processing a PHI do not allow our walk to
2329 merge again until we handle that robustly. */
2330 if (forked)
2332 /* Mark refs_not_in_seq as unsupported. */
2333 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2334 return 1;
2336 /* Otherwise it doesn't really matter if we end up in different
2337 BBs. */
2338 bb = gimple_bb (def);
2340 if (gphi *phi = dyn_cast <gphi *> (def))
2342 /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb)
2343 this is still linear.
2344 Eventually we want to cache intermediate results per BB
2345 (but we can't easily cache for different exits?). */
2346 /* Stop at PHIs with possible backedges. */
2347 if (bb == bb->loop_father->header
2348 || bb->flags & BB_IRREDUCIBLE_LOOP)
2350 /* Mark refs_not_in_seq as unsupported. */
2351 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2352 return 1;
2354 if (gimple_phi_num_args (phi) == 1)
2355 return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2356 gimple_phi_arg_def (phi, 0), seq,
2357 refs_not_in_seq, refs_not_supported,
2358 false, fully_visited);
2359 if (bitmap_bit_p (fully_visited,
2360 SSA_NAME_VERSION (gimple_phi_result (phi))))
2361 return 1;
2362 auto_vec<seq_entry> first_edge_seq;
2363 auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2364 int eret;
2365 bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2366 eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src,
2367 gimple_phi_arg_def (phi, 0),
2368 first_edge_seq,
2369 tem_refs_not_in_seq, refs_not_supported,
2370 true, fully_visited);
2371 if (eret != 1)
2372 return -1;
2373 /* Simplify our lives by pruning the sequence of !sm_ord. */
2374 while (!first_edge_seq.is_empty ()
2375 && first_edge_seq.last ().second != sm_ord)
2376 first_edge_seq.pop ();
2377 for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i)
2379 tree vuse = gimple_phi_arg_def (phi, i);
2380 edge e = gimple_phi_arg_edge (phi, i);
2381 auto_vec<seq_entry> edge_seq;
2382 bitmap_and_compl (tem_refs_not_in_seq,
2383 refs_not_in_seq, refs_not_supported);
2384 /* If we've marked all refs we search for as unsupported
2385 we can stop processing and use the sequence as before
2386 the PHI. */
2387 if (bitmap_empty_p (tem_refs_not_in_seq))
2388 return 1;
2389 eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq,
2390 tem_refs_not_in_seq, refs_not_supported,
2391 true, fully_visited);
2392 if (eret != 1)
2393 return -1;
2394 /* Simplify our lives by pruning the sequence of !sm_ord. */
2395 while (!edge_seq.is_empty ()
2396 && edge_seq.last ().second != sm_ord)
2397 edge_seq.pop ();
2398 unsigned min_len = MIN(first_edge_seq.length (),
2399 edge_seq.length ());
2400 /* Incrementally merge seqs into first_edge_seq. */
2401 int first_uneq = -1;
2402 auto_vec<seq_entry, 2> extra_refs;
2403 for (unsigned int i = 0; i < min_len; ++i)
2405 /* ??? We can more intelligently merge when we face different
2406 order by additional sinking operations in one sequence.
2407 For now we simply mark them as to be processed by the
2408 not order-preserving SM code. */
2409 if (first_edge_seq[i].first != edge_seq[i].first)
2411 if (first_edge_seq[i].second == sm_ord)
2412 bitmap_set_bit (refs_not_supported,
2413 first_edge_seq[i].first);
2414 if (edge_seq[i].second == sm_ord)
2415 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2416 first_edge_seq[i].second = sm_other;
2417 first_edge_seq[i].from = NULL_TREE;
2418 /* Record the dropped refs for later processing. */
2419 if (first_uneq == -1)
2420 first_uneq = i;
2421 extra_refs.safe_push (seq_entry (edge_seq[i].first,
2422 sm_other, NULL_TREE));
2424 /* sm_other prevails. */
2425 else if (first_edge_seq[i].second != edge_seq[i].second)
2427 /* Make sure the ref is marked as not supported. */
2428 bitmap_set_bit (refs_not_supported,
2429 first_edge_seq[i].first);
2430 first_edge_seq[i].second = sm_other;
2431 first_edge_seq[i].from = NULL_TREE;
2433 else if (first_edge_seq[i].second == sm_other
2434 && first_edge_seq[i].from != NULL_TREE
2435 && (edge_seq[i].from == NULL_TREE
2436 || !operand_equal_p (first_edge_seq[i].from,
2437 edge_seq[i].from, 0)))
2438 first_edge_seq[i].from = NULL_TREE;
2440 /* Any excess elements become sm_other since they are now
2441 coonditionally executed. */
2442 if (first_edge_seq.length () > edge_seq.length ())
2444 for (unsigned i = edge_seq.length ();
2445 i < first_edge_seq.length (); ++i)
2447 if (first_edge_seq[i].second == sm_ord)
2448 bitmap_set_bit (refs_not_supported,
2449 first_edge_seq[i].first);
2450 first_edge_seq[i].second = sm_other;
2453 else if (edge_seq.length () > first_edge_seq.length ())
2455 if (first_uneq == -1)
2456 first_uneq = first_edge_seq.length ();
2457 for (unsigned i = first_edge_seq.length ();
2458 i < edge_seq.length (); ++i)
2460 if (edge_seq[i].second == sm_ord)
2461 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2462 extra_refs.safe_push (seq_entry (edge_seq[i].first,
2463 sm_other, NULL_TREE));
2466 /* Put unmerged refs at first_uneq to force dependence checking
2467 on them. */
2468 if (first_uneq != -1)
2470 /* Missing ordered_splice_at. */
2471 if ((unsigned)first_uneq == first_edge_seq.length ())
2472 first_edge_seq.safe_splice (extra_refs);
2473 else
2475 unsigned fes_length = first_edge_seq.length ();
2476 first_edge_seq.safe_grow (fes_length
2477 + extra_refs.length ());
2478 memmove (&first_edge_seq[first_uneq + extra_refs.length ()],
2479 &first_edge_seq[first_uneq],
2480 (fes_length - first_uneq) * sizeof (seq_entry));
2481 memcpy (&first_edge_seq[first_uneq],
2482 extra_refs.address (),
2483 extra_refs.length () * sizeof (seq_entry));
2487 /* Use the sequence from the first edge and push SMs down. */
2488 for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2490 unsigned id = first_edge_seq[i].first;
2491 seq.safe_push (first_edge_seq[i]);
2492 unsigned new_idx;
2493 if ((first_edge_seq[i].second == sm_ord
2494 || (first_edge_seq[i].second == sm_other
2495 && first_edge_seq[i].from != NULL_TREE))
2496 && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
2498 if (first_edge_seq[i].second == sm_ord)
2499 bitmap_set_bit (refs_not_supported, id);
2500 /* Mark it sm_other. */
2501 seq[new_idx].second = sm_other;
2502 seq[new_idx].from = NULL_TREE;
2505 bitmap_set_bit (fully_visited,
2506 SSA_NAME_VERSION (gimple_phi_result (phi)));
2507 return 1;
2509 lim_aux_data *data = get_lim_data (def);
2510 gcc_assert (data);
2511 if (data->ref == UNANALYZABLE_MEM_ID)
2512 return -1;
2513 /* Stop at memory references which we can't move. */
2514 else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node)
2516 /* Mark refs_not_in_seq as unsupported. */
2517 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2518 return 1;
2520 /* One of the stores we want to apply SM to and we've not yet seen. */
2521 else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2523 seq.safe_push (seq_entry (data->ref, sm_ord));
2525 /* 1) push it down the queue until a SMed
2526 and not ignored ref is reached, skipping all not SMed refs
2527 and ignored refs via non-TBAA disambiguation. */
2528 unsigned new_idx;
2529 if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
2530 /* If that fails but we did not fork yet continue, we'll see
2531 to re-materialize all of the stores in the sequence then.
2532 Further stores will only be pushed up to this one. */
2533 && forked)
2535 bitmap_set_bit (refs_not_supported, data->ref);
2536 /* Mark it sm_other. */
2537 seq[new_idx].second = sm_other;
2540 /* 2) check whether we've seen all refs we want to SM and if so
2541 declare success for the active exit */
2542 if (bitmap_empty_p (refs_not_in_seq))
2543 return 1;
2545 else
2546 /* Another store not part of the final sequence. Simply push it. */
2547 seq.safe_push (seq_entry (data->ref, sm_other,
2548 gimple_assign_rhs1 (def)));
2550 vdef = gimple_vuse (def);
2552 while (1);
2555 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2556 edges of the LOOP. */
2558 static void
2559 hoist_memory_references (class loop *loop, bitmap mem_refs,
2560 const vec<edge> &exits)
2562 im_mem_ref *ref;
2563 unsigned i;
2564 bitmap_iterator bi;
2566 /* To address PR57359 before actually applying store-motion check
2567 the candidates found for validity with regards to reordering
2568 relative to other stores which we until here disambiguated using
2569 TBAA which isn't valid.
2570 What matters is the order of the last stores to the mem_refs
2571 with respect to the other stores of the loop at the point of the
2572 loop exits. */
2574 /* For each exit compute the store order, pruning from mem_refs
2575 on the fly. */
2576 /* The complexity of this is at least
2577 O(number of exits * number of SM refs) but more approaching
2578 O(number of exits * number of SM refs * number of stores). */
2579 /* ??? Somehow do this in a single sweep over the loop body. */
2580 auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2581 auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2582 edge e;
2583 FOR_EACH_VEC_ELT (exits, i, e)
2585 vec<seq_entry> seq;
2586 seq.create (4);
2587 auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2588 bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2589 if (bitmap_empty_p (refs_not_in_seq))
2591 seq.release ();
2592 break;
2594 auto_bitmap fully_visited;
2595 int res = sm_seq_valid_bb (loop, e->src, NULL_TREE,
2596 seq, refs_not_in_seq,
2597 refs_not_supported, false,
2598 fully_visited);
2599 if (res != 1)
2601 bitmap_copy (refs_not_supported, mem_refs);
2602 seq.release ();
2603 break;
2605 sms.safe_push (std::make_pair (e, seq));
2608 /* Prune pruned mem_refs from earlier processed exits. */
2609 bool changed = !bitmap_empty_p (refs_not_supported);
2610 while (changed)
2612 changed = false;
2613 std::pair<edge, vec<seq_entry> > *seq;
2614 FOR_EACH_VEC_ELT (sms, i, seq)
2616 bool need_to_push = false;
2617 for (unsigned i = 0; i < seq->second.length (); ++i)
2619 sm_kind kind = seq->second[i].second;
2620 if (kind == sm_other && seq->second[i].from == NULL_TREE)
2621 break;
2622 unsigned id = seq->second[i].first;
2623 unsigned new_idx;
2624 if (kind == sm_ord
2625 && bitmap_bit_p (refs_not_supported, id))
2627 seq->second[i].second = sm_other;
2628 gcc_assert (seq->second[i].from == NULL_TREE);
2629 need_to_push = true;
2631 else if (need_to_push
2632 && !sm_seq_push_down (seq->second, i, &new_idx))
2634 /* We need to push down both sm_ord and sm_other
2635 but for the latter we need to disqualify all
2636 following refs. */
2637 if (kind == sm_ord)
2639 if (bitmap_set_bit (refs_not_supported, id))
2640 changed = true;
2641 seq->second[new_idx].second = sm_other;
2643 else
2645 for (unsigned j = seq->second.length () - 1;
2646 j > new_idx; --j)
2647 if (seq->second[j].second == sm_ord
2648 && bitmap_set_bit (refs_not_supported,
2649 seq->second[j].first))
2650 changed = true;
2651 seq->second.truncate (new_idx);
2652 break;
2658 std::pair<edge, vec<seq_entry> > *seq;
2659 FOR_EACH_VEC_ELT (sms, i, seq)
2661 /* Prune sm_other from the end. */
2662 while (!seq->second.is_empty ()
2663 && seq->second.last ().second == sm_other)
2664 seq->second.pop ();
2665 /* Prune duplicates from the start. */
2666 auto_bitmap seen (&lim_bitmap_obstack);
2667 unsigned j, k;
2668 for (j = k = 0; j < seq->second.length (); ++j)
2669 if (bitmap_set_bit (seen, seq->second[j].first))
2671 if (k != j)
2672 seq->second[k] = seq->second[j];
2673 ++k;
2675 seq->second.truncate (k);
2676 /* And verify. */
2677 seq_entry *e;
2678 FOR_EACH_VEC_ELT (seq->second, j, e)
2679 gcc_assert (e->second == sm_ord
2680 || (e->second == sm_other && e->from != NULL_TREE));
2683 /* Verify dependence for refs we cannot handle with the order preserving
2684 code (refs_not_supported) or prune them from mem_refs. */
2685 auto_vec<seq_entry> unord_refs;
2686 EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
2688 ref = memory_accesses.refs_list[i];
2689 if (!ref_indep_loop_p (loop, ref, sm_waw))
2690 bitmap_clear_bit (mem_refs, i);
2691 /* We've now verified store order for ref with respect to all other
2692 stores in the loop does not matter. */
2693 else
2694 unord_refs.safe_push (seq_entry (i, sm_unord));
2697 hash_map<im_mem_ref *, sm_aux *> aux_map;
2699 /* Execute SM but delay the store materialization for ordered
2700 sequences on exit. */
2701 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2703 ref = memory_accesses.refs_list[i];
2704 execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i));
2707 /* Materialize ordered store sequences on exits. */
2708 FOR_EACH_VEC_ELT (exits, i, e)
2710 edge append_cond_position = NULL;
2711 edge last_cond_fallthru = NULL;
2712 if (i < sms.length ())
2714 gcc_assert (sms[i].first == e);
2715 execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord,
2716 append_cond_position, last_cond_fallthru);
2717 sms[i].second.release ();
2719 if (!unord_refs.is_empty ())
2720 execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord,
2721 append_cond_position, last_cond_fallthru);
2722 /* Commit edge inserts here to preserve the order of stores
2723 when an exit exits multiple loops. */
2724 gsi_commit_one_edge_insert (e, NULL);
2727 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2728 iter != aux_map.end (); ++iter)
2729 delete (*iter).second;
2732 class ref_always_accessed
2734 public:
2735 ref_always_accessed (class loop *loop_, bool stored_p_)
2736 : loop (loop_), stored_p (stored_p_) {}
2737 bool operator () (mem_ref_loc *loc);
2738 class loop *loop;
2739 bool stored_p;
2742 bool
2743 ref_always_accessed::operator () (mem_ref_loc *loc)
2745 class loop *must_exec;
2747 struct lim_aux_data *lim_data = get_lim_data (loc->stmt);
2748 if (!lim_data)
2749 return false;
2751 /* If we require an always executed store make sure the statement
2752 is a store. */
2753 if (stored_p)
2755 tree lhs = gimple_get_lhs (loc->stmt);
2756 if (!lhs
2757 || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
2758 return false;
2761 must_exec = lim_data->always_executed_in;
2762 if (!must_exec)
2763 return false;
2765 if (must_exec == loop
2766 || flow_loop_nested_p (must_exec, loop))
2767 return true;
2769 return false;
2772 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2773 make sure REF is always stored to in LOOP. */
2775 static bool
2776 ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
2778 return for_all_locs_in_loop (loop, ref,
2779 ref_always_accessed (loop, stored_p));
2782 /* Returns true if REF1 and REF2 are independent. */
2784 static bool
2785 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
2787 if (ref1 == ref2)
2788 return true;
2790 if (dump_file && (dump_flags & TDF_DETAILS))
2791 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2792 ref1->id, ref2->id);
2794 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p))
2796 if (dump_file && (dump_flags & TDF_DETAILS))
2797 fprintf (dump_file, "dependent.\n");
2798 return false;
2800 else
2802 if (dump_file && (dump_flags & TDF_DETAILS))
2803 fprintf (dump_file, "independent.\n");
2804 return true;
2808 /* Returns true if REF is independent on all other accessess in LOOP.
2809 KIND specifies the kind of dependence to consider.
2810 lim_raw assumes REF is not stored in LOOP and disambiguates RAW
2811 dependences so if true REF can be hoisted out of LOOP
2812 sm_war disambiguates a store REF against all other loads to see
2813 whether the store can be sunk across loads out of LOOP
2814 sm_waw disambiguates a store REF against all other stores to see
2815 whether the store can be sunk across stores out of LOOP. */
2817 static bool
2818 ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
2820 bool indep_p = true;
2821 bitmap refs_to_check;
2823 if (kind == sm_war)
2824 refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
2825 else
2826 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2828 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
2829 || ref->mem.ref == error_mark_node)
2830 indep_p = false;
2831 else
2833 /* tri-state, { unknown, independent, dependent } */
2834 dep_state state = query_loop_dependence (loop, ref, kind);
2835 if (state != dep_unknown)
2836 return state == dep_independent ? true : false;
2838 class loop *inner = loop->inner;
2839 while (inner)
2841 if (!ref_indep_loop_p (inner, ref, kind))
2843 indep_p = false;
2844 break;
2846 inner = inner->next;
2849 if (indep_p)
2851 unsigned i;
2852 bitmap_iterator bi;
2853 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2855 im_mem_ref *aref = memory_accesses.refs_list[i];
2856 if (aref->mem.ref == error_mark_node)
2858 gimple *stmt = aref->accesses_in_loop[0].stmt;
2859 if ((kind == sm_war
2860 && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
2861 kind != sm_waw))
2862 || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
2863 kind != sm_waw))
2865 indep_p = false;
2866 break;
2869 else if (!refs_independent_p (ref, aref, kind != sm_waw))
2871 indep_p = false;
2872 break;
2878 if (dump_file && (dump_flags & TDF_DETAILS))
2879 fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n",
2880 kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
2881 ref->id, loop->num, indep_p ? "independent" : "dependent");
2883 /* Record the computed result in the cache. */
2884 record_loop_dependence (loop, ref, kind,
2885 indep_p ? dep_independent : dep_dependent);
2887 return indep_p;
2891 /* Returns true if we can perform store motion of REF from LOOP. */
2893 static bool
2894 can_sm_ref_p (class loop *loop, im_mem_ref *ref)
2896 tree base;
2898 /* Can't hoist unanalyzable refs. */
2899 if (!MEM_ANALYZABLE (ref))
2900 return false;
2902 /* Can't hoist/sink aggregate copies. */
2903 if (ref->mem.ref == error_mark_node)
2904 return false;
2906 /* It should be movable. */
2907 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2908 || TREE_THIS_VOLATILE (ref->mem.ref)
2909 || !for_each_index (&ref->mem.ref, may_move_till, loop))
2910 return false;
2912 /* If it can throw fail, we do not properly update EH info. */
2913 if (tree_could_throw_p (ref->mem.ref))
2914 return false;
2916 /* If it can trap, it must be always executed in LOOP.
2917 Readonly memory locations may trap when storing to them, but
2918 tree_could_trap_p is a predicate for rvalues, so check that
2919 explicitly. */
2920 base = get_base_address (ref->mem.ref);
2921 if ((tree_could_trap_p (ref->mem.ref)
2922 || (DECL_P (base) && TREE_READONLY (base)))
2923 /* ??? We can at least use false here, allowing loads? We
2924 are forcing conditional stores if the ref is not always
2925 stored to later anyway. So this would only guard
2926 the load we need to emit. Thus when the ref is not
2927 loaded we can elide this completely? */
2928 && !ref_always_accessed_p (loop, ref, true))
2929 return false;
2931 /* Verify all loads of ref can be hoisted. */
2932 if (ref->loaded
2933 && bitmap_bit_p (ref->loaded, loop->num)
2934 && !ref_indep_loop_p (loop, ref, lim_raw))
2935 return false;
2937 /* Verify the candidate can be disambiguated against all loads,
2938 that is, we can elide all in-loop stores. Disambiguation
2939 against stores is done later when we cannot guarantee preserving
2940 the order of stores. */
2941 if (!ref_indep_loop_p (loop, ref, sm_war))
2942 return false;
2944 return true;
2947 /* Marks the references in LOOP for that store motion should be performed
2948 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2949 motion was performed in one of the outer loops. */
2951 static void
2952 find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2954 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2955 unsigned i;
2956 bitmap_iterator bi;
2957 im_mem_ref *ref;
2959 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2961 ref = memory_accesses.refs_list[i];
2962 if (can_sm_ref_p (loop, ref) && dbg_cnt (lim))
2963 bitmap_set_bit (refs_to_sm, i);
2967 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2968 for a store motion optimization (i.e. whether we can insert statement
2969 on its exits). */
2971 static bool
2972 loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
2973 const vec<edge> &exits)
2975 unsigned i;
2976 edge ex;
2978 FOR_EACH_VEC_ELT (exits, i, ex)
2979 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2980 return false;
2982 return true;
2985 /* Try to perform store motion for all memory references modified inside
2986 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2987 store motion was executed in one of the outer loops. */
2989 static void
2990 store_motion_loop (class loop *loop, bitmap sm_executed)
2992 auto_vec<edge> exits = get_loop_exit_edges (loop);
2993 class loop *subloop;
2994 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2996 if (loop_suitable_for_sm (loop, exits))
2998 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2999 if (!bitmap_empty_p (sm_in_loop))
3000 hoist_memory_references (loop, sm_in_loop, exits);
3003 bitmap_ior_into (sm_executed, sm_in_loop);
3004 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
3005 store_motion_loop (subloop, sm_executed);
3006 bitmap_and_compl_into (sm_executed, sm_in_loop);
3007 BITMAP_FREE (sm_in_loop);
3010 /* Try to perform store motion for all memory references modified inside
3011 loops. */
3013 static void
3014 do_store_motion (void)
3016 class loop *loop;
3017 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
3019 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3020 store_motion_loop (loop, sm_executed);
3022 BITMAP_FREE (sm_executed);
3025 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
3026 for each such basic block bb records the outermost loop for that execution
3027 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
3028 blocks that contain a nonpure call. */
3030 static void
3031 fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
3033 basic_block bb = NULL, last = NULL;
3034 edge e;
3035 class loop *inn_loop = loop;
3037 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
3039 auto_vec<basic_block, 64> worklist;
3040 worklist.reserve_exact (loop->num_nodes);
3041 worklist.quick_push (loop->header);
3044 edge_iterator ei;
3045 bb = worklist.pop ();
3047 if (!flow_bb_inside_loop_p (inn_loop, bb))
3049 /* When we are leaving a possibly infinite inner loop
3050 we have to stop processing. */
3051 if (!finite_loop_p (inn_loop))
3052 break;
3053 /* If the loop was finite we can continue with processing
3054 the loop we exited to. */
3055 inn_loop = bb->loop_father;
3058 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3059 last = bb;
3061 if (bitmap_bit_p (contains_call, bb->index))
3062 break;
3064 /* If LOOP exits from this BB stop processing. */
3065 FOR_EACH_EDGE (e, ei, bb->succs)
3066 if (!flow_bb_inside_loop_p (loop, e->dest))
3067 break;
3068 if (e)
3069 break;
3071 /* A loop might be infinite (TODO use simple loop analysis
3072 to disprove this if possible). */
3073 if (bb->flags & BB_IRREDUCIBLE_LOOP)
3074 break;
3076 if (bb->loop_father->header == bb)
3077 /* Record that we enter into a subloop since it might not
3078 be finite. */
3079 /* ??? Entering into a not always executed subloop makes
3080 fill_always_executed_in quadratic in loop depth since
3081 we walk those loops N times. This is not a problem
3082 in practice though, see PR102253 for a worst-case testcase. */
3083 inn_loop = bb->loop_father;
3085 /* Walk the body of LOOP sorted by dominance relation. Additionally,
3086 if a basic block S dominates the latch, then only blocks dominated
3087 by S are after it.
3088 This is get_loop_body_in_dom_order using a worklist algorithm and
3089 stopping once we are no longer interested in visiting further
3090 blocks. */
3091 unsigned old_len = worklist.length ();
3092 unsigned postpone = 0;
3093 for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
3094 son;
3095 son = next_dom_son (CDI_DOMINATORS, son))
3097 if (!flow_bb_inside_loop_p (loop, son))
3098 continue;
3099 if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
3100 postpone = worklist.length ();
3101 worklist.quick_push (son);
3103 if (postpone)
3104 /* Postponing the block that dominates the latch means
3105 processing it last and thus putting it earliest in the
3106 worklist. */
3107 std::swap (worklist[old_len], worklist[postpone]);
3109 while (!worklist.is_empty ());
3111 while (1)
3113 if (dump_enabled_p ())
3114 dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
3115 last->index, loop->num);
3116 SET_ALWAYS_EXECUTED_IN (last, loop);
3117 if (last == loop->header)
3118 break;
3119 last = get_immediate_dominator (CDI_DOMINATORS, last);
3123 for (loop = loop->inner; loop; loop = loop->next)
3124 fill_always_executed_in_1 (loop, contains_call);
3127 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3128 for each such basic block bb records the outermost loop for that execution
3129 of its header implies execution of bb. */
3131 static void
3132 fill_always_executed_in (void)
3134 basic_block bb;
3135 class loop *loop;
3137 auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3138 bitmap_clear (contains_call);
3139 FOR_EACH_BB_FN (bb, cfun)
3141 gimple_stmt_iterator gsi;
3142 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3144 if (nonpure_call_p (gsi_stmt (gsi)))
3145 break;
3148 if (!gsi_end_p (gsi))
3149 bitmap_set_bit (contains_call, bb->index);
3152 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3153 fill_always_executed_in_1 (loop, contains_call);
3157 /* Compute the global information needed by the loop invariant motion pass. */
3159 static void
3160 tree_ssa_lim_initialize (bool store_motion)
3162 unsigned i;
3164 bitmap_obstack_initialize (&lim_bitmap_obstack);
3165 gcc_obstack_init (&mem_ref_obstack);
3166 lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3168 if (flag_tm)
3169 compute_transaction_bits ();
3171 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3172 memory_accesses.refs_list.create (100);
3173 /* Allocate a special, unanalyzable mem-ref with ID zero. */
3174 memory_accesses.refs_list.quick_push
3175 (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
3177 memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun));
3178 memory_accesses.refs_loaded_in_loop.quick_grow (number_of_loops (cfun));
3179 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
3180 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
3181 if (store_motion)
3183 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
3184 memory_accesses.all_refs_stored_in_loop.quick_grow
3185 (number_of_loops (cfun));
3188 for (i = 0; i < number_of_loops (cfun); i++)
3190 bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i],
3191 &lim_bitmap_obstack);
3192 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
3193 &lim_bitmap_obstack);
3194 if (store_motion)
3195 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
3196 &lim_bitmap_obstack);
3199 memory_accesses.ttae_cache = NULL;
3201 /* Initialize bb_loop_postorder with a mapping from loop->num to
3202 its postorder index. */
3203 i = 0;
3204 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3205 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
3206 bb_loop_postorder[loop->num] = i++;
3209 /* Cleans up after the invariant motion pass. */
3211 static void
3212 tree_ssa_lim_finalize (void)
3214 basic_block bb;
3215 unsigned i;
3216 im_mem_ref *ref;
3218 FOR_EACH_BB_FN (bb, cfun)
3219 SET_ALWAYS_EXECUTED_IN (bb, NULL);
3221 bitmap_obstack_release (&lim_bitmap_obstack);
3222 delete lim_aux_data_map;
3224 delete memory_accesses.refs;
3225 memory_accesses.refs = NULL;
3227 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3228 memref_free (ref);
3229 memory_accesses.refs_list.release ();
3230 obstack_free (&mem_ref_obstack, NULL);
3232 memory_accesses.refs_loaded_in_loop.release ();
3233 memory_accesses.refs_stored_in_loop.release ();
3234 memory_accesses.all_refs_stored_in_loop.release ();
3236 if (memory_accesses.ttae_cache)
3237 free_affine_expand_cache (&memory_accesses.ttae_cache);
3239 free (bb_loop_postorder);
3242 /* Moves invariants from loops. Only "expensive" invariants are moved out --
3243 i.e. those that are likely to be win regardless of the register pressure.
3244 Only perform store motion if STORE_MOTION is true. */
3246 unsigned int
3247 loop_invariant_motion_in_fun (function *fun, bool store_motion)
3249 unsigned int todo = 0;
3251 tree_ssa_lim_initialize (store_motion);
3253 /* Gathers information about memory accesses in the loops. */
3254 analyze_memory_references (store_motion);
3256 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
3257 fill_always_executed_in ();
3259 int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3260 int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3262 /* For each statement determine the outermost loop in that it is
3263 invariant and cost for computing the invariant. */
3264 for (int i = 0; i < n; ++i)
3265 compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3267 /* Execute store motion. Force the necessary invariants to be moved
3268 out of the loops as well. */
3269 if (store_motion)
3270 do_store_motion ();
3272 free (rpo);
3273 rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3274 n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3276 /* Move the expressions that are expensive enough. */
3277 for (int i = 0; i < n; ++i)
3278 todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3280 free (rpo);
3282 gsi_commit_edge_inserts ();
3283 if (need_ssa_update_p (fun))
3284 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3286 tree_ssa_lim_finalize ();
3288 return todo;
3291 /* Loop invariant motion pass. */
3293 namespace {
3295 const pass_data pass_data_lim =
3297 GIMPLE_PASS, /* type */
3298 "lim", /* name */
3299 OPTGROUP_LOOP, /* optinfo_flags */
3300 TV_LIM, /* tv_id */
3301 PROP_cfg, /* properties_required */
3302 0, /* properties_provided */
3303 0, /* properties_destroyed */
3304 0, /* todo_flags_start */
3305 0, /* todo_flags_finish */
3308 class pass_lim : public gimple_opt_pass
3310 public:
3311 pass_lim (gcc::context *ctxt)
3312 : gimple_opt_pass (pass_data_lim, ctxt)
3315 /* opt_pass methods: */
3316 opt_pass * clone () { return new pass_lim (m_ctxt); }
3317 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
3318 virtual unsigned int execute (function *);
3320 }; // class pass_lim
3322 unsigned int
3323 pass_lim::execute (function *fun)
3325 bool in_loop_pipeline = scev_initialized_p ();
3326 if (!in_loop_pipeline)
3327 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3329 if (number_of_loops (fun) <= 1)
3330 return 0;
3331 unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
3333 if (!in_loop_pipeline)
3334 loop_optimizer_finalize ();
3335 else
3336 scev_reset ();
3337 return todo;
3340 } // anon namespace
3342 gimple_opt_pass *
3343 make_pass_lim (gcc::context *ctxt)
3345 return new pass_lim (ctxt);