gfortran.texi: Remove reference to the ASSIGN statement...
[official-gcc.git] / gcc / tree-ssa-loop-im.c
blob56d8e8e4330b9d013d4559fccdf19b768cbd86fb
1 /* Loop invariant motion.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "tree-eh.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-manip.h"
37 #include "tree-ssa-loop.h"
38 #include "tree-into-ssa.h"
39 #include "cfgloop.h"
40 #include "domwalk.h"
41 #include "params.h"
42 #include "tree-affine.h"
43 #include "tree-ssa-propagate.h"
44 #include "trans-mem.h"
45 #include "gimple-fold.h"
46 #include "tree-scalar-evolution.h"
47 #include "tree-ssa-loop-niter.h"
48 #include "alias.h"
49 #include "builtins.h"
50 #include "tree-dfa.h"
52 /* TODO: Support for predicated code motion. I.e.
54 while (1)
56 if (cond)
58 a = inv;
59 something;
63 Where COND and INV are invariants, but evaluating INV may trap or be
64 invalid from some other reason if !COND. This may be transformed to
66 if (cond)
67 a = inv;
68 while (1)
70 if (cond)
71 something;
72 } */
74 /* The auxiliary data kept for each statement. */
76 struct lim_aux_data
78 struct loop *max_loop; /* The outermost loop in that the statement
79 is invariant. */
81 struct loop *tgt_loop; /* The loop out of that we want to move the
82 invariant. */
84 struct loop *always_executed_in;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
87 is entered. */
89 unsigned cost; /* Cost of the computation performed by the
90 statement. */
92 unsigned ref; /* The simple_mem_ref in this stmt or 0. */
94 vec<gimple *> depends; /* Vector of statements that must be also
95 hoisted out of the loop when this statement
96 is hoisted; i.e. those that define the
97 operands of the statement and are inside of
98 the MAX_LOOP loop. */
101 /* Maps statements to their lim_aux_data. */
103 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
105 /* Description of a memory reference location. */
107 struct mem_ref_loc
109 tree *ref; /* The reference itself. */
110 gimple *stmt; /* The statement in that it occurs. */
114 /* Description of a memory reference. */
116 struct im_mem_ref
118 unsigned id : 31; /* ID assigned to the memory reference
119 (its index in memory_accesses.refs_list) */
120 unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
121 hashval_t hash; /* Its hash value. */
123 /* The memory access itself and associated caching of alias-oracle
124 query meta-data. */
125 ao_ref mem;
127 bitmap stored; /* The set of loops in that this memory location
128 is stored to. */
129 vec<mem_ref_loc> accesses_in_loop;
130 /* The locations of the accesses. Vector
131 indexed by the loop number. */
133 /* The following sets are computed on demand. We keep both set and
134 its complement, so that we know whether the information was
135 already computed or not. */
136 bitmap_head indep_loop; /* The set of loops in that the memory
137 reference is independent, meaning:
138 If it is stored in the loop, this store
139 is independent on all other loads and
140 stores.
141 If it is only loaded, then it is independent
142 on all stores in the loop. */
143 bitmap_head dep_loop; /* The complement of INDEP_LOOP. */
146 /* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
147 to record (in)dependence against stores in the loop and its subloops, the
148 second to record (in)dependence against all references in the loop
149 and its subloops. */
150 #define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
152 /* Mem_ref hashtable helpers. */
154 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
156 typedef ao_ref *compare_type;
157 static inline hashval_t hash (const im_mem_ref *);
158 static inline bool equal (const im_mem_ref *, const ao_ref *);
161 /* A hash function for struct im_mem_ref object OBJ. */
163 inline hashval_t
164 mem_ref_hasher::hash (const im_mem_ref *mem)
166 return mem->hash;
169 /* An equality function for struct im_mem_ref object MEM1 with
170 memory reference OBJ2. */
172 inline bool
173 mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
175 if (obj2->max_size_known_p ())
176 return (operand_equal_p (mem1->mem.base, obj2->base, 0)
177 && known_eq (mem1->mem.offset, obj2->offset)
178 && known_eq (mem1->mem.size, obj2->size)
179 && known_eq (mem1->mem.max_size, obj2->max_size)
180 && mem1->mem.volatile_p == obj2->volatile_p
181 && (mem1->mem.ref_alias_set == obj2->ref_alias_set
182 /* We are not canonicalizing alias-sets but for the
183 special-case we didn't canonicalize yet and the
184 incoming ref is a alias-set zero MEM we pick
185 the correct one already. */
186 || (!mem1->ref_canonical
187 && (TREE_CODE (obj2->ref) == MEM_REF
188 || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
189 && obj2->ref_alias_set == 0)
190 /* Likewise if there's a canonical ref with alias-set zero. */
191 || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
192 && types_compatible_p (TREE_TYPE (mem1->mem.ref),
193 TREE_TYPE (obj2->ref)));
194 else
195 return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
199 /* Description of memory accesses in loops. */
201 static struct
203 /* The hash table of memory references accessed in loops. */
204 hash_table<mem_ref_hasher> *refs;
206 /* The list of memory references. */
207 vec<im_mem_ref *> refs_list;
209 /* The set of memory references accessed in each loop. */
210 vec<bitmap_head> refs_in_loop;
212 /* The set of memory references stored in each loop. */
213 vec<bitmap_head> refs_stored_in_loop;
215 /* The set of memory references stored in each loop, including subloops . */
216 vec<bitmap_head> all_refs_stored_in_loop;
218 /* Cache for expanding memory addresses. */
219 hash_map<tree, name_expansion *> *ttae_cache;
220 } memory_accesses;
222 /* Obstack for the bitmaps in the above data structures. */
223 static bitmap_obstack lim_bitmap_obstack;
224 static obstack mem_ref_obstack;
226 static bool ref_indep_loop_p (struct loop *, im_mem_ref *);
227 static bool ref_always_accessed_p (struct loop *, im_mem_ref *, bool);
229 /* Minimum cost of an expensive expression. */
230 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
232 /* The outermost loop for which execution of the header guarantees that the
233 block will be executed. */
234 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
235 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
237 /* ID of the shared unanalyzable mem. */
238 #define UNANALYZABLE_MEM_ID 0
240 /* Whether the reference was analyzable. */
241 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
243 static struct lim_aux_data *
244 init_lim_data (gimple *stmt)
246 lim_aux_data *p = XCNEW (struct lim_aux_data);
247 lim_aux_data_map->put (stmt, p);
249 return p;
252 static struct lim_aux_data *
253 get_lim_data (gimple *stmt)
255 lim_aux_data **p = lim_aux_data_map->get (stmt);
256 if (!p)
257 return NULL;
259 return *p;
262 /* Releases the memory occupied by DATA. */
264 static void
265 free_lim_aux_data (struct lim_aux_data *data)
267 data->depends.release ();
268 free (data);
271 static void
272 clear_lim_data (gimple *stmt)
274 lim_aux_data **p = lim_aux_data_map->get (stmt);
275 if (!p)
276 return;
278 free_lim_aux_data (*p);
279 *p = NULL;
283 /* The possibilities of statement movement. */
284 enum move_pos
286 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
287 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
288 become executed -- memory accesses, ... */
289 MOVE_POSSIBLE /* Unlimited movement. */
293 /* If it is possible to hoist the statement STMT unconditionally,
294 returns MOVE_POSSIBLE.
295 If it is possible to hoist the statement STMT, but we must avoid making
296 it executed if it would not be executed in the original program (e.g.
297 because it may trap), return MOVE_PRESERVE_EXECUTION.
298 Otherwise return MOVE_IMPOSSIBLE. */
300 enum move_pos
301 movement_possibility (gimple *stmt)
303 tree lhs;
304 enum move_pos ret = MOVE_POSSIBLE;
306 if (flag_unswitch_loops
307 && gimple_code (stmt) == GIMPLE_COND)
309 /* If we perform unswitching, force the operands of the invariant
310 condition to be moved out of the loop. */
311 return MOVE_POSSIBLE;
314 if (gimple_code (stmt) == GIMPLE_PHI
315 && gimple_phi_num_args (stmt) <= 2
316 && !virtual_operand_p (gimple_phi_result (stmt))
317 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
318 return MOVE_POSSIBLE;
320 if (gimple_get_lhs (stmt) == NULL_TREE)
321 return MOVE_IMPOSSIBLE;
323 if (gimple_vdef (stmt))
324 return MOVE_IMPOSSIBLE;
326 if (stmt_ends_bb_p (stmt)
327 || gimple_has_volatile_ops (stmt)
328 || gimple_has_side_effects (stmt)
329 || stmt_could_throw_p (cfun, stmt))
330 return MOVE_IMPOSSIBLE;
332 if (is_gimple_call (stmt))
334 /* While pure or const call is guaranteed to have no side effects, we
335 cannot move it arbitrarily. Consider code like
337 char *s = something ();
339 while (1)
341 if (s)
342 t = strlen (s);
343 else
344 t = 0;
347 Here the strlen call cannot be moved out of the loop, even though
348 s is invariant. In addition to possibly creating a call with
349 invalid arguments, moving out a function call that is not executed
350 may cause performance regressions in case the call is costly and
351 not executed at all. */
352 ret = MOVE_PRESERVE_EXECUTION;
353 lhs = gimple_call_lhs (stmt);
355 else if (is_gimple_assign (stmt))
356 lhs = gimple_assign_lhs (stmt);
357 else
358 return MOVE_IMPOSSIBLE;
360 if (TREE_CODE (lhs) == SSA_NAME
361 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
362 return MOVE_IMPOSSIBLE;
364 if (TREE_CODE (lhs) != SSA_NAME
365 || gimple_could_trap_p (stmt))
366 return MOVE_PRESERVE_EXECUTION;
368 /* Non local loads in a transaction cannot be hoisted out. Well,
369 unless the load happens on every path out of the loop, but we
370 don't take this into account yet. */
371 if (flag_tm
372 && gimple_in_transaction (stmt)
373 && gimple_assign_single_p (stmt))
375 tree rhs = gimple_assign_rhs1 (stmt);
376 if (DECL_P (rhs) && is_global_var (rhs))
378 if (dump_file)
380 fprintf (dump_file, "Cannot hoist conditional load of ");
381 print_generic_expr (dump_file, rhs, TDF_SLIM);
382 fprintf (dump_file, " because it is in a transaction.\n");
384 return MOVE_IMPOSSIBLE;
388 return ret;
391 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
392 loop to that we could move the expression using DEF if it did not have
393 other operands, i.e. the outermost loop enclosing LOOP in that the value
394 of DEF is invariant. */
396 static struct loop *
397 outermost_invariant_loop (tree def, struct loop *loop)
399 gimple *def_stmt;
400 basic_block def_bb;
401 struct loop *max_loop;
402 struct lim_aux_data *lim_data;
404 if (!def)
405 return superloop_at_depth (loop, 1);
407 if (TREE_CODE (def) != SSA_NAME)
409 gcc_assert (is_gimple_min_invariant (def));
410 return superloop_at_depth (loop, 1);
413 def_stmt = SSA_NAME_DEF_STMT (def);
414 def_bb = gimple_bb (def_stmt);
415 if (!def_bb)
416 return superloop_at_depth (loop, 1);
418 max_loop = find_common_loop (loop, def_bb->loop_father);
420 lim_data = get_lim_data (def_stmt);
421 if (lim_data != NULL && lim_data->max_loop != NULL)
422 max_loop = find_common_loop (max_loop,
423 loop_outer (lim_data->max_loop));
424 if (max_loop == loop)
425 return NULL;
426 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
428 return max_loop;
431 /* DATA is a structure containing information associated with a statement
432 inside LOOP. DEF is one of the operands of this statement.
434 Find the outermost loop enclosing LOOP in that value of DEF is invariant
435 and record this in DATA->max_loop field. If DEF itself is defined inside
436 this loop as well (i.e. we need to hoist it out of the loop if we want
437 to hoist the statement represented by DATA), record the statement in that
438 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
439 add the cost of the computation of DEF to the DATA->cost.
441 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
443 static bool
444 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
445 bool add_cost)
447 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
448 basic_block def_bb = gimple_bb (def_stmt);
449 struct loop *max_loop;
450 struct lim_aux_data *def_data;
452 if (!def_bb)
453 return true;
455 max_loop = outermost_invariant_loop (def, loop);
456 if (!max_loop)
457 return false;
459 if (flow_loop_nested_p (data->max_loop, max_loop))
460 data->max_loop = max_loop;
462 def_data = get_lim_data (def_stmt);
463 if (!def_data)
464 return true;
466 if (add_cost
467 /* Only add the cost if the statement defining DEF is inside LOOP,
468 i.e. if it is likely that by moving the invariants dependent
469 on it, we will be able to avoid creating a new register for
470 it (since it will be only used in these dependent invariants). */
471 && def_bb->loop_father == loop)
472 data->cost += def_data->cost;
474 data->depends.safe_push (def_stmt);
476 return true;
479 /* Returns an estimate for a cost of statement STMT. The values here
480 are just ad-hoc constants, similar to costs for inlining. */
482 static unsigned
483 stmt_cost (gimple *stmt)
485 /* Always try to create possibilities for unswitching. */
486 if (gimple_code (stmt) == GIMPLE_COND
487 || gimple_code (stmt) == GIMPLE_PHI)
488 return LIM_EXPENSIVE;
490 /* We should be hoisting calls if possible. */
491 if (is_gimple_call (stmt))
493 tree fndecl;
495 /* Unless the call is a builtin_constant_p; this always folds to a
496 constant, so moving it is useless. */
497 fndecl = gimple_call_fndecl (stmt);
498 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P))
499 return 0;
501 return LIM_EXPENSIVE;
504 /* Hoisting memory references out should almost surely be a win. */
505 if (gimple_references_memory_p (stmt))
506 return LIM_EXPENSIVE;
508 if (gimple_code (stmt) != GIMPLE_ASSIGN)
509 return 1;
511 switch (gimple_assign_rhs_code (stmt))
513 case MULT_EXPR:
514 case WIDEN_MULT_EXPR:
515 case WIDEN_MULT_PLUS_EXPR:
516 case WIDEN_MULT_MINUS_EXPR:
517 case DOT_PROD_EXPR:
518 case TRUNC_DIV_EXPR:
519 case CEIL_DIV_EXPR:
520 case FLOOR_DIV_EXPR:
521 case ROUND_DIV_EXPR:
522 case EXACT_DIV_EXPR:
523 case CEIL_MOD_EXPR:
524 case FLOOR_MOD_EXPR:
525 case ROUND_MOD_EXPR:
526 case TRUNC_MOD_EXPR:
527 case RDIV_EXPR:
528 /* Division and multiplication are usually expensive. */
529 return LIM_EXPENSIVE;
531 case LSHIFT_EXPR:
532 case RSHIFT_EXPR:
533 case WIDEN_LSHIFT_EXPR:
534 case LROTATE_EXPR:
535 case RROTATE_EXPR:
536 /* Shifts and rotates are usually expensive. */
537 return LIM_EXPENSIVE;
539 case CONSTRUCTOR:
540 /* Make vector construction cost proportional to the number
541 of elements. */
542 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
544 case SSA_NAME:
545 case PAREN_EXPR:
546 /* Whether or not something is wrapped inside a PAREN_EXPR
547 should not change move cost. Nor should an intermediate
548 unpropagated SSA name copy. */
549 return 0;
551 default:
552 return 1;
556 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
557 REF is independent. If REF is not independent in LOOP, NULL is returned
558 instead. */
560 static struct loop *
561 outermost_indep_loop (struct loop *outer, struct loop *loop, im_mem_ref *ref)
563 struct loop *aloop;
565 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
566 return NULL;
568 for (aloop = outer;
569 aloop != loop;
570 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
571 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
572 && ref_indep_loop_p (aloop, ref))
573 return aloop;
575 if (ref_indep_loop_p (loop, ref))
576 return loop;
577 else
578 return NULL;
581 /* If there is a simple load or store to a memory reference in STMT, returns
582 the location of the memory reference, and sets IS_STORE according to whether
583 it is a store or load. Otherwise, returns NULL. */
585 static tree *
586 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
588 tree *lhs, *rhs;
590 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
591 if (!gimple_assign_single_p (stmt))
592 return NULL;
594 lhs = gimple_assign_lhs_ptr (stmt);
595 rhs = gimple_assign_rhs1_ptr (stmt);
597 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
599 *is_store = false;
600 return rhs;
602 else if (gimple_vdef (stmt)
603 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
605 *is_store = true;
606 return lhs;
608 else
609 return NULL;
612 /* From a controlling predicate in DOM determine the arguments from
613 the PHI node PHI that are chosen if the predicate evaluates to
614 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
615 they are non-NULL. Returns true if the arguments can be determined,
616 else return false. */
618 static bool
619 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
620 tree *true_arg_p, tree *false_arg_p)
622 edge te, fe;
623 if (! extract_true_false_controlled_edges (dom, gimple_bb (phi),
624 &te, &fe))
625 return false;
627 if (true_arg_p)
628 *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
629 if (false_arg_p)
630 *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
632 return true;
635 /* Determine the outermost loop to that it is possible to hoist a statement
636 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
637 the outermost loop in that the value computed by STMT is invariant.
638 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
639 we preserve the fact whether STMT is executed. It also fills other related
640 information to LIM_DATA (STMT).
642 The function returns false if STMT cannot be hoisted outside of the loop it
643 is defined in, and true otherwise. */
645 static bool
646 determine_max_movement (gimple *stmt, bool must_preserve_exec)
648 basic_block bb = gimple_bb (stmt);
649 struct loop *loop = bb->loop_father;
650 struct loop *level;
651 struct lim_aux_data *lim_data = get_lim_data (stmt);
652 tree val;
653 ssa_op_iter iter;
655 if (must_preserve_exec)
656 level = ALWAYS_EXECUTED_IN (bb);
657 else
658 level = superloop_at_depth (loop, 1);
659 lim_data->max_loop = level;
661 if (gphi *phi = dyn_cast <gphi *> (stmt))
663 use_operand_p use_p;
664 unsigned min_cost = UINT_MAX;
665 unsigned total_cost = 0;
666 struct lim_aux_data *def_data;
668 /* We will end up promoting dependencies to be unconditionally
669 evaluated. For this reason the PHI cost (and thus the
670 cost we remove from the loop by doing the invariant motion)
671 is that of the cheapest PHI argument dependency chain. */
672 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
674 val = USE_FROM_PTR (use_p);
676 if (TREE_CODE (val) != SSA_NAME)
678 /* Assign const 1 to constants. */
679 min_cost = MIN (min_cost, 1);
680 total_cost += 1;
681 continue;
683 if (!add_dependency (val, lim_data, loop, false))
684 return false;
686 gimple *def_stmt = SSA_NAME_DEF_STMT (val);
687 if (gimple_bb (def_stmt)
688 && gimple_bb (def_stmt)->loop_father == loop)
690 def_data = get_lim_data (def_stmt);
691 if (def_data)
693 min_cost = MIN (min_cost, def_data->cost);
694 total_cost += def_data->cost;
699 min_cost = MIN (min_cost, total_cost);
700 lim_data->cost += min_cost;
702 if (gimple_phi_num_args (phi) > 1)
704 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
705 gimple *cond;
706 if (gsi_end_p (gsi_last_bb (dom)))
707 return false;
708 cond = gsi_stmt (gsi_last_bb (dom));
709 if (gimple_code (cond) != GIMPLE_COND)
710 return false;
711 /* Verify that this is an extended form of a diamond and
712 the PHI arguments are completely controlled by the
713 predicate in DOM. */
714 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
715 return false;
717 /* Fold in dependencies and cost of the condition. */
718 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
720 if (!add_dependency (val, lim_data, loop, false))
721 return false;
722 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
723 if (def_data)
724 lim_data->cost += def_data->cost;
727 /* We want to avoid unconditionally executing very expensive
728 operations. As costs for our dependencies cannot be
729 negative just claim we are not invariand for this case.
730 We also are not sure whether the control-flow inside the
731 loop will vanish. */
732 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
733 && !(min_cost != 0
734 && total_cost / min_cost <= 2))
735 return false;
737 /* Assume that the control-flow in the loop will vanish.
738 ??? We should verify this and not artificially increase
739 the cost if that is not the case. */
740 lim_data->cost += stmt_cost (stmt);
743 return true;
745 else
746 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
747 if (!add_dependency (val, lim_data, loop, true))
748 return false;
750 if (gimple_vuse (stmt))
752 im_mem_ref *ref
753 = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
754 if (ref
755 && MEM_ANALYZABLE (ref))
757 lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
758 loop, ref);
759 if (!lim_data->max_loop)
760 return false;
762 else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
763 return false;
766 lim_data->cost += stmt_cost (stmt);
768 return true;
771 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
772 and that one of the operands of this statement is computed by STMT.
773 Ensure that STMT (together with all the statements that define its
774 operands) is hoisted at least out of the loop LEVEL. */
776 static void
777 set_level (gimple *stmt, struct loop *orig_loop, struct loop *level)
779 struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
780 struct lim_aux_data *lim_data;
781 gimple *dep_stmt;
782 unsigned i;
784 stmt_loop = find_common_loop (orig_loop, stmt_loop);
785 lim_data = get_lim_data (stmt);
786 if (lim_data != NULL && lim_data->tgt_loop != NULL)
787 stmt_loop = find_common_loop (stmt_loop,
788 loop_outer (lim_data->tgt_loop));
789 if (flow_loop_nested_p (stmt_loop, level))
790 return;
792 gcc_assert (level == lim_data->max_loop
793 || flow_loop_nested_p (lim_data->max_loop, level));
795 lim_data->tgt_loop = level;
796 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
797 set_level (dep_stmt, orig_loop, level);
800 /* Determines an outermost loop from that we want to hoist the statement STMT.
801 For now we chose the outermost possible loop. TODO -- use profiling
802 information to set it more sanely. */
804 static void
805 set_profitable_level (gimple *stmt)
807 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
810 /* Returns true if STMT is a call that has side effects. */
812 static bool
813 nonpure_call_p (gimple *stmt)
815 if (gimple_code (stmt) != GIMPLE_CALL)
816 return false;
818 return gimple_has_side_effects (stmt);
821 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
823 static gimple *
824 rewrite_reciprocal (gimple_stmt_iterator *bsi)
826 gassign *stmt, *stmt1, *stmt2;
827 tree name, lhs, type;
828 tree real_one;
829 gimple_stmt_iterator gsi;
831 stmt = as_a <gassign *> (gsi_stmt (*bsi));
832 lhs = gimple_assign_lhs (stmt);
833 type = TREE_TYPE (lhs);
835 real_one = build_one_cst (type);
837 name = make_temp_ssa_name (type, NULL, "reciptmp");
838 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
839 gimple_assign_rhs2 (stmt));
840 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
841 gimple_assign_rhs1 (stmt));
843 /* Replace division stmt with reciprocal and multiply stmts.
844 The multiply stmt is not invariant, so update iterator
845 and avoid rescanning. */
846 gsi = *bsi;
847 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
848 gsi_replace (&gsi, stmt2, true);
850 /* Continue processing with invariant reciprocal statement. */
851 return stmt1;
854 /* Check if the pattern at *BSI is a bittest of the form
855 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
857 static gimple *
858 rewrite_bittest (gimple_stmt_iterator *bsi)
860 gassign *stmt;
861 gimple *stmt1;
862 gassign *stmt2;
863 gimple *use_stmt;
864 gcond *cond_stmt;
865 tree lhs, name, t, a, b;
866 use_operand_p use;
868 stmt = as_a <gassign *> (gsi_stmt (*bsi));
869 lhs = gimple_assign_lhs (stmt);
871 /* Verify that the single use of lhs is a comparison against zero. */
872 if (TREE_CODE (lhs) != SSA_NAME
873 || !single_imm_use (lhs, &use, &use_stmt))
874 return stmt;
875 cond_stmt = dyn_cast <gcond *> (use_stmt);
876 if (!cond_stmt)
877 return stmt;
878 if (gimple_cond_lhs (cond_stmt) != lhs
879 || (gimple_cond_code (cond_stmt) != NE_EXPR
880 && gimple_cond_code (cond_stmt) != EQ_EXPR)
881 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
882 return stmt;
884 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
885 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
886 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
887 return stmt;
889 /* There is a conversion in between possibly inserted by fold. */
890 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
892 t = gimple_assign_rhs1 (stmt1);
893 if (TREE_CODE (t) != SSA_NAME
894 || !has_single_use (t))
895 return stmt;
896 stmt1 = SSA_NAME_DEF_STMT (t);
897 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
898 return stmt;
901 /* Verify that B is loop invariant but A is not. Verify that with
902 all the stmt walking we are still in the same loop. */
903 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
904 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
905 return stmt;
907 a = gimple_assign_rhs1 (stmt1);
908 b = gimple_assign_rhs2 (stmt1);
910 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
911 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
913 gimple_stmt_iterator rsi;
915 /* 1 << B */
916 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
917 build_int_cst (TREE_TYPE (a), 1), b);
918 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
919 stmt1 = gimple_build_assign (name, t);
921 /* A & (1 << B) */
922 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
923 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
924 stmt2 = gimple_build_assign (name, t);
926 /* Replace the SSA_NAME we compare against zero. Adjust
927 the type of zero accordingly. */
928 SET_USE (use, name);
929 gimple_cond_set_rhs (cond_stmt,
930 build_int_cst_type (TREE_TYPE (name),
931 0));
933 /* Don't use gsi_replace here, none of the new assignments sets
934 the variable originally set in stmt. Move bsi to stmt1, and
935 then remove the original stmt, so that we get a chance to
936 retain debug info for it. */
937 rsi = *bsi;
938 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
939 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
940 gimple *to_release = gsi_stmt (rsi);
941 gsi_remove (&rsi, true);
942 release_defs (to_release);
944 return stmt1;
947 return stmt;
950 /* For each statement determines the outermost loop in that it is invariant,
951 - statements on whose motion it depends and the cost of the computation.
952 - This information is stored to the LIM_DATA structure associated with
953 - each statement. */
954 class invariantness_dom_walker : public dom_walker
956 public:
957 invariantness_dom_walker (cdi_direction direction)
958 : dom_walker (direction) {}
960 virtual edge before_dom_children (basic_block);
963 /* Determine the outermost loops in that statements in basic block BB are
964 invariant, and record them to the LIM_DATA associated with the statements.
965 Callback for dom_walker. */
967 edge
968 invariantness_dom_walker::before_dom_children (basic_block bb)
970 enum move_pos pos;
971 gimple_stmt_iterator bsi;
972 gimple *stmt;
973 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
974 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
975 struct lim_aux_data *lim_data;
977 if (!loop_outer (bb->loop_father))
978 return NULL;
980 if (dump_file && (dump_flags & TDF_DETAILS))
981 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
982 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
984 /* Look at PHI nodes, but only if there is at most two.
985 ??? We could relax this further by post-processing the inserted
986 code and transforming adjacent cond-exprs with the same predicate
987 to control flow again. */
988 bsi = gsi_start_phis (bb);
989 if (!gsi_end_p (bsi)
990 && ((gsi_next (&bsi), gsi_end_p (bsi))
991 || (gsi_next (&bsi), gsi_end_p (bsi))))
992 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
994 stmt = gsi_stmt (bsi);
996 pos = movement_possibility (stmt);
997 if (pos == MOVE_IMPOSSIBLE)
998 continue;
1000 lim_data = get_lim_data (stmt);
1001 if (! lim_data)
1002 lim_data = init_lim_data (stmt);
1003 lim_data->always_executed_in = outermost;
1005 if (!determine_max_movement (stmt, false))
1007 lim_data->max_loop = NULL;
1008 continue;
1011 if (dump_file && (dump_flags & TDF_DETAILS))
1013 print_gimple_stmt (dump_file, stmt, 2);
1014 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1015 loop_depth (lim_data->max_loop),
1016 lim_data->cost);
1019 if (lim_data->cost >= LIM_EXPENSIVE)
1020 set_profitable_level (stmt);
1023 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1025 stmt = gsi_stmt (bsi);
1027 pos = movement_possibility (stmt);
1028 if (pos == MOVE_IMPOSSIBLE)
1030 if (nonpure_call_p (stmt))
1032 maybe_never = true;
1033 outermost = NULL;
1035 /* Make sure to note always_executed_in for stores to make
1036 store-motion work. */
1037 else if (stmt_makes_single_store (stmt))
1039 struct lim_aux_data *lim_data = get_lim_data (stmt);
1040 if (! lim_data)
1041 lim_data = init_lim_data (stmt);
1042 lim_data->always_executed_in = outermost;
1044 continue;
1047 if (is_gimple_assign (stmt)
1048 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1049 == GIMPLE_BINARY_RHS))
1051 tree op0 = gimple_assign_rhs1 (stmt);
1052 tree op1 = gimple_assign_rhs2 (stmt);
1053 struct loop *ol1 = outermost_invariant_loop (op1,
1054 loop_containing_stmt (stmt));
1056 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1057 to be hoisted out of loop, saving expensive divide. */
1058 if (pos == MOVE_POSSIBLE
1059 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1060 && flag_unsafe_math_optimizations
1061 && !flag_trapping_math
1062 && ol1 != NULL
1063 && outermost_invariant_loop (op0, ol1) == NULL)
1064 stmt = rewrite_reciprocal (&bsi);
1066 /* If the shift count is invariant, convert (A >> B) & 1 to
1067 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1068 saving an expensive shift. */
1069 if (pos == MOVE_POSSIBLE
1070 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1071 && integer_onep (op1)
1072 && TREE_CODE (op0) == SSA_NAME
1073 && has_single_use (op0))
1074 stmt = rewrite_bittest (&bsi);
1077 lim_data = get_lim_data (stmt);
1078 if (! lim_data)
1079 lim_data = init_lim_data (stmt);
1080 lim_data->always_executed_in = outermost;
1082 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1083 continue;
1085 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1087 lim_data->max_loop = NULL;
1088 continue;
1091 if (dump_file && (dump_flags & TDF_DETAILS))
1093 print_gimple_stmt (dump_file, stmt, 2);
1094 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1095 loop_depth (lim_data->max_loop),
1096 lim_data->cost);
1099 if (lim_data->cost >= LIM_EXPENSIVE)
1100 set_profitable_level (stmt);
1102 return NULL;
1105 /* Hoist the statements in basic block BB out of the loops prescribed by
1106 data stored in LIM_DATA structures associated with each statement. Callback
1107 for walk_dominator_tree. */
1109 unsigned int
1110 move_computations_worker (basic_block bb)
1112 struct loop *level;
1113 unsigned cost = 0;
1114 struct lim_aux_data *lim_data;
1115 unsigned int todo = 0;
1117 if (!loop_outer (bb->loop_father))
1118 return todo;
1120 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1122 gassign *new_stmt;
1123 gphi *stmt = bsi.phi ();
1125 lim_data = get_lim_data (stmt);
1126 if (lim_data == NULL)
1128 gsi_next (&bsi);
1129 continue;
1132 cost = lim_data->cost;
1133 level = lim_data->tgt_loop;
1134 clear_lim_data (stmt);
1136 if (!level)
1138 gsi_next (&bsi);
1139 continue;
1142 if (dump_file && (dump_flags & TDF_DETAILS))
1144 fprintf (dump_file, "Moving PHI node\n");
1145 print_gimple_stmt (dump_file, stmt, 0);
1146 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1147 cost, level->num);
1150 if (gimple_phi_num_args (stmt) == 1)
1152 tree arg = PHI_ARG_DEF (stmt, 0);
1153 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1154 TREE_CODE (arg), arg);
1156 else
1158 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1159 gimple *cond = gsi_stmt (gsi_last_bb (dom));
1160 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1161 /* Get the PHI arguments corresponding to the true and false
1162 edges of COND. */
1163 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1164 gcc_assert (arg0 && arg1);
1165 t = build2 (gimple_cond_code (cond), boolean_type_node,
1166 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1167 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1168 COND_EXPR, t, arg0, arg1);
1169 todo |= TODO_cleanup_cfg;
1171 if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1172 && (!ALWAYS_EXECUTED_IN (bb)
1173 || (ALWAYS_EXECUTED_IN (bb) != level
1174 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1176 tree lhs = gimple_assign_lhs (new_stmt);
1177 SSA_NAME_RANGE_INFO (lhs) = NULL;
1179 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1180 remove_phi_node (&bsi, false);
1183 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1185 edge e;
1187 gimple *stmt = gsi_stmt (bsi);
1189 lim_data = get_lim_data (stmt);
1190 if (lim_data == NULL)
1192 gsi_next (&bsi);
1193 continue;
1196 cost = lim_data->cost;
1197 level = lim_data->tgt_loop;
1198 clear_lim_data (stmt);
1200 if (!level)
1202 gsi_next (&bsi);
1203 continue;
1206 /* We do not really want to move conditionals out of the loop; we just
1207 placed it here to force its operands to be moved if necessary. */
1208 if (gimple_code (stmt) == GIMPLE_COND)
1209 continue;
1211 if (dump_file && (dump_flags & TDF_DETAILS))
1213 fprintf (dump_file, "Moving statement\n");
1214 print_gimple_stmt (dump_file, stmt, 0);
1215 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1216 cost, level->num);
1219 e = loop_preheader_edge (level);
1220 gcc_assert (!gimple_vdef (stmt));
1221 if (gimple_vuse (stmt))
1223 /* The new VUSE is the one from the virtual PHI in the loop
1224 header or the one already present. */
1225 gphi_iterator gsi2;
1226 for (gsi2 = gsi_start_phis (e->dest);
1227 !gsi_end_p (gsi2); gsi_next (&gsi2))
1229 gphi *phi = gsi2.phi ();
1230 if (virtual_operand_p (gimple_phi_result (phi)))
1232 gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1233 break;
1237 gsi_remove (&bsi, false);
1238 if (gimple_has_lhs (stmt)
1239 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1240 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1241 && (!ALWAYS_EXECUTED_IN (bb)
1242 || !(ALWAYS_EXECUTED_IN (bb) == level
1243 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1245 tree lhs = gimple_get_lhs (stmt);
1246 SSA_NAME_RANGE_INFO (lhs) = NULL;
1248 /* In case this is a stmt that is not unconditionally executed
1249 when the target loop header is executed and the stmt may
1250 invoke undefined integer or pointer overflow rewrite it to
1251 unsigned arithmetic. */
1252 if (is_gimple_assign (stmt)
1253 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1254 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1255 && arith_code_with_undefined_signed_overflow
1256 (gimple_assign_rhs_code (stmt))
1257 && (!ALWAYS_EXECUTED_IN (bb)
1258 || !(ALWAYS_EXECUTED_IN (bb) == level
1259 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1260 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1261 else
1262 gsi_insert_on_edge (e, stmt);
1265 return todo;
1268 /* Hoist the statements out of the loops prescribed by data stored in
1269 LIM_DATA structures associated with each statement.*/
1271 static unsigned int
1272 move_computations (void)
1274 int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
1275 int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
1276 unsigned todo = 0;
1278 for (int i = 0; i < n; ++i)
1279 todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (cfun, rpo[i]));
1281 free (rpo);
1283 gsi_commit_edge_inserts ();
1284 if (need_ssa_update_p (cfun))
1285 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1287 return todo;
1290 /* Checks whether the statement defining variable *INDEX can be hoisted
1291 out of the loop passed in DATA. Callback for for_each_index. */
1293 static bool
1294 may_move_till (tree ref, tree *index, void *data)
1296 struct loop *loop = (struct loop *) data, *max_loop;
1298 /* If REF is an array reference, check also that the step and the lower
1299 bound is invariant in LOOP. */
1300 if (TREE_CODE (ref) == ARRAY_REF)
1302 tree step = TREE_OPERAND (ref, 3);
1303 tree lbound = TREE_OPERAND (ref, 2);
1305 max_loop = outermost_invariant_loop (step, loop);
1306 if (!max_loop)
1307 return false;
1309 max_loop = outermost_invariant_loop (lbound, loop);
1310 if (!max_loop)
1311 return false;
1314 max_loop = outermost_invariant_loop (*index, loop);
1315 if (!max_loop)
1316 return false;
1318 return true;
1321 /* If OP is SSA NAME, force the statement that defines it to be
1322 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1324 static void
1325 force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1327 gimple *stmt;
1329 if (!op
1330 || is_gimple_min_invariant (op))
1331 return;
1333 gcc_assert (TREE_CODE (op) == SSA_NAME);
1335 stmt = SSA_NAME_DEF_STMT (op);
1336 if (gimple_nop_p (stmt))
1337 return;
1339 set_level (stmt, orig_loop, loop);
1342 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1343 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1344 for_each_index. */
1346 struct fmt_data
1348 struct loop *loop;
1349 struct loop *orig_loop;
1352 static bool
1353 force_move_till (tree ref, tree *index, void *data)
1355 struct fmt_data *fmt_data = (struct fmt_data *) data;
1357 if (TREE_CODE (ref) == ARRAY_REF)
1359 tree step = TREE_OPERAND (ref, 3);
1360 tree lbound = TREE_OPERAND (ref, 2);
1362 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1363 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1366 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1368 return true;
1371 /* A function to free the mem_ref object OBJ. */
1373 static void
1374 memref_free (struct im_mem_ref *mem)
1376 mem->accesses_in_loop.release ();
1379 /* Allocates and returns a memory reference description for MEM whose hash
1380 value is HASH and id is ID. */
1382 static im_mem_ref *
1383 mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1385 im_mem_ref *ref = XOBNEW (&mem_ref_obstack, struct im_mem_ref);
1386 if (mem)
1387 ref->mem = *mem;
1388 else
1389 ao_ref_init (&ref->mem, error_mark_node);
1390 ref->id = id;
1391 ref->ref_canonical = false;
1392 ref->hash = hash;
1393 ref->stored = NULL;
1394 bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
1395 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1396 ref->accesses_in_loop.create (1);
1398 return ref;
1401 /* Records memory reference location *LOC in LOOP to the memory reference
1402 description REF. The reference occurs in statement STMT. */
1404 static void
1405 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1407 mem_ref_loc aref;
1408 aref.stmt = stmt;
1409 aref.ref = loc;
1410 ref->accesses_in_loop.safe_push (aref);
1413 /* Set the LOOP bit in REF stored bitmap and allocate that if
1414 necessary. Return whether a bit was changed. */
1416 static bool
1417 set_ref_stored_in_loop (im_mem_ref *ref, struct loop *loop)
1419 if (!ref->stored)
1420 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1421 return bitmap_set_bit (ref->stored, loop->num);
1424 /* Marks reference REF as stored in LOOP. */
1426 static void
1427 mark_ref_stored (im_mem_ref *ref, struct loop *loop)
1429 while (loop != current_loops->tree_root
1430 && set_ref_stored_in_loop (ref, loop))
1431 loop = loop_outer (loop);
1434 /* Gathers memory references in statement STMT in LOOP, storing the
1435 information about them in the memory_accesses structure. Marks
1436 the vops accessed through unrecognized statements there as
1437 well. */
1439 static void
1440 gather_mem_refs_stmt (struct loop *loop, gimple *stmt)
1442 tree *mem = NULL;
1443 hashval_t hash;
1444 im_mem_ref **slot;
1445 im_mem_ref *ref;
1446 bool is_stored;
1447 unsigned id;
1449 if (!gimple_vuse (stmt))
1450 return;
1452 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1453 if (!mem)
1455 /* We use the shared mem_ref for all unanalyzable refs. */
1456 id = UNANALYZABLE_MEM_ID;
1457 ref = memory_accesses.refs_list[id];
1458 if (dump_file && (dump_flags & TDF_DETAILS))
1460 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1461 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1463 is_stored = gimple_vdef (stmt);
1465 else
1467 /* We are looking for equal refs that might differ in structure
1468 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1469 make sure we can canonicalize the ref in the hashtable if
1470 non-operand_equal_p refs are found. For the lookup we mark
1471 the case we want strict equality with aor.max_size == -1. */
1472 ao_ref aor;
1473 ao_ref_init (&aor, *mem);
1474 ao_ref_base (&aor);
1475 ao_ref_alias_set (&aor);
1476 HOST_WIDE_INT offset, size, max_size;
1477 poly_int64 saved_maxsize = aor.max_size, mem_off;
1478 tree mem_base;
1479 if (aor.max_size_known_p ()
1480 && aor.offset.is_constant (&offset)
1481 && aor.size.is_constant (&size)
1482 && aor.max_size.is_constant (&max_size)
1483 && size == max_size
1484 && (size % BITS_PER_UNIT) == 0
1485 /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1486 size. Make sure this is consistent with the extraction. */
1487 && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1488 && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1489 aor.size)
1490 && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1492 hash = iterative_hash_expr (ao_ref_base (&aor), 0);
1493 hash = iterative_hash_host_wide_int (offset, hash);
1494 hash = iterative_hash_host_wide_int (size, hash);
1496 else
1498 hash = iterative_hash_expr (aor.ref, 0);
1499 aor.max_size = -1;
1501 slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
1502 aor.max_size = saved_maxsize;
1503 if (*slot)
1505 if (!(*slot)->ref_canonical
1506 && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
1508 /* If we didn't yet canonicalize the hashtable ref (which
1509 we'll end up using for code insertion) and hit a second
1510 equal ref that is not structurally equivalent create
1511 a canonical ref which is a bare MEM_REF. */
1512 if (TREE_CODE (*mem) == MEM_REF
1513 || TREE_CODE (*mem) == TARGET_MEM_REF)
1515 (*slot)->mem.ref = *mem;
1516 (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1518 else
1520 tree ref_alias_type = reference_alias_ptr_type (*mem);
1521 unsigned int ref_align = get_object_alignment (*mem);
1522 tree ref_type = TREE_TYPE (*mem);
1523 tree tmp = build_fold_addr_expr (unshare_expr (mem_base));
1524 if (TYPE_ALIGN (ref_type) != ref_align)
1525 ref_type = build_aligned_type (ref_type, ref_align);
1526 (*slot)->mem.ref
1527 = fold_build2 (MEM_REF, ref_type, tmp,
1528 build_int_cst (ref_alias_type, mem_off));
1529 if ((*slot)->mem.volatile_p)
1530 TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1;
1531 gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1532 && is_gimple_mem_ref_addr
1533 (TREE_OPERAND ((*slot)->mem.ref,
1534 0)));
1535 (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1537 (*slot)->ref_canonical = true;
1539 ref = *slot;
1540 id = ref->id;
1542 else
1544 id = memory_accesses.refs_list.length ();
1545 ref = mem_ref_alloc (&aor, hash, id);
1546 memory_accesses.refs_list.safe_push (ref);
1547 *slot = ref;
1549 if (dump_file && (dump_flags & TDF_DETAILS))
1551 fprintf (dump_file, "Memory reference %u: ", id);
1552 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1553 fprintf (dump_file, "\n");
1557 record_mem_ref_loc (ref, stmt, mem);
1559 bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
1560 if (is_stored)
1562 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1563 mark_ref_stored (ref, loop);
1565 init_lim_data (stmt)->ref = ref->id;
1566 return;
1569 static unsigned *bb_loop_postorder;
1571 /* qsort sort function to sort blocks after their loop fathers postorder. */
1573 static int
1574 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_)
1576 basic_block bb1 = *(basic_block *)const_cast<void *>(bb1_);
1577 basic_block bb2 = *(basic_block *)const_cast<void *>(bb2_);
1578 struct loop *loop1 = bb1->loop_father;
1579 struct loop *loop2 = bb2->loop_father;
1580 if (loop1->num == loop2->num)
1581 return bb1->index - bb2->index;
1582 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1585 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1587 static int
1588 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_)
1590 mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_);
1591 mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_);
1592 struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1593 struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1594 if (loop1->num == loop2->num)
1595 return 0;
1596 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1599 /* Gathers memory references in loops. */
1601 static void
1602 analyze_memory_references (void)
1604 gimple_stmt_iterator bsi;
1605 basic_block bb, *bbs;
1606 struct loop *loop, *outer;
1607 unsigned i, n;
1609 /* Collect all basic-blocks in loops and sort them after their
1610 loops postorder. */
1611 i = 0;
1612 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1613 FOR_EACH_BB_FN (bb, cfun)
1614 if (bb->loop_father != current_loops->tree_root)
1615 bbs[i++] = bb;
1616 n = i;
1617 qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp);
1619 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1620 That results in better locality for all the bitmaps. */
1621 for (i = 0; i < n; ++i)
1623 basic_block bb = bbs[i];
1624 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1625 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1628 /* Sort the location list of gathered memory references after their
1629 loop postorder number. */
1630 im_mem_ref *ref;
1631 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1632 ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp);
1634 free (bbs);
1635 // free (bb_loop_postorder);
1637 /* Propagate the information about accessed memory references up
1638 the loop hierarchy. */
1639 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1641 /* Finalize the overall touched references (including subloops). */
1642 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1643 &memory_accesses.refs_stored_in_loop[loop->num]);
1645 /* Propagate the information about accessed memory references up
1646 the loop hierarchy. */
1647 outer = loop_outer (loop);
1648 if (outer == current_loops->tree_root)
1649 continue;
1651 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1652 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1656 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1657 tree_to_aff_combination_expand. */
1659 static bool
1660 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1661 hash_map<tree, name_expansion *> **ttae_cache)
1663 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1664 object and their offset differ in such a way that the locations cannot
1665 overlap, then they cannot alias. */
1666 poly_widest_int size1, size2;
1667 aff_tree off1, off2;
1669 /* Perform basic offset and type-based disambiguation. */
1670 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
1671 return false;
1673 /* The expansion of addresses may be a bit expensive, thus we only do
1674 the check at -O2 and higher optimization levels. */
1675 if (optimize < 2)
1676 return true;
1678 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1679 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1680 aff_combination_expand (&off1, ttae_cache);
1681 aff_combination_expand (&off2, ttae_cache);
1682 aff_combination_scale (&off1, -1);
1683 aff_combination_add (&off2, &off1);
1685 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1686 return false;
1688 return true;
1691 /* Compare function for bsearch searching for reference locations
1692 in a loop. */
1694 static int
1695 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_)
1697 struct loop *loop = (struct loop *)const_cast<void *>(loop_);
1698 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1699 struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1700 if (loop->num == loc_loop->num
1701 || flow_loop_nested_p (loop, loc_loop))
1702 return 0;
1703 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1704 ? -1 : 1);
1707 /* Iterates over all locations of REF in LOOP and its subloops calling
1708 fn.operator() with the location as argument. When that operator
1709 returns true the iteration is stopped and true is returned.
1710 Otherwise false is returned. */
1712 template <typename FN>
1713 static bool
1714 for_all_locs_in_loop (struct loop *loop, im_mem_ref *ref, FN fn)
1716 unsigned i;
1717 mem_ref_loc *loc;
1719 /* Search for the cluster of locs in the accesses_in_loop vector
1720 which is sorted after postorder index of the loop father. */
1721 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp);
1722 if (!loc)
1723 return false;
1725 /* We have found one location inside loop or its sub-loops. Iterate
1726 both forward and backward to cover the whole cluster. */
1727 i = loc - ref->accesses_in_loop.address ();
1728 while (i > 0)
1730 --i;
1731 mem_ref_loc *l = &ref->accesses_in_loop[i];
1732 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1733 break;
1734 if (fn (l))
1735 return true;
1737 for (i = loc - ref->accesses_in_loop.address ();
1738 i < ref->accesses_in_loop.length (); ++i)
1740 mem_ref_loc *l = &ref->accesses_in_loop[i];
1741 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1742 break;
1743 if (fn (l))
1744 return true;
1747 return false;
1750 /* Rewrites location LOC by TMP_VAR. */
1752 struct rewrite_mem_ref_loc
1754 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1755 bool operator () (mem_ref_loc *loc);
1756 tree tmp_var;
1759 bool
1760 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1762 *loc->ref = tmp_var;
1763 update_stmt (loc->stmt);
1764 return false;
1767 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1769 static void
1770 rewrite_mem_refs (struct loop *loop, im_mem_ref *ref, tree tmp_var)
1772 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1775 /* Stores the first reference location in LOCP. */
1777 struct first_mem_ref_loc_1
1779 first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
1780 bool operator () (mem_ref_loc *loc);
1781 mem_ref_loc **locp;
1784 bool
1785 first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
1787 *locp = loc;
1788 return true;
1791 /* Returns the first reference location to REF in LOOP. */
1793 static mem_ref_loc *
1794 first_mem_ref_loc (struct loop *loop, im_mem_ref *ref)
1796 mem_ref_loc *locp = NULL;
1797 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1798 return locp;
1801 struct prev_flag_edges {
1802 /* Edge to insert new flag comparison code. */
1803 edge append_cond_position;
1805 /* Edge for fall through from previous flag comparison. */
1806 edge last_cond_fallthru;
1809 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1810 MEM along edge EX.
1812 The store is only done if MEM has changed. We do this so no
1813 changes to MEM occur on code paths that did not originally store
1814 into it.
1816 The common case for execute_sm will transform:
1818 for (...) {
1819 if (foo)
1820 stuff;
1821 else
1822 MEM = TMP_VAR;
1825 into:
1827 lsm = MEM;
1828 for (...) {
1829 if (foo)
1830 stuff;
1831 else
1832 lsm = TMP_VAR;
1834 MEM = lsm;
1836 This function will generate:
1838 lsm = MEM;
1840 lsm_flag = false;
1842 for (...) {
1843 if (foo)
1844 stuff;
1845 else {
1846 lsm = TMP_VAR;
1847 lsm_flag = true;
1850 if (lsm_flag) <--
1851 MEM = lsm; <--
1854 static void
1855 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
1856 edge preheader, hash_set <basic_block> *flag_bbs)
1858 basic_block new_bb, then_bb, old_dest;
1859 bool loop_has_only_one_exit;
1860 edge then_old_edge, orig_ex = ex;
1861 gimple_stmt_iterator gsi;
1862 gimple *stmt;
1863 struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1864 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1866 profile_count count_sum = profile_count::zero ();
1867 int nbbs = 0, ncount = 0;
1868 profile_probability flag_probability = profile_probability::uninitialized ();
1870 /* Flag is set in FLAG_BBS. Determine probability that flag will be true
1871 at loop exit.
1873 This code may look fancy, but it cannot update profile very realistically
1874 because we do not know the probability that flag will be true at given
1875 loop exit.
1877 We look for two interesting extremes
1878 - when exit is dominated by block setting the flag, we know it will
1879 always be true. This is a common case.
1880 - when all blocks setting the flag have very low frequency we know
1881 it will likely be false.
1882 In all other cases we default to 2/3 for flag being true. */
1884 for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
1885 it != flag_bbs->end (); ++it)
1887 if ((*it)->count.initialized_p ())
1888 count_sum += (*it)->count, ncount ++;
1889 if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
1890 flag_probability = profile_probability::always ();
1891 nbbs++;
1894 profile_probability cap = profile_probability::always ().apply_scale (2, 3);
1896 if (flag_probability.initialized_p ())
1898 else if (ncount == nbbs
1899 && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
1901 flag_probability = count_sum.probability_in (preheader->count ());
1902 if (flag_probability > cap)
1903 flag_probability = cap;
1906 if (!flag_probability.initialized_p ())
1907 flag_probability = cap;
1909 /* ?? Insert store after previous store if applicable. See note
1910 below. */
1911 if (prev_edges)
1912 ex = prev_edges->append_cond_position;
1914 loop_has_only_one_exit = single_pred_p (ex->dest);
1916 if (loop_has_only_one_exit)
1917 ex = split_block_after_labels (ex->dest);
1918 else
1920 for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1921 !gsi_end_p (gpi); gsi_next (&gpi))
1923 gphi *phi = gpi.phi ();
1924 if (virtual_operand_p (gimple_phi_result (phi)))
1925 continue;
1927 /* When the destination has a non-virtual PHI node with multiple
1928 predecessors make sure we preserve the PHI structure by
1929 forcing a forwarder block so that hoisting of that PHI will
1930 still work. */
1931 split_edge (ex);
1932 break;
1936 old_dest = ex->dest;
1937 new_bb = split_edge (ex);
1938 then_bb = create_empty_bb (new_bb);
1939 then_bb->count = new_bb->count.apply_probability (flag_probability);
1940 if (irr)
1941 then_bb->flags = BB_IRREDUCIBLE_LOOP;
1942 add_bb_to_loop (then_bb, new_bb->loop_father);
1944 gsi = gsi_start_bb (new_bb);
1945 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1946 NULL_TREE, NULL_TREE);
1947 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1949 gsi = gsi_start_bb (then_bb);
1950 /* Insert actual store. */
1951 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1952 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1954 edge e1 = single_succ_edge (new_bb);
1955 edge e2 = make_edge (new_bb, then_bb,
1956 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1957 e2->probability = flag_probability;
1959 e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
1960 e1->flags &= ~EDGE_FALLTHRU;
1962 e1->probability = flag_probability.invert ();
1964 then_old_edge = make_single_succ_edge (then_bb, old_dest,
1965 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1967 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
1969 if (prev_edges)
1971 basic_block prevbb = prev_edges->last_cond_fallthru->src;
1972 redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
1973 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
1974 set_immediate_dominator (CDI_DOMINATORS, old_dest,
1975 recompute_dominator (CDI_DOMINATORS, old_dest));
1978 /* ?? Because stores may alias, they must happen in the exact
1979 sequence they originally happened. Save the position right after
1980 the (_lsm) store we just created so we can continue appending after
1981 it and maintain the original order. */
1983 struct prev_flag_edges *p;
1985 if (orig_ex->aux)
1986 orig_ex->aux = NULL;
1987 alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
1988 p = (struct prev_flag_edges *) orig_ex->aux;
1989 p->append_cond_position = then_old_edge;
1990 p->last_cond_fallthru = find_edge (new_bb, old_dest);
1991 orig_ex->aux = (void *) p;
1994 if (!loop_has_only_one_exit)
1995 for (gphi_iterator gpi = gsi_start_phis (old_dest);
1996 !gsi_end_p (gpi); gsi_next (&gpi))
1998 gphi *phi = gpi.phi ();
1999 unsigned i;
2001 for (i = 0; i < gimple_phi_num_args (phi); i++)
2002 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2004 tree arg = gimple_phi_arg_def (phi, i);
2005 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2006 update_stmt (phi);
2011 /* When REF is set on the location, set flag indicating the store. */
2013 struct sm_set_flag_if_changed
2015 sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2016 : flag (flag_), bbs (bbs_) {}
2017 bool operator () (mem_ref_loc *loc);
2018 tree flag;
2019 hash_set <basic_block> *bbs;
2022 bool
2023 sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2025 /* Only set the flag for writes. */
2026 if (is_gimple_assign (loc->stmt)
2027 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2029 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2030 gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2031 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2032 bbs->add (gimple_bb (stmt));
2034 return false;
2037 /* Helper function for execute_sm. On every location where REF is
2038 set, set an appropriate flag indicating the store. */
2040 static tree
2041 execute_sm_if_changed_flag_set (struct loop *loop, im_mem_ref *ref,
2042 hash_set <basic_block> *bbs)
2044 tree flag;
2045 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
2046 flag = create_tmp_reg (boolean_type_node, str);
2047 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs));
2048 return flag;
2051 /* Executes store motion of memory reference REF from LOOP.
2052 Exits from the LOOP are stored in EXITS. The initialization of the
2053 temporary variable is put to the preheader of the loop, and assignments
2054 to the reference from the temporary variable are emitted to exits. */
2056 static void
2057 execute_sm (struct loop *loop, vec<edge> exits, im_mem_ref *ref)
2059 tree tmp_var, store_flag = NULL_TREE;
2060 unsigned i;
2061 gassign *load;
2062 struct fmt_data fmt_data;
2063 edge ex;
2064 struct lim_aux_data *lim_data;
2065 bool multi_threaded_model_p = false;
2066 gimple_stmt_iterator gsi;
2067 hash_set<basic_block> flag_bbs;
2069 if (dump_file && (dump_flags & TDF_DETAILS))
2071 fprintf (dump_file, "Executing store motion of ");
2072 print_generic_expr (dump_file, ref->mem.ref);
2073 fprintf (dump_file, " from loop %d\n", loop->num);
2076 tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2077 get_lsm_tmp_name (ref->mem.ref, ~0));
2079 fmt_data.loop = loop;
2080 fmt_data.orig_loop = loop;
2081 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2083 if (bb_in_transaction (loop_preheader_edge (loop)->src)
2084 || (! PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES)
2085 && ! ref_always_accessed_p (loop, ref, true)))
2086 multi_threaded_model_p = true;
2088 if (multi_threaded_model_p)
2089 store_flag = execute_sm_if_changed_flag_set (loop, ref, &flag_bbs);
2091 rewrite_mem_refs (loop, ref, tmp_var);
2093 /* Emit the load code on a random exit edge or into the latch if
2094 the loop does not exit, so that we are sure it will be processed
2095 by move_computations after all dependencies. */
2096 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2098 /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2099 load altogether, since the store is predicated by a flag. We
2100 could, do the load only if it was originally in the loop. */
2101 load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
2102 lim_data = init_lim_data (load);
2103 lim_data->max_loop = loop;
2104 lim_data->tgt_loop = loop;
2105 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2107 if (multi_threaded_model_p)
2109 load = gimple_build_assign (store_flag, boolean_false_node);
2110 lim_data = init_lim_data (load);
2111 lim_data->max_loop = loop;
2112 lim_data->tgt_loop = loop;
2113 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2116 /* Sink the store to every exit from the loop. */
2117 FOR_EACH_VEC_ELT (exits, i, ex)
2118 if (!multi_threaded_model_p)
2120 gassign *store;
2121 store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
2122 gsi_insert_on_edge (ex, store);
2124 else
2125 execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag,
2126 loop_preheader_edge (loop), &flag_bbs);
2129 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2130 edges of the LOOP. */
2132 static void
2133 hoist_memory_references (struct loop *loop, bitmap mem_refs,
2134 vec<edge> exits)
2136 im_mem_ref *ref;
2137 unsigned i;
2138 bitmap_iterator bi;
2140 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2142 ref = memory_accesses.refs_list[i];
2143 execute_sm (loop, exits, ref);
2147 struct ref_always_accessed
2149 ref_always_accessed (struct loop *loop_, bool stored_p_)
2150 : loop (loop_), stored_p (stored_p_) {}
2151 bool operator () (mem_ref_loc *loc);
2152 struct loop *loop;
2153 bool stored_p;
2156 bool
2157 ref_always_accessed::operator () (mem_ref_loc *loc)
2159 struct loop *must_exec;
2161 if (!get_lim_data (loc->stmt))
2162 return false;
2164 /* If we require an always executed store make sure the statement
2165 stores to the reference. */
2166 if (stored_p)
2168 tree lhs = gimple_get_lhs (loc->stmt);
2169 if (!lhs
2170 || lhs != *loc->ref)
2171 return false;
2174 must_exec = get_lim_data (loc->stmt)->always_executed_in;
2175 if (!must_exec)
2176 return false;
2178 if (must_exec == loop
2179 || flow_loop_nested_p (must_exec, loop))
2180 return true;
2182 return false;
2185 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2186 make sure REF is always stored to in LOOP. */
2188 static bool
2189 ref_always_accessed_p (struct loop *loop, im_mem_ref *ref, bool stored_p)
2191 return for_all_locs_in_loop (loop, ref,
2192 ref_always_accessed (loop, stored_p));
2195 /* Returns true if REF1 and REF2 are independent. */
2197 static bool
2198 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2)
2200 if (ref1 == ref2)
2201 return true;
2203 if (dump_file && (dump_flags & TDF_DETAILS))
2204 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2205 ref1->id, ref2->id);
2207 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
2209 if (dump_file && (dump_flags & TDF_DETAILS))
2210 fprintf (dump_file, "dependent.\n");
2211 return false;
2213 else
2215 if (dump_file && (dump_flags & TDF_DETAILS))
2216 fprintf (dump_file, "independent.\n");
2217 return true;
2221 /* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2222 and its super-loops. */
2224 static void
2225 record_dep_loop (struct loop *loop, im_mem_ref *ref, bool stored_p)
2227 /* We can propagate dependent-in-loop bits up the loop
2228 hierarchy to all outer loops. */
2229 while (loop != current_loops->tree_root
2230 && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2231 loop = loop_outer (loop);
2234 /* Returns true if REF is independent on all other memory
2235 references in LOOP. */
2237 static bool
2238 ref_indep_loop_p_1 (struct loop *loop, im_mem_ref *ref, bool stored_p)
2240 stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
2242 bool indep_p = true;
2243 bitmap refs_to_check;
2245 if (stored_p)
2246 refs_to_check = &memory_accesses.refs_in_loop[loop->num];
2247 else
2248 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2250 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
2251 indep_p = false;
2252 else
2254 if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2255 return true;
2256 if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2257 return false;
2259 struct loop *inner = loop->inner;
2260 while (inner)
2262 if (!ref_indep_loop_p_1 (inner, ref, stored_p))
2264 indep_p = false;
2265 break;
2267 inner = inner->next;
2270 if (indep_p)
2272 unsigned i;
2273 bitmap_iterator bi;
2274 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2276 im_mem_ref *aref = memory_accesses.refs_list[i];
2277 if (!refs_independent_p (ref, aref))
2279 indep_p = false;
2280 break;
2286 if (dump_file && (dump_flags & TDF_DETAILS))
2287 fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2288 ref->id, loop->num, indep_p ? "independent" : "dependent");
2290 /* Record the computed result in the cache. */
2291 if (indep_p)
2293 if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
2294 && stored_p)
2296 /* If it's independend against all refs then it's independent
2297 against stores, too. */
2298 bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
2301 else
2303 record_dep_loop (loop, ref, stored_p);
2304 if (!stored_p)
2306 /* If it's dependent against stores it's dependent against
2307 all refs, too. */
2308 record_dep_loop (loop, ref, true);
2312 return indep_p;
2315 /* Returns true if REF is independent on all other memory references in
2316 LOOP. */
2318 static bool
2319 ref_indep_loop_p (struct loop *loop, im_mem_ref *ref)
2321 gcc_checking_assert (MEM_ANALYZABLE (ref));
2323 return ref_indep_loop_p_1 (loop, ref, false);
2326 /* Returns true if we can perform store motion of REF from LOOP. */
2328 static bool
2329 can_sm_ref_p (struct loop *loop, im_mem_ref *ref)
2331 tree base;
2333 /* Can't hoist unanalyzable refs. */
2334 if (!MEM_ANALYZABLE (ref))
2335 return false;
2337 /* It should be movable. */
2338 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2339 || TREE_THIS_VOLATILE (ref->mem.ref)
2340 || !for_each_index (&ref->mem.ref, may_move_till, loop))
2341 return false;
2343 /* If it can throw fail, we do not properly update EH info. */
2344 if (tree_could_throw_p (ref->mem.ref))
2345 return false;
2347 /* If it can trap, it must be always executed in LOOP.
2348 Readonly memory locations may trap when storing to them, but
2349 tree_could_trap_p is a predicate for rvalues, so check that
2350 explicitly. */
2351 base = get_base_address (ref->mem.ref);
2352 if ((tree_could_trap_p (ref->mem.ref)
2353 || (DECL_P (base) && TREE_READONLY (base)))
2354 && !ref_always_accessed_p (loop, ref, true))
2355 return false;
2357 /* And it must be independent on all other memory references
2358 in LOOP. */
2359 if (!ref_indep_loop_p (loop, ref))
2360 return false;
2362 return true;
2365 /* Marks the references in LOOP for that store motion should be performed
2366 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2367 motion was performed in one of the outer loops. */
2369 static void
2370 find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2372 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2373 unsigned i;
2374 bitmap_iterator bi;
2375 im_mem_ref *ref;
2377 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2379 ref = memory_accesses.refs_list[i];
2380 if (can_sm_ref_p (loop, ref))
2381 bitmap_set_bit (refs_to_sm, i);
2385 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2386 for a store motion optimization (i.e. whether we can insert statement
2387 on its exits). */
2389 static bool
2390 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2391 vec<edge> exits)
2393 unsigned i;
2394 edge ex;
2396 FOR_EACH_VEC_ELT (exits, i, ex)
2397 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2398 return false;
2400 return true;
2403 /* Try to perform store motion for all memory references modified inside
2404 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2405 store motion was executed in one of the outer loops. */
2407 static void
2408 store_motion_loop (struct loop *loop, bitmap sm_executed)
2410 vec<edge> exits = get_loop_exit_edges (loop);
2411 struct loop *subloop;
2412 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2414 if (loop_suitable_for_sm (loop, exits))
2416 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2417 hoist_memory_references (loop, sm_in_loop, exits);
2419 exits.release ();
2421 bitmap_ior_into (sm_executed, sm_in_loop);
2422 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2423 store_motion_loop (subloop, sm_executed);
2424 bitmap_and_compl_into (sm_executed, sm_in_loop);
2425 BITMAP_FREE (sm_in_loop);
2428 /* Try to perform store motion for all memory references modified inside
2429 loops. */
2431 static void
2432 store_motion (void)
2434 struct loop *loop;
2435 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
2437 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2438 store_motion_loop (loop, sm_executed);
2440 BITMAP_FREE (sm_executed);
2441 gsi_commit_edge_inserts ();
2444 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2445 for each such basic block bb records the outermost loop for that execution
2446 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2447 blocks that contain a nonpure call. */
2449 static void
2450 fill_always_executed_in_1 (struct loop *loop, sbitmap contains_call)
2452 basic_block bb = NULL, *bbs, last = NULL;
2453 unsigned i;
2454 edge e;
2455 struct loop *inn_loop = loop;
2457 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2459 bbs = get_loop_body_in_dom_order (loop);
2461 for (i = 0; i < loop->num_nodes; i++)
2463 edge_iterator ei;
2464 bb = bbs[i];
2466 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2467 last = bb;
2469 if (bitmap_bit_p (contains_call, bb->index))
2470 break;
2472 FOR_EACH_EDGE (e, ei, bb->succs)
2474 /* If there is an exit from this BB. */
2475 if (!flow_bb_inside_loop_p (loop, e->dest))
2476 break;
2477 /* Or we enter a possibly non-finite loop. */
2478 if (flow_loop_nested_p (bb->loop_father,
2479 e->dest->loop_father)
2480 && ! finite_loop_p (e->dest->loop_father))
2481 break;
2483 if (e)
2484 break;
2486 /* A loop might be infinite (TODO use simple loop analysis
2487 to disprove this if possible). */
2488 if (bb->flags & BB_IRREDUCIBLE_LOOP)
2489 break;
2491 if (!flow_bb_inside_loop_p (inn_loop, bb))
2492 break;
2494 if (bb->loop_father->header == bb)
2496 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2497 break;
2499 /* In a loop that is always entered we may proceed anyway.
2500 But record that we entered it and stop once we leave it. */
2501 inn_loop = bb->loop_father;
2505 while (1)
2507 SET_ALWAYS_EXECUTED_IN (last, loop);
2508 if (last == loop->header)
2509 break;
2510 last = get_immediate_dominator (CDI_DOMINATORS, last);
2513 free (bbs);
2516 for (loop = loop->inner; loop; loop = loop->next)
2517 fill_always_executed_in_1 (loop, contains_call);
2520 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2521 for each such basic block bb records the outermost loop for that execution
2522 of its header implies execution of bb. */
2524 static void
2525 fill_always_executed_in (void)
2527 basic_block bb;
2528 struct loop *loop;
2530 auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
2531 bitmap_clear (contains_call);
2532 FOR_EACH_BB_FN (bb, cfun)
2534 gimple_stmt_iterator gsi;
2535 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2537 if (nonpure_call_p (gsi_stmt (gsi)))
2538 break;
2541 if (!gsi_end_p (gsi))
2542 bitmap_set_bit (contains_call, bb->index);
2545 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2546 fill_always_executed_in_1 (loop, contains_call);
2550 /* Compute the global information needed by the loop invariant motion pass. */
2552 static void
2553 tree_ssa_lim_initialize (void)
2555 struct loop *loop;
2556 unsigned i;
2558 bitmap_obstack_initialize (&lim_bitmap_obstack);
2559 gcc_obstack_init (&mem_ref_obstack);
2560 lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
2562 if (flag_tm)
2563 compute_transaction_bits ();
2565 alloc_aux_for_edges (0);
2567 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
2568 memory_accesses.refs_list.create (100);
2569 /* Allocate a special, unanalyzable mem-ref with ID zero. */
2570 memory_accesses.refs_list.quick_push
2571 (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
2573 memory_accesses.refs_in_loop.create (number_of_loops (cfun));
2574 memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
2575 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
2576 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2577 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
2578 memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2580 for (i = 0; i < number_of_loops (cfun); i++)
2582 bitmap_initialize (&memory_accesses.refs_in_loop[i],
2583 &lim_bitmap_obstack);
2584 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
2585 &lim_bitmap_obstack);
2586 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
2587 &lim_bitmap_obstack);
2590 memory_accesses.ttae_cache = NULL;
2592 /* Initialize bb_loop_postorder with a mapping from loop->num to
2593 its postorder index. */
2594 i = 0;
2595 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
2596 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2597 bb_loop_postorder[loop->num] = i++;
2600 /* Cleans up after the invariant motion pass. */
2602 static void
2603 tree_ssa_lim_finalize (void)
2605 basic_block bb;
2606 unsigned i;
2607 im_mem_ref *ref;
2609 free_aux_for_edges ();
2611 FOR_EACH_BB_FN (bb, cfun)
2612 SET_ALWAYS_EXECUTED_IN (bb, NULL);
2614 bitmap_obstack_release (&lim_bitmap_obstack);
2615 delete lim_aux_data_map;
2617 delete memory_accesses.refs;
2618 memory_accesses.refs = NULL;
2620 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2621 memref_free (ref);
2622 memory_accesses.refs_list.release ();
2623 obstack_free (&mem_ref_obstack, NULL);
2625 memory_accesses.refs_in_loop.release ();
2626 memory_accesses.refs_stored_in_loop.release ();
2627 memory_accesses.all_refs_stored_in_loop.release ();
2629 if (memory_accesses.ttae_cache)
2630 free_affine_expand_cache (&memory_accesses.ttae_cache);
2632 free (bb_loop_postorder);
2635 /* Moves invariants from loops. Only "expensive" invariants are moved out --
2636 i.e. those that are likely to be win regardless of the register pressure. */
2638 static unsigned int
2639 tree_ssa_lim (void)
2641 unsigned int todo;
2643 tree_ssa_lim_initialize ();
2645 /* Gathers information about memory accesses in the loops. */
2646 analyze_memory_references ();
2648 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
2649 fill_always_executed_in ();
2651 /* For each statement determine the outermost loop in that it is
2652 invariant and cost for computing the invariant. */
2653 invariantness_dom_walker (CDI_DOMINATORS)
2654 .walk (cfun->cfg->x_entry_block_ptr);
2656 /* Execute store motion. Force the necessary invariants to be moved
2657 out of the loops as well. */
2658 store_motion ();
2660 /* Move the expressions that are expensive enough. */
2661 todo = move_computations ();
2663 tree_ssa_lim_finalize ();
2665 return todo;
2668 /* Loop invariant motion pass. */
2670 namespace {
2672 const pass_data pass_data_lim =
2674 GIMPLE_PASS, /* type */
2675 "lim", /* name */
2676 OPTGROUP_LOOP, /* optinfo_flags */
2677 TV_LIM, /* tv_id */
2678 PROP_cfg, /* properties_required */
2679 0, /* properties_provided */
2680 0, /* properties_destroyed */
2681 0, /* todo_flags_start */
2682 0, /* todo_flags_finish */
2685 class pass_lim : public gimple_opt_pass
2687 public:
2688 pass_lim (gcc::context *ctxt)
2689 : gimple_opt_pass (pass_data_lim, ctxt)
2692 /* opt_pass methods: */
2693 opt_pass * clone () { return new pass_lim (m_ctxt); }
2694 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
2695 virtual unsigned int execute (function *);
2697 }; // class pass_lim
2699 unsigned int
2700 pass_lim::execute (function *fun)
2702 bool in_loop_pipeline = scev_initialized_p ();
2703 if (!in_loop_pipeline)
2704 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2706 if (number_of_loops (fun) <= 1)
2707 return 0;
2708 unsigned int todo = tree_ssa_lim ();
2710 if (!in_loop_pipeline)
2711 loop_optimizer_finalize ();
2712 else
2713 scev_reset ();
2714 return todo;
2717 } // anon namespace
2719 gimple_opt_pass *
2720 make_pass_lim (gcc::context *ctxt)
2722 return new pass_lim (ctxt);