Merge from trunk:
[official-gcc.git] / main / gcc / tree-ssa-loop-im.c
blob0cbb3ae17a2911adcc085507b686f0daf804fa97
1 /* Loop invariant motion.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "tree.h"
25 #include "tm_p.h"
26 #include "basic-block.h"
27 #include "gimple-pretty-print.h"
28 #include "pointer-set.h"
29 #include "hash-map.h"
30 #include "hash-table.h"
31 #include "tree-ssa-alias.h"
32 #include "internal-fn.h"
33 #include "tree-eh.h"
34 #include "gimple-expr.h"
35 #include "is-a.h"
36 #include "gimple.h"
37 #include "gimplify.h"
38 #include "gimple-iterator.h"
39 #include "gimple-ssa.h"
40 #include "tree-cfg.h"
41 #include "tree-phinodes.h"
42 #include "ssa-iterators.h"
43 #include "stringpool.h"
44 #include "tree-ssanames.h"
45 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-into-ssa.h"
48 #include "cfgloop.h"
49 #include "domwalk.h"
50 #include "params.h"
51 #include "tree-pass.h"
52 #include "flags.h"
53 #include "tree-affine.h"
54 #include "tree-ssa-propagate.h"
55 #include "trans-mem.h"
56 #include "gimple-fold.h"
58 /* TODO: Support for predicated code motion. I.e.
60 while (1)
62 if (cond)
64 a = inv;
65 something;
69 Where COND and INV are invariants, but evaluating INV may trap or be
70 invalid from some other reason if !COND. This may be transformed to
72 if (cond)
73 a = inv;
74 while (1)
76 if (cond)
77 something;
78 } */
80 /* The auxiliary data kept for each statement. */
82 struct lim_aux_data
84 struct loop *max_loop; /* The outermost loop in that the statement
85 is invariant. */
87 struct loop *tgt_loop; /* The loop out of that we want to move the
88 invariant. */
90 struct loop *always_executed_in;
91 /* The outermost loop for that we are sure
92 the statement is executed if the loop
93 is entered. */
95 unsigned cost; /* Cost of the computation performed by the
96 statement. */
98 vec<gimple> depends; /* Vector of statements that must be also
99 hoisted out of the loop when this statement
100 is hoisted; i.e. those that define the
101 operands of the statement and are inside of
102 the MAX_LOOP loop. */
105 /* Maps statements to their lim_aux_data. */
107 static hash_map<gimple, lim_aux_data *> *lim_aux_data_map;
109 /* Description of a memory reference location. */
111 typedef struct mem_ref_loc
113 tree *ref; /* The reference itself. */
114 gimple stmt; /* The statement in that it occurs. */
115 } *mem_ref_loc_p;
118 /* Description of a memory reference. */
120 typedef struct mem_ref
122 unsigned id; /* ID assigned to the memory reference
123 (its index in memory_accesses.refs_list) */
124 hashval_t hash; /* Its hash value. */
126 /* The memory access itself and associated caching of alias-oracle
127 query meta-data. */
128 ao_ref mem;
130 bitmap stored; /* The set of loops in that this memory location
131 is stored to. */
132 vec<mem_ref_loc> accesses_in_loop;
133 /* The locations of the accesses. Vector
134 indexed by the loop number. */
136 /* The following sets are computed on demand. We keep both set and
137 its complement, so that we know whether the information was
138 already computed or not. */
139 bitmap_head indep_loop; /* The set of loops in that the memory
140 reference is independent, meaning:
141 If it is stored in the loop, this store
142 is independent on all other loads and
143 stores.
144 If it is only loaded, then it is independent
145 on all stores in the loop. */
146 bitmap_head dep_loop; /* The complement of INDEP_LOOP. */
147 } *mem_ref_p;
149 /* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
150 to record (in)dependence against stores in the loop and its subloops, the
151 second to record (in)dependence against all references in the loop
152 and its subloops. */
153 #define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
155 /* Mem_ref hashtable helpers. */
157 struct mem_ref_hasher : typed_noop_remove <mem_ref>
159 typedef mem_ref value_type;
160 typedef tree_node compare_type;
161 static inline hashval_t hash (const value_type *);
162 static inline bool equal (const value_type *, const compare_type *);
165 /* A hash function for struct mem_ref object OBJ. */
167 inline hashval_t
168 mem_ref_hasher::hash (const value_type *mem)
170 return mem->hash;
173 /* An equality function for struct mem_ref object MEM1 with
174 memory reference OBJ2. */
176 inline bool
177 mem_ref_hasher::equal (const value_type *mem1, const compare_type *obj2)
179 return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0);
183 /* Description of memory accesses in loops. */
185 static struct
187 /* The hash table of memory references accessed in loops. */
188 hash_table<mem_ref_hasher> *refs;
190 /* The list of memory references. */
191 vec<mem_ref_p> refs_list;
193 /* The set of memory references accessed in each loop. */
194 vec<bitmap_head> refs_in_loop;
196 /* The set of memory references stored in each loop. */
197 vec<bitmap_head> refs_stored_in_loop;
199 /* The set of memory references stored in each loop, including subloops . */
200 vec<bitmap_head> all_refs_stored_in_loop;
202 /* Cache for expanding memory addresses. */
203 struct pointer_map_t *ttae_cache;
204 } memory_accesses;
206 /* Obstack for the bitmaps in the above data structures. */
207 static bitmap_obstack lim_bitmap_obstack;
208 static obstack mem_ref_obstack;
210 static bool ref_indep_loop_p (struct loop *, mem_ref_p);
212 /* Minimum cost of an expensive expression. */
213 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
215 /* The outermost loop for which execution of the header guarantees that the
216 block will be executed. */
217 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
218 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
220 /* ID of the shared unanalyzable mem. */
221 #define UNANALYZABLE_MEM_ID 0
223 /* Whether the reference was analyzable. */
224 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
226 static struct lim_aux_data *
227 init_lim_data (gimple stmt)
229 lim_aux_data *p = XCNEW (struct lim_aux_data);
230 lim_aux_data_map->put (stmt, p);
232 return p;
235 static struct lim_aux_data *
236 get_lim_data (gimple stmt)
238 lim_aux_data **p = lim_aux_data_map->get (stmt);
239 if (!p)
240 return NULL;
242 return *p;
245 /* Releases the memory occupied by DATA. */
247 static void
248 free_lim_aux_data (struct lim_aux_data *data)
250 data->depends.release ();
251 free (data);
254 static void
255 clear_lim_data (gimple stmt)
257 lim_aux_data **p = lim_aux_data_map->get (stmt);
258 if (!p)
259 return;
261 free_lim_aux_data (*p);
262 *p = NULL;
266 /* The possibilities of statement movement. */
267 enum move_pos
269 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
270 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
271 become executed -- memory accesses, ... */
272 MOVE_POSSIBLE /* Unlimited movement. */
276 /* If it is possible to hoist the statement STMT unconditionally,
277 returns MOVE_POSSIBLE.
278 If it is possible to hoist the statement STMT, but we must avoid making
279 it executed if it would not be executed in the original program (e.g.
280 because it may trap), return MOVE_PRESERVE_EXECUTION.
281 Otherwise return MOVE_IMPOSSIBLE. */
283 enum move_pos
284 movement_possibility (gimple stmt)
286 tree lhs;
287 enum move_pos ret = MOVE_POSSIBLE;
289 if (flag_unswitch_loops
290 && gimple_code (stmt) == GIMPLE_COND)
292 /* If we perform unswitching, force the operands of the invariant
293 condition to be moved out of the loop. */
294 return MOVE_POSSIBLE;
297 if (gimple_code (stmt) == GIMPLE_PHI
298 && gimple_phi_num_args (stmt) <= 2
299 && !virtual_operand_p (gimple_phi_result (stmt))
300 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
301 return MOVE_POSSIBLE;
303 if (gimple_get_lhs (stmt) == NULL_TREE)
304 return MOVE_IMPOSSIBLE;
306 if (gimple_vdef (stmt))
307 return MOVE_IMPOSSIBLE;
309 if (stmt_ends_bb_p (stmt)
310 || gimple_has_volatile_ops (stmt)
311 || gimple_has_side_effects (stmt)
312 || stmt_could_throw_p (stmt))
313 return MOVE_IMPOSSIBLE;
315 if (is_gimple_call (stmt))
317 /* While pure or const call is guaranteed to have no side effects, we
318 cannot move it arbitrarily. Consider code like
320 char *s = something ();
322 while (1)
324 if (s)
325 t = strlen (s);
326 else
327 t = 0;
330 Here the strlen call cannot be moved out of the loop, even though
331 s is invariant. In addition to possibly creating a call with
332 invalid arguments, moving out a function call that is not executed
333 may cause performance regressions in case the call is costly and
334 not executed at all. */
335 ret = MOVE_PRESERVE_EXECUTION;
336 lhs = gimple_call_lhs (stmt);
338 else if (is_gimple_assign (stmt))
339 lhs = gimple_assign_lhs (stmt);
340 else
341 return MOVE_IMPOSSIBLE;
343 if (TREE_CODE (lhs) == SSA_NAME
344 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
345 return MOVE_IMPOSSIBLE;
347 if (TREE_CODE (lhs) != SSA_NAME
348 || gimple_could_trap_p (stmt))
349 return MOVE_PRESERVE_EXECUTION;
351 /* Non local loads in a transaction cannot be hoisted out. Well,
352 unless the load happens on every path out of the loop, but we
353 don't take this into account yet. */
354 if (flag_tm
355 && gimple_in_transaction (stmt)
356 && gimple_assign_single_p (stmt))
358 tree rhs = gimple_assign_rhs1 (stmt);
359 if (DECL_P (rhs) && is_global_var (rhs))
361 if (dump_file)
363 fprintf (dump_file, "Cannot hoist conditional load of ");
364 print_generic_expr (dump_file, rhs, TDF_SLIM);
365 fprintf (dump_file, " because it is in a transaction.\n");
367 return MOVE_IMPOSSIBLE;
371 return ret;
374 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
375 loop to that we could move the expression using DEF if it did not have
376 other operands, i.e. the outermost loop enclosing LOOP in that the value
377 of DEF is invariant. */
379 static struct loop *
380 outermost_invariant_loop (tree def, struct loop *loop)
382 gimple def_stmt;
383 basic_block def_bb;
384 struct loop *max_loop;
385 struct lim_aux_data *lim_data;
387 if (!def)
388 return superloop_at_depth (loop, 1);
390 if (TREE_CODE (def) != SSA_NAME)
392 gcc_assert (is_gimple_min_invariant (def));
393 return superloop_at_depth (loop, 1);
396 def_stmt = SSA_NAME_DEF_STMT (def);
397 def_bb = gimple_bb (def_stmt);
398 if (!def_bb)
399 return superloop_at_depth (loop, 1);
401 max_loop = find_common_loop (loop, def_bb->loop_father);
403 lim_data = get_lim_data (def_stmt);
404 if (lim_data != NULL && lim_data->max_loop != NULL)
405 max_loop = find_common_loop (max_loop,
406 loop_outer (lim_data->max_loop));
407 if (max_loop == loop)
408 return NULL;
409 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
411 return max_loop;
414 /* DATA is a structure containing information associated with a statement
415 inside LOOP. DEF is one of the operands of this statement.
417 Find the outermost loop enclosing LOOP in that value of DEF is invariant
418 and record this in DATA->max_loop field. If DEF itself is defined inside
419 this loop as well (i.e. we need to hoist it out of the loop if we want
420 to hoist the statement represented by DATA), record the statement in that
421 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
422 add the cost of the computation of DEF to the DATA->cost.
424 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
426 static bool
427 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
428 bool add_cost)
430 gimple def_stmt = SSA_NAME_DEF_STMT (def);
431 basic_block def_bb = gimple_bb (def_stmt);
432 struct loop *max_loop;
433 struct lim_aux_data *def_data;
435 if (!def_bb)
436 return true;
438 max_loop = outermost_invariant_loop (def, loop);
439 if (!max_loop)
440 return false;
442 if (flow_loop_nested_p (data->max_loop, max_loop))
443 data->max_loop = max_loop;
445 def_data = get_lim_data (def_stmt);
446 if (!def_data)
447 return true;
449 if (add_cost
450 /* Only add the cost if the statement defining DEF is inside LOOP,
451 i.e. if it is likely that by moving the invariants dependent
452 on it, we will be able to avoid creating a new register for
453 it (since it will be only used in these dependent invariants). */
454 && def_bb->loop_father == loop)
455 data->cost += def_data->cost;
457 data->depends.safe_push (def_stmt);
459 return true;
462 /* Returns an estimate for a cost of statement STMT. The values here
463 are just ad-hoc constants, similar to costs for inlining. */
465 static unsigned
466 stmt_cost (gimple stmt)
468 /* Always try to create possibilities for unswitching. */
469 if (gimple_code (stmt) == GIMPLE_COND
470 || gimple_code (stmt) == GIMPLE_PHI)
471 return LIM_EXPENSIVE;
473 /* We should be hoisting calls if possible. */
474 if (is_gimple_call (stmt))
476 tree fndecl;
478 /* Unless the call is a builtin_constant_p; this always folds to a
479 constant, so moving it is useless. */
480 fndecl = gimple_call_fndecl (stmt);
481 if (fndecl
482 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
483 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
484 return 0;
486 return LIM_EXPENSIVE;
489 /* Hoisting memory references out should almost surely be a win. */
490 if (gimple_references_memory_p (stmt))
491 return LIM_EXPENSIVE;
493 if (gimple_code (stmt) != GIMPLE_ASSIGN)
494 return 1;
496 switch (gimple_assign_rhs_code (stmt))
498 case MULT_EXPR:
499 case WIDEN_MULT_EXPR:
500 case WIDEN_MULT_PLUS_EXPR:
501 case WIDEN_MULT_MINUS_EXPR:
502 case DOT_PROD_EXPR:
503 case FMA_EXPR:
504 case TRUNC_DIV_EXPR:
505 case CEIL_DIV_EXPR:
506 case FLOOR_DIV_EXPR:
507 case ROUND_DIV_EXPR:
508 case EXACT_DIV_EXPR:
509 case CEIL_MOD_EXPR:
510 case FLOOR_MOD_EXPR:
511 case ROUND_MOD_EXPR:
512 case TRUNC_MOD_EXPR:
513 case RDIV_EXPR:
514 /* Division and multiplication are usually expensive. */
515 return LIM_EXPENSIVE;
517 case LSHIFT_EXPR:
518 case RSHIFT_EXPR:
519 case WIDEN_LSHIFT_EXPR:
520 case LROTATE_EXPR:
521 case RROTATE_EXPR:
522 /* Shifts and rotates are usually expensive. */
523 return LIM_EXPENSIVE;
525 case CONSTRUCTOR:
526 /* Make vector construction cost proportional to the number
527 of elements. */
528 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
530 case SSA_NAME:
531 case PAREN_EXPR:
532 /* Whether or not something is wrapped inside a PAREN_EXPR
533 should not change move cost. Nor should an intermediate
534 unpropagated SSA name copy. */
535 return 0;
537 default:
538 return 1;
542 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
543 REF is independent. If REF is not independent in LOOP, NULL is returned
544 instead. */
546 static struct loop *
547 outermost_indep_loop (struct loop *outer, struct loop *loop, mem_ref_p ref)
549 struct loop *aloop;
551 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
552 return NULL;
554 for (aloop = outer;
555 aloop != loop;
556 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
557 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
558 && ref_indep_loop_p (aloop, ref))
559 return aloop;
561 if (ref_indep_loop_p (loop, ref))
562 return loop;
563 else
564 return NULL;
567 /* If there is a simple load or store to a memory reference in STMT, returns
568 the location of the memory reference, and sets IS_STORE according to whether
569 it is a store or load. Otherwise, returns NULL. */
571 static tree *
572 simple_mem_ref_in_stmt (gimple stmt, bool *is_store)
574 tree *lhs, *rhs;
576 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
577 if (!gimple_assign_single_p (stmt))
578 return NULL;
580 lhs = gimple_assign_lhs_ptr (stmt);
581 rhs = gimple_assign_rhs1_ptr (stmt);
583 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
585 *is_store = false;
586 return rhs;
588 else if (gimple_vdef (stmt)
589 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
591 *is_store = true;
592 return lhs;
594 else
595 return NULL;
598 /* Returns the memory reference contained in STMT. */
600 static mem_ref_p
601 mem_ref_in_stmt (gimple stmt)
603 bool store;
604 tree *mem = simple_mem_ref_in_stmt (stmt, &store);
605 hashval_t hash;
606 mem_ref_p ref;
608 if (!mem)
609 return NULL;
610 gcc_assert (!store);
612 hash = iterative_hash_expr (*mem, 0);
613 ref = memory_accesses.refs->find_with_hash (*mem, hash);
615 gcc_assert (ref != NULL);
616 return ref;
619 /* From a controlling predicate in DOM determine the arguments from
620 the PHI node PHI that are chosen if the predicate evaluates to
621 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
622 they are non-NULL. Returns true if the arguments can be determined,
623 else return false. */
625 static bool
626 extract_true_false_args_from_phi (basic_block dom, gimple phi,
627 tree *true_arg_p, tree *false_arg_p)
629 basic_block bb = gimple_bb (phi);
630 edge true_edge, false_edge, tem;
631 tree arg0 = NULL_TREE, arg1 = NULL_TREE;
633 /* We have to verify that one edge into the PHI node is dominated
634 by the true edge of the predicate block and the other edge
635 dominated by the false edge. This ensures that the PHI argument
636 we are going to take is completely determined by the path we
637 take from the predicate block.
638 We can only use BB dominance checks below if the destination of
639 the true/false edges are dominated by their edge, thus only
640 have a single predecessor. */
641 extract_true_false_edges_from_block (dom, &true_edge, &false_edge);
642 tem = EDGE_PRED (bb, 0);
643 if (tem == true_edge
644 || (single_pred_p (true_edge->dest)
645 && (tem->src == true_edge->dest
646 || dominated_by_p (CDI_DOMINATORS,
647 tem->src, true_edge->dest))))
648 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
649 else if (tem == false_edge
650 || (single_pred_p (false_edge->dest)
651 && (tem->src == false_edge->dest
652 || dominated_by_p (CDI_DOMINATORS,
653 tem->src, false_edge->dest))))
654 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
655 else
656 return false;
657 tem = EDGE_PRED (bb, 1);
658 if (tem == true_edge
659 || (single_pred_p (true_edge->dest)
660 && (tem->src == true_edge->dest
661 || dominated_by_p (CDI_DOMINATORS,
662 tem->src, true_edge->dest))))
663 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
664 else if (tem == false_edge
665 || (single_pred_p (false_edge->dest)
666 && (tem->src == false_edge->dest
667 || dominated_by_p (CDI_DOMINATORS,
668 tem->src, false_edge->dest))))
669 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
670 else
671 return false;
672 if (!arg0 || !arg1)
673 return false;
675 if (true_arg_p)
676 *true_arg_p = arg0;
677 if (false_arg_p)
678 *false_arg_p = arg1;
680 return true;
683 /* Determine the outermost loop to that it is possible to hoist a statement
684 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
685 the outermost loop in that the value computed by STMT is invariant.
686 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
687 we preserve the fact whether STMT is executed. It also fills other related
688 information to LIM_DATA (STMT).
690 The function returns false if STMT cannot be hoisted outside of the loop it
691 is defined in, and true otherwise. */
693 static bool
694 determine_max_movement (gimple stmt, bool must_preserve_exec)
696 basic_block bb = gimple_bb (stmt);
697 struct loop *loop = bb->loop_father;
698 struct loop *level;
699 struct lim_aux_data *lim_data = get_lim_data (stmt);
700 tree val;
701 ssa_op_iter iter;
703 if (must_preserve_exec)
704 level = ALWAYS_EXECUTED_IN (bb);
705 else
706 level = superloop_at_depth (loop, 1);
707 lim_data->max_loop = level;
709 if (gimple_code (stmt) == GIMPLE_PHI)
711 use_operand_p use_p;
712 unsigned min_cost = UINT_MAX;
713 unsigned total_cost = 0;
714 struct lim_aux_data *def_data;
716 /* We will end up promoting dependencies to be unconditionally
717 evaluated. For this reason the PHI cost (and thus the
718 cost we remove from the loop by doing the invariant motion)
719 is that of the cheapest PHI argument dependency chain. */
720 FOR_EACH_PHI_ARG (use_p, stmt, iter, SSA_OP_USE)
722 val = USE_FROM_PTR (use_p);
724 if (TREE_CODE (val) != SSA_NAME)
726 /* Assign const 1 to constants. */
727 min_cost = MIN (min_cost, 1);
728 total_cost += 1;
729 continue;
731 if (!add_dependency (val, lim_data, loop, false))
732 return false;
734 gimple def_stmt = SSA_NAME_DEF_STMT (val);
735 if (gimple_bb (def_stmt)
736 && gimple_bb (def_stmt)->loop_father == loop)
738 def_data = get_lim_data (def_stmt);
739 if (def_data)
741 min_cost = MIN (min_cost, def_data->cost);
742 total_cost += def_data->cost;
747 min_cost = MIN (min_cost, total_cost);
748 lim_data->cost += min_cost;
750 if (gimple_phi_num_args (stmt) > 1)
752 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
753 gimple cond;
754 if (gsi_end_p (gsi_last_bb (dom)))
755 return false;
756 cond = gsi_stmt (gsi_last_bb (dom));
757 if (gimple_code (cond) != GIMPLE_COND)
758 return false;
759 /* Verify that this is an extended form of a diamond and
760 the PHI arguments are completely controlled by the
761 predicate in DOM. */
762 if (!extract_true_false_args_from_phi (dom, stmt, NULL, NULL))
763 return false;
765 /* Fold in dependencies and cost of the condition. */
766 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
768 if (!add_dependency (val, lim_data, loop, false))
769 return false;
770 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
771 if (def_data)
772 total_cost += def_data->cost;
775 /* We want to avoid unconditionally executing very expensive
776 operations. As costs for our dependencies cannot be
777 negative just claim we are not invariand for this case.
778 We also are not sure whether the control-flow inside the
779 loop will vanish. */
780 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
781 && !(min_cost != 0
782 && total_cost / min_cost <= 2))
783 return false;
785 /* Assume that the control-flow in the loop will vanish.
786 ??? We should verify this and not artificially increase
787 the cost if that is not the case. */
788 lim_data->cost += stmt_cost (stmt);
791 return true;
793 else
794 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
795 if (!add_dependency (val, lim_data, loop, true))
796 return false;
798 if (gimple_vuse (stmt))
800 mem_ref_p ref = mem_ref_in_stmt (stmt);
802 if (ref)
804 lim_data->max_loop
805 = outermost_indep_loop (lim_data->max_loop, loop, ref);
806 if (!lim_data->max_loop)
807 return false;
809 else
811 if ((val = gimple_vuse (stmt)) != NULL_TREE)
813 if (!add_dependency (val, lim_data, loop, false))
814 return false;
819 lim_data->cost += stmt_cost (stmt);
821 return true;
824 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
825 and that one of the operands of this statement is computed by STMT.
826 Ensure that STMT (together with all the statements that define its
827 operands) is hoisted at least out of the loop LEVEL. */
829 static void
830 set_level (gimple stmt, struct loop *orig_loop, struct loop *level)
832 struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
833 struct lim_aux_data *lim_data;
834 gimple dep_stmt;
835 unsigned i;
837 stmt_loop = find_common_loop (orig_loop, stmt_loop);
838 lim_data = get_lim_data (stmt);
839 if (lim_data != NULL && lim_data->tgt_loop != NULL)
840 stmt_loop = find_common_loop (stmt_loop,
841 loop_outer (lim_data->tgt_loop));
842 if (flow_loop_nested_p (stmt_loop, level))
843 return;
845 gcc_assert (level == lim_data->max_loop
846 || flow_loop_nested_p (lim_data->max_loop, level));
848 lim_data->tgt_loop = level;
849 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
850 set_level (dep_stmt, orig_loop, level);
853 /* Determines an outermost loop from that we want to hoist the statement STMT.
854 For now we chose the outermost possible loop. TODO -- use profiling
855 information to set it more sanely. */
857 static void
858 set_profitable_level (gimple stmt)
860 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
863 /* Returns true if STMT is a call that has side effects. */
865 static bool
866 nonpure_call_p (gimple stmt)
868 if (gimple_code (stmt) != GIMPLE_CALL)
869 return false;
871 return gimple_has_side_effects (stmt);
874 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
876 static gimple
877 rewrite_reciprocal (gimple_stmt_iterator *bsi)
879 gimple stmt, stmt1, stmt2;
880 tree name, lhs, type;
881 tree real_one;
882 gimple_stmt_iterator gsi;
884 stmt = gsi_stmt (*bsi);
885 lhs = gimple_assign_lhs (stmt);
886 type = TREE_TYPE (lhs);
888 real_one = build_one_cst (type);
890 name = make_temp_ssa_name (type, NULL, "reciptmp");
891 stmt1 = gimple_build_assign_with_ops (RDIV_EXPR, name, real_one,
892 gimple_assign_rhs2 (stmt));
894 stmt2 = gimple_build_assign_with_ops (MULT_EXPR, lhs, name,
895 gimple_assign_rhs1 (stmt));
897 /* Replace division stmt with reciprocal and multiply stmts.
898 The multiply stmt is not invariant, so update iterator
899 and avoid rescanning. */
900 gsi = *bsi;
901 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
902 gsi_replace (&gsi, stmt2, true);
904 /* Continue processing with invariant reciprocal statement. */
905 return stmt1;
908 /* Check if the pattern at *BSI is a bittest of the form
909 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
911 static gimple
912 rewrite_bittest (gimple_stmt_iterator *bsi)
914 gimple stmt, use_stmt, stmt1, stmt2;
915 tree lhs, name, t, a, b;
916 use_operand_p use;
918 stmt = gsi_stmt (*bsi);
919 lhs = gimple_assign_lhs (stmt);
921 /* Verify that the single use of lhs is a comparison against zero. */
922 if (TREE_CODE (lhs) != SSA_NAME
923 || !single_imm_use (lhs, &use, &use_stmt)
924 || gimple_code (use_stmt) != GIMPLE_COND)
925 return stmt;
926 if (gimple_cond_lhs (use_stmt) != lhs
927 || (gimple_cond_code (use_stmt) != NE_EXPR
928 && gimple_cond_code (use_stmt) != EQ_EXPR)
929 || !integer_zerop (gimple_cond_rhs (use_stmt)))
930 return stmt;
932 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
933 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
934 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
935 return stmt;
937 /* There is a conversion in between possibly inserted by fold. */
938 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
940 t = gimple_assign_rhs1 (stmt1);
941 if (TREE_CODE (t) != SSA_NAME
942 || !has_single_use (t))
943 return stmt;
944 stmt1 = SSA_NAME_DEF_STMT (t);
945 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
946 return stmt;
949 /* Verify that B is loop invariant but A is not. Verify that with
950 all the stmt walking we are still in the same loop. */
951 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
952 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
953 return stmt;
955 a = gimple_assign_rhs1 (stmt1);
956 b = gimple_assign_rhs2 (stmt1);
958 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
959 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
961 gimple_stmt_iterator rsi;
963 /* 1 << B */
964 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
965 build_int_cst (TREE_TYPE (a), 1), b);
966 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
967 stmt1 = gimple_build_assign (name, t);
969 /* A & (1 << B) */
970 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
971 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
972 stmt2 = gimple_build_assign (name, t);
974 /* Replace the SSA_NAME we compare against zero. Adjust
975 the type of zero accordingly. */
976 SET_USE (use, name);
977 gimple_cond_set_rhs (use_stmt, build_int_cst_type (TREE_TYPE (name), 0));
979 /* Don't use gsi_replace here, none of the new assignments sets
980 the variable originally set in stmt. Move bsi to stmt1, and
981 then remove the original stmt, so that we get a chance to
982 retain debug info for it. */
983 rsi = *bsi;
984 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
985 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
986 gsi_remove (&rsi, true);
988 return stmt1;
991 return stmt;
994 /* For each statement determines the outermost loop in that it is invariant,
995 - statements on whose motion it depends and the cost of the computation.
996 - This information is stored to the LIM_DATA structure associated with
997 - each statement. */
998 class invariantness_dom_walker : public dom_walker
1000 public:
1001 invariantness_dom_walker (cdi_direction direction)
1002 : dom_walker (direction) {}
1004 virtual void before_dom_children (basic_block);
1007 /* Determine the outermost loops in that statements in basic block BB are
1008 invariant, and record them to the LIM_DATA associated with the statements.
1009 Callback for dom_walker. */
1011 void
1012 invariantness_dom_walker::before_dom_children (basic_block bb)
1014 enum move_pos pos;
1015 gimple_stmt_iterator bsi;
1016 gimple stmt;
1017 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1018 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
1019 struct lim_aux_data *lim_data;
1021 if (!loop_outer (bb->loop_father))
1022 return;
1024 if (dump_file && (dump_flags & TDF_DETAILS))
1025 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1026 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1028 /* Look at PHI nodes, but only if there is at most two.
1029 ??? We could relax this further by post-processing the inserted
1030 code and transforming adjacent cond-exprs with the same predicate
1031 to control flow again. */
1032 bsi = gsi_start_phis (bb);
1033 if (!gsi_end_p (bsi)
1034 && ((gsi_next (&bsi), gsi_end_p (bsi))
1035 || (gsi_next (&bsi), gsi_end_p (bsi))))
1036 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1038 stmt = gsi_stmt (bsi);
1040 pos = movement_possibility (stmt);
1041 if (pos == MOVE_IMPOSSIBLE)
1042 continue;
1044 lim_data = init_lim_data (stmt);
1045 lim_data->always_executed_in = outermost;
1047 if (!determine_max_movement (stmt, false))
1049 lim_data->max_loop = NULL;
1050 continue;
1053 if (dump_file && (dump_flags & TDF_DETAILS))
1055 print_gimple_stmt (dump_file, stmt, 2, 0);
1056 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1057 loop_depth (lim_data->max_loop),
1058 lim_data->cost);
1061 if (lim_data->cost >= LIM_EXPENSIVE)
1062 set_profitable_level (stmt);
1065 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1067 stmt = gsi_stmt (bsi);
1069 pos = movement_possibility (stmt);
1070 if (pos == MOVE_IMPOSSIBLE)
1072 if (nonpure_call_p (stmt))
1074 maybe_never = true;
1075 outermost = NULL;
1077 /* Make sure to note always_executed_in for stores to make
1078 store-motion work. */
1079 else if (stmt_makes_single_store (stmt))
1081 struct lim_aux_data *lim_data = init_lim_data (stmt);
1082 lim_data->always_executed_in = outermost;
1084 continue;
1087 if (is_gimple_assign (stmt)
1088 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1089 == GIMPLE_BINARY_RHS))
1091 tree op0 = gimple_assign_rhs1 (stmt);
1092 tree op1 = gimple_assign_rhs2 (stmt);
1093 struct loop *ol1 = outermost_invariant_loop (op1,
1094 loop_containing_stmt (stmt));
1096 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1097 to be hoisted out of loop, saving expensive divide. */
1098 if (pos == MOVE_POSSIBLE
1099 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1100 && flag_unsafe_math_optimizations
1101 && !flag_trapping_math
1102 && ol1 != NULL
1103 && outermost_invariant_loop (op0, ol1) == NULL)
1104 stmt = rewrite_reciprocal (&bsi);
1106 /* If the shift count is invariant, convert (A >> B) & 1 to
1107 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1108 saving an expensive shift. */
1109 if (pos == MOVE_POSSIBLE
1110 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1111 && integer_onep (op1)
1112 && TREE_CODE (op0) == SSA_NAME
1113 && has_single_use (op0))
1114 stmt = rewrite_bittest (&bsi);
1117 lim_data = init_lim_data (stmt);
1118 lim_data->always_executed_in = outermost;
1120 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1121 continue;
1123 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1125 lim_data->max_loop = NULL;
1126 continue;
1129 if (dump_file && (dump_flags & TDF_DETAILS))
1131 print_gimple_stmt (dump_file, stmt, 2, 0);
1132 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1133 loop_depth (lim_data->max_loop),
1134 lim_data->cost);
1137 if (lim_data->cost >= LIM_EXPENSIVE)
1138 set_profitable_level (stmt);
1142 class move_computations_dom_walker : public dom_walker
1144 public:
1145 move_computations_dom_walker (cdi_direction direction)
1146 : dom_walker (direction), todo_ (0) {}
1148 virtual void before_dom_children (basic_block);
1150 unsigned int todo_;
1153 /* Hoist the statements in basic block BB out of the loops prescribed by
1154 data stored in LIM_DATA structures associated with each statement. Callback
1155 for walk_dominator_tree. */
1157 void
1158 move_computations_dom_walker::before_dom_children (basic_block bb)
1160 struct loop *level;
1161 gimple_stmt_iterator bsi;
1162 gimple stmt;
1163 unsigned cost = 0;
1164 struct lim_aux_data *lim_data;
1166 if (!loop_outer (bb->loop_father))
1167 return;
1169 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1171 gimple new_stmt;
1172 stmt = gsi_stmt (bsi);
1174 lim_data = get_lim_data (stmt);
1175 if (lim_data == NULL)
1177 gsi_next (&bsi);
1178 continue;
1181 cost = lim_data->cost;
1182 level = lim_data->tgt_loop;
1183 clear_lim_data (stmt);
1185 if (!level)
1187 gsi_next (&bsi);
1188 continue;
1191 if (dump_file && (dump_flags & TDF_DETAILS))
1193 fprintf (dump_file, "Moving PHI node\n");
1194 print_gimple_stmt (dump_file, stmt, 0, 0);
1195 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1196 cost, level->num);
1199 if (gimple_phi_num_args (stmt) == 1)
1201 tree arg = PHI_ARG_DEF (stmt, 0);
1202 new_stmt = gimple_build_assign_with_ops (TREE_CODE (arg),
1203 gimple_phi_result (stmt),
1204 arg, NULL_TREE);
1206 else
1208 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1209 gimple cond = gsi_stmt (gsi_last_bb (dom));
1210 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1211 /* Get the PHI arguments corresponding to the true and false
1212 edges of COND. */
1213 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1214 gcc_assert (arg0 && arg1);
1215 t = build2 (gimple_cond_code (cond), boolean_type_node,
1216 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1217 new_stmt = gimple_build_assign_with_ops (COND_EXPR,
1218 gimple_phi_result (stmt),
1219 t, arg0, arg1);
1220 todo_ |= TODO_cleanup_cfg;
1222 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1223 remove_phi_node (&bsi, false);
1226 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1228 edge e;
1230 stmt = gsi_stmt (bsi);
1232 lim_data = get_lim_data (stmt);
1233 if (lim_data == NULL)
1235 gsi_next (&bsi);
1236 continue;
1239 cost = lim_data->cost;
1240 level = lim_data->tgt_loop;
1241 clear_lim_data (stmt);
1243 if (!level)
1245 gsi_next (&bsi);
1246 continue;
1249 /* We do not really want to move conditionals out of the loop; we just
1250 placed it here to force its operands to be moved if necessary. */
1251 if (gimple_code (stmt) == GIMPLE_COND)
1252 continue;
1254 if (dump_file && (dump_flags & TDF_DETAILS))
1256 fprintf (dump_file, "Moving statement\n");
1257 print_gimple_stmt (dump_file, stmt, 0, 0);
1258 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1259 cost, level->num);
1262 e = loop_preheader_edge (level);
1263 gcc_assert (!gimple_vdef (stmt));
1264 if (gimple_vuse (stmt))
1266 /* The new VUSE is the one from the virtual PHI in the loop
1267 header or the one already present. */
1268 gimple_stmt_iterator gsi2;
1269 for (gsi2 = gsi_start_phis (e->dest);
1270 !gsi_end_p (gsi2); gsi_next (&gsi2))
1272 gimple phi = gsi_stmt (gsi2);
1273 if (virtual_operand_p (gimple_phi_result (phi)))
1275 gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1276 break;
1280 gsi_remove (&bsi, false);
1281 /* In case this is a stmt that is not unconditionally executed
1282 when the target loop header is executed and the stmt may
1283 invoke undefined integer or pointer overflow rewrite it to
1284 unsigned arithmetic. */
1285 if (is_gimple_assign (stmt)
1286 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1287 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1288 && arith_code_with_undefined_signed_overflow
1289 (gimple_assign_rhs_code (stmt))
1290 && (!ALWAYS_EXECUTED_IN (bb)
1291 || !(ALWAYS_EXECUTED_IN (bb) == level
1292 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1293 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1294 else
1295 gsi_insert_on_edge (e, stmt);
1299 /* Hoist the statements out of the loops prescribed by data stored in
1300 LIM_DATA structures associated with each statement.*/
1302 static unsigned int
1303 move_computations (void)
1305 move_computations_dom_walker walker (CDI_DOMINATORS);
1306 walker.walk (cfun->cfg->x_entry_block_ptr);
1308 gsi_commit_edge_inserts ();
1309 if (need_ssa_update_p (cfun))
1310 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1312 return walker.todo_;
1315 /* Checks whether the statement defining variable *INDEX can be hoisted
1316 out of the loop passed in DATA. Callback for for_each_index. */
1318 static bool
1319 may_move_till (tree ref, tree *index, void *data)
1321 struct loop *loop = (struct loop *) data, *max_loop;
1323 /* If REF is an array reference, check also that the step and the lower
1324 bound is invariant in LOOP. */
1325 if (TREE_CODE (ref) == ARRAY_REF)
1327 tree step = TREE_OPERAND (ref, 3);
1328 tree lbound = TREE_OPERAND (ref, 2);
1330 max_loop = outermost_invariant_loop (step, loop);
1331 if (!max_loop)
1332 return false;
1334 max_loop = outermost_invariant_loop (lbound, loop);
1335 if (!max_loop)
1336 return false;
1339 max_loop = outermost_invariant_loop (*index, loop);
1340 if (!max_loop)
1341 return false;
1343 return true;
1346 /* If OP is SSA NAME, force the statement that defines it to be
1347 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1349 static void
1350 force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1352 gimple stmt;
1354 if (!op
1355 || is_gimple_min_invariant (op))
1356 return;
1358 gcc_assert (TREE_CODE (op) == SSA_NAME);
1360 stmt = SSA_NAME_DEF_STMT (op);
1361 if (gimple_nop_p (stmt))
1362 return;
1364 set_level (stmt, orig_loop, loop);
1367 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1368 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1369 for_each_index. */
1371 struct fmt_data
1373 struct loop *loop;
1374 struct loop *orig_loop;
1377 static bool
1378 force_move_till (tree ref, tree *index, void *data)
1380 struct fmt_data *fmt_data = (struct fmt_data *) data;
1382 if (TREE_CODE (ref) == ARRAY_REF)
1384 tree step = TREE_OPERAND (ref, 3);
1385 tree lbound = TREE_OPERAND (ref, 2);
1387 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1388 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1391 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1393 return true;
1396 /* A function to free the mem_ref object OBJ. */
1398 static void
1399 memref_free (struct mem_ref *mem)
1401 mem->accesses_in_loop.release ();
1404 /* Allocates and returns a memory reference description for MEM whose hash
1405 value is HASH and id is ID. */
1407 static mem_ref_p
1408 mem_ref_alloc (tree mem, unsigned hash, unsigned id)
1410 mem_ref_p ref = XOBNEW (&mem_ref_obstack, struct mem_ref);
1411 ao_ref_init (&ref->mem, mem);
1412 ref->id = id;
1413 ref->hash = hash;
1414 ref->stored = NULL;
1415 bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
1416 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1417 ref->accesses_in_loop.create (1);
1419 return ref;
1422 /* Records memory reference location *LOC in LOOP to the memory reference
1423 description REF. The reference occurs in statement STMT. */
1425 static void
1426 record_mem_ref_loc (mem_ref_p ref, gimple stmt, tree *loc)
1428 mem_ref_loc aref;
1429 aref.stmt = stmt;
1430 aref.ref = loc;
1431 ref->accesses_in_loop.safe_push (aref);
1434 /* Set the LOOP bit in REF stored bitmap and allocate that if
1435 necessary. Return whether a bit was changed. */
1437 static bool
1438 set_ref_stored_in_loop (mem_ref_p ref, struct loop *loop)
1440 if (!ref->stored)
1441 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1442 return bitmap_set_bit (ref->stored, loop->num);
1445 /* Marks reference REF as stored in LOOP. */
1447 static void
1448 mark_ref_stored (mem_ref_p ref, struct loop *loop)
1450 while (loop != current_loops->tree_root
1451 && set_ref_stored_in_loop (ref, loop))
1452 loop = loop_outer (loop);
1455 /* Gathers memory references in statement STMT in LOOP, storing the
1456 information about them in the memory_accesses structure. Marks
1457 the vops accessed through unrecognized statements there as
1458 well. */
1460 static void
1461 gather_mem_refs_stmt (struct loop *loop, gimple stmt)
1463 tree *mem = NULL;
1464 hashval_t hash;
1465 mem_ref **slot;
1466 mem_ref_p ref;
1467 bool is_stored;
1468 unsigned id;
1470 if (!gimple_vuse (stmt))
1471 return;
1473 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1474 if (!mem)
1476 /* We use the shared mem_ref for all unanalyzable refs. */
1477 id = UNANALYZABLE_MEM_ID;
1478 ref = memory_accesses.refs_list[id];
1479 if (dump_file && (dump_flags & TDF_DETAILS))
1481 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1482 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1484 is_stored = gimple_vdef (stmt);
1486 else
1488 hash = iterative_hash_expr (*mem, 0);
1489 slot = memory_accesses.refs->find_slot_with_hash (*mem, hash, INSERT);
1490 if (*slot)
1492 ref = (mem_ref_p) *slot;
1493 id = ref->id;
1495 else
1497 id = memory_accesses.refs_list.length ();
1498 ref = mem_ref_alloc (*mem, hash, id);
1499 memory_accesses.refs_list.safe_push (ref);
1500 *slot = ref;
1502 if (dump_file && (dump_flags & TDF_DETAILS))
1504 fprintf (dump_file, "Memory reference %u: ", id);
1505 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1506 fprintf (dump_file, "\n");
1510 record_mem_ref_loc (ref, stmt, mem);
1512 bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
1513 if (is_stored)
1515 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1516 mark_ref_stored (ref, loop);
1518 return;
1521 static unsigned *bb_loop_postorder;
1523 /* qsort sort function to sort blocks after their loop fathers postorder. */
1525 static int
1526 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_)
1528 basic_block bb1 = *(basic_block *)const_cast<void *>(bb1_);
1529 basic_block bb2 = *(basic_block *)const_cast<void *>(bb2_);
1530 struct loop *loop1 = bb1->loop_father;
1531 struct loop *loop2 = bb2->loop_father;
1532 if (loop1->num == loop2->num)
1533 return 0;
1534 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1537 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1539 static int
1540 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_)
1542 mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_);
1543 mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_);
1544 struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1545 struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1546 if (loop1->num == loop2->num)
1547 return 0;
1548 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1551 /* Gathers memory references in loops. */
1553 static void
1554 analyze_memory_references (void)
1556 gimple_stmt_iterator bsi;
1557 basic_block bb, *bbs;
1558 struct loop *loop, *outer;
1559 unsigned i, n;
1561 /* Collect all basic-blocks in loops and sort them after their
1562 loops postorder. */
1563 i = 0;
1564 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1565 FOR_EACH_BB_FN (bb, cfun)
1566 if (bb->loop_father != current_loops->tree_root)
1567 bbs[i++] = bb;
1568 n = i;
1569 qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp);
1571 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1572 That results in better locality for all the bitmaps. */
1573 for (i = 0; i < n; ++i)
1575 basic_block bb = bbs[i];
1576 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1577 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1580 /* Sort the location list of gathered memory references after their
1581 loop postorder number. */
1582 mem_ref *ref;
1583 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1584 ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp);
1586 free (bbs);
1587 // free (bb_loop_postorder);
1589 /* Propagate the information about accessed memory references up
1590 the loop hierarchy. */
1591 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1593 /* Finalize the overall touched references (including subloops). */
1594 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1595 &memory_accesses.refs_stored_in_loop[loop->num]);
1597 /* Propagate the information about accessed memory references up
1598 the loop hierarchy. */
1599 outer = loop_outer (loop);
1600 if (outer == current_loops->tree_root)
1601 continue;
1603 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1604 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1608 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1609 tree_to_aff_combination_expand. */
1611 static bool
1612 mem_refs_may_alias_p (mem_ref_p mem1, mem_ref_p mem2,
1613 struct pointer_map_t **ttae_cache)
1615 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1616 object and their offset differ in such a way that the locations cannot
1617 overlap, then they cannot alias. */
1618 widest_int size1, size2;
1619 aff_tree off1, off2;
1621 /* Perform basic offset and type-based disambiguation. */
1622 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
1623 return false;
1625 /* The expansion of addresses may be a bit expensive, thus we only do
1626 the check at -O2 and higher optimization levels. */
1627 if (optimize < 2)
1628 return true;
1630 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1631 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1632 aff_combination_expand (&off1, ttae_cache);
1633 aff_combination_expand (&off2, ttae_cache);
1634 aff_combination_scale (&off1, -1);
1635 aff_combination_add (&off2, &off1);
1637 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1638 return false;
1640 return true;
1643 /* Compare function for bsearch searching for reference locations
1644 in a loop. */
1646 static int
1647 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_)
1649 struct loop *loop = (struct loop *)const_cast<void *>(loop_);
1650 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1651 struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1652 if (loop->num == loc_loop->num
1653 || flow_loop_nested_p (loop, loc_loop))
1654 return 0;
1655 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1656 ? -1 : 1);
1659 /* Iterates over all locations of REF in LOOP and its subloops calling
1660 fn.operator() with the location as argument. When that operator
1661 returns true the iteration is stopped and true is returned.
1662 Otherwise false is returned. */
1664 template <typename FN>
1665 static bool
1666 for_all_locs_in_loop (struct loop *loop, mem_ref_p ref, FN fn)
1668 unsigned i;
1669 mem_ref_loc_p loc;
1671 /* Search for the cluster of locs in the accesses_in_loop vector
1672 which is sorted after postorder index of the loop father. */
1673 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp);
1674 if (!loc)
1675 return false;
1677 /* We have found one location inside loop or its sub-loops. Iterate
1678 both forward and backward to cover the whole cluster. */
1679 i = loc - ref->accesses_in_loop.address ();
1680 while (i > 0)
1682 --i;
1683 mem_ref_loc_p l = &ref->accesses_in_loop[i];
1684 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1685 break;
1686 if (fn (l))
1687 return true;
1689 for (i = loc - ref->accesses_in_loop.address ();
1690 i < ref->accesses_in_loop.length (); ++i)
1692 mem_ref_loc_p l = &ref->accesses_in_loop[i];
1693 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1694 break;
1695 if (fn (l))
1696 return true;
1699 return false;
1702 /* Rewrites location LOC by TMP_VAR. */
1704 struct rewrite_mem_ref_loc
1706 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1707 bool operator () (mem_ref_loc_p loc);
1708 tree tmp_var;
1711 bool
1712 rewrite_mem_ref_loc::operator () (mem_ref_loc_p loc)
1714 *loc->ref = tmp_var;
1715 update_stmt (loc->stmt);
1716 return false;
1719 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1721 static void
1722 rewrite_mem_refs (struct loop *loop, mem_ref_p ref, tree tmp_var)
1724 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1727 /* Stores the first reference location in LOCP. */
1729 struct first_mem_ref_loc_1
1731 first_mem_ref_loc_1 (mem_ref_loc_p *locp_) : locp (locp_) {}
1732 bool operator () (mem_ref_loc_p loc);
1733 mem_ref_loc_p *locp;
1736 bool
1737 first_mem_ref_loc_1::operator () (mem_ref_loc_p loc)
1739 *locp = loc;
1740 return true;
1743 /* Returns the first reference location to REF in LOOP. */
1745 static mem_ref_loc_p
1746 first_mem_ref_loc (struct loop *loop, mem_ref_p ref)
1748 mem_ref_loc_p locp = NULL;
1749 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1750 return locp;
1753 struct prev_flag_edges {
1754 /* Edge to insert new flag comparison code. */
1755 edge append_cond_position;
1757 /* Edge for fall through from previous flag comparison. */
1758 edge last_cond_fallthru;
1761 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1762 MEM along edge EX.
1764 The store is only done if MEM has changed. We do this so no
1765 changes to MEM occur on code paths that did not originally store
1766 into it.
1768 The common case for execute_sm will transform:
1770 for (...) {
1771 if (foo)
1772 stuff;
1773 else
1774 MEM = TMP_VAR;
1777 into:
1779 lsm = MEM;
1780 for (...) {
1781 if (foo)
1782 stuff;
1783 else
1784 lsm = TMP_VAR;
1786 MEM = lsm;
1788 This function will generate:
1790 lsm = MEM;
1792 lsm_flag = false;
1794 for (...) {
1795 if (foo)
1796 stuff;
1797 else {
1798 lsm = TMP_VAR;
1799 lsm_flag = true;
1802 if (lsm_flag) <--
1803 MEM = lsm; <--
1806 static void
1807 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag)
1809 basic_block new_bb, then_bb, old_dest;
1810 bool loop_has_only_one_exit;
1811 edge then_old_edge, orig_ex = ex;
1812 gimple_stmt_iterator gsi;
1813 gimple stmt;
1814 struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1815 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1817 /* ?? Insert store after previous store if applicable. See note
1818 below. */
1819 if (prev_edges)
1820 ex = prev_edges->append_cond_position;
1822 loop_has_only_one_exit = single_pred_p (ex->dest);
1824 if (loop_has_only_one_exit)
1825 ex = split_block_after_labels (ex->dest);
1827 old_dest = ex->dest;
1828 new_bb = split_edge (ex);
1829 then_bb = create_empty_bb (new_bb);
1830 if (irr)
1831 then_bb->flags = BB_IRREDUCIBLE_LOOP;
1832 add_bb_to_loop (then_bb, new_bb->loop_father);
1834 gsi = gsi_start_bb (new_bb);
1835 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1836 NULL_TREE, NULL_TREE);
1837 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1839 gsi = gsi_start_bb (then_bb);
1840 /* Insert actual store. */
1841 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1842 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1844 make_edge (new_bb, then_bb,
1845 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1846 make_edge (new_bb, old_dest,
1847 EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1848 then_old_edge = make_edge (then_bb, old_dest,
1849 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1851 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
1853 if (prev_edges)
1855 basic_block prevbb = prev_edges->last_cond_fallthru->src;
1856 redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
1857 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
1858 set_immediate_dominator (CDI_DOMINATORS, old_dest,
1859 recompute_dominator (CDI_DOMINATORS, old_dest));
1862 /* ?? Because stores may alias, they must happen in the exact
1863 sequence they originally happened. Save the position right after
1864 the (_lsm) store we just created so we can continue appending after
1865 it and maintain the original order. */
1867 struct prev_flag_edges *p;
1869 if (orig_ex->aux)
1870 orig_ex->aux = NULL;
1871 alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
1872 p = (struct prev_flag_edges *) orig_ex->aux;
1873 p->append_cond_position = then_old_edge;
1874 p->last_cond_fallthru = find_edge (new_bb, old_dest);
1875 orig_ex->aux = (void *) p;
1878 if (!loop_has_only_one_exit)
1879 for (gsi = gsi_start_phis (old_dest); !gsi_end_p (gsi); gsi_next (&gsi))
1881 gimple phi = gsi_stmt (gsi);
1882 unsigned i;
1884 for (i = 0; i < gimple_phi_num_args (phi); i++)
1885 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
1887 tree arg = gimple_phi_arg_def (phi, i);
1888 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
1889 update_stmt (phi);
1892 /* Remove the original fall through edge. This was the
1893 single_succ_edge (new_bb). */
1894 EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU;
1897 /* When REF is set on the location, set flag indicating the store. */
1899 struct sm_set_flag_if_changed
1901 sm_set_flag_if_changed (tree flag_) : flag (flag_) {}
1902 bool operator () (mem_ref_loc_p loc);
1903 tree flag;
1906 bool
1907 sm_set_flag_if_changed::operator () (mem_ref_loc_p loc)
1909 /* Only set the flag for writes. */
1910 if (is_gimple_assign (loc->stmt)
1911 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
1913 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
1914 gimple stmt = gimple_build_assign (flag, boolean_true_node);
1915 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1917 return false;
1920 /* Helper function for execute_sm. On every location where REF is
1921 set, set an appropriate flag indicating the store. */
1923 static tree
1924 execute_sm_if_changed_flag_set (struct loop *loop, mem_ref_p ref)
1926 tree flag;
1927 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
1928 flag = create_tmp_reg (boolean_type_node, str);
1929 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag));
1930 return flag;
1933 /* Executes store motion of memory reference REF from LOOP.
1934 Exits from the LOOP are stored in EXITS. The initialization of the
1935 temporary variable is put to the preheader of the loop, and assignments
1936 to the reference from the temporary variable are emitted to exits. */
1938 static void
1939 execute_sm (struct loop *loop, vec<edge> exits, mem_ref_p ref)
1941 tree tmp_var, store_flag = NULL_TREE;
1942 unsigned i;
1943 gimple load;
1944 struct fmt_data fmt_data;
1945 edge ex;
1946 struct lim_aux_data *lim_data;
1947 bool multi_threaded_model_p = false;
1948 gimple_stmt_iterator gsi;
1950 if (dump_file && (dump_flags & TDF_DETAILS))
1952 fprintf (dump_file, "Executing store motion of ");
1953 print_generic_expr (dump_file, ref->mem.ref, 0);
1954 fprintf (dump_file, " from loop %d\n", loop->num);
1957 tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
1958 get_lsm_tmp_name (ref->mem.ref, ~0));
1960 fmt_data.loop = loop;
1961 fmt_data.orig_loop = loop;
1962 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
1964 if (bb_in_transaction (loop_preheader_edge (loop)->src)
1965 || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
1966 multi_threaded_model_p = true;
1968 if (multi_threaded_model_p)
1969 store_flag = execute_sm_if_changed_flag_set (loop, ref);
1971 rewrite_mem_refs (loop, ref, tmp_var);
1973 /* Emit the load code on a random exit edge or into the latch if
1974 the loop does not exit, so that we are sure it will be processed
1975 by move_computations after all dependencies. */
1976 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
1978 /* FIXME/TODO: For the multi-threaded variant, we could avoid this
1979 load altogether, since the store is predicated by a flag. We
1980 could, do the load only if it was originally in the loop. */
1981 load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
1982 lim_data = init_lim_data (load);
1983 lim_data->max_loop = loop;
1984 lim_data->tgt_loop = loop;
1985 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
1987 if (multi_threaded_model_p)
1989 load = gimple_build_assign (store_flag, boolean_false_node);
1990 lim_data = init_lim_data (load);
1991 lim_data->max_loop = loop;
1992 lim_data->tgt_loop = loop;
1993 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
1996 /* Sink the store to every exit from the loop. */
1997 FOR_EACH_VEC_ELT (exits, i, ex)
1998 if (!multi_threaded_model_p)
2000 gimple store;
2001 store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
2002 gsi_insert_on_edge (ex, store);
2004 else
2005 execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag);
2008 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2009 edges of the LOOP. */
2011 static void
2012 hoist_memory_references (struct loop *loop, bitmap mem_refs,
2013 vec<edge> exits)
2015 mem_ref_p ref;
2016 unsigned i;
2017 bitmap_iterator bi;
2019 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2021 ref = memory_accesses.refs_list[i];
2022 execute_sm (loop, exits, ref);
2026 struct ref_always_accessed
2028 ref_always_accessed (struct loop *loop_, bool stored_p_)
2029 : loop (loop_), stored_p (stored_p_) {}
2030 bool operator () (mem_ref_loc_p loc);
2031 struct loop *loop;
2032 bool stored_p;
2035 bool
2036 ref_always_accessed::operator () (mem_ref_loc_p loc)
2038 struct loop *must_exec;
2040 if (!get_lim_data (loc->stmt))
2041 return false;
2043 /* If we require an always executed store make sure the statement
2044 stores to the reference. */
2045 if (stored_p)
2047 tree lhs = gimple_get_lhs (loc->stmt);
2048 if (!lhs
2049 || lhs != *loc->ref)
2050 return false;
2053 must_exec = get_lim_data (loc->stmt)->always_executed_in;
2054 if (!must_exec)
2055 return false;
2057 if (must_exec == loop
2058 || flow_loop_nested_p (must_exec, loop))
2059 return true;
2061 return false;
2064 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2065 make sure REF is always stored to in LOOP. */
2067 static bool
2068 ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
2070 return for_all_locs_in_loop (loop, ref,
2071 ref_always_accessed (loop, stored_p));
2074 /* Returns true if REF1 and REF2 are independent. */
2076 static bool
2077 refs_independent_p (mem_ref_p ref1, mem_ref_p ref2)
2079 if (ref1 == ref2)
2080 return true;
2082 if (dump_file && (dump_flags & TDF_DETAILS))
2083 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2084 ref1->id, ref2->id);
2086 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
2088 if (dump_file && (dump_flags & TDF_DETAILS))
2089 fprintf (dump_file, "dependent.\n");
2090 return false;
2092 else
2094 if (dump_file && (dump_flags & TDF_DETAILS))
2095 fprintf (dump_file, "independent.\n");
2096 return true;
2100 /* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2101 and its super-loops. */
2103 static void
2104 record_dep_loop (struct loop *loop, mem_ref_p ref, bool stored_p)
2106 /* We can propagate dependent-in-loop bits up the loop
2107 hierarchy to all outer loops. */
2108 while (loop != current_loops->tree_root
2109 && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2110 loop = loop_outer (loop);
2113 /* Returns true if REF is independent on all other memory references in
2114 LOOP. */
2116 static bool
2117 ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref, bool stored_p)
2119 bitmap refs_to_check;
2120 unsigned i;
2121 bitmap_iterator bi;
2122 mem_ref_p aref;
2124 if (stored_p)
2125 refs_to_check = &memory_accesses.refs_in_loop[loop->num];
2126 else
2127 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2129 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
2130 return false;
2132 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2134 aref = memory_accesses.refs_list[i];
2135 if (!refs_independent_p (ref, aref))
2136 return false;
2139 return true;
2142 /* Returns true if REF is independent on all other memory references in
2143 LOOP. Wrapper over ref_indep_loop_p_1, caching its results. */
2145 static bool
2146 ref_indep_loop_p_2 (struct loop *loop, mem_ref_p ref, bool stored_p)
2148 stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
2150 if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2151 return true;
2152 if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2153 return false;
2155 struct loop *inner = loop->inner;
2156 while (inner)
2158 if (!ref_indep_loop_p_2 (inner, ref, stored_p))
2159 return false;
2160 inner = inner->next;
2163 bool indep_p = ref_indep_loop_p_1 (loop, ref, stored_p);
2165 if (dump_file && (dump_flags & TDF_DETAILS))
2166 fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2167 ref->id, loop->num, indep_p ? "independent" : "dependent");
2169 /* Record the computed result in the cache. */
2170 if (indep_p)
2172 if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
2173 && stored_p)
2175 /* If it's independend against all refs then it's independent
2176 against stores, too. */
2177 bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
2180 else
2182 record_dep_loop (loop, ref, stored_p);
2183 if (!stored_p)
2185 /* If it's dependent against stores it's dependent against
2186 all refs, too. */
2187 record_dep_loop (loop, ref, true);
2191 return indep_p;
2194 /* Returns true if REF is independent on all other memory references in
2195 LOOP. */
2197 static bool
2198 ref_indep_loop_p (struct loop *loop, mem_ref_p ref)
2200 gcc_checking_assert (MEM_ANALYZABLE (ref));
2202 return ref_indep_loop_p_2 (loop, ref, false);
2205 /* Returns true if we can perform store motion of REF from LOOP. */
2207 static bool
2208 can_sm_ref_p (struct loop *loop, mem_ref_p ref)
2210 tree base;
2212 /* Can't hoist unanalyzable refs. */
2213 if (!MEM_ANALYZABLE (ref))
2214 return false;
2216 /* It should be movable. */
2217 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2218 || TREE_THIS_VOLATILE (ref->mem.ref)
2219 || !for_each_index (&ref->mem.ref, may_move_till, loop))
2220 return false;
2222 /* If it can throw fail, we do not properly update EH info. */
2223 if (tree_could_throw_p (ref->mem.ref))
2224 return false;
2226 /* If it can trap, it must be always executed in LOOP.
2227 Readonly memory locations may trap when storing to them, but
2228 tree_could_trap_p is a predicate for rvalues, so check that
2229 explicitly. */
2230 base = get_base_address (ref->mem.ref);
2231 if ((tree_could_trap_p (ref->mem.ref)
2232 || (DECL_P (base) && TREE_READONLY (base)))
2233 && !ref_always_accessed_p (loop, ref, true))
2234 return false;
2236 /* And it must be independent on all other memory references
2237 in LOOP. */
2238 if (!ref_indep_loop_p (loop, ref))
2239 return false;
2241 return true;
2244 /* Marks the references in LOOP for that store motion should be performed
2245 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2246 motion was performed in one of the outer loops. */
2248 static void
2249 find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2251 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2252 unsigned i;
2253 bitmap_iterator bi;
2254 mem_ref_p ref;
2256 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2258 ref = memory_accesses.refs_list[i];
2259 if (can_sm_ref_p (loop, ref))
2260 bitmap_set_bit (refs_to_sm, i);
2264 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2265 for a store motion optimization (i.e. whether we can insert statement
2266 on its exits). */
2268 static bool
2269 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2270 vec<edge> exits)
2272 unsigned i;
2273 edge ex;
2275 FOR_EACH_VEC_ELT (exits, i, ex)
2276 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2277 return false;
2279 return true;
2282 /* Try to perform store motion for all memory references modified inside
2283 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2284 store motion was executed in one of the outer loops. */
2286 static void
2287 store_motion_loop (struct loop *loop, bitmap sm_executed)
2289 vec<edge> exits = get_loop_exit_edges (loop);
2290 struct loop *subloop;
2291 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2293 if (loop_suitable_for_sm (loop, exits))
2295 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2296 hoist_memory_references (loop, sm_in_loop, exits);
2298 exits.release ();
2300 bitmap_ior_into (sm_executed, sm_in_loop);
2301 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2302 store_motion_loop (subloop, sm_executed);
2303 bitmap_and_compl_into (sm_executed, sm_in_loop);
2304 BITMAP_FREE (sm_in_loop);
2307 /* Try to perform store motion for all memory references modified inside
2308 loops. */
2310 static void
2311 store_motion (void)
2313 struct loop *loop;
2314 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
2316 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2317 store_motion_loop (loop, sm_executed);
2319 BITMAP_FREE (sm_executed);
2320 gsi_commit_edge_inserts ();
2323 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2324 for each such basic block bb records the outermost loop for that execution
2325 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2326 blocks that contain a nonpure call. */
2328 static void
2329 fill_always_executed_in_1 (struct loop *loop, sbitmap contains_call)
2331 basic_block bb = NULL, *bbs, last = NULL;
2332 unsigned i;
2333 edge e;
2334 struct loop *inn_loop = loop;
2336 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2338 bbs = get_loop_body_in_dom_order (loop);
2340 for (i = 0; i < loop->num_nodes; i++)
2342 edge_iterator ei;
2343 bb = bbs[i];
2345 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2346 last = bb;
2348 if (bitmap_bit_p (contains_call, bb->index))
2349 break;
2351 FOR_EACH_EDGE (e, ei, bb->succs)
2352 if (!flow_bb_inside_loop_p (loop, e->dest))
2353 break;
2354 if (e)
2355 break;
2357 /* A loop might be infinite (TODO use simple loop analysis
2358 to disprove this if possible). */
2359 if (bb->flags & BB_IRREDUCIBLE_LOOP)
2360 break;
2362 if (!flow_bb_inside_loop_p (inn_loop, bb))
2363 break;
2365 if (bb->loop_father->header == bb)
2367 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2368 break;
2370 /* In a loop that is always entered we may proceed anyway.
2371 But record that we entered it and stop once we leave it. */
2372 inn_loop = bb->loop_father;
2376 while (1)
2378 SET_ALWAYS_EXECUTED_IN (last, loop);
2379 if (last == loop->header)
2380 break;
2381 last = get_immediate_dominator (CDI_DOMINATORS, last);
2384 free (bbs);
2387 for (loop = loop->inner; loop; loop = loop->next)
2388 fill_always_executed_in_1 (loop, contains_call);
2391 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2392 for each such basic block bb records the outermost loop for that execution
2393 of its header implies execution of bb. */
2395 static void
2396 fill_always_executed_in (void)
2398 sbitmap contains_call = sbitmap_alloc (last_basic_block_for_fn (cfun));
2399 basic_block bb;
2400 struct loop *loop;
2402 bitmap_clear (contains_call);
2403 FOR_EACH_BB_FN (bb, cfun)
2405 gimple_stmt_iterator gsi;
2406 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2408 if (nonpure_call_p (gsi_stmt (gsi)))
2409 break;
2412 if (!gsi_end_p (gsi))
2413 bitmap_set_bit (contains_call, bb->index);
2416 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2417 fill_always_executed_in_1 (loop, contains_call);
2419 sbitmap_free (contains_call);
2423 /* Compute the global information needed by the loop invariant motion pass. */
2425 static void
2426 tree_ssa_lim_initialize (void)
2428 struct loop *loop;
2429 unsigned i;
2431 bitmap_obstack_initialize (&lim_bitmap_obstack);
2432 gcc_obstack_init (&mem_ref_obstack);
2433 lim_aux_data_map = new hash_map<gimple, lim_aux_data *>;
2435 if (flag_tm)
2436 compute_transaction_bits ();
2438 alloc_aux_for_edges (0);
2440 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
2441 memory_accesses.refs_list.create (100);
2442 /* Allocate a special, unanalyzable mem-ref with ID zero. */
2443 memory_accesses.refs_list.quick_push
2444 (mem_ref_alloc (error_mark_node, 0, UNANALYZABLE_MEM_ID));
2446 memory_accesses.refs_in_loop.create (number_of_loops (cfun));
2447 memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
2448 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
2449 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2450 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
2451 memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2453 for (i = 0; i < number_of_loops (cfun); i++)
2455 bitmap_initialize (&memory_accesses.refs_in_loop[i],
2456 &lim_bitmap_obstack);
2457 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
2458 &lim_bitmap_obstack);
2459 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
2460 &lim_bitmap_obstack);
2463 memory_accesses.ttae_cache = NULL;
2465 /* Initialize bb_loop_postorder with a mapping from loop->num to
2466 its postorder index. */
2467 i = 0;
2468 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
2469 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2470 bb_loop_postorder[loop->num] = i++;
2473 /* Cleans up after the invariant motion pass. */
2475 static void
2476 tree_ssa_lim_finalize (void)
2478 basic_block bb;
2479 unsigned i;
2480 mem_ref_p ref;
2482 free_aux_for_edges ();
2484 FOR_EACH_BB_FN (bb, cfun)
2485 SET_ALWAYS_EXECUTED_IN (bb, NULL);
2487 bitmap_obstack_release (&lim_bitmap_obstack);
2488 delete lim_aux_data_map;
2490 delete memory_accesses.refs;
2491 memory_accesses.refs = NULL;
2493 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2494 memref_free (ref);
2495 memory_accesses.refs_list.release ();
2496 obstack_free (&mem_ref_obstack, NULL);
2498 memory_accesses.refs_in_loop.release ();
2499 memory_accesses.refs_stored_in_loop.release ();
2500 memory_accesses.all_refs_stored_in_loop.release ();
2502 if (memory_accesses.ttae_cache)
2503 free_affine_expand_cache (&memory_accesses.ttae_cache);
2505 free (bb_loop_postorder);
2508 /* Moves invariants from loops. Only "expensive" invariants are moved out --
2509 i.e. those that are likely to be win regardless of the register pressure. */
2511 unsigned int
2512 tree_ssa_lim (void)
2514 unsigned int todo;
2516 tree_ssa_lim_initialize ();
2518 /* Gathers information about memory accesses in the loops. */
2519 analyze_memory_references ();
2521 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
2522 fill_always_executed_in ();
2524 /* For each statement determine the outermost loop in that it is
2525 invariant and cost for computing the invariant. */
2526 invariantness_dom_walker (CDI_DOMINATORS)
2527 .walk (cfun->cfg->x_entry_block_ptr);
2529 /* Execute store motion. Force the necessary invariants to be moved
2530 out of the loops as well. */
2531 store_motion ();
2533 /* Move the expressions that are expensive enough. */
2534 todo = move_computations ();
2536 tree_ssa_lim_finalize ();
2538 return todo;
2541 /* Loop invariant motion pass. */
2543 namespace {
2545 const pass_data pass_data_lim =
2547 GIMPLE_PASS, /* type */
2548 "lim", /* name */
2549 OPTGROUP_LOOP, /* optinfo_flags */
2550 TV_LIM, /* tv_id */
2551 PROP_cfg, /* properties_required */
2552 0, /* properties_provided */
2553 0, /* properties_destroyed */
2554 0, /* todo_flags_start */
2555 0, /* todo_flags_finish */
2558 class pass_lim : public gimple_opt_pass
2560 public:
2561 pass_lim (gcc::context *ctxt)
2562 : gimple_opt_pass (pass_data_lim, ctxt)
2565 /* opt_pass methods: */
2566 opt_pass * clone () { return new pass_lim (m_ctxt); }
2567 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
2568 virtual unsigned int execute (function *);
2570 }; // class pass_lim
2572 unsigned int
2573 pass_lim::execute (function *fun)
2575 if (number_of_loops (fun) <= 1)
2576 return 0;
2578 return tree_ssa_lim ();
2581 } // anon namespace
2583 gimple_opt_pass *
2584 make_pass_lim (gcc::context *ctxt)
2586 return new pass_lim (ctxt);