2015-06-25 Zhouyi Zhou <yizhouzhou@ict.ac.cn>
[official-gcc.git] / gcc / tree-ssa-loop-im.c
blob8f0cc804a6d489024eaef95fca24c186e6383de0
1 /* Loop invariant motion.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "alias.h"
25 #include "symtab.h"
26 #include "tree.h"
27 #include "fold-const.h"
28 #include "tm_p.h"
29 #include "predict.h"
30 #include "hard-reg-set.h"
31 #include "function.h"
32 #include "dominance.h"
33 #include "cfg.h"
34 #include "cfganal.h"
35 #include "basic-block.h"
36 #include "gimple-pretty-print.h"
37 #include "tree-ssa-alias.h"
38 #include "internal-fn.h"
39 #include "tree-eh.h"
40 #include "gimple-expr.h"
41 #include "gimple.h"
42 #include "gimplify.h"
43 #include "gimple-iterator.h"
44 #include "gimple-ssa.h"
45 #include "tree-cfg.h"
46 #include "tree-phinodes.h"
47 #include "ssa-iterators.h"
48 #include "stringpool.h"
49 #include "tree-ssanames.h"
50 #include "tree-ssa-loop-manip.h"
51 #include "tree-ssa-loop.h"
52 #include "tree-into-ssa.h"
53 #include "cfgloop.h"
54 #include "domwalk.h"
55 #include "params.h"
56 #include "tree-pass.h"
57 #include "flags.h"
58 #include "tree-affine.h"
59 #include "tree-ssa-propagate.h"
60 #include "trans-mem.h"
61 #include "gimple-fold.h"
63 /* TODO: Support for predicated code motion. I.e.
65 while (1)
67 if (cond)
69 a = inv;
70 something;
74 Where COND and INV are invariants, but evaluating INV may trap or be
75 invalid from some other reason if !COND. This may be transformed to
77 if (cond)
78 a = inv;
79 while (1)
81 if (cond)
82 something;
83 } */
85 /* The auxiliary data kept for each statement. */
87 struct lim_aux_data
89 struct loop *max_loop; /* The outermost loop in that the statement
90 is invariant. */
92 struct loop *tgt_loop; /* The loop out of that we want to move the
93 invariant. */
95 struct loop *always_executed_in;
96 /* The outermost loop for that we are sure
97 the statement is executed if the loop
98 is entered. */
100 unsigned cost; /* Cost of the computation performed by the
101 statement. */
103 vec<gimple> depends; /* Vector of statements that must be also
104 hoisted out of the loop when this statement
105 is hoisted; i.e. those that define the
106 operands of the statement and are inside of
107 the MAX_LOOP loop. */
110 /* Maps statements to their lim_aux_data. */
112 static hash_map<gimple, lim_aux_data *> *lim_aux_data_map;
114 /* Description of a memory reference location. */
116 typedef struct mem_ref_loc
118 tree *ref; /* The reference itself. */
119 gimple stmt; /* The statement in that it occurs. */
120 } *mem_ref_loc_p;
123 /* Description of a memory reference. */
125 typedef struct im_mem_ref
127 unsigned id; /* ID assigned to the memory reference
128 (its index in memory_accesses.refs_list) */
129 hashval_t hash; /* Its hash value. */
131 /* The memory access itself and associated caching of alias-oracle
132 query meta-data. */
133 ao_ref mem;
135 bitmap stored; /* The set of loops in that this memory location
136 is stored to. */
137 vec<mem_ref_loc> accesses_in_loop;
138 /* The locations of the accesses. Vector
139 indexed by the loop number. */
141 /* The following sets are computed on demand. We keep both set and
142 its complement, so that we know whether the information was
143 already computed or not. */
144 bitmap_head indep_loop; /* The set of loops in that the memory
145 reference is independent, meaning:
146 If it is stored in the loop, this store
147 is independent on all other loads and
148 stores.
149 If it is only loaded, then it is independent
150 on all stores in the loop. */
151 bitmap_head dep_loop; /* The complement of INDEP_LOOP. */
152 } *mem_ref_p;
154 /* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
155 to record (in)dependence against stores in the loop and its subloops, the
156 second to record (in)dependence against all references in the loop
157 and its subloops. */
158 #define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
160 /* Mem_ref hashtable helpers. */
162 struct mem_ref_hasher : typed_noop_remove <im_mem_ref>
164 typedef im_mem_ref *value_type;
165 typedef tree_node *compare_type;
166 static inline hashval_t hash (const im_mem_ref *);
167 static inline bool equal (const im_mem_ref *, const tree_node *);
170 /* A hash function for struct im_mem_ref object OBJ. */
172 inline hashval_t
173 mem_ref_hasher::hash (const im_mem_ref *mem)
175 return mem->hash;
178 /* An equality function for struct im_mem_ref object MEM1 with
179 memory reference OBJ2. */
181 inline bool
182 mem_ref_hasher::equal (const im_mem_ref *mem1, const tree_node *obj2)
184 return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0);
188 /* Description of memory accesses in loops. */
190 static struct
192 /* The hash table of memory references accessed in loops. */
193 hash_table<mem_ref_hasher> *refs;
195 /* The list of memory references. */
196 vec<mem_ref_p> refs_list;
198 /* The set of memory references accessed in each loop. */
199 vec<bitmap_head> refs_in_loop;
201 /* The set of memory references stored in each loop. */
202 vec<bitmap_head> refs_stored_in_loop;
204 /* The set of memory references stored in each loop, including subloops . */
205 vec<bitmap_head> all_refs_stored_in_loop;
207 /* Cache for expanding memory addresses. */
208 hash_map<tree, name_expansion *> *ttae_cache;
209 } memory_accesses;
211 /* Obstack for the bitmaps in the above data structures. */
212 static bitmap_obstack lim_bitmap_obstack;
213 static obstack mem_ref_obstack;
215 static bool ref_indep_loop_p (struct loop *, mem_ref_p);
217 /* Minimum cost of an expensive expression. */
218 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
220 /* The outermost loop for which execution of the header guarantees that the
221 block will be executed. */
222 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
223 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
225 /* ID of the shared unanalyzable mem. */
226 #define UNANALYZABLE_MEM_ID 0
228 /* Whether the reference was analyzable. */
229 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
231 static struct lim_aux_data *
232 init_lim_data (gimple stmt)
234 lim_aux_data *p = XCNEW (struct lim_aux_data);
235 lim_aux_data_map->put (stmt, p);
237 return p;
240 static struct lim_aux_data *
241 get_lim_data (gimple stmt)
243 lim_aux_data **p = lim_aux_data_map->get (stmt);
244 if (!p)
245 return NULL;
247 return *p;
250 /* Releases the memory occupied by DATA. */
252 static void
253 free_lim_aux_data (struct lim_aux_data *data)
255 data->depends.release ();
256 free (data);
259 static void
260 clear_lim_data (gimple stmt)
262 lim_aux_data **p = lim_aux_data_map->get (stmt);
263 if (!p)
264 return;
266 free_lim_aux_data (*p);
267 *p = NULL;
271 /* The possibilities of statement movement. */
272 enum move_pos
274 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
275 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
276 become executed -- memory accesses, ... */
277 MOVE_POSSIBLE /* Unlimited movement. */
281 /* If it is possible to hoist the statement STMT unconditionally,
282 returns MOVE_POSSIBLE.
283 If it is possible to hoist the statement STMT, but we must avoid making
284 it executed if it would not be executed in the original program (e.g.
285 because it may trap), return MOVE_PRESERVE_EXECUTION.
286 Otherwise return MOVE_IMPOSSIBLE. */
288 enum move_pos
289 movement_possibility (gimple stmt)
291 tree lhs;
292 enum move_pos ret = MOVE_POSSIBLE;
294 if (flag_unswitch_loops
295 && gimple_code (stmt) == GIMPLE_COND)
297 /* If we perform unswitching, force the operands of the invariant
298 condition to be moved out of the loop. */
299 return MOVE_POSSIBLE;
302 if (gimple_code (stmt) == GIMPLE_PHI
303 && gimple_phi_num_args (stmt) <= 2
304 && !virtual_operand_p (gimple_phi_result (stmt))
305 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
306 return MOVE_POSSIBLE;
308 if (gimple_get_lhs (stmt) == NULL_TREE)
309 return MOVE_IMPOSSIBLE;
311 if (gimple_vdef (stmt))
312 return MOVE_IMPOSSIBLE;
314 if (stmt_ends_bb_p (stmt)
315 || gimple_has_volatile_ops (stmt)
316 || gimple_has_side_effects (stmt)
317 || stmt_could_throw_p (stmt))
318 return MOVE_IMPOSSIBLE;
320 if (is_gimple_call (stmt))
322 /* While pure or const call is guaranteed to have no side effects, we
323 cannot move it arbitrarily. Consider code like
325 char *s = something ();
327 while (1)
329 if (s)
330 t = strlen (s);
331 else
332 t = 0;
335 Here the strlen call cannot be moved out of the loop, even though
336 s is invariant. In addition to possibly creating a call with
337 invalid arguments, moving out a function call that is not executed
338 may cause performance regressions in case the call is costly and
339 not executed at all. */
340 ret = MOVE_PRESERVE_EXECUTION;
341 lhs = gimple_call_lhs (stmt);
343 else if (is_gimple_assign (stmt))
344 lhs = gimple_assign_lhs (stmt);
345 else
346 return MOVE_IMPOSSIBLE;
348 if (TREE_CODE (lhs) == SSA_NAME
349 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
350 return MOVE_IMPOSSIBLE;
352 if (TREE_CODE (lhs) != SSA_NAME
353 || gimple_could_trap_p (stmt))
354 return MOVE_PRESERVE_EXECUTION;
356 /* Non local loads in a transaction cannot be hoisted out. Well,
357 unless the load happens on every path out of the loop, but we
358 don't take this into account yet. */
359 if (flag_tm
360 && gimple_in_transaction (stmt)
361 && gimple_assign_single_p (stmt))
363 tree rhs = gimple_assign_rhs1 (stmt);
364 if (DECL_P (rhs) && is_global_var (rhs))
366 if (dump_file)
368 fprintf (dump_file, "Cannot hoist conditional load of ");
369 print_generic_expr (dump_file, rhs, TDF_SLIM);
370 fprintf (dump_file, " because it is in a transaction.\n");
372 return MOVE_IMPOSSIBLE;
376 return ret;
379 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
380 loop to that we could move the expression using DEF if it did not have
381 other operands, i.e. the outermost loop enclosing LOOP in that the value
382 of DEF is invariant. */
384 static struct loop *
385 outermost_invariant_loop (tree def, struct loop *loop)
387 gimple def_stmt;
388 basic_block def_bb;
389 struct loop *max_loop;
390 struct lim_aux_data *lim_data;
392 if (!def)
393 return superloop_at_depth (loop, 1);
395 if (TREE_CODE (def) != SSA_NAME)
397 gcc_assert (is_gimple_min_invariant (def));
398 return superloop_at_depth (loop, 1);
401 def_stmt = SSA_NAME_DEF_STMT (def);
402 def_bb = gimple_bb (def_stmt);
403 if (!def_bb)
404 return superloop_at_depth (loop, 1);
406 max_loop = find_common_loop (loop, def_bb->loop_father);
408 lim_data = get_lim_data (def_stmt);
409 if (lim_data != NULL && lim_data->max_loop != NULL)
410 max_loop = find_common_loop (max_loop,
411 loop_outer (lim_data->max_loop));
412 if (max_loop == loop)
413 return NULL;
414 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
416 return max_loop;
419 /* DATA is a structure containing information associated with a statement
420 inside LOOP. DEF is one of the operands of this statement.
422 Find the outermost loop enclosing LOOP in that value of DEF is invariant
423 and record this in DATA->max_loop field. If DEF itself is defined inside
424 this loop as well (i.e. we need to hoist it out of the loop if we want
425 to hoist the statement represented by DATA), record the statement in that
426 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
427 add the cost of the computation of DEF to the DATA->cost.
429 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
431 static bool
432 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
433 bool add_cost)
435 gimple def_stmt = SSA_NAME_DEF_STMT (def);
436 basic_block def_bb = gimple_bb (def_stmt);
437 struct loop *max_loop;
438 struct lim_aux_data *def_data;
440 if (!def_bb)
441 return true;
443 max_loop = outermost_invariant_loop (def, loop);
444 if (!max_loop)
445 return false;
447 if (flow_loop_nested_p (data->max_loop, max_loop))
448 data->max_loop = max_loop;
450 def_data = get_lim_data (def_stmt);
451 if (!def_data)
452 return true;
454 if (add_cost
455 /* Only add the cost if the statement defining DEF is inside LOOP,
456 i.e. if it is likely that by moving the invariants dependent
457 on it, we will be able to avoid creating a new register for
458 it (since it will be only used in these dependent invariants). */
459 && def_bb->loop_father == loop)
460 data->cost += def_data->cost;
462 data->depends.safe_push (def_stmt);
464 return true;
467 /* Returns an estimate for a cost of statement STMT. The values here
468 are just ad-hoc constants, similar to costs for inlining. */
470 static unsigned
471 stmt_cost (gimple stmt)
473 /* Always try to create possibilities for unswitching. */
474 if (gimple_code (stmt) == GIMPLE_COND
475 || gimple_code (stmt) == GIMPLE_PHI)
476 return LIM_EXPENSIVE;
478 /* We should be hoisting calls if possible. */
479 if (is_gimple_call (stmt))
481 tree fndecl;
483 /* Unless the call is a builtin_constant_p; this always folds to a
484 constant, so moving it is useless. */
485 fndecl = gimple_call_fndecl (stmt);
486 if (fndecl
487 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
488 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
489 return 0;
491 return LIM_EXPENSIVE;
494 /* Hoisting memory references out should almost surely be a win. */
495 if (gimple_references_memory_p (stmt))
496 return LIM_EXPENSIVE;
498 if (gimple_code (stmt) != GIMPLE_ASSIGN)
499 return 1;
501 switch (gimple_assign_rhs_code (stmt))
503 case MULT_EXPR:
504 case WIDEN_MULT_EXPR:
505 case WIDEN_MULT_PLUS_EXPR:
506 case WIDEN_MULT_MINUS_EXPR:
507 case DOT_PROD_EXPR:
508 case FMA_EXPR:
509 case TRUNC_DIV_EXPR:
510 case CEIL_DIV_EXPR:
511 case FLOOR_DIV_EXPR:
512 case ROUND_DIV_EXPR:
513 case EXACT_DIV_EXPR:
514 case CEIL_MOD_EXPR:
515 case FLOOR_MOD_EXPR:
516 case ROUND_MOD_EXPR:
517 case TRUNC_MOD_EXPR:
518 case RDIV_EXPR:
519 /* Division and multiplication are usually expensive. */
520 return LIM_EXPENSIVE;
522 case LSHIFT_EXPR:
523 case RSHIFT_EXPR:
524 case WIDEN_LSHIFT_EXPR:
525 case LROTATE_EXPR:
526 case RROTATE_EXPR:
527 /* Shifts and rotates are usually expensive. */
528 return LIM_EXPENSIVE;
530 case CONSTRUCTOR:
531 /* Make vector construction cost proportional to the number
532 of elements. */
533 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
535 case SSA_NAME:
536 case PAREN_EXPR:
537 /* Whether or not something is wrapped inside a PAREN_EXPR
538 should not change move cost. Nor should an intermediate
539 unpropagated SSA name copy. */
540 return 0;
542 default:
543 return 1;
547 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
548 REF is independent. If REF is not independent in LOOP, NULL is returned
549 instead. */
551 static struct loop *
552 outermost_indep_loop (struct loop *outer, struct loop *loop, mem_ref_p ref)
554 struct loop *aloop;
556 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
557 return NULL;
559 for (aloop = outer;
560 aloop != loop;
561 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
562 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
563 && ref_indep_loop_p (aloop, ref))
564 return aloop;
566 if (ref_indep_loop_p (loop, ref))
567 return loop;
568 else
569 return NULL;
572 /* If there is a simple load or store to a memory reference in STMT, returns
573 the location of the memory reference, and sets IS_STORE according to whether
574 it is a store or load. Otherwise, returns NULL. */
576 static tree *
577 simple_mem_ref_in_stmt (gimple stmt, bool *is_store)
579 tree *lhs, *rhs;
581 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
582 if (!gimple_assign_single_p (stmt))
583 return NULL;
585 lhs = gimple_assign_lhs_ptr (stmt);
586 rhs = gimple_assign_rhs1_ptr (stmt);
588 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
590 *is_store = false;
591 return rhs;
593 else if (gimple_vdef (stmt)
594 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
596 *is_store = true;
597 return lhs;
599 else
600 return NULL;
603 /* Returns the memory reference contained in STMT. */
605 static mem_ref_p
606 mem_ref_in_stmt (gimple stmt)
608 bool store;
609 tree *mem = simple_mem_ref_in_stmt (stmt, &store);
610 hashval_t hash;
611 mem_ref_p ref;
613 if (!mem)
614 return NULL;
615 gcc_assert (!store);
617 hash = iterative_hash_expr (*mem, 0);
618 ref = memory_accesses.refs->find_with_hash (*mem, hash);
620 gcc_assert (ref != NULL);
621 return ref;
624 /* From a controlling predicate in DOM determine the arguments from
625 the PHI node PHI that are chosen if the predicate evaluates to
626 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
627 they are non-NULL. Returns true if the arguments can be determined,
628 else return false. */
630 static bool
631 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
632 tree *true_arg_p, tree *false_arg_p)
634 basic_block bb = gimple_bb (phi);
635 edge true_edge, false_edge, tem;
636 tree arg0 = NULL_TREE, arg1 = NULL_TREE;
638 /* We have to verify that one edge into the PHI node is dominated
639 by the true edge of the predicate block and the other edge
640 dominated by the false edge. This ensures that the PHI argument
641 we are going to take is completely determined by the path we
642 take from the predicate block.
643 We can only use BB dominance checks below if the destination of
644 the true/false edges are dominated by their edge, thus only
645 have a single predecessor. */
646 extract_true_false_edges_from_block (dom, &true_edge, &false_edge);
647 tem = EDGE_PRED (bb, 0);
648 if (tem == true_edge
649 || (single_pred_p (true_edge->dest)
650 && (tem->src == true_edge->dest
651 || dominated_by_p (CDI_DOMINATORS,
652 tem->src, true_edge->dest))))
653 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
654 else if (tem == false_edge
655 || (single_pred_p (false_edge->dest)
656 && (tem->src == false_edge->dest
657 || dominated_by_p (CDI_DOMINATORS,
658 tem->src, false_edge->dest))))
659 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
660 else
661 return false;
662 tem = EDGE_PRED (bb, 1);
663 if (tem == true_edge
664 || (single_pred_p (true_edge->dest)
665 && (tem->src == true_edge->dest
666 || dominated_by_p (CDI_DOMINATORS,
667 tem->src, true_edge->dest))))
668 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
669 else if (tem == false_edge
670 || (single_pred_p (false_edge->dest)
671 && (tem->src == false_edge->dest
672 || dominated_by_p (CDI_DOMINATORS,
673 tem->src, false_edge->dest))))
674 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
675 else
676 return false;
677 if (!arg0 || !arg1)
678 return false;
680 if (true_arg_p)
681 *true_arg_p = arg0;
682 if (false_arg_p)
683 *false_arg_p = arg1;
685 return true;
688 /* Determine the outermost loop to that it is possible to hoist a statement
689 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
690 the outermost loop in that the value computed by STMT is invariant.
691 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
692 we preserve the fact whether STMT is executed. It also fills other related
693 information to LIM_DATA (STMT).
695 The function returns false if STMT cannot be hoisted outside of the loop it
696 is defined in, and true otherwise. */
698 static bool
699 determine_max_movement (gimple stmt, bool must_preserve_exec)
701 basic_block bb = gimple_bb (stmt);
702 struct loop *loop = bb->loop_father;
703 struct loop *level;
704 struct lim_aux_data *lim_data = get_lim_data (stmt);
705 tree val;
706 ssa_op_iter iter;
708 if (must_preserve_exec)
709 level = ALWAYS_EXECUTED_IN (bb);
710 else
711 level = superloop_at_depth (loop, 1);
712 lim_data->max_loop = level;
714 if (gphi *phi = dyn_cast <gphi *> (stmt))
716 use_operand_p use_p;
717 unsigned min_cost = UINT_MAX;
718 unsigned total_cost = 0;
719 struct lim_aux_data *def_data;
721 /* We will end up promoting dependencies to be unconditionally
722 evaluated. For this reason the PHI cost (and thus the
723 cost we remove from the loop by doing the invariant motion)
724 is that of the cheapest PHI argument dependency chain. */
725 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
727 val = USE_FROM_PTR (use_p);
729 if (TREE_CODE (val) != SSA_NAME)
731 /* Assign const 1 to constants. */
732 min_cost = MIN (min_cost, 1);
733 total_cost += 1;
734 continue;
736 if (!add_dependency (val, lim_data, loop, false))
737 return false;
739 gimple def_stmt = SSA_NAME_DEF_STMT (val);
740 if (gimple_bb (def_stmt)
741 && gimple_bb (def_stmt)->loop_father == loop)
743 def_data = get_lim_data (def_stmt);
744 if (def_data)
746 min_cost = MIN (min_cost, def_data->cost);
747 total_cost += def_data->cost;
752 min_cost = MIN (min_cost, total_cost);
753 lim_data->cost += min_cost;
755 if (gimple_phi_num_args (phi) > 1)
757 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
758 gimple cond;
759 if (gsi_end_p (gsi_last_bb (dom)))
760 return false;
761 cond = gsi_stmt (gsi_last_bb (dom));
762 if (gimple_code (cond) != GIMPLE_COND)
763 return false;
764 /* Verify that this is an extended form of a diamond and
765 the PHI arguments are completely controlled by the
766 predicate in DOM. */
767 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
768 return false;
770 /* Fold in dependencies and cost of the condition. */
771 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
773 if (!add_dependency (val, lim_data, loop, false))
774 return false;
775 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
776 if (def_data)
777 total_cost += def_data->cost;
780 /* We want to avoid unconditionally executing very expensive
781 operations. As costs for our dependencies cannot be
782 negative just claim we are not invariand for this case.
783 We also are not sure whether the control-flow inside the
784 loop will vanish. */
785 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
786 && !(min_cost != 0
787 && total_cost / min_cost <= 2))
788 return false;
790 /* Assume that the control-flow in the loop will vanish.
791 ??? We should verify this and not artificially increase
792 the cost if that is not the case. */
793 lim_data->cost += stmt_cost (stmt);
796 return true;
798 else
799 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
800 if (!add_dependency (val, lim_data, loop, true))
801 return false;
803 if (gimple_vuse (stmt))
805 mem_ref_p ref = mem_ref_in_stmt (stmt);
807 if (ref)
809 lim_data->max_loop
810 = outermost_indep_loop (lim_data->max_loop, loop, ref);
811 if (!lim_data->max_loop)
812 return false;
814 else
816 if ((val = gimple_vuse (stmt)) != NULL_TREE)
818 if (!add_dependency (val, lim_data, loop, false))
819 return false;
824 lim_data->cost += stmt_cost (stmt);
826 return true;
829 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
830 and that one of the operands of this statement is computed by STMT.
831 Ensure that STMT (together with all the statements that define its
832 operands) is hoisted at least out of the loop LEVEL. */
834 static void
835 set_level (gimple stmt, struct loop *orig_loop, struct loop *level)
837 struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
838 struct lim_aux_data *lim_data;
839 gimple dep_stmt;
840 unsigned i;
842 stmt_loop = find_common_loop (orig_loop, stmt_loop);
843 lim_data = get_lim_data (stmt);
844 if (lim_data != NULL && lim_data->tgt_loop != NULL)
845 stmt_loop = find_common_loop (stmt_loop,
846 loop_outer (lim_data->tgt_loop));
847 if (flow_loop_nested_p (stmt_loop, level))
848 return;
850 gcc_assert (level == lim_data->max_loop
851 || flow_loop_nested_p (lim_data->max_loop, level));
853 lim_data->tgt_loop = level;
854 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
855 set_level (dep_stmt, orig_loop, level);
858 /* Determines an outermost loop from that we want to hoist the statement STMT.
859 For now we chose the outermost possible loop. TODO -- use profiling
860 information to set it more sanely. */
862 static void
863 set_profitable_level (gimple stmt)
865 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
868 /* Returns true if STMT is a call that has side effects. */
870 static bool
871 nonpure_call_p (gimple stmt)
873 if (gimple_code (stmt) != GIMPLE_CALL)
874 return false;
876 return gimple_has_side_effects (stmt);
879 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
881 static gimple
882 rewrite_reciprocal (gimple_stmt_iterator *bsi)
884 gassign *stmt, *stmt1, *stmt2;
885 tree name, lhs, type;
886 tree real_one;
887 gimple_stmt_iterator gsi;
889 stmt = as_a <gassign *> (gsi_stmt (*bsi));
890 lhs = gimple_assign_lhs (stmt);
891 type = TREE_TYPE (lhs);
893 real_one = build_one_cst (type);
895 name = make_temp_ssa_name (type, NULL, "reciptmp");
896 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
897 gimple_assign_rhs2 (stmt));
898 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
899 gimple_assign_rhs1 (stmt));
901 /* Replace division stmt with reciprocal and multiply stmts.
902 The multiply stmt is not invariant, so update iterator
903 and avoid rescanning. */
904 gsi = *bsi;
905 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
906 gsi_replace (&gsi, stmt2, true);
908 /* Continue processing with invariant reciprocal statement. */
909 return stmt1;
912 /* Check if the pattern at *BSI is a bittest of the form
913 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
915 static gimple
916 rewrite_bittest (gimple_stmt_iterator *bsi)
918 gassign *stmt;
919 gimple stmt1;
920 gassign *stmt2;
921 gimple use_stmt;
922 gcond *cond_stmt;
923 tree lhs, name, t, a, b;
924 use_operand_p use;
926 stmt = as_a <gassign *> (gsi_stmt (*bsi));
927 lhs = gimple_assign_lhs (stmt);
929 /* Verify that the single use of lhs is a comparison against zero. */
930 if (TREE_CODE (lhs) != SSA_NAME
931 || !single_imm_use (lhs, &use, &use_stmt))
932 return stmt;
933 cond_stmt = dyn_cast <gcond *> (use_stmt);
934 if (!cond_stmt)
935 return stmt;
936 if (gimple_cond_lhs (cond_stmt) != lhs
937 || (gimple_cond_code (cond_stmt) != NE_EXPR
938 && gimple_cond_code (cond_stmt) != EQ_EXPR)
939 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
940 return stmt;
942 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
943 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
944 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
945 return stmt;
947 /* There is a conversion in between possibly inserted by fold. */
948 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
950 t = gimple_assign_rhs1 (stmt1);
951 if (TREE_CODE (t) != SSA_NAME
952 || !has_single_use (t))
953 return stmt;
954 stmt1 = SSA_NAME_DEF_STMT (t);
955 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
956 return stmt;
959 /* Verify that B is loop invariant but A is not. Verify that with
960 all the stmt walking we are still in the same loop. */
961 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
962 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
963 return stmt;
965 a = gimple_assign_rhs1 (stmt1);
966 b = gimple_assign_rhs2 (stmt1);
968 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
969 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
971 gimple_stmt_iterator rsi;
973 /* 1 << B */
974 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
975 build_int_cst (TREE_TYPE (a), 1), b);
976 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
977 stmt1 = gimple_build_assign (name, t);
979 /* A & (1 << B) */
980 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
981 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
982 stmt2 = gimple_build_assign (name, t);
984 /* Replace the SSA_NAME we compare against zero. Adjust
985 the type of zero accordingly. */
986 SET_USE (use, name);
987 gimple_cond_set_rhs (cond_stmt,
988 build_int_cst_type (TREE_TYPE (name),
989 0));
991 /* Don't use gsi_replace here, none of the new assignments sets
992 the variable originally set in stmt. Move bsi to stmt1, and
993 then remove the original stmt, so that we get a chance to
994 retain debug info for it. */
995 rsi = *bsi;
996 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
997 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
998 gsi_remove (&rsi, true);
1000 return stmt1;
1003 return stmt;
1006 /* For each statement determines the outermost loop in that it is invariant,
1007 - statements on whose motion it depends and the cost of the computation.
1008 - This information is stored to the LIM_DATA structure associated with
1009 - each statement. */
1010 class invariantness_dom_walker : public dom_walker
1012 public:
1013 invariantness_dom_walker (cdi_direction direction)
1014 : dom_walker (direction) {}
1016 virtual void before_dom_children (basic_block);
1019 /* Determine the outermost loops in that statements in basic block BB are
1020 invariant, and record them to the LIM_DATA associated with the statements.
1021 Callback for dom_walker. */
1023 void
1024 invariantness_dom_walker::before_dom_children (basic_block bb)
1026 enum move_pos pos;
1027 gimple_stmt_iterator bsi;
1028 gimple stmt;
1029 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1030 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
1031 struct lim_aux_data *lim_data;
1033 if (!loop_outer (bb->loop_father))
1034 return;
1036 if (dump_file && (dump_flags & TDF_DETAILS))
1037 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1038 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1040 /* Look at PHI nodes, but only if there is at most two.
1041 ??? We could relax this further by post-processing the inserted
1042 code and transforming adjacent cond-exprs with the same predicate
1043 to control flow again. */
1044 bsi = gsi_start_phis (bb);
1045 if (!gsi_end_p (bsi)
1046 && ((gsi_next (&bsi), gsi_end_p (bsi))
1047 || (gsi_next (&bsi), gsi_end_p (bsi))))
1048 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1050 stmt = gsi_stmt (bsi);
1052 pos = movement_possibility (stmt);
1053 if (pos == MOVE_IMPOSSIBLE)
1054 continue;
1056 lim_data = init_lim_data (stmt);
1057 lim_data->always_executed_in = outermost;
1059 if (!determine_max_movement (stmt, false))
1061 lim_data->max_loop = NULL;
1062 continue;
1065 if (dump_file && (dump_flags & TDF_DETAILS))
1067 print_gimple_stmt (dump_file, stmt, 2, 0);
1068 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1069 loop_depth (lim_data->max_loop),
1070 lim_data->cost);
1073 if (lim_data->cost >= LIM_EXPENSIVE)
1074 set_profitable_level (stmt);
1077 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1079 stmt = gsi_stmt (bsi);
1081 pos = movement_possibility (stmt);
1082 if (pos == MOVE_IMPOSSIBLE)
1084 if (nonpure_call_p (stmt))
1086 maybe_never = true;
1087 outermost = NULL;
1089 /* Make sure to note always_executed_in for stores to make
1090 store-motion work. */
1091 else if (stmt_makes_single_store (stmt))
1093 struct lim_aux_data *lim_data = init_lim_data (stmt);
1094 lim_data->always_executed_in = outermost;
1096 continue;
1099 if (is_gimple_assign (stmt)
1100 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1101 == GIMPLE_BINARY_RHS))
1103 tree op0 = gimple_assign_rhs1 (stmt);
1104 tree op1 = gimple_assign_rhs2 (stmt);
1105 struct loop *ol1 = outermost_invariant_loop (op1,
1106 loop_containing_stmt (stmt));
1108 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1109 to be hoisted out of loop, saving expensive divide. */
1110 if (pos == MOVE_POSSIBLE
1111 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1112 && flag_unsafe_math_optimizations
1113 && !flag_trapping_math
1114 && ol1 != NULL
1115 && outermost_invariant_loop (op0, ol1) == NULL)
1116 stmt = rewrite_reciprocal (&bsi);
1118 /* If the shift count is invariant, convert (A >> B) & 1 to
1119 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1120 saving an expensive shift. */
1121 if (pos == MOVE_POSSIBLE
1122 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1123 && integer_onep (op1)
1124 && TREE_CODE (op0) == SSA_NAME
1125 && has_single_use (op0))
1126 stmt = rewrite_bittest (&bsi);
1129 lim_data = init_lim_data (stmt);
1130 lim_data->always_executed_in = outermost;
1132 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1133 continue;
1135 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1137 lim_data->max_loop = NULL;
1138 continue;
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1143 print_gimple_stmt (dump_file, stmt, 2, 0);
1144 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1145 loop_depth (lim_data->max_loop),
1146 lim_data->cost);
1149 if (lim_data->cost >= LIM_EXPENSIVE)
1150 set_profitable_level (stmt);
1154 class move_computations_dom_walker : public dom_walker
1156 public:
1157 move_computations_dom_walker (cdi_direction direction)
1158 : dom_walker (direction), todo_ (0) {}
1160 virtual void before_dom_children (basic_block);
1162 unsigned int todo_;
1165 /* Hoist the statements in basic block BB out of the loops prescribed by
1166 data stored in LIM_DATA structures associated with each statement. Callback
1167 for walk_dominator_tree. */
1169 void
1170 move_computations_dom_walker::before_dom_children (basic_block bb)
1172 struct loop *level;
1173 unsigned cost = 0;
1174 struct lim_aux_data *lim_data;
1176 if (!loop_outer (bb->loop_father))
1177 return;
1179 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1181 gassign *new_stmt;
1182 gphi *stmt = bsi.phi ();
1184 lim_data = get_lim_data (stmt);
1185 if (lim_data == NULL)
1187 gsi_next (&bsi);
1188 continue;
1191 cost = lim_data->cost;
1192 level = lim_data->tgt_loop;
1193 clear_lim_data (stmt);
1195 if (!level)
1197 gsi_next (&bsi);
1198 continue;
1201 if (dump_file && (dump_flags & TDF_DETAILS))
1203 fprintf (dump_file, "Moving PHI node\n");
1204 print_gimple_stmt (dump_file, stmt, 0, 0);
1205 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1206 cost, level->num);
1209 if (gimple_phi_num_args (stmt) == 1)
1211 tree arg = PHI_ARG_DEF (stmt, 0);
1212 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1213 TREE_CODE (arg), arg);
1215 else
1217 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1218 gimple cond = gsi_stmt (gsi_last_bb (dom));
1219 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1220 /* Get the PHI arguments corresponding to the true and false
1221 edges of COND. */
1222 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1223 gcc_assert (arg0 && arg1);
1224 t = build2 (gimple_cond_code (cond), boolean_type_node,
1225 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1226 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1227 COND_EXPR, t, arg0, arg1);
1228 todo_ |= TODO_cleanup_cfg;
1230 if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1231 && (!ALWAYS_EXECUTED_IN (bb)
1232 || (ALWAYS_EXECUTED_IN (bb) != level
1233 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1235 tree lhs = gimple_assign_lhs (new_stmt);
1236 SSA_NAME_RANGE_INFO (lhs) = NULL;
1237 SSA_NAME_ANTI_RANGE_P (lhs) = 0;
1239 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1240 remove_phi_node (&bsi, false);
1243 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1245 edge e;
1247 gimple stmt = gsi_stmt (bsi);
1249 lim_data = get_lim_data (stmt);
1250 if (lim_data == NULL)
1252 gsi_next (&bsi);
1253 continue;
1256 cost = lim_data->cost;
1257 level = lim_data->tgt_loop;
1258 clear_lim_data (stmt);
1260 if (!level)
1262 gsi_next (&bsi);
1263 continue;
1266 /* We do not really want to move conditionals out of the loop; we just
1267 placed it here to force its operands to be moved if necessary. */
1268 if (gimple_code (stmt) == GIMPLE_COND)
1269 continue;
1271 if (dump_file && (dump_flags & TDF_DETAILS))
1273 fprintf (dump_file, "Moving statement\n");
1274 print_gimple_stmt (dump_file, stmt, 0, 0);
1275 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1276 cost, level->num);
1279 e = loop_preheader_edge (level);
1280 gcc_assert (!gimple_vdef (stmt));
1281 if (gimple_vuse (stmt))
1283 /* The new VUSE is the one from the virtual PHI in the loop
1284 header or the one already present. */
1285 gphi_iterator gsi2;
1286 for (gsi2 = gsi_start_phis (e->dest);
1287 !gsi_end_p (gsi2); gsi_next (&gsi2))
1289 gphi *phi = gsi2.phi ();
1290 if (virtual_operand_p (gimple_phi_result (phi)))
1292 gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1293 break;
1297 gsi_remove (&bsi, false);
1298 if (gimple_has_lhs (stmt)
1299 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1300 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1301 && (!ALWAYS_EXECUTED_IN (bb)
1302 || !(ALWAYS_EXECUTED_IN (bb) == level
1303 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1305 tree lhs = gimple_get_lhs (stmt);
1306 SSA_NAME_RANGE_INFO (lhs) = NULL;
1307 SSA_NAME_ANTI_RANGE_P (lhs) = 0;
1309 /* In case this is a stmt that is not unconditionally executed
1310 when the target loop header is executed and the stmt may
1311 invoke undefined integer or pointer overflow rewrite it to
1312 unsigned arithmetic. */
1313 if (is_gimple_assign (stmt)
1314 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1315 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1316 && arith_code_with_undefined_signed_overflow
1317 (gimple_assign_rhs_code (stmt))
1318 && (!ALWAYS_EXECUTED_IN (bb)
1319 || !(ALWAYS_EXECUTED_IN (bb) == level
1320 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1321 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1322 else
1323 gsi_insert_on_edge (e, stmt);
1327 /* Hoist the statements out of the loops prescribed by data stored in
1328 LIM_DATA structures associated with each statement.*/
1330 static unsigned int
1331 move_computations (void)
1333 move_computations_dom_walker walker (CDI_DOMINATORS);
1334 walker.walk (cfun->cfg->x_entry_block_ptr);
1336 gsi_commit_edge_inserts ();
1337 if (need_ssa_update_p (cfun))
1338 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1340 return walker.todo_;
1343 /* Checks whether the statement defining variable *INDEX can be hoisted
1344 out of the loop passed in DATA. Callback for for_each_index. */
1346 static bool
1347 may_move_till (tree ref, tree *index, void *data)
1349 struct loop *loop = (struct loop *) data, *max_loop;
1351 /* If REF is an array reference, check also that the step and the lower
1352 bound is invariant in LOOP. */
1353 if (TREE_CODE (ref) == ARRAY_REF)
1355 tree step = TREE_OPERAND (ref, 3);
1356 tree lbound = TREE_OPERAND (ref, 2);
1358 max_loop = outermost_invariant_loop (step, loop);
1359 if (!max_loop)
1360 return false;
1362 max_loop = outermost_invariant_loop (lbound, loop);
1363 if (!max_loop)
1364 return false;
1367 max_loop = outermost_invariant_loop (*index, loop);
1368 if (!max_loop)
1369 return false;
1371 return true;
1374 /* If OP is SSA NAME, force the statement that defines it to be
1375 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1377 static void
1378 force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1380 gimple stmt;
1382 if (!op
1383 || is_gimple_min_invariant (op))
1384 return;
1386 gcc_assert (TREE_CODE (op) == SSA_NAME);
1388 stmt = SSA_NAME_DEF_STMT (op);
1389 if (gimple_nop_p (stmt))
1390 return;
1392 set_level (stmt, orig_loop, loop);
1395 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1396 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1397 for_each_index. */
1399 struct fmt_data
1401 struct loop *loop;
1402 struct loop *orig_loop;
1405 static bool
1406 force_move_till (tree ref, tree *index, void *data)
1408 struct fmt_data *fmt_data = (struct fmt_data *) data;
1410 if (TREE_CODE (ref) == ARRAY_REF)
1412 tree step = TREE_OPERAND (ref, 3);
1413 tree lbound = TREE_OPERAND (ref, 2);
1415 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1416 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1419 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1421 return true;
1424 /* A function to free the mem_ref object OBJ. */
1426 static void
1427 memref_free (struct im_mem_ref *mem)
1429 mem->accesses_in_loop.release ();
1432 /* Allocates and returns a memory reference description for MEM whose hash
1433 value is HASH and id is ID. */
1435 static mem_ref_p
1436 mem_ref_alloc (tree mem, unsigned hash, unsigned id)
1438 mem_ref_p ref = XOBNEW (&mem_ref_obstack, struct im_mem_ref);
1439 ao_ref_init (&ref->mem, mem);
1440 ref->id = id;
1441 ref->hash = hash;
1442 ref->stored = NULL;
1443 bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
1444 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1445 ref->accesses_in_loop.create (1);
1447 return ref;
1450 /* Records memory reference location *LOC in LOOP to the memory reference
1451 description REF. The reference occurs in statement STMT. */
1453 static void
1454 record_mem_ref_loc (mem_ref_p ref, gimple stmt, tree *loc)
1456 mem_ref_loc aref;
1457 aref.stmt = stmt;
1458 aref.ref = loc;
1459 ref->accesses_in_loop.safe_push (aref);
1462 /* Set the LOOP bit in REF stored bitmap and allocate that if
1463 necessary. Return whether a bit was changed. */
1465 static bool
1466 set_ref_stored_in_loop (mem_ref_p ref, struct loop *loop)
1468 if (!ref->stored)
1469 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1470 return bitmap_set_bit (ref->stored, loop->num);
1473 /* Marks reference REF as stored in LOOP. */
1475 static void
1476 mark_ref_stored (mem_ref_p ref, struct loop *loop)
1478 while (loop != current_loops->tree_root
1479 && set_ref_stored_in_loop (ref, loop))
1480 loop = loop_outer (loop);
1483 /* Gathers memory references in statement STMT in LOOP, storing the
1484 information about them in the memory_accesses structure. Marks
1485 the vops accessed through unrecognized statements there as
1486 well. */
1488 static void
1489 gather_mem_refs_stmt (struct loop *loop, gimple stmt)
1491 tree *mem = NULL;
1492 hashval_t hash;
1493 im_mem_ref **slot;
1494 mem_ref_p ref;
1495 bool is_stored;
1496 unsigned id;
1498 if (!gimple_vuse (stmt))
1499 return;
1501 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1502 if (!mem)
1504 /* We use the shared mem_ref for all unanalyzable refs. */
1505 id = UNANALYZABLE_MEM_ID;
1506 ref = memory_accesses.refs_list[id];
1507 if (dump_file && (dump_flags & TDF_DETAILS))
1509 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1510 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1512 is_stored = gimple_vdef (stmt);
1514 else
1516 hash = iterative_hash_expr (*mem, 0);
1517 slot = memory_accesses.refs->find_slot_with_hash (*mem, hash, INSERT);
1518 if (*slot)
1520 ref = (mem_ref_p) *slot;
1521 id = ref->id;
1523 else
1525 id = memory_accesses.refs_list.length ();
1526 ref = mem_ref_alloc (*mem, hash, id);
1527 memory_accesses.refs_list.safe_push (ref);
1528 *slot = ref;
1530 if (dump_file && (dump_flags & TDF_DETAILS))
1532 fprintf (dump_file, "Memory reference %u: ", id);
1533 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1534 fprintf (dump_file, "\n");
1538 record_mem_ref_loc (ref, stmt, mem);
1540 bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
1541 if (is_stored)
1543 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1544 mark_ref_stored (ref, loop);
1546 return;
1549 static unsigned *bb_loop_postorder;
1551 /* qsort sort function to sort blocks after their loop fathers postorder. */
1553 static int
1554 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_)
1556 basic_block bb1 = *(basic_block *)const_cast<void *>(bb1_);
1557 basic_block bb2 = *(basic_block *)const_cast<void *>(bb2_);
1558 struct loop *loop1 = bb1->loop_father;
1559 struct loop *loop2 = bb2->loop_father;
1560 if (loop1->num == loop2->num)
1561 return 0;
1562 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1565 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1567 static int
1568 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_)
1570 mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_);
1571 mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_);
1572 struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1573 struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1574 if (loop1->num == loop2->num)
1575 return 0;
1576 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1579 /* Gathers memory references in loops. */
1581 static void
1582 analyze_memory_references (void)
1584 gimple_stmt_iterator bsi;
1585 basic_block bb, *bbs;
1586 struct loop *loop, *outer;
1587 unsigned i, n;
1589 /* Collect all basic-blocks in loops and sort them after their
1590 loops postorder. */
1591 i = 0;
1592 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1593 FOR_EACH_BB_FN (bb, cfun)
1594 if (bb->loop_father != current_loops->tree_root)
1595 bbs[i++] = bb;
1596 n = i;
1597 qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp);
1599 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1600 That results in better locality for all the bitmaps. */
1601 for (i = 0; i < n; ++i)
1603 basic_block bb = bbs[i];
1604 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1605 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1608 /* Sort the location list of gathered memory references after their
1609 loop postorder number. */
1610 im_mem_ref *ref;
1611 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1612 ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp);
1614 free (bbs);
1615 // free (bb_loop_postorder);
1617 /* Propagate the information about accessed memory references up
1618 the loop hierarchy. */
1619 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1621 /* Finalize the overall touched references (including subloops). */
1622 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1623 &memory_accesses.refs_stored_in_loop[loop->num]);
1625 /* Propagate the information about accessed memory references up
1626 the loop hierarchy. */
1627 outer = loop_outer (loop);
1628 if (outer == current_loops->tree_root)
1629 continue;
1631 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1632 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1636 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1637 tree_to_aff_combination_expand. */
1639 static bool
1640 mem_refs_may_alias_p (mem_ref_p mem1, mem_ref_p mem2,
1641 hash_map<tree, name_expansion *> **ttae_cache)
1643 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1644 object and their offset differ in such a way that the locations cannot
1645 overlap, then they cannot alias. */
1646 widest_int size1, size2;
1647 aff_tree off1, off2;
1649 /* Perform basic offset and type-based disambiguation. */
1650 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
1651 return false;
1653 /* The expansion of addresses may be a bit expensive, thus we only do
1654 the check at -O2 and higher optimization levels. */
1655 if (optimize < 2)
1656 return true;
1658 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1659 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1660 aff_combination_expand (&off1, ttae_cache);
1661 aff_combination_expand (&off2, ttae_cache);
1662 aff_combination_scale (&off1, -1);
1663 aff_combination_add (&off2, &off1);
1665 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1666 return false;
1668 return true;
1671 /* Compare function for bsearch searching for reference locations
1672 in a loop. */
1674 static int
1675 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_)
1677 struct loop *loop = (struct loop *)const_cast<void *>(loop_);
1678 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1679 struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1680 if (loop->num == loc_loop->num
1681 || flow_loop_nested_p (loop, loc_loop))
1682 return 0;
1683 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1684 ? -1 : 1);
1687 /* Iterates over all locations of REF in LOOP and its subloops calling
1688 fn.operator() with the location as argument. When that operator
1689 returns true the iteration is stopped and true is returned.
1690 Otherwise false is returned. */
1692 template <typename FN>
1693 static bool
1694 for_all_locs_in_loop (struct loop *loop, mem_ref_p ref, FN fn)
1696 unsigned i;
1697 mem_ref_loc_p loc;
1699 /* Search for the cluster of locs in the accesses_in_loop vector
1700 which is sorted after postorder index of the loop father. */
1701 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp);
1702 if (!loc)
1703 return false;
1705 /* We have found one location inside loop or its sub-loops. Iterate
1706 both forward and backward to cover the whole cluster. */
1707 i = loc - ref->accesses_in_loop.address ();
1708 while (i > 0)
1710 --i;
1711 mem_ref_loc_p l = &ref->accesses_in_loop[i];
1712 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1713 break;
1714 if (fn (l))
1715 return true;
1717 for (i = loc - ref->accesses_in_loop.address ();
1718 i < ref->accesses_in_loop.length (); ++i)
1720 mem_ref_loc_p l = &ref->accesses_in_loop[i];
1721 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1722 break;
1723 if (fn (l))
1724 return true;
1727 return false;
1730 /* Rewrites location LOC by TMP_VAR. */
1732 struct rewrite_mem_ref_loc
1734 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1735 bool operator () (mem_ref_loc_p loc);
1736 tree tmp_var;
1739 bool
1740 rewrite_mem_ref_loc::operator () (mem_ref_loc_p loc)
1742 *loc->ref = tmp_var;
1743 update_stmt (loc->stmt);
1744 return false;
1747 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1749 static void
1750 rewrite_mem_refs (struct loop *loop, mem_ref_p ref, tree tmp_var)
1752 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1755 /* Stores the first reference location in LOCP. */
1757 struct first_mem_ref_loc_1
1759 first_mem_ref_loc_1 (mem_ref_loc_p *locp_) : locp (locp_) {}
1760 bool operator () (mem_ref_loc_p loc);
1761 mem_ref_loc_p *locp;
1764 bool
1765 first_mem_ref_loc_1::operator () (mem_ref_loc_p loc)
1767 *locp = loc;
1768 return true;
1771 /* Returns the first reference location to REF in LOOP. */
1773 static mem_ref_loc_p
1774 first_mem_ref_loc (struct loop *loop, mem_ref_p ref)
1776 mem_ref_loc_p locp = NULL;
1777 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1778 return locp;
1781 struct prev_flag_edges {
1782 /* Edge to insert new flag comparison code. */
1783 edge append_cond_position;
1785 /* Edge for fall through from previous flag comparison. */
1786 edge last_cond_fallthru;
1789 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1790 MEM along edge EX.
1792 The store is only done if MEM has changed. We do this so no
1793 changes to MEM occur on code paths that did not originally store
1794 into it.
1796 The common case for execute_sm will transform:
1798 for (...) {
1799 if (foo)
1800 stuff;
1801 else
1802 MEM = TMP_VAR;
1805 into:
1807 lsm = MEM;
1808 for (...) {
1809 if (foo)
1810 stuff;
1811 else
1812 lsm = TMP_VAR;
1814 MEM = lsm;
1816 This function will generate:
1818 lsm = MEM;
1820 lsm_flag = false;
1822 for (...) {
1823 if (foo)
1824 stuff;
1825 else {
1826 lsm = TMP_VAR;
1827 lsm_flag = true;
1830 if (lsm_flag) <--
1831 MEM = lsm; <--
1834 static void
1835 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag)
1837 basic_block new_bb, then_bb, old_dest;
1838 bool loop_has_only_one_exit;
1839 edge then_old_edge, orig_ex = ex;
1840 gimple_stmt_iterator gsi;
1841 gimple stmt;
1842 struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1843 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1845 /* ?? Insert store after previous store if applicable. See note
1846 below. */
1847 if (prev_edges)
1848 ex = prev_edges->append_cond_position;
1850 loop_has_only_one_exit = single_pred_p (ex->dest);
1852 if (loop_has_only_one_exit)
1853 ex = split_block_after_labels (ex->dest);
1855 old_dest = ex->dest;
1856 new_bb = split_edge (ex);
1857 then_bb = create_empty_bb (new_bb);
1858 if (irr)
1859 then_bb->flags = BB_IRREDUCIBLE_LOOP;
1860 add_bb_to_loop (then_bb, new_bb->loop_father);
1862 gsi = gsi_start_bb (new_bb);
1863 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1864 NULL_TREE, NULL_TREE);
1865 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1867 gsi = gsi_start_bb (then_bb);
1868 /* Insert actual store. */
1869 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1870 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1872 make_edge (new_bb, then_bb,
1873 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1874 make_edge (new_bb, old_dest,
1875 EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1876 then_old_edge = make_edge (then_bb, old_dest,
1877 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1879 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
1881 if (prev_edges)
1883 basic_block prevbb = prev_edges->last_cond_fallthru->src;
1884 redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
1885 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
1886 set_immediate_dominator (CDI_DOMINATORS, old_dest,
1887 recompute_dominator (CDI_DOMINATORS, old_dest));
1890 /* ?? Because stores may alias, they must happen in the exact
1891 sequence they originally happened. Save the position right after
1892 the (_lsm) store we just created so we can continue appending after
1893 it and maintain the original order. */
1895 struct prev_flag_edges *p;
1897 if (orig_ex->aux)
1898 orig_ex->aux = NULL;
1899 alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
1900 p = (struct prev_flag_edges *) orig_ex->aux;
1901 p->append_cond_position = then_old_edge;
1902 p->last_cond_fallthru = find_edge (new_bb, old_dest);
1903 orig_ex->aux = (void *) p;
1906 if (!loop_has_only_one_exit)
1907 for (gphi_iterator gpi = gsi_start_phis (old_dest);
1908 !gsi_end_p (gpi); gsi_next (&gpi))
1910 gphi *phi = gpi.phi ();
1911 unsigned i;
1913 for (i = 0; i < gimple_phi_num_args (phi); i++)
1914 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
1916 tree arg = gimple_phi_arg_def (phi, i);
1917 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
1918 update_stmt (phi);
1921 /* Remove the original fall through edge. This was the
1922 single_succ_edge (new_bb). */
1923 EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU;
1926 /* When REF is set on the location, set flag indicating the store. */
1928 struct sm_set_flag_if_changed
1930 sm_set_flag_if_changed (tree flag_) : flag (flag_) {}
1931 bool operator () (mem_ref_loc_p loc);
1932 tree flag;
1935 bool
1936 sm_set_flag_if_changed::operator () (mem_ref_loc_p loc)
1938 /* Only set the flag for writes. */
1939 if (is_gimple_assign (loc->stmt)
1940 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
1942 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
1943 gimple stmt = gimple_build_assign (flag, boolean_true_node);
1944 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1946 return false;
1949 /* Helper function for execute_sm. On every location where REF is
1950 set, set an appropriate flag indicating the store. */
1952 static tree
1953 execute_sm_if_changed_flag_set (struct loop *loop, mem_ref_p ref)
1955 tree flag;
1956 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
1957 flag = create_tmp_reg (boolean_type_node, str);
1958 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag));
1959 return flag;
1962 /* Executes store motion of memory reference REF from LOOP.
1963 Exits from the LOOP are stored in EXITS. The initialization of the
1964 temporary variable is put to the preheader of the loop, and assignments
1965 to the reference from the temporary variable are emitted to exits. */
1967 static void
1968 execute_sm (struct loop *loop, vec<edge> exits, mem_ref_p ref)
1970 tree tmp_var, store_flag = NULL_TREE;
1971 unsigned i;
1972 gassign *load;
1973 struct fmt_data fmt_data;
1974 edge ex;
1975 struct lim_aux_data *lim_data;
1976 bool multi_threaded_model_p = false;
1977 gimple_stmt_iterator gsi;
1979 if (dump_file && (dump_flags & TDF_DETAILS))
1981 fprintf (dump_file, "Executing store motion of ");
1982 print_generic_expr (dump_file, ref->mem.ref, 0);
1983 fprintf (dump_file, " from loop %d\n", loop->num);
1986 tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
1987 get_lsm_tmp_name (ref->mem.ref, ~0));
1989 fmt_data.loop = loop;
1990 fmt_data.orig_loop = loop;
1991 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
1993 if (bb_in_transaction (loop_preheader_edge (loop)->src)
1994 || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
1995 multi_threaded_model_p = true;
1997 if (multi_threaded_model_p)
1998 store_flag = execute_sm_if_changed_flag_set (loop, ref);
2000 rewrite_mem_refs (loop, ref, tmp_var);
2002 /* Emit the load code on a random exit edge or into the latch if
2003 the loop does not exit, so that we are sure it will be processed
2004 by move_computations after all dependencies. */
2005 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2007 /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2008 load altogether, since the store is predicated by a flag. We
2009 could, do the load only if it was originally in the loop. */
2010 load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
2011 lim_data = init_lim_data (load);
2012 lim_data->max_loop = loop;
2013 lim_data->tgt_loop = loop;
2014 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2016 if (multi_threaded_model_p)
2018 load = gimple_build_assign (store_flag, boolean_false_node);
2019 lim_data = init_lim_data (load);
2020 lim_data->max_loop = loop;
2021 lim_data->tgt_loop = loop;
2022 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2025 /* Sink the store to every exit from the loop. */
2026 FOR_EACH_VEC_ELT (exits, i, ex)
2027 if (!multi_threaded_model_p)
2029 gassign *store;
2030 store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
2031 gsi_insert_on_edge (ex, store);
2033 else
2034 execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag);
2037 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2038 edges of the LOOP. */
2040 static void
2041 hoist_memory_references (struct loop *loop, bitmap mem_refs,
2042 vec<edge> exits)
2044 mem_ref_p ref;
2045 unsigned i;
2046 bitmap_iterator bi;
2048 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2050 ref = memory_accesses.refs_list[i];
2051 execute_sm (loop, exits, ref);
2055 struct ref_always_accessed
2057 ref_always_accessed (struct loop *loop_, bool stored_p_)
2058 : loop (loop_), stored_p (stored_p_) {}
2059 bool operator () (mem_ref_loc_p loc);
2060 struct loop *loop;
2061 bool stored_p;
2064 bool
2065 ref_always_accessed::operator () (mem_ref_loc_p loc)
2067 struct loop *must_exec;
2069 if (!get_lim_data (loc->stmt))
2070 return false;
2072 /* If we require an always executed store make sure the statement
2073 stores to the reference. */
2074 if (stored_p)
2076 tree lhs = gimple_get_lhs (loc->stmt);
2077 if (!lhs
2078 || lhs != *loc->ref)
2079 return false;
2082 must_exec = get_lim_data (loc->stmt)->always_executed_in;
2083 if (!must_exec)
2084 return false;
2086 if (must_exec == loop
2087 || flow_loop_nested_p (must_exec, loop))
2088 return true;
2090 return false;
2093 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2094 make sure REF is always stored to in LOOP. */
2096 static bool
2097 ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
2099 return for_all_locs_in_loop (loop, ref,
2100 ref_always_accessed (loop, stored_p));
2103 /* Returns true if REF1 and REF2 are independent. */
2105 static bool
2106 refs_independent_p (mem_ref_p ref1, mem_ref_p ref2)
2108 if (ref1 == ref2)
2109 return true;
2111 if (dump_file && (dump_flags & TDF_DETAILS))
2112 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2113 ref1->id, ref2->id);
2115 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
2117 if (dump_file && (dump_flags & TDF_DETAILS))
2118 fprintf (dump_file, "dependent.\n");
2119 return false;
2121 else
2123 if (dump_file && (dump_flags & TDF_DETAILS))
2124 fprintf (dump_file, "independent.\n");
2125 return true;
2129 /* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2130 and its super-loops. */
2132 static void
2133 record_dep_loop (struct loop *loop, mem_ref_p ref, bool stored_p)
2135 /* We can propagate dependent-in-loop bits up the loop
2136 hierarchy to all outer loops. */
2137 while (loop != current_loops->tree_root
2138 && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2139 loop = loop_outer (loop);
2142 /* Returns true if REF is independent on all other memory references in
2143 LOOP. */
2145 static bool
2146 ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref, bool stored_p)
2148 bitmap refs_to_check;
2149 unsigned i;
2150 bitmap_iterator bi;
2151 mem_ref_p aref;
2153 if (stored_p)
2154 refs_to_check = &memory_accesses.refs_in_loop[loop->num];
2155 else
2156 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2158 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
2159 return false;
2161 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2163 aref = memory_accesses.refs_list[i];
2164 if (!refs_independent_p (ref, aref))
2165 return false;
2168 return true;
2171 /* Returns true if REF is independent on all other memory references in
2172 LOOP. Wrapper over ref_indep_loop_p_1, caching its results. */
2174 static bool
2175 ref_indep_loop_p_2 (struct loop *loop, mem_ref_p ref, bool stored_p)
2177 stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
2179 if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2180 return true;
2181 if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2182 return false;
2184 struct loop *inner = loop->inner;
2185 while (inner)
2187 if (!ref_indep_loop_p_2 (inner, ref, stored_p))
2188 return false;
2189 inner = inner->next;
2192 bool indep_p = ref_indep_loop_p_1 (loop, ref, stored_p);
2194 if (dump_file && (dump_flags & TDF_DETAILS))
2195 fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2196 ref->id, loop->num, indep_p ? "independent" : "dependent");
2198 /* Record the computed result in the cache. */
2199 if (indep_p)
2201 if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
2202 && stored_p)
2204 /* If it's independend against all refs then it's independent
2205 against stores, too. */
2206 bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
2209 else
2211 record_dep_loop (loop, ref, stored_p);
2212 if (!stored_p)
2214 /* If it's dependent against stores it's dependent against
2215 all refs, too. */
2216 record_dep_loop (loop, ref, true);
2220 return indep_p;
2223 /* Returns true if REF is independent on all other memory references in
2224 LOOP. */
2226 static bool
2227 ref_indep_loop_p (struct loop *loop, mem_ref_p ref)
2229 gcc_checking_assert (MEM_ANALYZABLE (ref));
2231 return ref_indep_loop_p_2 (loop, ref, false);
2234 /* Returns true if we can perform store motion of REF from LOOP. */
2236 static bool
2237 can_sm_ref_p (struct loop *loop, mem_ref_p ref)
2239 tree base;
2241 /* Can't hoist unanalyzable refs. */
2242 if (!MEM_ANALYZABLE (ref))
2243 return false;
2245 /* It should be movable. */
2246 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2247 || TREE_THIS_VOLATILE (ref->mem.ref)
2248 || !for_each_index (&ref->mem.ref, may_move_till, loop))
2249 return false;
2251 /* If it can throw fail, we do not properly update EH info. */
2252 if (tree_could_throw_p (ref->mem.ref))
2253 return false;
2255 /* If it can trap, it must be always executed in LOOP.
2256 Readonly memory locations may trap when storing to them, but
2257 tree_could_trap_p is a predicate for rvalues, so check that
2258 explicitly. */
2259 base = get_base_address (ref->mem.ref);
2260 if ((tree_could_trap_p (ref->mem.ref)
2261 || (DECL_P (base) && TREE_READONLY (base)))
2262 && !ref_always_accessed_p (loop, ref, true))
2263 return false;
2265 /* And it must be independent on all other memory references
2266 in LOOP. */
2267 if (!ref_indep_loop_p (loop, ref))
2268 return false;
2270 return true;
2273 /* Marks the references in LOOP for that store motion should be performed
2274 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2275 motion was performed in one of the outer loops. */
2277 static void
2278 find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2280 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2281 unsigned i;
2282 bitmap_iterator bi;
2283 mem_ref_p ref;
2285 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2287 ref = memory_accesses.refs_list[i];
2288 if (can_sm_ref_p (loop, ref))
2289 bitmap_set_bit (refs_to_sm, i);
2293 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2294 for a store motion optimization (i.e. whether we can insert statement
2295 on its exits). */
2297 static bool
2298 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2299 vec<edge> exits)
2301 unsigned i;
2302 edge ex;
2304 FOR_EACH_VEC_ELT (exits, i, ex)
2305 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2306 return false;
2308 return true;
2311 /* Try to perform store motion for all memory references modified inside
2312 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2313 store motion was executed in one of the outer loops. */
2315 static void
2316 store_motion_loop (struct loop *loop, bitmap sm_executed)
2318 vec<edge> exits = get_loop_exit_edges (loop);
2319 struct loop *subloop;
2320 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2322 if (loop_suitable_for_sm (loop, exits))
2324 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2325 hoist_memory_references (loop, sm_in_loop, exits);
2327 exits.release ();
2329 bitmap_ior_into (sm_executed, sm_in_loop);
2330 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2331 store_motion_loop (subloop, sm_executed);
2332 bitmap_and_compl_into (sm_executed, sm_in_loop);
2333 BITMAP_FREE (sm_in_loop);
2336 /* Try to perform store motion for all memory references modified inside
2337 loops. */
2339 static void
2340 store_motion (void)
2342 struct loop *loop;
2343 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
2345 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2346 store_motion_loop (loop, sm_executed);
2348 BITMAP_FREE (sm_executed);
2349 gsi_commit_edge_inserts ();
2352 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2353 for each such basic block bb records the outermost loop for that execution
2354 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2355 blocks that contain a nonpure call. */
2357 static void
2358 fill_always_executed_in_1 (struct loop *loop, sbitmap contains_call)
2360 basic_block bb = NULL, *bbs, last = NULL;
2361 unsigned i;
2362 edge e;
2363 struct loop *inn_loop = loop;
2365 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2367 bbs = get_loop_body_in_dom_order (loop);
2369 for (i = 0; i < loop->num_nodes; i++)
2371 edge_iterator ei;
2372 bb = bbs[i];
2374 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2375 last = bb;
2377 if (bitmap_bit_p (contains_call, bb->index))
2378 break;
2380 FOR_EACH_EDGE (e, ei, bb->succs)
2381 if (!flow_bb_inside_loop_p (loop, e->dest))
2382 break;
2383 if (e)
2384 break;
2386 /* A loop might be infinite (TODO use simple loop analysis
2387 to disprove this if possible). */
2388 if (bb->flags & BB_IRREDUCIBLE_LOOP)
2389 break;
2391 if (!flow_bb_inside_loop_p (inn_loop, bb))
2392 break;
2394 if (bb->loop_father->header == bb)
2396 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2397 break;
2399 /* In a loop that is always entered we may proceed anyway.
2400 But record that we entered it and stop once we leave it. */
2401 inn_loop = bb->loop_father;
2405 while (1)
2407 SET_ALWAYS_EXECUTED_IN (last, loop);
2408 if (last == loop->header)
2409 break;
2410 last = get_immediate_dominator (CDI_DOMINATORS, last);
2413 free (bbs);
2416 for (loop = loop->inner; loop; loop = loop->next)
2417 fill_always_executed_in_1 (loop, contains_call);
2420 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2421 for each such basic block bb records the outermost loop for that execution
2422 of its header implies execution of bb. */
2424 static void
2425 fill_always_executed_in (void)
2427 sbitmap contains_call = sbitmap_alloc (last_basic_block_for_fn (cfun));
2428 basic_block bb;
2429 struct loop *loop;
2431 bitmap_clear (contains_call);
2432 FOR_EACH_BB_FN (bb, cfun)
2434 gimple_stmt_iterator gsi;
2435 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2437 if (nonpure_call_p (gsi_stmt (gsi)))
2438 break;
2441 if (!gsi_end_p (gsi))
2442 bitmap_set_bit (contains_call, bb->index);
2445 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2446 fill_always_executed_in_1 (loop, contains_call);
2448 sbitmap_free (contains_call);
2452 /* Compute the global information needed by the loop invariant motion pass. */
2454 static void
2455 tree_ssa_lim_initialize (void)
2457 struct loop *loop;
2458 unsigned i;
2460 bitmap_obstack_initialize (&lim_bitmap_obstack);
2461 gcc_obstack_init (&mem_ref_obstack);
2462 lim_aux_data_map = new hash_map<gimple, lim_aux_data *>;
2464 if (flag_tm)
2465 compute_transaction_bits ();
2467 alloc_aux_for_edges (0);
2469 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
2470 memory_accesses.refs_list.create (100);
2471 /* Allocate a special, unanalyzable mem-ref with ID zero. */
2472 memory_accesses.refs_list.quick_push
2473 (mem_ref_alloc (error_mark_node, 0, UNANALYZABLE_MEM_ID));
2475 memory_accesses.refs_in_loop.create (number_of_loops (cfun));
2476 memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
2477 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
2478 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2479 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
2480 memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2482 for (i = 0; i < number_of_loops (cfun); i++)
2484 bitmap_initialize (&memory_accesses.refs_in_loop[i],
2485 &lim_bitmap_obstack);
2486 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
2487 &lim_bitmap_obstack);
2488 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
2489 &lim_bitmap_obstack);
2492 memory_accesses.ttae_cache = NULL;
2494 /* Initialize bb_loop_postorder with a mapping from loop->num to
2495 its postorder index. */
2496 i = 0;
2497 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
2498 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2499 bb_loop_postorder[loop->num] = i++;
2502 /* Cleans up after the invariant motion pass. */
2504 static void
2505 tree_ssa_lim_finalize (void)
2507 basic_block bb;
2508 unsigned i;
2509 mem_ref_p ref;
2511 free_aux_for_edges ();
2513 FOR_EACH_BB_FN (bb, cfun)
2514 SET_ALWAYS_EXECUTED_IN (bb, NULL);
2516 bitmap_obstack_release (&lim_bitmap_obstack);
2517 delete lim_aux_data_map;
2519 delete memory_accesses.refs;
2520 memory_accesses.refs = NULL;
2522 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2523 memref_free (ref);
2524 memory_accesses.refs_list.release ();
2525 obstack_free (&mem_ref_obstack, NULL);
2527 memory_accesses.refs_in_loop.release ();
2528 memory_accesses.refs_stored_in_loop.release ();
2529 memory_accesses.all_refs_stored_in_loop.release ();
2531 if (memory_accesses.ttae_cache)
2532 free_affine_expand_cache (&memory_accesses.ttae_cache);
2534 free (bb_loop_postorder);
2537 /* Moves invariants from loops. Only "expensive" invariants are moved out --
2538 i.e. those that are likely to be win regardless of the register pressure. */
2540 unsigned int
2541 tree_ssa_lim (void)
2543 unsigned int todo;
2545 tree_ssa_lim_initialize ();
2547 /* Gathers information about memory accesses in the loops. */
2548 analyze_memory_references ();
2550 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
2551 fill_always_executed_in ();
2553 /* For each statement determine the outermost loop in that it is
2554 invariant and cost for computing the invariant. */
2555 invariantness_dom_walker (CDI_DOMINATORS)
2556 .walk (cfun->cfg->x_entry_block_ptr);
2558 /* Execute store motion. Force the necessary invariants to be moved
2559 out of the loops as well. */
2560 store_motion ();
2562 /* Move the expressions that are expensive enough. */
2563 todo = move_computations ();
2565 tree_ssa_lim_finalize ();
2567 return todo;
2570 /* Loop invariant motion pass. */
2572 namespace {
2574 const pass_data pass_data_lim =
2576 GIMPLE_PASS, /* type */
2577 "lim", /* name */
2578 OPTGROUP_LOOP, /* optinfo_flags */
2579 TV_LIM, /* tv_id */
2580 PROP_cfg, /* properties_required */
2581 0, /* properties_provided */
2582 0, /* properties_destroyed */
2583 0, /* todo_flags_start */
2584 0, /* todo_flags_finish */
2587 class pass_lim : public gimple_opt_pass
2589 public:
2590 pass_lim (gcc::context *ctxt)
2591 : gimple_opt_pass (pass_data_lim, ctxt)
2594 /* opt_pass methods: */
2595 opt_pass * clone () { return new pass_lim (m_ctxt); }
2596 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
2597 virtual unsigned int execute (function *);
2599 }; // class pass_lim
2601 unsigned int
2602 pass_lim::execute (function *fun)
2604 if (number_of_loops (fun) <= 1)
2605 return 0;
2607 return tree_ssa_lim ();
2610 } // anon namespace
2612 gimple_opt_pass *
2613 make_pass_lim (gcc::context *ctxt)
2615 return new pass_lim (ctxt);