* config/pa/linux-atomic.c (__kernel_cmpxchg): Reorder arguments to
[official-gcc.git] / gcc / tree-ssa-loop-im.c
blob477f373e89d6c08a35f378864486b0a330c884b9
1 /* Loop invariant motion.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "alias.h"
25 #include "symtab.h"
26 #include "tree.h"
27 #include "fold-const.h"
28 #include "tm_p.h"
29 #include "predict.h"
30 #include "hard-reg-set.h"
31 #include "function.h"
32 #include "dominance.h"
33 #include "cfg.h"
34 #include "cfganal.h"
35 #include "basic-block.h"
36 #include "gimple-pretty-print.h"
37 #include "tree-ssa-alias.h"
38 #include "internal-fn.h"
39 #include "tree-eh.h"
40 #include "gimple-expr.h"
41 #include "gimple.h"
42 #include "gimplify.h"
43 #include "gimple-iterator.h"
44 #include "gimple-ssa.h"
45 #include "tree-cfg.h"
46 #include "tree-phinodes.h"
47 #include "ssa-iterators.h"
48 #include "stringpool.h"
49 #include "tree-ssanames.h"
50 #include "tree-ssa-loop-manip.h"
51 #include "tree-ssa-loop.h"
52 #include "tree-into-ssa.h"
53 #include "cfgloop.h"
54 #include "domwalk.h"
55 #include "params.h"
56 #include "tree-pass.h"
57 #include "flags.h"
58 #include "tree-affine.h"
59 #include "tree-ssa-propagate.h"
60 #include "trans-mem.h"
61 #include "gimple-fold.h"
63 /* TODO: Support for predicated code motion. I.e.
65 while (1)
67 if (cond)
69 a = inv;
70 something;
74 Where COND and INV are invariants, but evaluating INV may trap or be
75 invalid from some other reason if !COND. This may be transformed to
77 if (cond)
78 a = inv;
79 while (1)
81 if (cond)
82 something;
83 } */
85 /* The auxiliary data kept for each statement. */
87 struct lim_aux_data
89 struct loop *max_loop; /* The outermost loop in that the statement
90 is invariant. */
92 struct loop *tgt_loop; /* The loop out of that we want to move the
93 invariant. */
95 struct loop *always_executed_in;
96 /* The outermost loop for that we are sure
97 the statement is executed if the loop
98 is entered. */
100 unsigned cost; /* Cost of the computation performed by the
101 statement. */
103 vec<gimple> depends; /* Vector of statements that must be also
104 hoisted out of the loop when this statement
105 is hoisted; i.e. those that define the
106 operands of the statement and are inside of
107 the MAX_LOOP loop. */
110 /* Maps statements to their lim_aux_data. */
112 static hash_map<gimple, lim_aux_data *> *lim_aux_data_map;
114 /* Description of a memory reference location. */
116 typedef struct mem_ref_loc
118 tree *ref; /* The reference itself. */
119 gimple stmt; /* The statement in that it occurs. */
120 } *mem_ref_loc_p;
123 /* Description of a memory reference. */
125 typedef struct im_mem_ref
127 unsigned id; /* ID assigned to the memory reference
128 (its index in memory_accesses.refs_list) */
129 hashval_t hash; /* Its hash value. */
131 /* The memory access itself and associated caching of alias-oracle
132 query meta-data. */
133 ao_ref mem;
135 bitmap stored; /* The set of loops in that this memory location
136 is stored to. */
137 vec<mem_ref_loc> accesses_in_loop;
138 /* The locations of the accesses. Vector
139 indexed by the loop number. */
141 /* The following sets are computed on demand. We keep both set and
142 its complement, so that we know whether the information was
143 already computed or not. */
144 bitmap_head indep_loop; /* The set of loops in that the memory
145 reference is independent, meaning:
146 If it is stored in the loop, this store
147 is independent on all other loads and
148 stores.
149 If it is only loaded, then it is independent
150 on all stores in the loop. */
151 bitmap_head dep_loop; /* The complement of INDEP_LOOP. */
152 } *mem_ref_p;
154 /* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
155 to record (in)dependence against stores in the loop and its subloops, the
156 second to record (in)dependence against all references in the loop
157 and its subloops. */
158 #define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
160 /* Mem_ref hashtable helpers. */
162 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
164 typedef tree_node *compare_type;
165 static inline hashval_t hash (const im_mem_ref *);
166 static inline bool equal (const im_mem_ref *, const tree_node *);
169 /* A hash function for struct im_mem_ref object OBJ. */
171 inline hashval_t
172 mem_ref_hasher::hash (const im_mem_ref *mem)
174 return mem->hash;
177 /* An equality function for struct im_mem_ref object MEM1 with
178 memory reference OBJ2. */
180 inline bool
181 mem_ref_hasher::equal (const im_mem_ref *mem1, const tree_node *obj2)
183 return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0);
187 /* Description of memory accesses in loops. */
189 static struct
191 /* The hash table of memory references accessed in loops. */
192 hash_table<mem_ref_hasher> *refs;
194 /* The list of memory references. */
195 vec<mem_ref_p> refs_list;
197 /* The set of memory references accessed in each loop. */
198 vec<bitmap_head> refs_in_loop;
200 /* The set of memory references stored in each loop. */
201 vec<bitmap_head> refs_stored_in_loop;
203 /* The set of memory references stored in each loop, including subloops . */
204 vec<bitmap_head> all_refs_stored_in_loop;
206 /* Cache for expanding memory addresses. */
207 hash_map<tree, name_expansion *> *ttae_cache;
208 } memory_accesses;
210 /* Obstack for the bitmaps in the above data structures. */
211 static bitmap_obstack lim_bitmap_obstack;
212 static obstack mem_ref_obstack;
214 static bool ref_indep_loop_p (struct loop *, mem_ref_p);
216 /* Minimum cost of an expensive expression. */
217 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
219 /* The outermost loop for which execution of the header guarantees that the
220 block will be executed. */
221 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
222 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
224 /* ID of the shared unanalyzable mem. */
225 #define UNANALYZABLE_MEM_ID 0
227 /* Whether the reference was analyzable. */
228 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
230 static struct lim_aux_data *
231 init_lim_data (gimple stmt)
233 lim_aux_data *p = XCNEW (struct lim_aux_data);
234 lim_aux_data_map->put (stmt, p);
236 return p;
239 static struct lim_aux_data *
240 get_lim_data (gimple stmt)
242 lim_aux_data **p = lim_aux_data_map->get (stmt);
243 if (!p)
244 return NULL;
246 return *p;
249 /* Releases the memory occupied by DATA. */
251 static void
252 free_lim_aux_data (struct lim_aux_data *data)
254 data->depends.release ();
255 free (data);
258 static void
259 clear_lim_data (gimple stmt)
261 lim_aux_data **p = lim_aux_data_map->get (stmt);
262 if (!p)
263 return;
265 free_lim_aux_data (*p);
266 *p = NULL;
270 /* The possibilities of statement movement. */
271 enum move_pos
273 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
274 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
275 become executed -- memory accesses, ... */
276 MOVE_POSSIBLE /* Unlimited movement. */
280 /* If it is possible to hoist the statement STMT unconditionally,
281 returns MOVE_POSSIBLE.
282 If it is possible to hoist the statement STMT, but we must avoid making
283 it executed if it would not be executed in the original program (e.g.
284 because it may trap), return MOVE_PRESERVE_EXECUTION.
285 Otherwise return MOVE_IMPOSSIBLE. */
287 enum move_pos
288 movement_possibility (gimple stmt)
290 tree lhs;
291 enum move_pos ret = MOVE_POSSIBLE;
293 if (flag_unswitch_loops
294 && gimple_code (stmt) == GIMPLE_COND)
296 /* If we perform unswitching, force the operands of the invariant
297 condition to be moved out of the loop. */
298 return MOVE_POSSIBLE;
301 if (gimple_code (stmt) == GIMPLE_PHI
302 && gimple_phi_num_args (stmt) <= 2
303 && !virtual_operand_p (gimple_phi_result (stmt))
304 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
305 return MOVE_POSSIBLE;
307 if (gimple_get_lhs (stmt) == NULL_TREE)
308 return MOVE_IMPOSSIBLE;
310 if (gimple_vdef (stmt))
311 return MOVE_IMPOSSIBLE;
313 if (stmt_ends_bb_p (stmt)
314 || gimple_has_volatile_ops (stmt)
315 || gimple_has_side_effects (stmt)
316 || stmt_could_throw_p (stmt))
317 return MOVE_IMPOSSIBLE;
319 if (is_gimple_call (stmt))
321 /* While pure or const call is guaranteed to have no side effects, we
322 cannot move it arbitrarily. Consider code like
324 char *s = something ();
326 while (1)
328 if (s)
329 t = strlen (s);
330 else
331 t = 0;
334 Here the strlen call cannot be moved out of the loop, even though
335 s is invariant. In addition to possibly creating a call with
336 invalid arguments, moving out a function call that is not executed
337 may cause performance regressions in case the call is costly and
338 not executed at all. */
339 ret = MOVE_PRESERVE_EXECUTION;
340 lhs = gimple_call_lhs (stmt);
342 else if (is_gimple_assign (stmt))
343 lhs = gimple_assign_lhs (stmt);
344 else
345 return MOVE_IMPOSSIBLE;
347 if (TREE_CODE (lhs) == SSA_NAME
348 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
349 return MOVE_IMPOSSIBLE;
351 if (TREE_CODE (lhs) != SSA_NAME
352 || gimple_could_trap_p (stmt))
353 return MOVE_PRESERVE_EXECUTION;
355 /* Non local loads in a transaction cannot be hoisted out. Well,
356 unless the load happens on every path out of the loop, but we
357 don't take this into account yet. */
358 if (flag_tm
359 && gimple_in_transaction (stmt)
360 && gimple_assign_single_p (stmt))
362 tree rhs = gimple_assign_rhs1 (stmt);
363 if (DECL_P (rhs) && is_global_var (rhs))
365 if (dump_file)
367 fprintf (dump_file, "Cannot hoist conditional load of ");
368 print_generic_expr (dump_file, rhs, TDF_SLIM);
369 fprintf (dump_file, " because it is in a transaction.\n");
371 return MOVE_IMPOSSIBLE;
375 return ret;
378 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
379 loop to that we could move the expression using DEF if it did not have
380 other operands, i.e. the outermost loop enclosing LOOP in that the value
381 of DEF is invariant. */
383 static struct loop *
384 outermost_invariant_loop (tree def, struct loop *loop)
386 gimple def_stmt;
387 basic_block def_bb;
388 struct loop *max_loop;
389 struct lim_aux_data *lim_data;
391 if (!def)
392 return superloop_at_depth (loop, 1);
394 if (TREE_CODE (def) != SSA_NAME)
396 gcc_assert (is_gimple_min_invariant (def));
397 return superloop_at_depth (loop, 1);
400 def_stmt = SSA_NAME_DEF_STMT (def);
401 def_bb = gimple_bb (def_stmt);
402 if (!def_bb)
403 return superloop_at_depth (loop, 1);
405 max_loop = find_common_loop (loop, def_bb->loop_father);
407 lim_data = get_lim_data (def_stmt);
408 if (lim_data != NULL && lim_data->max_loop != NULL)
409 max_loop = find_common_loop (max_loop,
410 loop_outer (lim_data->max_loop));
411 if (max_loop == loop)
412 return NULL;
413 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
415 return max_loop;
418 /* DATA is a structure containing information associated with a statement
419 inside LOOP. DEF is one of the operands of this statement.
421 Find the outermost loop enclosing LOOP in that value of DEF is invariant
422 and record this in DATA->max_loop field. If DEF itself is defined inside
423 this loop as well (i.e. we need to hoist it out of the loop if we want
424 to hoist the statement represented by DATA), record the statement in that
425 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
426 add the cost of the computation of DEF to the DATA->cost.
428 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
430 static bool
431 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
432 bool add_cost)
434 gimple def_stmt = SSA_NAME_DEF_STMT (def);
435 basic_block def_bb = gimple_bb (def_stmt);
436 struct loop *max_loop;
437 struct lim_aux_data *def_data;
439 if (!def_bb)
440 return true;
442 max_loop = outermost_invariant_loop (def, loop);
443 if (!max_loop)
444 return false;
446 if (flow_loop_nested_p (data->max_loop, max_loop))
447 data->max_loop = max_loop;
449 def_data = get_lim_data (def_stmt);
450 if (!def_data)
451 return true;
453 if (add_cost
454 /* Only add the cost if the statement defining DEF is inside LOOP,
455 i.e. if it is likely that by moving the invariants dependent
456 on it, we will be able to avoid creating a new register for
457 it (since it will be only used in these dependent invariants). */
458 && def_bb->loop_father == loop)
459 data->cost += def_data->cost;
461 data->depends.safe_push (def_stmt);
463 return true;
466 /* Returns an estimate for a cost of statement STMT. The values here
467 are just ad-hoc constants, similar to costs for inlining. */
469 static unsigned
470 stmt_cost (gimple stmt)
472 /* Always try to create possibilities for unswitching. */
473 if (gimple_code (stmt) == GIMPLE_COND
474 || gimple_code (stmt) == GIMPLE_PHI)
475 return LIM_EXPENSIVE;
477 /* We should be hoisting calls if possible. */
478 if (is_gimple_call (stmt))
480 tree fndecl;
482 /* Unless the call is a builtin_constant_p; this always folds to a
483 constant, so moving it is useless. */
484 fndecl = gimple_call_fndecl (stmt);
485 if (fndecl
486 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
487 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
488 return 0;
490 return LIM_EXPENSIVE;
493 /* Hoisting memory references out should almost surely be a win. */
494 if (gimple_references_memory_p (stmt))
495 return LIM_EXPENSIVE;
497 if (gimple_code (stmt) != GIMPLE_ASSIGN)
498 return 1;
500 switch (gimple_assign_rhs_code (stmt))
502 case MULT_EXPR:
503 case WIDEN_MULT_EXPR:
504 case WIDEN_MULT_PLUS_EXPR:
505 case WIDEN_MULT_MINUS_EXPR:
506 case DOT_PROD_EXPR:
507 case FMA_EXPR:
508 case TRUNC_DIV_EXPR:
509 case CEIL_DIV_EXPR:
510 case FLOOR_DIV_EXPR:
511 case ROUND_DIV_EXPR:
512 case EXACT_DIV_EXPR:
513 case CEIL_MOD_EXPR:
514 case FLOOR_MOD_EXPR:
515 case ROUND_MOD_EXPR:
516 case TRUNC_MOD_EXPR:
517 case RDIV_EXPR:
518 /* Division and multiplication are usually expensive. */
519 return LIM_EXPENSIVE;
521 case LSHIFT_EXPR:
522 case RSHIFT_EXPR:
523 case WIDEN_LSHIFT_EXPR:
524 case LROTATE_EXPR:
525 case RROTATE_EXPR:
526 /* Shifts and rotates are usually expensive. */
527 return LIM_EXPENSIVE;
529 case CONSTRUCTOR:
530 /* Make vector construction cost proportional to the number
531 of elements. */
532 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
534 case SSA_NAME:
535 case PAREN_EXPR:
536 /* Whether or not something is wrapped inside a PAREN_EXPR
537 should not change move cost. Nor should an intermediate
538 unpropagated SSA name copy. */
539 return 0;
541 default:
542 return 1;
546 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
547 REF is independent. If REF is not independent in LOOP, NULL is returned
548 instead. */
550 static struct loop *
551 outermost_indep_loop (struct loop *outer, struct loop *loop, mem_ref_p ref)
553 struct loop *aloop;
555 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
556 return NULL;
558 for (aloop = outer;
559 aloop != loop;
560 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
561 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
562 && ref_indep_loop_p (aloop, ref))
563 return aloop;
565 if (ref_indep_loop_p (loop, ref))
566 return loop;
567 else
568 return NULL;
571 /* If there is a simple load or store to a memory reference in STMT, returns
572 the location of the memory reference, and sets IS_STORE according to whether
573 it is a store or load. Otherwise, returns NULL. */
575 static tree *
576 simple_mem_ref_in_stmt (gimple stmt, bool *is_store)
578 tree *lhs, *rhs;
580 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
581 if (!gimple_assign_single_p (stmt))
582 return NULL;
584 lhs = gimple_assign_lhs_ptr (stmt);
585 rhs = gimple_assign_rhs1_ptr (stmt);
587 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
589 *is_store = false;
590 return rhs;
592 else if (gimple_vdef (stmt)
593 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
595 *is_store = true;
596 return lhs;
598 else
599 return NULL;
602 /* Returns the memory reference contained in STMT. */
604 static mem_ref_p
605 mem_ref_in_stmt (gimple stmt)
607 bool store;
608 tree *mem = simple_mem_ref_in_stmt (stmt, &store);
609 hashval_t hash;
610 mem_ref_p ref;
612 if (!mem)
613 return NULL;
614 gcc_assert (!store);
616 hash = iterative_hash_expr (*mem, 0);
617 ref = memory_accesses.refs->find_with_hash (*mem, hash);
619 gcc_assert (ref != NULL);
620 return ref;
623 /* From a controlling predicate in DOM determine the arguments from
624 the PHI node PHI that are chosen if the predicate evaluates to
625 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
626 they are non-NULL. Returns true if the arguments can be determined,
627 else return false. */
629 static bool
630 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
631 tree *true_arg_p, tree *false_arg_p)
633 basic_block bb = gimple_bb (phi);
634 edge true_edge, false_edge, tem;
635 tree arg0 = NULL_TREE, arg1 = NULL_TREE;
637 /* We have to verify that one edge into the PHI node is dominated
638 by the true edge of the predicate block and the other edge
639 dominated by the false edge. This ensures that the PHI argument
640 we are going to take is completely determined by the path we
641 take from the predicate block.
642 We can only use BB dominance checks below if the destination of
643 the true/false edges are dominated by their edge, thus only
644 have a single predecessor. */
645 extract_true_false_edges_from_block (dom, &true_edge, &false_edge);
646 tem = EDGE_PRED (bb, 0);
647 if (tem == true_edge
648 || (single_pred_p (true_edge->dest)
649 && (tem->src == true_edge->dest
650 || dominated_by_p (CDI_DOMINATORS,
651 tem->src, true_edge->dest))))
652 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
653 else if (tem == false_edge
654 || (single_pred_p (false_edge->dest)
655 && (tem->src == false_edge->dest
656 || dominated_by_p (CDI_DOMINATORS,
657 tem->src, false_edge->dest))))
658 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
659 else
660 return false;
661 tem = EDGE_PRED (bb, 1);
662 if (tem == true_edge
663 || (single_pred_p (true_edge->dest)
664 && (tem->src == true_edge->dest
665 || dominated_by_p (CDI_DOMINATORS,
666 tem->src, true_edge->dest))))
667 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
668 else if (tem == false_edge
669 || (single_pred_p (false_edge->dest)
670 && (tem->src == false_edge->dest
671 || dominated_by_p (CDI_DOMINATORS,
672 tem->src, false_edge->dest))))
673 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
674 else
675 return false;
676 if (!arg0 || !arg1)
677 return false;
679 if (true_arg_p)
680 *true_arg_p = arg0;
681 if (false_arg_p)
682 *false_arg_p = arg1;
684 return true;
687 /* Determine the outermost loop to that it is possible to hoist a statement
688 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
689 the outermost loop in that the value computed by STMT is invariant.
690 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
691 we preserve the fact whether STMT is executed. It also fills other related
692 information to LIM_DATA (STMT).
694 The function returns false if STMT cannot be hoisted outside of the loop it
695 is defined in, and true otherwise. */
697 static bool
698 determine_max_movement (gimple stmt, bool must_preserve_exec)
700 basic_block bb = gimple_bb (stmt);
701 struct loop *loop = bb->loop_father;
702 struct loop *level;
703 struct lim_aux_data *lim_data = get_lim_data (stmt);
704 tree val;
705 ssa_op_iter iter;
707 if (must_preserve_exec)
708 level = ALWAYS_EXECUTED_IN (bb);
709 else
710 level = superloop_at_depth (loop, 1);
711 lim_data->max_loop = level;
713 if (gphi *phi = dyn_cast <gphi *> (stmt))
715 use_operand_p use_p;
716 unsigned min_cost = UINT_MAX;
717 unsigned total_cost = 0;
718 struct lim_aux_data *def_data;
720 /* We will end up promoting dependencies to be unconditionally
721 evaluated. For this reason the PHI cost (and thus the
722 cost we remove from the loop by doing the invariant motion)
723 is that of the cheapest PHI argument dependency chain. */
724 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
726 val = USE_FROM_PTR (use_p);
728 if (TREE_CODE (val) != SSA_NAME)
730 /* Assign const 1 to constants. */
731 min_cost = MIN (min_cost, 1);
732 total_cost += 1;
733 continue;
735 if (!add_dependency (val, lim_data, loop, false))
736 return false;
738 gimple def_stmt = SSA_NAME_DEF_STMT (val);
739 if (gimple_bb (def_stmt)
740 && gimple_bb (def_stmt)->loop_father == loop)
742 def_data = get_lim_data (def_stmt);
743 if (def_data)
745 min_cost = MIN (min_cost, def_data->cost);
746 total_cost += def_data->cost;
751 min_cost = MIN (min_cost, total_cost);
752 lim_data->cost += min_cost;
754 if (gimple_phi_num_args (phi) > 1)
756 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
757 gimple cond;
758 if (gsi_end_p (gsi_last_bb (dom)))
759 return false;
760 cond = gsi_stmt (gsi_last_bb (dom));
761 if (gimple_code (cond) != GIMPLE_COND)
762 return false;
763 /* Verify that this is an extended form of a diamond and
764 the PHI arguments are completely controlled by the
765 predicate in DOM. */
766 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
767 return false;
769 /* Fold in dependencies and cost of the condition. */
770 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
772 if (!add_dependency (val, lim_data, loop, false))
773 return false;
774 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
775 if (def_data)
776 total_cost += def_data->cost;
779 /* We want to avoid unconditionally executing very expensive
780 operations. As costs for our dependencies cannot be
781 negative just claim we are not invariand for this case.
782 We also are not sure whether the control-flow inside the
783 loop will vanish. */
784 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
785 && !(min_cost != 0
786 && total_cost / min_cost <= 2))
787 return false;
789 /* Assume that the control-flow in the loop will vanish.
790 ??? We should verify this and not artificially increase
791 the cost if that is not the case. */
792 lim_data->cost += stmt_cost (stmt);
795 return true;
797 else
798 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
799 if (!add_dependency (val, lim_data, loop, true))
800 return false;
802 if (gimple_vuse (stmt))
804 mem_ref_p ref = mem_ref_in_stmt (stmt);
806 if (ref)
808 lim_data->max_loop
809 = outermost_indep_loop (lim_data->max_loop, loop, ref);
810 if (!lim_data->max_loop)
811 return false;
813 else
815 if ((val = gimple_vuse (stmt)) != NULL_TREE)
817 if (!add_dependency (val, lim_data, loop, false))
818 return false;
823 lim_data->cost += stmt_cost (stmt);
825 return true;
828 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
829 and that one of the operands of this statement is computed by STMT.
830 Ensure that STMT (together with all the statements that define its
831 operands) is hoisted at least out of the loop LEVEL. */
833 static void
834 set_level (gimple stmt, struct loop *orig_loop, struct loop *level)
836 struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
837 struct lim_aux_data *lim_data;
838 gimple dep_stmt;
839 unsigned i;
841 stmt_loop = find_common_loop (orig_loop, stmt_loop);
842 lim_data = get_lim_data (stmt);
843 if (lim_data != NULL && lim_data->tgt_loop != NULL)
844 stmt_loop = find_common_loop (stmt_loop,
845 loop_outer (lim_data->tgt_loop));
846 if (flow_loop_nested_p (stmt_loop, level))
847 return;
849 gcc_assert (level == lim_data->max_loop
850 || flow_loop_nested_p (lim_data->max_loop, level));
852 lim_data->tgt_loop = level;
853 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
854 set_level (dep_stmt, orig_loop, level);
857 /* Determines an outermost loop from that we want to hoist the statement STMT.
858 For now we chose the outermost possible loop. TODO -- use profiling
859 information to set it more sanely. */
861 static void
862 set_profitable_level (gimple stmt)
864 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
867 /* Returns true if STMT is a call that has side effects. */
869 static bool
870 nonpure_call_p (gimple stmt)
872 if (gimple_code (stmt) != GIMPLE_CALL)
873 return false;
875 return gimple_has_side_effects (stmt);
878 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
880 static gimple
881 rewrite_reciprocal (gimple_stmt_iterator *bsi)
883 gassign *stmt, *stmt1, *stmt2;
884 tree name, lhs, type;
885 tree real_one;
886 gimple_stmt_iterator gsi;
888 stmt = as_a <gassign *> (gsi_stmt (*bsi));
889 lhs = gimple_assign_lhs (stmt);
890 type = TREE_TYPE (lhs);
892 real_one = build_one_cst (type);
894 name = make_temp_ssa_name (type, NULL, "reciptmp");
895 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
896 gimple_assign_rhs2 (stmt));
897 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
898 gimple_assign_rhs1 (stmt));
900 /* Replace division stmt with reciprocal and multiply stmts.
901 The multiply stmt is not invariant, so update iterator
902 and avoid rescanning. */
903 gsi = *bsi;
904 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
905 gsi_replace (&gsi, stmt2, true);
907 /* Continue processing with invariant reciprocal statement. */
908 return stmt1;
911 /* Check if the pattern at *BSI is a bittest of the form
912 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
914 static gimple
915 rewrite_bittest (gimple_stmt_iterator *bsi)
917 gassign *stmt;
918 gimple stmt1;
919 gassign *stmt2;
920 gimple use_stmt;
921 gcond *cond_stmt;
922 tree lhs, name, t, a, b;
923 use_operand_p use;
925 stmt = as_a <gassign *> (gsi_stmt (*bsi));
926 lhs = gimple_assign_lhs (stmt);
928 /* Verify that the single use of lhs is a comparison against zero. */
929 if (TREE_CODE (lhs) != SSA_NAME
930 || !single_imm_use (lhs, &use, &use_stmt))
931 return stmt;
932 cond_stmt = dyn_cast <gcond *> (use_stmt);
933 if (!cond_stmt)
934 return stmt;
935 if (gimple_cond_lhs (cond_stmt) != lhs
936 || (gimple_cond_code (cond_stmt) != NE_EXPR
937 && gimple_cond_code (cond_stmt) != EQ_EXPR)
938 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
939 return stmt;
941 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
942 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
943 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
944 return stmt;
946 /* There is a conversion in between possibly inserted by fold. */
947 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
949 t = gimple_assign_rhs1 (stmt1);
950 if (TREE_CODE (t) != SSA_NAME
951 || !has_single_use (t))
952 return stmt;
953 stmt1 = SSA_NAME_DEF_STMT (t);
954 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
955 return stmt;
958 /* Verify that B is loop invariant but A is not. Verify that with
959 all the stmt walking we are still in the same loop. */
960 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
961 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
962 return stmt;
964 a = gimple_assign_rhs1 (stmt1);
965 b = gimple_assign_rhs2 (stmt1);
967 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
968 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
970 gimple_stmt_iterator rsi;
972 /* 1 << B */
973 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
974 build_int_cst (TREE_TYPE (a), 1), b);
975 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
976 stmt1 = gimple_build_assign (name, t);
978 /* A & (1 << B) */
979 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
980 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
981 stmt2 = gimple_build_assign (name, t);
983 /* Replace the SSA_NAME we compare against zero. Adjust
984 the type of zero accordingly. */
985 SET_USE (use, name);
986 gimple_cond_set_rhs (cond_stmt,
987 build_int_cst_type (TREE_TYPE (name),
988 0));
990 /* Don't use gsi_replace here, none of the new assignments sets
991 the variable originally set in stmt. Move bsi to stmt1, and
992 then remove the original stmt, so that we get a chance to
993 retain debug info for it. */
994 rsi = *bsi;
995 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
996 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
997 gsi_remove (&rsi, true);
999 return stmt1;
1002 return stmt;
1005 /* For each statement determines the outermost loop in that it is invariant,
1006 - statements on whose motion it depends and the cost of the computation.
1007 - This information is stored to the LIM_DATA structure associated with
1008 - each statement. */
1009 class invariantness_dom_walker : public dom_walker
1011 public:
1012 invariantness_dom_walker (cdi_direction direction)
1013 : dom_walker (direction) {}
1015 virtual void before_dom_children (basic_block);
1018 /* Determine the outermost loops in that statements in basic block BB are
1019 invariant, and record them to the LIM_DATA associated with the statements.
1020 Callback for dom_walker. */
1022 void
1023 invariantness_dom_walker::before_dom_children (basic_block bb)
1025 enum move_pos pos;
1026 gimple_stmt_iterator bsi;
1027 gimple stmt;
1028 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1029 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
1030 struct lim_aux_data *lim_data;
1032 if (!loop_outer (bb->loop_father))
1033 return;
1035 if (dump_file && (dump_flags & TDF_DETAILS))
1036 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1037 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1039 /* Look at PHI nodes, but only if there is at most two.
1040 ??? We could relax this further by post-processing the inserted
1041 code and transforming adjacent cond-exprs with the same predicate
1042 to control flow again. */
1043 bsi = gsi_start_phis (bb);
1044 if (!gsi_end_p (bsi)
1045 && ((gsi_next (&bsi), gsi_end_p (bsi))
1046 || (gsi_next (&bsi), gsi_end_p (bsi))))
1047 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1049 stmt = gsi_stmt (bsi);
1051 pos = movement_possibility (stmt);
1052 if (pos == MOVE_IMPOSSIBLE)
1053 continue;
1055 lim_data = init_lim_data (stmt);
1056 lim_data->always_executed_in = outermost;
1058 if (!determine_max_movement (stmt, false))
1060 lim_data->max_loop = NULL;
1061 continue;
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1066 print_gimple_stmt (dump_file, stmt, 2, 0);
1067 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1068 loop_depth (lim_data->max_loop),
1069 lim_data->cost);
1072 if (lim_data->cost >= LIM_EXPENSIVE)
1073 set_profitable_level (stmt);
1076 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1078 stmt = gsi_stmt (bsi);
1080 pos = movement_possibility (stmt);
1081 if (pos == MOVE_IMPOSSIBLE)
1083 if (nonpure_call_p (stmt))
1085 maybe_never = true;
1086 outermost = NULL;
1088 /* Make sure to note always_executed_in for stores to make
1089 store-motion work. */
1090 else if (stmt_makes_single_store (stmt))
1092 struct lim_aux_data *lim_data = init_lim_data (stmt);
1093 lim_data->always_executed_in = outermost;
1095 continue;
1098 if (is_gimple_assign (stmt)
1099 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1100 == GIMPLE_BINARY_RHS))
1102 tree op0 = gimple_assign_rhs1 (stmt);
1103 tree op1 = gimple_assign_rhs2 (stmt);
1104 struct loop *ol1 = outermost_invariant_loop (op1,
1105 loop_containing_stmt (stmt));
1107 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1108 to be hoisted out of loop, saving expensive divide. */
1109 if (pos == MOVE_POSSIBLE
1110 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1111 && flag_unsafe_math_optimizations
1112 && !flag_trapping_math
1113 && ol1 != NULL
1114 && outermost_invariant_loop (op0, ol1) == NULL)
1115 stmt = rewrite_reciprocal (&bsi);
1117 /* If the shift count is invariant, convert (A >> B) & 1 to
1118 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1119 saving an expensive shift. */
1120 if (pos == MOVE_POSSIBLE
1121 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1122 && integer_onep (op1)
1123 && TREE_CODE (op0) == SSA_NAME
1124 && has_single_use (op0))
1125 stmt = rewrite_bittest (&bsi);
1128 lim_data = init_lim_data (stmt);
1129 lim_data->always_executed_in = outermost;
1131 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1132 continue;
1134 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1136 lim_data->max_loop = NULL;
1137 continue;
1140 if (dump_file && (dump_flags & TDF_DETAILS))
1142 print_gimple_stmt (dump_file, stmt, 2, 0);
1143 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1144 loop_depth (lim_data->max_loop),
1145 lim_data->cost);
1148 if (lim_data->cost >= LIM_EXPENSIVE)
1149 set_profitable_level (stmt);
1153 class move_computations_dom_walker : public dom_walker
1155 public:
1156 move_computations_dom_walker (cdi_direction direction)
1157 : dom_walker (direction), todo_ (0) {}
1159 virtual void before_dom_children (basic_block);
1161 unsigned int todo_;
1164 /* Hoist the statements in basic block BB out of the loops prescribed by
1165 data stored in LIM_DATA structures associated with each statement. Callback
1166 for walk_dominator_tree. */
1168 void
1169 move_computations_dom_walker::before_dom_children (basic_block bb)
1171 struct loop *level;
1172 unsigned cost = 0;
1173 struct lim_aux_data *lim_data;
1175 if (!loop_outer (bb->loop_father))
1176 return;
1178 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1180 gassign *new_stmt;
1181 gphi *stmt = bsi.phi ();
1183 lim_data = get_lim_data (stmt);
1184 if (lim_data == NULL)
1186 gsi_next (&bsi);
1187 continue;
1190 cost = lim_data->cost;
1191 level = lim_data->tgt_loop;
1192 clear_lim_data (stmt);
1194 if (!level)
1196 gsi_next (&bsi);
1197 continue;
1200 if (dump_file && (dump_flags & TDF_DETAILS))
1202 fprintf (dump_file, "Moving PHI node\n");
1203 print_gimple_stmt (dump_file, stmt, 0, 0);
1204 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1205 cost, level->num);
1208 if (gimple_phi_num_args (stmt) == 1)
1210 tree arg = PHI_ARG_DEF (stmt, 0);
1211 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1212 TREE_CODE (arg), arg);
1214 else
1216 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1217 gimple cond = gsi_stmt (gsi_last_bb (dom));
1218 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1219 /* Get the PHI arguments corresponding to the true and false
1220 edges of COND. */
1221 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1222 gcc_assert (arg0 && arg1);
1223 t = build2 (gimple_cond_code (cond), boolean_type_node,
1224 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1225 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1226 COND_EXPR, t, arg0, arg1);
1227 todo_ |= TODO_cleanup_cfg;
1229 if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1230 && (!ALWAYS_EXECUTED_IN (bb)
1231 || (ALWAYS_EXECUTED_IN (bb) != level
1232 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1234 tree lhs = gimple_assign_lhs (new_stmt);
1235 SSA_NAME_RANGE_INFO (lhs) = NULL;
1236 SSA_NAME_ANTI_RANGE_P (lhs) = 0;
1238 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1239 remove_phi_node (&bsi, false);
1242 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1244 edge e;
1246 gimple stmt = gsi_stmt (bsi);
1248 lim_data = get_lim_data (stmt);
1249 if (lim_data == NULL)
1251 gsi_next (&bsi);
1252 continue;
1255 cost = lim_data->cost;
1256 level = lim_data->tgt_loop;
1257 clear_lim_data (stmt);
1259 if (!level)
1261 gsi_next (&bsi);
1262 continue;
1265 /* We do not really want to move conditionals out of the loop; we just
1266 placed it here to force its operands to be moved if necessary. */
1267 if (gimple_code (stmt) == GIMPLE_COND)
1268 continue;
1270 if (dump_file && (dump_flags & TDF_DETAILS))
1272 fprintf (dump_file, "Moving statement\n");
1273 print_gimple_stmt (dump_file, stmt, 0, 0);
1274 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1275 cost, level->num);
1278 e = loop_preheader_edge (level);
1279 gcc_assert (!gimple_vdef (stmt));
1280 if (gimple_vuse (stmt))
1282 /* The new VUSE is the one from the virtual PHI in the loop
1283 header or the one already present. */
1284 gphi_iterator gsi2;
1285 for (gsi2 = gsi_start_phis (e->dest);
1286 !gsi_end_p (gsi2); gsi_next (&gsi2))
1288 gphi *phi = gsi2.phi ();
1289 if (virtual_operand_p (gimple_phi_result (phi)))
1291 gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1292 break;
1296 gsi_remove (&bsi, false);
1297 if (gimple_has_lhs (stmt)
1298 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1299 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1300 && (!ALWAYS_EXECUTED_IN (bb)
1301 || !(ALWAYS_EXECUTED_IN (bb) == level
1302 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1304 tree lhs = gimple_get_lhs (stmt);
1305 SSA_NAME_RANGE_INFO (lhs) = NULL;
1306 SSA_NAME_ANTI_RANGE_P (lhs) = 0;
1308 /* In case this is a stmt that is not unconditionally executed
1309 when the target loop header is executed and the stmt may
1310 invoke undefined integer or pointer overflow rewrite it to
1311 unsigned arithmetic. */
1312 if (is_gimple_assign (stmt)
1313 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1314 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1315 && arith_code_with_undefined_signed_overflow
1316 (gimple_assign_rhs_code (stmt))
1317 && (!ALWAYS_EXECUTED_IN (bb)
1318 || !(ALWAYS_EXECUTED_IN (bb) == level
1319 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1320 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1321 else
1322 gsi_insert_on_edge (e, stmt);
1326 /* Hoist the statements out of the loops prescribed by data stored in
1327 LIM_DATA structures associated with each statement.*/
1329 static unsigned int
1330 move_computations (void)
1332 move_computations_dom_walker walker (CDI_DOMINATORS);
1333 walker.walk (cfun->cfg->x_entry_block_ptr);
1335 gsi_commit_edge_inserts ();
1336 if (need_ssa_update_p (cfun))
1337 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1339 return walker.todo_;
1342 /* Checks whether the statement defining variable *INDEX can be hoisted
1343 out of the loop passed in DATA. Callback for for_each_index. */
1345 static bool
1346 may_move_till (tree ref, tree *index, void *data)
1348 struct loop *loop = (struct loop *) data, *max_loop;
1350 /* If REF is an array reference, check also that the step and the lower
1351 bound is invariant in LOOP. */
1352 if (TREE_CODE (ref) == ARRAY_REF)
1354 tree step = TREE_OPERAND (ref, 3);
1355 tree lbound = TREE_OPERAND (ref, 2);
1357 max_loop = outermost_invariant_loop (step, loop);
1358 if (!max_loop)
1359 return false;
1361 max_loop = outermost_invariant_loop (lbound, loop);
1362 if (!max_loop)
1363 return false;
1366 max_loop = outermost_invariant_loop (*index, loop);
1367 if (!max_loop)
1368 return false;
1370 return true;
1373 /* If OP is SSA NAME, force the statement that defines it to be
1374 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1376 static void
1377 force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1379 gimple stmt;
1381 if (!op
1382 || is_gimple_min_invariant (op))
1383 return;
1385 gcc_assert (TREE_CODE (op) == SSA_NAME);
1387 stmt = SSA_NAME_DEF_STMT (op);
1388 if (gimple_nop_p (stmt))
1389 return;
1391 set_level (stmt, orig_loop, loop);
1394 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1395 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1396 for_each_index. */
1398 struct fmt_data
1400 struct loop *loop;
1401 struct loop *orig_loop;
1404 static bool
1405 force_move_till (tree ref, tree *index, void *data)
1407 struct fmt_data *fmt_data = (struct fmt_data *) data;
1409 if (TREE_CODE (ref) == ARRAY_REF)
1411 tree step = TREE_OPERAND (ref, 3);
1412 tree lbound = TREE_OPERAND (ref, 2);
1414 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1415 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1418 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1420 return true;
1423 /* A function to free the mem_ref object OBJ. */
1425 static void
1426 memref_free (struct im_mem_ref *mem)
1428 mem->accesses_in_loop.release ();
1431 /* Allocates and returns a memory reference description for MEM whose hash
1432 value is HASH and id is ID. */
1434 static mem_ref_p
1435 mem_ref_alloc (tree mem, unsigned hash, unsigned id)
1437 mem_ref_p ref = XOBNEW (&mem_ref_obstack, struct im_mem_ref);
1438 ao_ref_init (&ref->mem, mem);
1439 ref->id = id;
1440 ref->hash = hash;
1441 ref->stored = NULL;
1442 bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
1443 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1444 ref->accesses_in_loop.create (1);
1446 return ref;
1449 /* Records memory reference location *LOC in LOOP to the memory reference
1450 description REF. The reference occurs in statement STMT. */
1452 static void
1453 record_mem_ref_loc (mem_ref_p ref, gimple stmt, tree *loc)
1455 mem_ref_loc aref;
1456 aref.stmt = stmt;
1457 aref.ref = loc;
1458 ref->accesses_in_loop.safe_push (aref);
1461 /* Set the LOOP bit in REF stored bitmap and allocate that if
1462 necessary. Return whether a bit was changed. */
1464 static bool
1465 set_ref_stored_in_loop (mem_ref_p ref, struct loop *loop)
1467 if (!ref->stored)
1468 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1469 return bitmap_set_bit (ref->stored, loop->num);
1472 /* Marks reference REF as stored in LOOP. */
1474 static void
1475 mark_ref_stored (mem_ref_p ref, struct loop *loop)
1477 while (loop != current_loops->tree_root
1478 && set_ref_stored_in_loop (ref, loop))
1479 loop = loop_outer (loop);
1482 /* Gathers memory references in statement STMT in LOOP, storing the
1483 information about them in the memory_accesses structure. Marks
1484 the vops accessed through unrecognized statements there as
1485 well. */
1487 static void
1488 gather_mem_refs_stmt (struct loop *loop, gimple stmt)
1490 tree *mem = NULL;
1491 hashval_t hash;
1492 im_mem_ref **slot;
1493 mem_ref_p ref;
1494 bool is_stored;
1495 unsigned id;
1497 if (!gimple_vuse (stmt))
1498 return;
1500 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1501 if (!mem)
1503 /* We use the shared mem_ref for all unanalyzable refs. */
1504 id = UNANALYZABLE_MEM_ID;
1505 ref = memory_accesses.refs_list[id];
1506 if (dump_file && (dump_flags & TDF_DETAILS))
1508 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1509 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1511 is_stored = gimple_vdef (stmt);
1513 else
1515 hash = iterative_hash_expr (*mem, 0);
1516 slot = memory_accesses.refs->find_slot_with_hash (*mem, hash, INSERT);
1517 if (*slot)
1519 ref = (mem_ref_p) *slot;
1520 id = ref->id;
1522 else
1524 id = memory_accesses.refs_list.length ();
1525 ref = mem_ref_alloc (*mem, hash, id);
1526 memory_accesses.refs_list.safe_push (ref);
1527 *slot = ref;
1529 if (dump_file && (dump_flags & TDF_DETAILS))
1531 fprintf (dump_file, "Memory reference %u: ", id);
1532 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1533 fprintf (dump_file, "\n");
1537 record_mem_ref_loc (ref, stmt, mem);
1539 bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
1540 if (is_stored)
1542 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1543 mark_ref_stored (ref, loop);
1545 return;
1548 static unsigned *bb_loop_postorder;
1550 /* qsort sort function to sort blocks after their loop fathers postorder. */
1552 static int
1553 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_)
1555 basic_block bb1 = *(basic_block *)const_cast<void *>(bb1_);
1556 basic_block bb2 = *(basic_block *)const_cast<void *>(bb2_);
1557 struct loop *loop1 = bb1->loop_father;
1558 struct loop *loop2 = bb2->loop_father;
1559 if (loop1->num == loop2->num)
1560 return 0;
1561 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1564 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1566 static int
1567 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_)
1569 mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_);
1570 mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_);
1571 struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1572 struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1573 if (loop1->num == loop2->num)
1574 return 0;
1575 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1578 /* Gathers memory references in loops. */
1580 static void
1581 analyze_memory_references (void)
1583 gimple_stmt_iterator bsi;
1584 basic_block bb, *bbs;
1585 struct loop *loop, *outer;
1586 unsigned i, n;
1588 /* Collect all basic-blocks in loops and sort them after their
1589 loops postorder. */
1590 i = 0;
1591 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1592 FOR_EACH_BB_FN (bb, cfun)
1593 if (bb->loop_father != current_loops->tree_root)
1594 bbs[i++] = bb;
1595 n = i;
1596 qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp);
1598 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1599 That results in better locality for all the bitmaps. */
1600 for (i = 0; i < n; ++i)
1602 basic_block bb = bbs[i];
1603 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1604 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1607 /* Sort the location list of gathered memory references after their
1608 loop postorder number. */
1609 im_mem_ref *ref;
1610 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1611 ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp);
1613 free (bbs);
1614 // free (bb_loop_postorder);
1616 /* Propagate the information about accessed memory references up
1617 the loop hierarchy. */
1618 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1620 /* Finalize the overall touched references (including subloops). */
1621 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1622 &memory_accesses.refs_stored_in_loop[loop->num]);
1624 /* Propagate the information about accessed memory references up
1625 the loop hierarchy. */
1626 outer = loop_outer (loop);
1627 if (outer == current_loops->tree_root)
1628 continue;
1630 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1631 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1635 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1636 tree_to_aff_combination_expand. */
1638 static bool
1639 mem_refs_may_alias_p (mem_ref_p mem1, mem_ref_p mem2,
1640 hash_map<tree, name_expansion *> **ttae_cache)
1642 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1643 object and their offset differ in such a way that the locations cannot
1644 overlap, then they cannot alias. */
1645 widest_int size1, size2;
1646 aff_tree off1, off2;
1648 /* Perform basic offset and type-based disambiguation. */
1649 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
1650 return false;
1652 /* The expansion of addresses may be a bit expensive, thus we only do
1653 the check at -O2 and higher optimization levels. */
1654 if (optimize < 2)
1655 return true;
1657 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1658 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1659 aff_combination_expand (&off1, ttae_cache);
1660 aff_combination_expand (&off2, ttae_cache);
1661 aff_combination_scale (&off1, -1);
1662 aff_combination_add (&off2, &off1);
1664 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1665 return false;
1667 return true;
1670 /* Compare function for bsearch searching for reference locations
1671 in a loop. */
1673 static int
1674 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_)
1676 struct loop *loop = (struct loop *)const_cast<void *>(loop_);
1677 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1678 struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1679 if (loop->num == loc_loop->num
1680 || flow_loop_nested_p (loop, loc_loop))
1681 return 0;
1682 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1683 ? -1 : 1);
1686 /* Iterates over all locations of REF in LOOP and its subloops calling
1687 fn.operator() with the location as argument. When that operator
1688 returns true the iteration is stopped and true is returned.
1689 Otherwise false is returned. */
1691 template <typename FN>
1692 static bool
1693 for_all_locs_in_loop (struct loop *loop, mem_ref_p ref, FN fn)
1695 unsigned i;
1696 mem_ref_loc_p loc;
1698 /* Search for the cluster of locs in the accesses_in_loop vector
1699 which is sorted after postorder index of the loop father. */
1700 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp);
1701 if (!loc)
1702 return false;
1704 /* We have found one location inside loop or its sub-loops. Iterate
1705 both forward and backward to cover the whole cluster. */
1706 i = loc - ref->accesses_in_loop.address ();
1707 while (i > 0)
1709 --i;
1710 mem_ref_loc_p l = &ref->accesses_in_loop[i];
1711 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1712 break;
1713 if (fn (l))
1714 return true;
1716 for (i = loc - ref->accesses_in_loop.address ();
1717 i < ref->accesses_in_loop.length (); ++i)
1719 mem_ref_loc_p l = &ref->accesses_in_loop[i];
1720 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1721 break;
1722 if (fn (l))
1723 return true;
1726 return false;
1729 /* Rewrites location LOC by TMP_VAR. */
1731 struct rewrite_mem_ref_loc
1733 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1734 bool operator () (mem_ref_loc_p loc);
1735 tree tmp_var;
1738 bool
1739 rewrite_mem_ref_loc::operator () (mem_ref_loc_p loc)
1741 *loc->ref = tmp_var;
1742 update_stmt (loc->stmt);
1743 return false;
1746 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1748 static void
1749 rewrite_mem_refs (struct loop *loop, mem_ref_p ref, tree tmp_var)
1751 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1754 /* Stores the first reference location in LOCP. */
1756 struct first_mem_ref_loc_1
1758 first_mem_ref_loc_1 (mem_ref_loc_p *locp_) : locp (locp_) {}
1759 bool operator () (mem_ref_loc_p loc);
1760 mem_ref_loc_p *locp;
1763 bool
1764 first_mem_ref_loc_1::operator () (mem_ref_loc_p loc)
1766 *locp = loc;
1767 return true;
1770 /* Returns the first reference location to REF in LOOP. */
1772 static mem_ref_loc_p
1773 first_mem_ref_loc (struct loop *loop, mem_ref_p ref)
1775 mem_ref_loc_p locp = NULL;
1776 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1777 return locp;
1780 struct prev_flag_edges {
1781 /* Edge to insert new flag comparison code. */
1782 edge append_cond_position;
1784 /* Edge for fall through from previous flag comparison. */
1785 edge last_cond_fallthru;
1788 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1789 MEM along edge EX.
1791 The store is only done if MEM has changed. We do this so no
1792 changes to MEM occur on code paths that did not originally store
1793 into it.
1795 The common case for execute_sm will transform:
1797 for (...) {
1798 if (foo)
1799 stuff;
1800 else
1801 MEM = TMP_VAR;
1804 into:
1806 lsm = MEM;
1807 for (...) {
1808 if (foo)
1809 stuff;
1810 else
1811 lsm = TMP_VAR;
1813 MEM = lsm;
1815 This function will generate:
1817 lsm = MEM;
1819 lsm_flag = false;
1821 for (...) {
1822 if (foo)
1823 stuff;
1824 else {
1825 lsm = TMP_VAR;
1826 lsm_flag = true;
1829 if (lsm_flag) <--
1830 MEM = lsm; <--
1833 static void
1834 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag)
1836 basic_block new_bb, then_bb, old_dest;
1837 bool loop_has_only_one_exit;
1838 edge then_old_edge, orig_ex = ex;
1839 gimple_stmt_iterator gsi;
1840 gimple stmt;
1841 struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1842 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1844 /* ?? Insert store after previous store if applicable. See note
1845 below. */
1846 if (prev_edges)
1847 ex = prev_edges->append_cond_position;
1849 loop_has_only_one_exit = single_pred_p (ex->dest);
1851 if (loop_has_only_one_exit)
1852 ex = split_block_after_labels (ex->dest);
1854 old_dest = ex->dest;
1855 new_bb = split_edge (ex);
1856 then_bb = create_empty_bb (new_bb);
1857 if (irr)
1858 then_bb->flags = BB_IRREDUCIBLE_LOOP;
1859 add_bb_to_loop (then_bb, new_bb->loop_father);
1861 gsi = gsi_start_bb (new_bb);
1862 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1863 NULL_TREE, NULL_TREE);
1864 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1866 gsi = gsi_start_bb (then_bb);
1867 /* Insert actual store. */
1868 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1869 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1871 make_edge (new_bb, then_bb,
1872 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1873 make_edge (new_bb, old_dest,
1874 EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1875 then_old_edge = make_edge (then_bb, old_dest,
1876 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1878 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
1880 if (prev_edges)
1882 basic_block prevbb = prev_edges->last_cond_fallthru->src;
1883 redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
1884 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
1885 set_immediate_dominator (CDI_DOMINATORS, old_dest,
1886 recompute_dominator (CDI_DOMINATORS, old_dest));
1889 /* ?? Because stores may alias, they must happen in the exact
1890 sequence they originally happened. Save the position right after
1891 the (_lsm) store we just created so we can continue appending after
1892 it and maintain the original order. */
1894 struct prev_flag_edges *p;
1896 if (orig_ex->aux)
1897 orig_ex->aux = NULL;
1898 alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
1899 p = (struct prev_flag_edges *) orig_ex->aux;
1900 p->append_cond_position = then_old_edge;
1901 p->last_cond_fallthru = find_edge (new_bb, old_dest);
1902 orig_ex->aux = (void *) p;
1905 if (!loop_has_only_one_exit)
1906 for (gphi_iterator gpi = gsi_start_phis (old_dest);
1907 !gsi_end_p (gpi); gsi_next (&gpi))
1909 gphi *phi = gpi.phi ();
1910 unsigned i;
1912 for (i = 0; i < gimple_phi_num_args (phi); i++)
1913 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
1915 tree arg = gimple_phi_arg_def (phi, i);
1916 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
1917 update_stmt (phi);
1920 /* Remove the original fall through edge. This was the
1921 single_succ_edge (new_bb). */
1922 EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU;
1925 /* When REF is set on the location, set flag indicating the store. */
1927 struct sm_set_flag_if_changed
1929 sm_set_flag_if_changed (tree flag_) : flag (flag_) {}
1930 bool operator () (mem_ref_loc_p loc);
1931 tree flag;
1934 bool
1935 sm_set_flag_if_changed::operator () (mem_ref_loc_p loc)
1937 /* Only set the flag for writes. */
1938 if (is_gimple_assign (loc->stmt)
1939 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
1941 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
1942 gimple stmt = gimple_build_assign (flag, boolean_true_node);
1943 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1945 return false;
1948 /* Helper function for execute_sm. On every location where REF is
1949 set, set an appropriate flag indicating the store. */
1951 static tree
1952 execute_sm_if_changed_flag_set (struct loop *loop, mem_ref_p ref)
1954 tree flag;
1955 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
1956 flag = create_tmp_reg (boolean_type_node, str);
1957 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag));
1958 return flag;
1961 /* Executes store motion of memory reference REF from LOOP.
1962 Exits from the LOOP are stored in EXITS. The initialization of the
1963 temporary variable is put to the preheader of the loop, and assignments
1964 to the reference from the temporary variable are emitted to exits. */
1966 static void
1967 execute_sm (struct loop *loop, vec<edge> exits, mem_ref_p ref)
1969 tree tmp_var, store_flag = NULL_TREE;
1970 unsigned i;
1971 gassign *load;
1972 struct fmt_data fmt_data;
1973 edge ex;
1974 struct lim_aux_data *lim_data;
1975 bool multi_threaded_model_p = false;
1976 gimple_stmt_iterator gsi;
1978 if (dump_file && (dump_flags & TDF_DETAILS))
1980 fprintf (dump_file, "Executing store motion of ");
1981 print_generic_expr (dump_file, ref->mem.ref, 0);
1982 fprintf (dump_file, " from loop %d\n", loop->num);
1985 tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
1986 get_lsm_tmp_name (ref->mem.ref, ~0));
1988 fmt_data.loop = loop;
1989 fmt_data.orig_loop = loop;
1990 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
1992 if (bb_in_transaction (loop_preheader_edge (loop)->src)
1993 || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
1994 multi_threaded_model_p = true;
1996 if (multi_threaded_model_p)
1997 store_flag = execute_sm_if_changed_flag_set (loop, ref);
1999 rewrite_mem_refs (loop, ref, tmp_var);
2001 /* Emit the load code on a random exit edge or into the latch if
2002 the loop does not exit, so that we are sure it will be processed
2003 by move_computations after all dependencies. */
2004 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2006 /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2007 load altogether, since the store is predicated by a flag. We
2008 could, do the load only if it was originally in the loop. */
2009 load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
2010 lim_data = init_lim_data (load);
2011 lim_data->max_loop = loop;
2012 lim_data->tgt_loop = loop;
2013 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2015 if (multi_threaded_model_p)
2017 load = gimple_build_assign (store_flag, boolean_false_node);
2018 lim_data = init_lim_data (load);
2019 lim_data->max_loop = loop;
2020 lim_data->tgt_loop = loop;
2021 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2024 /* Sink the store to every exit from the loop. */
2025 FOR_EACH_VEC_ELT (exits, i, ex)
2026 if (!multi_threaded_model_p)
2028 gassign *store;
2029 store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
2030 gsi_insert_on_edge (ex, store);
2032 else
2033 execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag);
2036 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2037 edges of the LOOP. */
2039 static void
2040 hoist_memory_references (struct loop *loop, bitmap mem_refs,
2041 vec<edge> exits)
2043 mem_ref_p ref;
2044 unsigned i;
2045 bitmap_iterator bi;
2047 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2049 ref = memory_accesses.refs_list[i];
2050 execute_sm (loop, exits, ref);
2054 struct ref_always_accessed
2056 ref_always_accessed (struct loop *loop_, bool stored_p_)
2057 : loop (loop_), stored_p (stored_p_) {}
2058 bool operator () (mem_ref_loc_p loc);
2059 struct loop *loop;
2060 bool stored_p;
2063 bool
2064 ref_always_accessed::operator () (mem_ref_loc_p loc)
2066 struct loop *must_exec;
2068 if (!get_lim_data (loc->stmt))
2069 return false;
2071 /* If we require an always executed store make sure the statement
2072 stores to the reference. */
2073 if (stored_p)
2075 tree lhs = gimple_get_lhs (loc->stmt);
2076 if (!lhs
2077 || lhs != *loc->ref)
2078 return false;
2081 must_exec = get_lim_data (loc->stmt)->always_executed_in;
2082 if (!must_exec)
2083 return false;
2085 if (must_exec == loop
2086 || flow_loop_nested_p (must_exec, loop))
2087 return true;
2089 return false;
2092 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2093 make sure REF is always stored to in LOOP. */
2095 static bool
2096 ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
2098 return for_all_locs_in_loop (loop, ref,
2099 ref_always_accessed (loop, stored_p));
2102 /* Returns true if REF1 and REF2 are independent. */
2104 static bool
2105 refs_independent_p (mem_ref_p ref1, mem_ref_p ref2)
2107 if (ref1 == ref2)
2108 return true;
2110 if (dump_file && (dump_flags & TDF_DETAILS))
2111 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2112 ref1->id, ref2->id);
2114 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
2116 if (dump_file && (dump_flags & TDF_DETAILS))
2117 fprintf (dump_file, "dependent.\n");
2118 return false;
2120 else
2122 if (dump_file && (dump_flags & TDF_DETAILS))
2123 fprintf (dump_file, "independent.\n");
2124 return true;
2128 /* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2129 and its super-loops. */
2131 static void
2132 record_dep_loop (struct loop *loop, mem_ref_p ref, bool stored_p)
2134 /* We can propagate dependent-in-loop bits up the loop
2135 hierarchy to all outer loops. */
2136 while (loop != current_loops->tree_root
2137 && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2138 loop = loop_outer (loop);
2141 /* Returns true if REF is independent on all other memory references in
2142 LOOP. */
2144 static bool
2145 ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref, bool stored_p)
2147 bitmap refs_to_check;
2148 unsigned i;
2149 bitmap_iterator bi;
2150 mem_ref_p aref;
2152 if (stored_p)
2153 refs_to_check = &memory_accesses.refs_in_loop[loop->num];
2154 else
2155 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2157 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
2158 return false;
2160 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2162 aref = memory_accesses.refs_list[i];
2163 if (!refs_independent_p (ref, aref))
2164 return false;
2167 return true;
2170 /* Returns true if REF is independent on all other memory references in
2171 LOOP. Wrapper over ref_indep_loop_p_1, caching its results. */
2173 static bool
2174 ref_indep_loop_p_2 (struct loop *loop, mem_ref_p ref, bool stored_p)
2176 stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
2178 if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2179 return true;
2180 if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2181 return false;
2183 struct loop *inner = loop->inner;
2184 while (inner)
2186 if (!ref_indep_loop_p_2 (inner, ref, stored_p))
2187 return false;
2188 inner = inner->next;
2191 bool indep_p = ref_indep_loop_p_1 (loop, ref, stored_p);
2193 if (dump_file && (dump_flags & TDF_DETAILS))
2194 fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2195 ref->id, loop->num, indep_p ? "independent" : "dependent");
2197 /* Record the computed result in the cache. */
2198 if (indep_p)
2200 if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
2201 && stored_p)
2203 /* If it's independend against all refs then it's independent
2204 against stores, too. */
2205 bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
2208 else
2210 record_dep_loop (loop, ref, stored_p);
2211 if (!stored_p)
2213 /* If it's dependent against stores it's dependent against
2214 all refs, too. */
2215 record_dep_loop (loop, ref, true);
2219 return indep_p;
2222 /* Returns true if REF is independent on all other memory references in
2223 LOOP. */
2225 static bool
2226 ref_indep_loop_p (struct loop *loop, mem_ref_p ref)
2228 gcc_checking_assert (MEM_ANALYZABLE (ref));
2230 return ref_indep_loop_p_2 (loop, ref, false);
2233 /* Returns true if we can perform store motion of REF from LOOP. */
2235 static bool
2236 can_sm_ref_p (struct loop *loop, mem_ref_p ref)
2238 tree base;
2240 /* Can't hoist unanalyzable refs. */
2241 if (!MEM_ANALYZABLE (ref))
2242 return false;
2244 /* It should be movable. */
2245 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2246 || TREE_THIS_VOLATILE (ref->mem.ref)
2247 || !for_each_index (&ref->mem.ref, may_move_till, loop))
2248 return false;
2250 /* If it can throw fail, we do not properly update EH info. */
2251 if (tree_could_throw_p (ref->mem.ref))
2252 return false;
2254 /* If it can trap, it must be always executed in LOOP.
2255 Readonly memory locations may trap when storing to them, but
2256 tree_could_trap_p is a predicate for rvalues, so check that
2257 explicitly. */
2258 base = get_base_address (ref->mem.ref);
2259 if ((tree_could_trap_p (ref->mem.ref)
2260 || (DECL_P (base) && TREE_READONLY (base)))
2261 && !ref_always_accessed_p (loop, ref, true))
2262 return false;
2264 /* And it must be independent on all other memory references
2265 in LOOP. */
2266 if (!ref_indep_loop_p (loop, ref))
2267 return false;
2269 return true;
2272 /* Marks the references in LOOP for that store motion should be performed
2273 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2274 motion was performed in one of the outer loops. */
2276 static void
2277 find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2279 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2280 unsigned i;
2281 bitmap_iterator bi;
2282 mem_ref_p ref;
2284 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2286 ref = memory_accesses.refs_list[i];
2287 if (can_sm_ref_p (loop, ref))
2288 bitmap_set_bit (refs_to_sm, i);
2292 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2293 for a store motion optimization (i.e. whether we can insert statement
2294 on its exits). */
2296 static bool
2297 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2298 vec<edge> exits)
2300 unsigned i;
2301 edge ex;
2303 FOR_EACH_VEC_ELT (exits, i, ex)
2304 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2305 return false;
2307 return true;
2310 /* Try to perform store motion for all memory references modified inside
2311 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2312 store motion was executed in one of the outer loops. */
2314 static void
2315 store_motion_loop (struct loop *loop, bitmap sm_executed)
2317 vec<edge> exits = get_loop_exit_edges (loop);
2318 struct loop *subloop;
2319 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2321 if (loop_suitable_for_sm (loop, exits))
2323 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2324 hoist_memory_references (loop, sm_in_loop, exits);
2326 exits.release ();
2328 bitmap_ior_into (sm_executed, sm_in_loop);
2329 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2330 store_motion_loop (subloop, sm_executed);
2331 bitmap_and_compl_into (sm_executed, sm_in_loop);
2332 BITMAP_FREE (sm_in_loop);
2335 /* Try to perform store motion for all memory references modified inside
2336 loops. */
2338 static void
2339 store_motion (void)
2341 struct loop *loop;
2342 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
2344 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2345 store_motion_loop (loop, sm_executed);
2347 BITMAP_FREE (sm_executed);
2348 gsi_commit_edge_inserts ();
2351 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2352 for each such basic block bb records the outermost loop for that execution
2353 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2354 blocks that contain a nonpure call. */
2356 static void
2357 fill_always_executed_in_1 (struct loop *loop, sbitmap contains_call)
2359 basic_block bb = NULL, *bbs, last = NULL;
2360 unsigned i;
2361 edge e;
2362 struct loop *inn_loop = loop;
2364 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2366 bbs = get_loop_body_in_dom_order (loop);
2368 for (i = 0; i < loop->num_nodes; i++)
2370 edge_iterator ei;
2371 bb = bbs[i];
2373 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2374 last = bb;
2376 if (bitmap_bit_p (contains_call, bb->index))
2377 break;
2379 FOR_EACH_EDGE (e, ei, bb->succs)
2380 if (!flow_bb_inside_loop_p (loop, e->dest))
2381 break;
2382 if (e)
2383 break;
2385 /* A loop might be infinite (TODO use simple loop analysis
2386 to disprove this if possible). */
2387 if (bb->flags & BB_IRREDUCIBLE_LOOP)
2388 break;
2390 if (!flow_bb_inside_loop_p (inn_loop, bb))
2391 break;
2393 if (bb->loop_father->header == bb)
2395 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2396 break;
2398 /* In a loop that is always entered we may proceed anyway.
2399 But record that we entered it and stop once we leave it. */
2400 inn_loop = bb->loop_father;
2404 while (1)
2406 SET_ALWAYS_EXECUTED_IN (last, loop);
2407 if (last == loop->header)
2408 break;
2409 last = get_immediate_dominator (CDI_DOMINATORS, last);
2412 free (bbs);
2415 for (loop = loop->inner; loop; loop = loop->next)
2416 fill_always_executed_in_1 (loop, contains_call);
2419 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2420 for each such basic block bb records the outermost loop for that execution
2421 of its header implies execution of bb. */
2423 static void
2424 fill_always_executed_in (void)
2426 sbitmap contains_call = sbitmap_alloc (last_basic_block_for_fn (cfun));
2427 basic_block bb;
2428 struct loop *loop;
2430 bitmap_clear (contains_call);
2431 FOR_EACH_BB_FN (bb, cfun)
2433 gimple_stmt_iterator gsi;
2434 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2436 if (nonpure_call_p (gsi_stmt (gsi)))
2437 break;
2440 if (!gsi_end_p (gsi))
2441 bitmap_set_bit (contains_call, bb->index);
2444 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2445 fill_always_executed_in_1 (loop, contains_call);
2447 sbitmap_free (contains_call);
2451 /* Compute the global information needed by the loop invariant motion pass. */
2453 static void
2454 tree_ssa_lim_initialize (void)
2456 struct loop *loop;
2457 unsigned i;
2459 bitmap_obstack_initialize (&lim_bitmap_obstack);
2460 gcc_obstack_init (&mem_ref_obstack);
2461 lim_aux_data_map = new hash_map<gimple, lim_aux_data *>;
2463 if (flag_tm)
2464 compute_transaction_bits ();
2466 alloc_aux_for_edges (0);
2468 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
2469 memory_accesses.refs_list.create (100);
2470 /* Allocate a special, unanalyzable mem-ref with ID zero. */
2471 memory_accesses.refs_list.quick_push
2472 (mem_ref_alloc (error_mark_node, 0, UNANALYZABLE_MEM_ID));
2474 memory_accesses.refs_in_loop.create (number_of_loops (cfun));
2475 memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
2476 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
2477 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2478 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
2479 memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2481 for (i = 0; i < number_of_loops (cfun); i++)
2483 bitmap_initialize (&memory_accesses.refs_in_loop[i],
2484 &lim_bitmap_obstack);
2485 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
2486 &lim_bitmap_obstack);
2487 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
2488 &lim_bitmap_obstack);
2491 memory_accesses.ttae_cache = NULL;
2493 /* Initialize bb_loop_postorder with a mapping from loop->num to
2494 its postorder index. */
2495 i = 0;
2496 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
2497 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2498 bb_loop_postorder[loop->num] = i++;
2501 /* Cleans up after the invariant motion pass. */
2503 static void
2504 tree_ssa_lim_finalize (void)
2506 basic_block bb;
2507 unsigned i;
2508 mem_ref_p ref;
2510 free_aux_for_edges ();
2512 FOR_EACH_BB_FN (bb, cfun)
2513 SET_ALWAYS_EXECUTED_IN (bb, NULL);
2515 bitmap_obstack_release (&lim_bitmap_obstack);
2516 delete lim_aux_data_map;
2518 delete memory_accesses.refs;
2519 memory_accesses.refs = NULL;
2521 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2522 memref_free (ref);
2523 memory_accesses.refs_list.release ();
2524 obstack_free (&mem_ref_obstack, NULL);
2526 memory_accesses.refs_in_loop.release ();
2527 memory_accesses.refs_stored_in_loop.release ();
2528 memory_accesses.all_refs_stored_in_loop.release ();
2530 if (memory_accesses.ttae_cache)
2531 free_affine_expand_cache (&memory_accesses.ttae_cache);
2533 free (bb_loop_postorder);
2536 /* Moves invariants from loops. Only "expensive" invariants are moved out --
2537 i.e. those that are likely to be win regardless of the register pressure. */
2539 unsigned int
2540 tree_ssa_lim (void)
2542 unsigned int todo;
2544 tree_ssa_lim_initialize ();
2546 /* Gathers information about memory accesses in the loops. */
2547 analyze_memory_references ();
2549 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
2550 fill_always_executed_in ();
2552 /* For each statement determine the outermost loop in that it is
2553 invariant and cost for computing the invariant. */
2554 invariantness_dom_walker (CDI_DOMINATORS)
2555 .walk (cfun->cfg->x_entry_block_ptr);
2557 /* Execute store motion. Force the necessary invariants to be moved
2558 out of the loops as well. */
2559 store_motion ();
2561 /* Move the expressions that are expensive enough. */
2562 todo = move_computations ();
2564 tree_ssa_lim_finalize ();
2566 return todo;
2569 /* Loop invariant motion pass. */
2571 namespace {
2573 const pass_data pass_data_lim =
2575 GIMPLE_PASS, /* type */
2576 "lim", /* name */
2577 OPTGROUP_LOOP, /* optinfo_flags */
2578 TV_LIM, /* tv_id */
2579 PROP_cfg, /* properties_required */
2580 0, /* properties_provided */
2581 0, /* properties_destroyed */
2582 0, /* todo_flags_start */
2583 0, /* todo_flags_finish */
2586 class pass_lim : public gimple_opt_pass
2588 public:
2589 pass_lim (gcc::context *ctxt)
2590 : gimple_opt_pass (pass_data_lim, ctxt)
2593 /* opt_pass methods: */
2594 opt_pass * clone () { return new pass_lim (m_ctxt); }
2595 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
2596 virtual unsigned int execute (function *);
2598 }; // class pass_lim
2600 unsigned int
2601 pass_lim::execute (function *fun)
2603 if (number_of_loops (fun) <= 1)
2604 return 0;
2606 return tree_ssa_lim ();
2609 } // anon namespace
2611 gimple_opt_pass *
2612 make_pass_lim (gcc::context *ctxt)
2614 return new pass_lim (ctxt);