PR other/54324
[official-gcc.git] / gcc / tree-ssa-loop-im.c
blobbad218dbb44e7aa8d4bb8c3d82389385406c54a3
1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "basic-block.h"
28 #include "gimple-pretty-print.h"
29 #include "tree-flow.h"
30 #include "cfgloop.h"
31 #include "domwalk.h"
32 #include "params.h"
33 #include "tree-pass.h"
34 #include "flags.h"
35 #include "hashtab.h"
36 #include "tree-affine.h"
37 #include "pointer-set.h"
38 #include "tree-ssa-propagate.h"
40 /* TODO: Support for predicated code motion. I.e.
42 while (1)
44 if (cond)
46 a = inv;
47 something;
51 Where COND and INV are invariants, but evaluating INV may trap or be
52 invalid from some other reason if !COND. This may be transformed to
54 if (cond)
55 a = inv;
56 while (1)
58 if (cond)
59 something;
60 } */
62 /* A type for the list of statements that have to be moved in order to be able
63 to hoist an invariant computation. */
65 struct depend
67 gimple stmt;
68 struct depend *next;
71 /* The auxiliary data kept for each statement. */
73 struct lim_aux_data
75 struct loop *max_loop; /* The outermost loop in that the statement
76 is invariant. */
78 struct loop *tgt_loop; /* The loop out of that we want to move the
79 invariant. */
81 struct loop *always_executed_in;
82 /* The outermost loop for that we are sure
83 the statement is executed if the loop
84 is entered. */
86 unsigned cost; /* Cost of the computation performed by the
87 statement. */
89 struct depend *depends; /* List of statements that must be also hoisted
90 out of the loop when this statement is
91 hoisted; i.e. those that define the operands
92 of the statement and are inside of the
93 MAX_LOOP loop. */
96 /* Maps statements to their lim_aux_data. */
98 static struct pointer_map_t *lim_aux_data_map;
100 /* Description of a memory reference location. */
102 typedef struct mem_ref_loc
104 tree *ref; /* The reference itself. */
105 gimple stmt; /* The statement in that it occurs. */
106 } *mem_ref_loc_p;
109 /* The list of memory reference locations in a loop. */
111 typedef struct mem_ref_locs
113 vec<mem_ref_loc_p> locs;
114 } *mem_ref_locs_p;
117 /* Description of a memory reference. */
119 typedef struct mem_ref
121 tree mem; /* The memory itself. */
122 unsigned id; /* ID assigned to the memory reference
123 (its index in memory_accesses.refs_list) */
124 hashval_t hash; /* Its hash value. */
125 bitmap stored; /* The set of loops in that this memory location
126 is stored to. */
127 vec<mem_ref_locs_p> accesses_in_loop;
128 /* The locations of the accesses. Vector
129 indexed by the loop number. */
131 /* The following sets are computed on demand. We keep both set and
132 its complement, so that we know whether the information was
133 already computed or not. */
134 bitmap indep_loop; /* The set of loops in that the memory
135 reference is independent, meaning:
136 If it is stored in the loop, this store
137 is independent on all other loads and
138 stores.
139 If it is only loaded, then it is independent
140 on all stores in the loop. */
141 bitmap dep_loop; /* The complement of INDEP_LOOP. */
143 bitmap indep_ref; /* The set of memory references on that
144 this reference is independent. */
145 bitmap dep_ref; /* The complement of INDEP_REF. */
146 } *mem_ref_p;
151 /* Description of memory accesses in loops. */
153 static struct
155 /* The hash table of memory references accessed in loops. */
156 htab_t refs;
158 /* The list of memory references. */
159 vec<mem_ref_p> refs_list;
161 /* The set of memory references accessed in each loop. */
162 vec<bitmap> refs_in_loop;
164 /* The set of memory references accessed in each loop, including
165 subloops. */
166 vec<bitmap> all_refs_in_loop;
168 /* The set of memory references stored in each loop, including
169 subloops. */
170 vec<bitmap> all_refs_stored_in_loop;
172 /* Cache for expanding memory addresses. */
173 struct pointer_map_t *ttae_cache;
174 } memory_accesses;
176 /* Obstack for the bitmaps in the above data structures. */
177 static bitmap_obstack lim_bitmap_obstack;
179 static bool ref_indep_loop_p (struct loop *, mem_ref_p);
181 /* Minimum cost of an expensive expression. */
182 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
184 /* The outermost loop for which execution of the header guarantees that the
185 block will be executed. */
186 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
187 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
189 /* Whether the reference was analyzable. */
190 #define MEM_ANALYZABLE(REF) ((REF)->mem != error_mark_node)
192 static struct lim_aux_data *
193 init_lim_data (gimple stmt)
195 void **p = pointer_map_insert (lim_aux_data_map, stmt);
197 *p = XCNEW (struct lim_aux_data);
198 return (struct lim_aux_data *) *p;
201 static struct lim_aux_data *
202 get_lim_data (gimple stmt)
204 void **p = pointer_map_contains (lim_aux_data_map, stmt);
205 if (!p)
206 return NULL;
208 return (struct lim_aux_data *) *p;
211 /* Releases the memory occupied by DATA. */
213 static void
214 free_lim_aux_data (struct lim_aux_data *data)
216 struct depend *dep, *next;
218 for (dep = data->depends; dep; dep = next)
220 next = dep->next;
221 free (dep);
223 free (data);
226 static void
227 clear_lim_data (gimple stmt)
229 void **p = pointer_map_contains (lim_aux_data_map, stmt);
230 if (!p)
231 return;
233 free_lim_aux_data ((struct lim_aux_data *) *p);
234 *p = NULL;
237 /* Calls CBCK for each index in memory reference ADDR_P. There are two
238 kinds situations handled; in each of these cases, the memory reference
239 and DATA are passed to the callback:
241 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
242 pass the pointer to the index to the callback.
244 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
245 pointer to addr to the callback.
247 If the callback returns false, the whole search stops and false is returned.
248 Otherwise the function returns true after traversing through the whole
249 reference *ADDR_P. */
251 bool
252 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
254 tree *nxt, *idx;
256 for (; ; addr_p = nxt)
258 switch (TREE_CODE (*addr_p))
260 case SSA_NAME:
261 return cbck (*addr_p, addr_p, data);
263 case MEM_REF:
264 nxt = &TREE_OPERAND (*addr_p, 0);
265 return cbck (*addr_p, nxt, data);
267 case BIT_FIELD_REF:
268 case VIEW_CONVERT_EXPR:
269 case REALPART_EXPR:
270 case IMAGPART_EXPR:
271 nxt = &TREE_OPERAND (*addr_p, 0);
272 break;
274 case COMPONENT_REF:
275 /* If the component has varying offset, it behaves like index
276 as well. */
277 idx = &TREE_OPERAND (*addr_p, 2);
278 if (*idx
279 && !cbck (*addr_p, idx, data))
280 return false;
282 nxt = &TREE_OPERAND (*addr_p, 0);
283 break;
285 case ARRAY_REF:
286 case ARRAY_RANGE_REF:
287 nxt = &TREE_OPERAND (*addr_p, 0);
288 if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
289 return false;
290 break;
292 case VAR_DECL:
293 case PARM_DECL:
294 case CONST_DECL:
295 case STRING_CST:
296 case RESULT_DECL:
297 case VECTOR_CST:
298 case COMPLEX_CST:
299 case INTEGER_CST:
300 case REAL_CST:
301 case FIXED_CST:
302 case CONSTRUCTOR:
303 return true;
305 case ADDR_EXPR:
306 gcc_assert (is_gimple_min_invariant (*addr_p));
307 return true;
309 case TARGET_MEM_REF:
310 idx = &TMR_BASE (*addr_p);
311 if (*idx
312 && !cbck (*addr_p, idx, data))
313 return false;
314 idx = &TMR_INDEX (*addr_p);
315 if (*idx
316 && !cbck (*addr_p, idx, data))
317 return false;
318 idx = &TMR_INDEX2 (*addr_p);
319 if (*idx
320 && !cbck (*addr_p, idx, data))
321 return false;
322 return true;
324 default:
325 gcc_unreachable ();
330 /* If it is possible to hoist the statement STMT unconditionally,
331 returns MOVE_POSSIBLE.
332 If it is possible to hoist the statement STMT, but we must avoid making
333 it executed if it would not be executed in the original program (e.g.
334 because it may trap), return MOVE_PRESERVE_EXECUTION.
335 Otherwise return MOVE_IMPOSSIBLE. */
337 enum move_pos
338 movement_possibility (gimple stmt)
340 tree lhs;
341 enum move_pos ret = MOVE_POSSIBLE;
343 if (flag_unswitch_loops
344 && gimple_code (stmt) == GIMPLE_COND)
346 /* If we perform unswitching, force the operands of the invariant
347 condition to be moved out of the loop. */
348 return MOVE_POSSIBLE;
351 if (gimple_code (stmt) == GIMPLE_PHI
352 && gimple_phi_num_args (stmt) <= 2
353 && !virtual_operand_p (gimple_phi_result (stmt))
354 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
355 return MOVE_POSSIBLE;
357 if (gimple_get_lhs (stmt) == NULL_TREE)
358 return MOVE_IMPOSSIBLE;
360 if (gimple_vdef (stmt))
361 return MOVE_IMPOSSIBLE;
363 if (stmt_ends_bb_p (stmt)
364 || gimple_has_volatile_ops (stmt)
365 || gimple_has_side_effects (stmt)
366 || stmt_could_throw_p (stmt))
367 return MOVE_IMPOSSIBLE;
369 if (is_gimple_call (stmt))
371 /* While pure or const call is guaranteed to have no side effects, we
372 cannot move it arbitrarily. Consider code like
374 char *s = something ();
376 while (1)
378 if (s)
379 t = strlen (s);
380 else
381 t = 0;
384 Here the strlen call cannot be moved out of the loop, even though
385 s is invariant. In addition to possibly creating a call with
386 invalid arguments, moving out a function call that is not executed
387 may cause performance regressions in case the call is costly and
388 not executed at all. */
389 ret = MOVE_PRESERVE_EXECUTION;
390 lhs = gimple_call_lhs (stmt);
392 else if (is_gimple_assign (stmt))
393 lhs = gimple_assign_lhs (stmt);
394 else
395 return MOVE_IMPOSSIBLE;
397 if (TREE_CODE (lhs) == SSA_NAME
398 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
399 return MOVE_IMPOSSIBLE;
401 if (TREE_CODE (lhs) != SSA_NAME
402 || gimple_could_trap_p (stmt))
403 return MOVE_PRESERVE_EXECUTION;
405 /* Non local loads in a transaction cannot be hoisted out. Well,
406 unless the load happens on every path out of the loop, but we
407 don't take this into account yet. */
408 if (flag_tm
409 && gimple_in_transaction (stmt)
410 && gimple_assign_single_p (stmt))
412 tree rhs = gimple_assign_rhs1 (stmt);
413 if (DECL_P (rhs) && is_global_var (rhs))
415 if (dump_file)
417 fprintf (dump_file, "Cannot hoist conditional load of ");
418 print_generic_expr (dump_file, rhs, TDF_SLIM);
419 fprintf (dump_file, " because it is in a transaction.\n");
421 return MOVE_IMPOSSIBLE;
425 return ret;
428 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
429 loop to that we could move the expression using DEF if it did not have
430 other operands, i.e. the outermost loop enclosing LOOP in that the value
431 of DEF is invariant. */
433 static struct loop *
434 outermost_invariant_loop (tree def, struct loop *loop)
436 gimple def_stmt;
437 basic_block def_bb;
438 struct loop *max_loop;
439 struct lim_aux_data *lim_data;
441 if (!def)
442 return superloop_at_depth (loop, 1);
444 if (TREE_CODE (def) != SSA_NAME)
446 gcc_assert (is_gimple_min_invariant (def));
447 return superloop_at_depth (loop, 1);
450 def_stmt = SSA_NAME_DEF_STMT (def);
451 def_bb = gimple_bb (def_stmt);
452 if (!def_bb)
453 return superloop_at_depth (loop, 1);
455 max_loop = find_common_loop (loop, def_bb->loop_father);
457 lim_data = get_lim_data (def_stmt);
458 if (lim_data != NULL && lim_data->max_loop != NULL)
459 max_loop = find_common_loop (max_loop,
460 loop_outer (lim_data->max_loop));
461 if (max_loop == loop)
462 return NULL;
463 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
465 return max_loop;
468 /* DATA is a structure containing information associated with a statement
469 inside LOOP. DEF is one of the operands of this statement.
471 Find the outermost loop enclosing LOOP in that value of DEF is invariant
472 and record this in DATA->max_loop field. If DEF itself is defined inside
473 this loop as well (i.e. we need to hoist it out of the loop if we want
474 to hoist the statement represented by DATA), record the statement in that
475 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
476 add the cost of the computation of DEF to the DATA->cost.
478 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
480 static bool
481 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
482 bool add_cost)
484 gimple def_stmt = SSA_NAME_DEF_STMT (def);
485 basic_block def_bb = gimple_bb (def_stmt);
486 struct loop *max_loop;
487 struct depend *dep;
488 struct lim_aux_data *def_data;
490 if (!def_bb)
491 return true;
493 max_loop = outermost_invariant_loop (def, loop);
494 if (!max_loop)
495 return false;
497 if (flow_loop_nested_p (data->max_loop, max_loop))
498 data->max_loop = max_loop;
500 def_data = get_lim_data (def_stmt);
501 if (!def_data)
502 return true;
504 if (add_cost
505 /* Only add the cost if the statement defining DEF is inside LOOP,
506 i.e. if it is likely that by moving the invariants dependent
507 on it, we will be able to avoid creating a new register for
508 it (since it will be only used in these dependent invariants). */
509 && def_bb->loop_father == loop)
510 data->cost += def_data->cost;
512 dep = XNEW (struct depend);
513 dep->stmt = def_stmt;
514 dep->next = data->depends;
515 data->depends = dep;
517 return true;
520 /* Returns an estimate for a cost of statement STMT. The values here
521 are just ad-hoc constants, similar to costs for inlining. */
523 static unsigned
524 stmt_cost (gimple stmt)
526 /* Always try to create possibilities for unswitching. */
527 if (gimple_code (stmt) == GIMPLE_COND
528 || gimple_code (stmt) == GIMPLE_PHI)
529 return LIM_EXPENSIVE;
531 /* We should be hoisting calls if possible. */
532 if (is_gimple_call (stmt))
534 tree fndecl;
536 /* Unless the call is a builtin_constant_p; this always folds to a
537 constant, so moving it is useless. */
538 fndecl = gimple_call_fndecl (stmt);
539 if (fndecl
540 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
541 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
542 return 0;
544 return LIM_EXPENSIVE;
547 /* Hoisting memory references out should almost surely be a win. */
548 if (gimple_references_memory_p (stmt))
549 return LIM_EXPENSIVE;
551 if (gimple_code (stmt) != GIMPLE_ASSIGN)
552 return 1;
554 switch (gimple_assign_rhs_code (stmt))
556 case MULT_EXPR:
557 case WIDEN_MULT_EXPR:
558 case WIDEN_MULT_PLUS_EXPR:
559 case WIDEN_MULT_MINUS_EXPR:
560 case DOT_PROD_EXPR:
561 case FMA_EXPR:
562 case TRUNC_DIV_EXPR:
563 case CEIL_DIV_EXPR:
564 case FLOOR_DIV_EXPR:
565 case ROUND_DIV_EXPR:
566 case EXACT_DIV_EXPR:
567 case CEIL_MOD_EXPR:
568 case FLOOR_MOD_EXPR:
569 case ROUND_MOD_EXPR:
570 case TRUNC_MOD_EXPR:
571 case RDIV_EXPR:
572 /* Division and multiplication are usually expensive. */
573 return LIM_EXPENSIVE;
575 case LSHIFT_EXPR:
576 case RSHIFT_EXPR:
577 case WIDEN_LSHIFT_EXPR:
578 case LROTATE_EXPR:
579 case RROTATE_EXPR:
580 /* Shifts and rotates are usually expensive. */
581 return LIM_EXPENSIVE;
583 case CONSTRUCTOR:
584 /* Make vector construction cost proportional to the number
585 of elements. */
586 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
588 case SSA_NAME:
589 case PAREN_EXPR:
590 /* Whether or not something is wrapped inside a PAREN_EXPR
591 should not change move cost. Nor should an intermediate
592 unpropagated SSA name copy. */
593 return 0;
595 default:
596 return 1;
600 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
601 REF is independent. If REF is not independent in LOOP, NULL is returned
602 instead. */
604 static struct loop *
605 outermost_indep_loop (struct loop *outer, struct loop *loop, mem_ref_p ref)
607 struct loop *aloop;
609 if (bitmap_bit_p (ref->stored, loop->num))
610 return NULL;
612 for (aloop = outer;
613 aloop != loop;
614 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
615 if (!bitmap_bit_p (ref->stored, aloop->num)
616 && ref_indep_loop_p (aloop, ref))
617 return aloop;
619 if (ref_indep_loop_p (loop, ref))
620 return loop;
621 else
622 return NULL;
625 /* If there is a simple load or store to a memory reference in STMT, returns
626 the location of the memory reference, and sets IS_STORE according to whether
627 it is a store or load. Otherwise, returns NULL. */
629 static tree *
630 simple_mem_ref_in_stmt (gimple stmt, bool *is_store)
632 tree *lhs, *rhs;
634 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
635 if (!gimple_assign_single_p (stmt))
636 return NULL;
638 lhs = gimple_assign_lhs_ptr (stmt);
639 rhs = gimple_assign_rhs1_ptr (stmt);
641 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
643 *is_store = false;
644 return rhs;
646 else if (gimple_vdef (stmt)
647 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
649 *is_store = true;
650 return lhs;
652 else
653 return NULL;
656 /* Returns the memory reference contained in STMT. */
658 static mem_ref_p
659 mem_ref_in_stmt (gimple stmt)
661 bool store;
662 tree *mem = simple_mem_ref_in_stmt (stmt, &store);
663 hashval_t hash;
664 mem_ref_p ref;
666 if (!mem)
667 return NULL;
668 gcc_assert (!store);
670 hash = iterative_hash_expr (*mem, 0);
671 ref = (mem_ref_p) htab_find_with_hash (memory_accesses.refs, *mem, hash);
673 gcc_assert (ref != NULL);
674 return ref;
677 /* From a controlling predicate in DOM determine the arguments from
678 the PHI node PHI that are chosen if the predicate evaluates to
679 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
680 they are non-NULL. Returns true if the arguments can be determined,
681 else return false. */
683 static bool
684 extract_true_false_args_from_phi (basic_block dom, gimple phi,
685 tree *true_arg_p, tree *false_arg_p)
687 basic_block bb = gimple_bb (phi);
688 edge true_edge, false_edge, tem;
689 tree arg0 = NULL_TREE, arg1 = NULL_TREE;
691 /* We have to verify that one edge into the PHI node is dominated
692 by the true edge of the predicate block and the other edge
693 dominated by the false edge. This ensures that the PHI argument
694 we are going to take is completely determined by the path we
695 take from the predicate block.
696 We can only use BB dominance checks below if the destination of
697 the true/false edges are dominated by their edge, thus only
698 have a single predecessor. */
699 extract_true_false_edges_from_block (dom, &true_edge, &false_edge);
700 tem = EDGE_PRED (bb, 0);
701 if (tem == true_edge
702 || (single_pred_p (true_edge->dest)
703 && (tem->src == true_edge->dest
704 || dominated_by_p (CDI_DOMINATORS,
705 tem->src, true_edge->dest))))
706 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
707 else if (tem == false_edge
708 || (single_pred_p (false_edge->dest)
709 && (tem->src == false_edge->dest
710 || dominated_by_p (CDI_DOMINATORS,
711 tem->src, false_edge->dest))))
712 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
713 else
714 return false;
715 tem = EDGE_PRED (bb, 1);
716 if (tem == true_edge
717 || (single_pred_p (true_edge->dest)
718 && (tem->src == true_edge->dest
719 || dominated_by_p (CDI_DOMINATORS,
720 tem->src, true_edge->dest))))
721 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
722 else if (tem == false_edge
723 || (single_pred_p (false_edge->dest)
724 && (tem->src == false_edge->dest
725 || dominated_by_p (CDI_DOMINATORS,
726 tem->src, false_edge->dest))))
727 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
728 else
729 return false;
730 if (!arg0 || !arg1)
731 return false;
733 if (true_arg_p)
734 *true_arg_p = arg0;
735 if (false_arg_p)
736 *false_arg_p = arg1;
738 return true;
741 /* Determine the outermost loop to that it is possible to hoist a statement
742 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
743 the outermost loop in that the value computed by STMT is invariant.
744 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
745 we preserve the fact whether STMT is executed. It also fills other related
746 information to LIM_DATA (STMT).
748 The function returns false if STMT cannot be hoisted outside of the loop it
749 is defined in, and true otherwise. */
751 static bool
752 determine_max_movement (gimple stmt, bool must_preserve_exec)
754 basic_block bb = gimple_bb (stmt);
755 struct loop *loop = bb->loop_father;
756 struct loop *level;
757 struct lim_aux_data *lim_data = get_lim_data (stmt);
758 tree val;
759 ssa_op_iter iter;
761 if (must_preserve_exec)
762 level = ALWAYS_EXECUTED_IN (bb);
763 else
764 level = superloop_at_depth (loop, 1);
765 lim_data->max_loop = level;
767 if (gimple_code (stmt) == GIMPLE_PHI)
769 use_operand_p use_p;
770 unsigned min_cost = UINT_MAX;
771 unsigned total_cost = 0;
772 struct lim_aux_data *def_data;
774 /* We will end up promoting dependencies to be unconditionally
775 evaluated. For this reason the PHI cost (and thus the
776 cost we remove from the loop by doing the invariant motion)
777 is that of the cheapest PHI argument dependency chain. */
778 FOR_EACH_PHI_ARG (use_p, stmt, iter, SSA_OP_USE)
780 val = USE_FROM_PTR (use_p);
781 if (TREE_CODE (val) != SSA_NAME)
782 continue;
783 if (!add_dependency (val, lim_data, loop, false))
784 return false;
785 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
786 if (def_data)
788 min_cost = MIN (min_cost, def_data->cost);
789 total_cost += def_data->cost;
793 lim_data->cost += min_cost;
795 if (gimple_phi_num_args (stmt) > 1)
797 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
798 gimple cond;
799 if (gsi_end_p (gsi_last_bb (dom)))
800 return false;
801 cond = gsi_stmt (gsi_last_bb (dom));
802 if (gimple_code (cond) != GIMPLE_COND)
803 return false;
804 /* Verify that this is an extended form of a diamond and
805 the PHI arguments are completely controlled by the
806 predicate in DOM. */
807 if (!extract_true_false_args_from_phi (dom, stmt, NULL, NULL))
808 return false;
810 /* Fold in dependencies and cost of the condition. */
811 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
813 if (!add_dependency (val, lim_data, loop, false))
814 return false;
815 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
816 if (def_data)
817 total_cost += def_data->cost;
820 /* We want to avoid unconditionally executing very expensive
821 operations. As costs for our dependencies cannot be
822 negative just claim we are not invariand for this case.
823 We also are not sure whether the control-flow inside the
824 loop will vanish. */
825 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
826 && !(min_cost != 0
827 && total_cost / min_cost <= 2))
828 return false;
830 /* Assume that the control-flow in the loop will vanish.
831 ??? We should verify this and not artificially increase
832 the cost if that is not the case. */
833 lim_data->cost += stmt_cost (stmt);
836 return true;
838 else
839 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
840 if (!add_dependency (val, lim_data, loop, true))
841 return false;
843 if (gimple_vuse (stmt))
845 mem_ref_p ref = mem_ref_in_stmt (stmt);
847 if (ref)
849 lim_data->max_loop
850 = outermost_indep_loop (lim_data->max_loop, loop, ref);
851 if (!lim_data->max_loop)
852 return false;
854 else
856 if ((val = gimple_vuse (stmt)) != NULL_TREE)
858 if (!add_dependency (val, lim_data, loop, false))
859 return false;
864 lim_data->cost += stmt_cost (stmt);
866 return true;
869 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
870 and that one of the operands of this statement is computed by STMT.
871 Ensure that STMT (together with all the statements that define its
872 operands) is hoisted at least out of the loop LEVEL. */
874 static void
875 set_level (gimple stmt, struct loop *orig_loop, struct loop *level)
877 struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
878 struct depend *dep;
879 struct lim_aux_data *lim_data;
881 stmt_loop = find_common_loop (orig_loop, stmt_loop);
882 lim_data = get_lim_data (stmt);
883 if (lim_data != NULL && lim_data->tgt_loop != NULL)
884 stmt_loop = find_common_loop (stmt_loop,
885 loop_outer (lim_data->tgt_loop));
886 if (flow_loop_nested_p (stmt_loop, level))
887 return;
889 gcc_assert (level == lim_data->max_loop
890 || flow_loop_nested_p (lim_data->max_loop, level));
892 lim_data->tgt_loop = level;
893 for (dep = lim_data->depends; dep; dep = dep->next)
894 set_level (dep->stmt, orig_loop, level);
897 /* Determines an outermost loop from that we want to hoist the statement STMT.
898 For now we chose the outermost possible loop. TODO -- use profiling
899 information to set it more sanely. */
901 static void
902 set_profitable_level (gimple stmt)
904 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
907 /* Returns true if STMT is a call that has side effects. */
909 static bool
910 nonpure_call_p (gimple stmt)
912 if (gimple_code (stmt) != GIMPLE_CALL)
913 return false;
915 return gimple_has_side_effects (stmt);
918 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
920 static gimple
921 rewrite_reciprocal (gimple_stmt_iterator *bsi)
923 gimple stmt, stmt1, stmt2;
924 tree name, lhs, type;
925 tree real_one;
926 gimple_stmt_iterator gsi;
928 stmt = gsi_stmt (*bsi);
929 lhs = gimple_assign_lhs (stmt);
930 type = TREE_TYPE (lhs);
932 real_one = build_one_cst (type);
934 name = make_temp_ssa_name (type, NULL, "reciptmp");
935 stmt1 = gimple_build_assign_with_ops (RDIV_EXPR, name, real_one,
936 gimple_assign_rhs2 (stmt));
938 stmt2 = gimple_build_assign_with_ops (MULT_EXPR, lhs, name,
939 gimple_assign_rhs1 (stmt));
941 /* Replace division stmt with reciprocal and multiply stmts.
942 The multiply stmt is not invariant, so update iterator
943 and avoid rescanning. */
944 gsi = *bsi;
945 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
946 gsi_replace (&gsi, stmt2, true);
948 /* Continue processing with invariant reciprocal statement. */
949 return stmt1;
952 /* Check if the pattern at *BSI is a bittest of the form
953 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
955 static gimple
956 rewrite_bittest (gimple_stmt_iterator *bsi)
958 gimple stmt, use_stmt, stmt1, stmt2;
959 tree lhs, name, t, a, b;
960 use_operand_p use;
962 stmt = gsi_stmt (*bsi);
963 lhs = gimple_assign_lhs (stmt);
965 /* Verify that the single use of lhs is a comparison against zero. */
966 if (TREE_CODE (lhs) != SSA_NAME
967 || !single_imm_use (lhs, &use, &use_stmt)
968 || gimple_code (use_stmt) != GIMPLE_COND)
969 return stmt;
970 if (gimple_cond_lhs (use_stmt) != lhs
971 || (gimple_cond_code (use_stmt) != NE_EXPR
972 && gimple_cond_code (use_stmt) != EQ_EXPR)
973 || !integer_zerop (gimple_cond_rhs (use_stmt)))
974 return stmt;
976 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
977 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
978 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
979 return stmt;
981 /* There is a conversion in between possibly inserted by fold. */
982 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
984 t = gimple_assign_rhs1 (stmt1);
985 if (TREE_CODE (t) != SSA_NAME
986 || !has_single_use (t))
987 return stmt;
988 stmt1 = SSA_NAME_DEF_STMT (t);
989 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
990 return stmt;
993 /* Verify that B is loop invariant but A is not. Verify that with
994 all the stmt walking we are still in the same loop. */
995 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
996 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
997 return stmt;
999 a = gimple_assign_rhs1 (stmt1);
1000 b = gimple_assign_rhs2 (stmt1);
1002 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
1003 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
1005 gimple_stmt_iterator rsi;
1007 /* 1 << B */
1008 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
1009 build_int_cst (TREE_TYPE (a), 1), b);
1010 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1011 stmt1 = gimple_build_assign (name, t);
1013 /* A & (1 << B) */
1014 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
1015 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1016 stmt2 = gimple_build_assign (name, t);
1018 /* Replace the SSA_NAME we compare against zero. Adjust
1019 the type of zero accordingly. */
1020 SET_USE (use, name);
1021 gimple_cond_set_rhs (use_stmt, build_int_cst_type (TREE_TYPE (name), 0));
1023 /* Don't use gsi_replace here, none of the new assignments sets
1024 the variable originally set in stmt. Move bsi to stmt1, and
1025 then remove the original stmt, so that we get a chance to
1026 retain debug info for it. */
1027 rsi = *bsi;
1028 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1029 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
1030 gsi_remove (&rsi, true);
1032 return stmt1;
1035 return stmt;
1039 /* Determine the outermost loops in that statements in basic block BB are
1040 invariant, and record them to the LIM_DATA associated with the statements.
1041 Callback for walk_dominator_tree. */
1043 static void
1044 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
1045 basic_block bb)
1047 enum move_pos pos;
1048 gimple_stmt_iterator bsi;
1049 gimple stmt;
1050 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1051 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
1052 struct lim_aux_data *lim_data;
1054 if (!loop_outer (bb->loop_father))
1055 return;
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1058 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1059 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1061 /* Look at PHI nodes, but only if there is at most two.
1062 ??? We could relax this further by post-processing the inserted
1063 code and transforming adjacent cond-exprs with the same predicate
1064 to control flow again. */
1065 bsi = gsi_start_phis (bb);
1066 if (!gsi_end_p (bsi)
1067 && ((gsi_next (&bsi), gsi_end_p (bsi))
1068 || (gsi_next (&bsi), gsi_end_p (bsi))))
1069 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1071 stmt = gsi_stmt (bsi);
1073 pos = movement_possibility (stmt);
1074 if (pos == MOVE_IMPOSSIBLE)
1075 continue;
1077 lim_data = init_lim_data (stmt);
1078 lim_data->always_executed_in = outermost;
1080 if (!determine_max_movement (stmt, false))
1082 lim_data->max_loop = NULL;
1083 continue;
1086 if (dump_file && (dump_flags & TDF_DETAILS))
1088 print_gimple_stmt (dump_file, stmt, 2, 0);
1089 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1090 loop_depth (lim_data->max_loop),
1091 lim_data->cost);
1094 if (lim_data->cost >= LIM_EXPENSIVE)
1095 set_profitable_level (stmt);
1098 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1100 stmt = gsi_stmt (bsi);
1102 pos = movement_possibility (stmt);
1103 if (pos == MOVE_IMPOSSIBLE)
1105 if (nonpure_call_p (stmt))
1107 maybe_never = true;
1108 outermost = NULL;
1110 /* Make sure to note always_executed_in for stores to make
1111 store-motion work. */
1112 else if (stmt_makes_single_store (stmt))
1114 struct lim_aux_data *lim_data = init_lim_data (stmt);
1115 lim_data->always_executed_in = outermost;
1117 continue;
1120 if (is_gimple_assign (stmt)
1121 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1122 == GIMPLE_BINARY_RHS))
1124 tree op0 = gimple_assign_rhs1 (stmt);
1125 tree op1 = gimple_assign_rhs2 (stmt);
1126 struct loop *ol1 = outermost_invariant_loop (op1,
1127 loop_containing_stmt (stmt));
1129 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1130 to be hoisted out of loop, saving expensive divide. */
1131 if (pos == MOVE_POSSIBLE
1132 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1133 && flag_unsafe_math_optimizations
1134 && !flag_trapping_math
1135 && ol1 != NULL
1136 && outermost_invariant_loop (op0, ol1) == NULL)
1137 stmt = rewrite_reciprocal (&bsi);
1139 /* If the shift count is invariant, convert (A >> B) & 1 to
1140 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1141 saving an expensive shift. */
1142 if (pos == MOVE_POSSIBLE
1143 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1144 && integer_onep (op1)
1145 && TREE_CODE (op0) == SSA_NAME
1146 && has_single_use (op0))
1147 stmt = rewrite_bittest (&bsi);
1150 lim_data = init_lim_data (stmt);
1151 lim_data->always_executed_in = outermost;
1153 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1154 continue;
1156 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1158 lim_data->max_loop = NULL;
1159 continue;
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1164 print_gimple_stmt (dump_file, stmt, 2, 0);
1165 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1166 loop_depth (lim_data->max_loop),
1167 lim_data->cost);
1170 if (lim_data->cost >= LIM_EXPENSIVE)
1171 set_profitable_level (stmt);
1175 /* For each statement determines the outermost loop in that it is invariant,
1176 statements on whose motion it depends and the cost of the computation.
1177 This information is stored to the LIM_DATA structure associated with
1178 each statement. */
1180 static void
1181 determine_invariantness (void)
1183 struct dom_walk_data walk_data;
1185 memset (&walk_data, 0, sizeof (struct dom_walk_data));
1186 walk_data.dom_direction = CDI_DOMINATORS;
1187 walk_data.before_dom_children = determine_invariantness_stmt;
1189 init_walk_dominator_tree (&walk_data);
1190 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
1191 fini_walk_dominator_tree (&walk_data);
1194 /* Hoist the statements in basic block BB out of the loops prescribed by
1195 data stored in LIM_DATA structures associated with each statement. Callback
1196 for walk_dominator_tree. */
1198 static void
1199 move_computations_stmt (struct dom_walk_data *dw_data,
1200 basic_block bb)
1202 struct loop *level;
1203 gimple_stmt_iterator bsi;
1204 gimple stmt;
1205 unsigned cost = 0;
1206 struct lim_aux_data *lim_data;
1208 if (!loop_outer (bb->loop_father))
1209 return;
1211 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1213 gimple new_stmt;
1214 stmt = gsi_stmt (bsi);
1216 lim_data = get_lim_data (stmt);
1217 if (lim_data == NULL)
1219 gsi_next (&bsi);
1220 continue;
1223 cost = lim_data->cost;
1224 level = lim_data->tgt_loop;
1225 clear_lim_data (stmt);
1227 if (!level)
1229 gsi_next (&bsi);
1230 continue;
1233 if (dump_file && (dump_flags & TDF_DETAILS))
1235 fprintf (dump_file, "Moving PHI node\n");
1236 print_gimple_stmt (dump_file, stmt, 0, 0);
1237 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1238 cost, level->num);
1241 if (gimple_phi_num_args (stmt) == 1)
1243 tree arg = PHI_ARG_DEF (stmt, 0);
1244 new_stmt = gimple_build_assign_with_ops (TREE_CODE (arg),
1245 gimple_phi_result (stmt),
1246 arg, NULL_TREE);
1247 SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
1249 else
1251 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1252 gimple cond = gsi_stmt (gsi_last_bb (dom));
1253 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1254 /* Get the PHI arguments corresponding to the true and false
1255 edges of COND. */
1256 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1257 gcc_assert (arg0 && arg1);
1258 t = build2 (gimple_cond_code (cond), boolean_type_node,
1259 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1260 new_stmt = gimple_build_assign_with_ops (COND_EXPR,
1261 gimple_phi_result (stmt),
1262 t, arg0, arg1);
1263 SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
1264 *((unsigned int *)(dw_data->global_data)) |= TODO_cleanup_cfg;
1266 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1267 remove_phi_node (&bsi, false);
1270 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1272 edge e;
1274 stmt = gsi_stmt (bsi);
1276 lim_data = get_lim_data (stmt);
1277 if (lim_data == NULL)
1279 gsi_next (&bsi);
1280 continue;
1283 cost = lim_data->cost;
1284 level = lim_data->tgt_loop;
1285 clear_lim_data (stmt);
1287 if (!level)
1289 gsi_next (&bsi);
1290 continue;
1293 /* We do not really want to move conditionals out of the loop; we just
1294 placed it here to force its operands to be moved if necessary. */
1295 if (gimple_code (stmt) == GIMPLE_COND)
1296 continue;
1298 if (dump_file && (dump_flags & TDF_DETAILS))
1300 fprintf (dump_file, "Moving statement\n");
1301 print_gimple_stmt (dump_file, stmt, 0, 0);
1302 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1303 cost, level->num);
1306 e = loop_preheader_edge (level);
1307 gcc_assert (!gimple_vdef (stmt));
1308 if (gimple_vuse (stmt))
1310 /* The new VUSE is the one from the virtual PHI in the loop
1311 header or the one already present. */
1312 gimple_stmt_iterator gsi2;
1313 for (gsi2 = gsi_start_phis (e->dest);
1314 !gsi_end_p (gsi2); gsi_next (&gsi2))
1316 gimple phi = gsi_stmt (gsi2);
1317 if (virtual_operand_p (gimple_phi_result (phi)))
1319 gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1320 break;
1324 gsi_remove (&bsi, false);
1325 gsi_insert_on_edge (e, stmt);
1329 /* Hoist the statements out of the loops prescribed by data stored in
1330 LIM_DATA structures associated with each statement.*/
1332 static unsigned int
1333 move_computations (void)
1335 struct dom_walk_data walk_data;
1336 unsigned int todo = 0;
1338 memset (&walk_data, 0, sizeof (struct dom_walk_data));
1339 walk_data.global_data = &todo;
1340 walk_data.dom_direction = CDI_DOMINATORS;
1341 walk_data.before_dom_children = move_computations_stmt;
1343 init_walk_dominator_tree (&walk_data);
1344 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
1345 fini_walk_dominator_tree (&walk_data);
1347 gsi_commit_edge_inserts ();
1348 if (need_ssa_update_p (cfun))
1349 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1351 return todo;
1354 /* Checks whether the statement defining variable *INDEX can be hoisted
1355 out of the loop passed in DATA. Callback for for_each_index. */
1357 static bool
1358 may_move_till (tree ref, tree *index, void *data)
1360 struct loop *loop = (struct loop *) data, *max_loop;
1362 /* If REF is an array reference, check also that the step and the lower
1363 bound is invariant in LOOP. */
1364 if (TREE_CODE (ref) == ARRAY_REF)
1366 tree step = TREE_OPERAND (ref, 3);
1367 tree lbound = TREE_OPERAND (ref, 2);
1369 max_loop = outermost_invariant_loop (step, loop);
1370 if (!max_loop)
1371 return false;
1373 max_loop = outermost_invariant_loop (lbound, loop);
1374 if (!max_loop)
1375 return false;
1378 max_loop = outermost_invariant_loop (*index, loop);
1379 if (!max_loop)
1380 return false;
1382 return true;
1385 /* If OP is SSA NAME, force the statement that defines it to be
1386 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1388 static void
1389 force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1391 gimple stmt;
1393 if (!op
1394 || is_gimple_min_invariant (op))
1395 return;
1397 gcc_assert (TREE_CODE (op) == SSA_NAME);
1399 stmt = SSA_NAME_DEF_STMT (op);
1400 if (gimple_nop_p (stmt))
1401 return;
1403 set_level (stmt, orig_loop, loop);
1406 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1407 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1408 for_each_index. */
1410 struct fmt_data
1412 struct loop *loop;
1413 struct loop *orig_loop;
1416 static bool
1417 force_move_till (tree ref, tree *index, void *data)
1419 struct fmt_data *fmt_data = (struct fmt_data *) data;
1421 if (TREE_CODE (ref) == ARRAY_REF)
1423 tree step = TREE_OPERAND (ref, 3);
1424 tree lbound = TREE_OPERAND (ref, 2);
1426 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1427 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1430 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1432 return true;
1435 /* A hash function for struct mem_ref object OBJ. */
1437 static hashval_t
1438 memref_hash (const void *obj)
1440 const struct mem_ref *const mem = (const struct mem_ref *) obj;
1442 return mem->hash;
1445 /* An equality function for struct mem_ref object OBJ1 with
1446 memory reference OBJ2. */
1448 static int
1449 memref_eq (const void *obj1, const void *obj2)
1451 const struct mem_ref *const mem1 = (const struct mem_ref *) obj1;
1453 return operand_equal_p (mem1->mem, (const_tree) obj2, 0);
1456 /* Releases list of memory reference locations ACCS. */
1458 static void
1459 free_mem_ref_locs (mem_ref_locs_p accs)
1461 unsigned i;
1462 mem_ref_loc_p loc;
1464 if (!accs)
1465 return;
1467 FOR_EACH_VEC_ELT (accs->locs, i, loc)
1468 free (loc);
1469 accs->locs.release ();
1470 free (accs);
1473 /* A function to free the mem_ref object OBJ. */
1475 static void
1476 memref_free (struct mem_ref *mem)
1478 unsigned i;
1479 mem_ref_locs_p accs;
1481 FOR_EACH_VEC_ELT (mem->accesses_in_loop, i, accs)
1482 free_mem_ref_locs (accs);
1483 mem->accesses_in_loop.release ();
1485 free (mem);
1488 /* Allocates and returns a memory reference description for MEM whose hash
1489 value is HASH and id is ID. */
1491 static mem_ref_p
1492 mem_ref_alloc (tree mem, unsigned hash, unsigned id)
1494 mem_ref_p ref = XNEW (struct mem_ref);
1495 ref->mem = mem;
1496 ref->id = id;
1497 ref->hash = hash;
1498 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1499 ref->indep_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
1500 ref->dep_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
1501 ref->indep_ref = BITMAP_ALLOC (&lim_bitmap_obstack);
1502 ref->dep_ref = BITMAP_ALLOC (&lim_bitmap_obstack);
1503 ref->accesses_in_loop.create (0);
1505 return ref;
1508 /* Allocates and returns the new list of locations. */
1510 static mem_ref_locs_p
1511 mem_ref_locs_alloc (void)
1513 mem_ref_locs_p accs = XNEW (struct mem_ref_locs);
1514 accs->locs.create (0);
1515 return accs;
1518 /* Records memory reference location *LOC in LOOP to the memory reference
1519 description REF. The reference occurs in statement STMT. */
1521 static void
1522 record_mem_ref_loc (mem_ref_p ref, struct loop *loop, gimple stmt, tree *loc)
1524 mem_ref_loc_p aref = XNEW (struct mem_ref_loc);
1525 mem_ref_locs_p accs;
1526 bitmap ril = memory_accesses.refs_in_loop[loop->num];
1528 if (ref->accesses_in_loop.length ()
1529 <= (unsigned) loop->num)
1530 ref->accesses_in_loop.safe_grow_cleared (loop->num + 1);
1531 accs = ref->accesses_in_loop[loop->num];
1532 if (!accs)
1534 accs = mem_ref_locs_alloc ();
1535 ref->accesses_in_loop[loop->num] = accs;
1538 aref->stmt = stmt;
1539 aref->ref = loc;
1541 accs->locs.safe_push (aref);
1542 bitmap_set_bit (ril, ref->id);
1545 /* Marks reference REF as stored in LOOP. */
1547 static void
1548 mark_ref_stored (mem_ref_p ref, struct loop *loop)
1550 for (;
1551 loop != current_loops->tree_root
1552 && !bitmap_bit_p (ref->stored, loop->num);
1553 loop = loop_outer (loop))
1554 bitmap_set_bit (ref->stored, loop->num);
1557 /* Gathers memory references in statement STMT in LOOP, storing the
1558 information about them in the memory_accesses structure. Marks
1559 the vops accessed through unrecognized statements there as
1560 well. */
1562 static void
1563 gather_mem_refs_stmt (struct loop *loop, gimple stmt)
1565 tree *mem = NULL;
1566 hashval_t hash;
1567 PTR *slot;
1568 mem_ref_p ref;
1569 bool is_stored;
1570 unsigned id;
1572 if (!gimple_vuse (stmt))
1573 return;
1575 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1576 if (!mem)
1578 id = memory_accesses.refs_list.length ();
1579 ref = mem_ref_alloc (error_mark_node, 0, id);
1580 memory_accesses.refs_list.safe_push (ref);
1581 if (dump_file && (dump_flags & TDF_DETAILS))
1583 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1584 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1586 if (gimple_vdef (stmt))
1587 mark_ref_stored (ref, loop);
1588 record_mem_ref_loc (ref, loop, stmt, mem);
1589 return;
1592 hash = iterative_hash_expr (*mem, 0);
1593 slot = htab_find_slot_with_hash (memory_accesses.refs, *mem, hash, INSERT);
1595 if (*slot)
1597 ref = (mem_ref_p) *slot;
1598 id = ref->id;
1600 else
1602 id = memory_accesses.refs_list.length ();
1603 ref = mem_ref_alloc (*mem, hash, id);
1604 memory_accesses.refs_list.safe_push (ref);
1605 *slot = ref;
1607 if (dump_file && (dump_flags & TDF_DETAILS))
1609 fprintf (dump_file, "Memory reference %u: ", id);
1610 print_generic_expr (dump_file, ref->mem, TDF_SLIM);
1611 fprintf (dump_file, "\n");
1615 if (is_stored)
1616 mark_ref_stored (ref, loop);
1618 record_mem_ref_loc (ref, loop, stmt, mem);
1619 return;
1622 /* Gathers memory references in loops. */
1624 static void
1625 gather_mem_refs_in_loops (void)
1627 gimple_stmt_iterator bsi;
1628 basic_block bb;
1629 struct loop *loop;
1630 loop_iterator li;
1631 bitmap lrefs, alrefs, alrefso;
1633 FOR_EACH_BB (bb)
1635 loop = bb->loop_father;
1636 if (loop == current_loops->tree_root)
1637 continue;
1639 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1640 gather_mem_refs_stmt (loop, gsi_stmt (bsi));
1643 /* Propagate the information about accessed memory references up
1644 the loop hierarchy. */
1645 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1647 lrefs = memory_accesses.refs_in_loop[loop->num];
1648 alrefs = memory_accesses.all_refs_in_loop[loop->num];
1649 bitmap_ior_into (alrefs, lrefs);
1651 if (loop_outer (loop) == current_loops->tree_root)
1652 continue;
1654 alrefso = memory_accesses.all_refs_in_loop[loop_outer (loop)->num];
1655 bitmap_ior_into (alrefso, alrefs);
1659 /* Create a mapping from virtual operands to references that touch them
1660 in LOOP. */
1662 static void
1663 create_vop_ref_mapping_loop (struct loop *loop)
1665 bitmap refs = memory_accesses.refs_in_loop[loop->num];
1666 struct loop *sloop;
1667 bitmap_iterator bi;
1668 unsigned i;
1669 mem_ref_p ref;
1671 EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
1673 ref = memory_accesses.refs_list[i];
1674 for (sloop = loop; sloop != current_loops->tree_root;
1675 sloop = loop_outer (sloop))
1676 if (bitmap_bit_p (ref->stored, loop->num))
1678 bitmap refs_stored
1679 = memory_accesses.all_refs_stored_in_loop[sloop->num];
1680 bitmap_set_bit (refs_stored, ref->id);
1685 /* For each non-clobbered virtual operand and each loop, record the memory
1686 references in this loop that touch the operand. */
1688 static void
1689 create_vop_ref_mapping (void)
1691 loop_iterator li;
1692 struct loop *loop;
1694 FOR_EACH_LOOP (li, loop, 0)
1696 create_vop_ref_mapping_loop (loop);
1700 /* Gathers information about memory accesses in the loops. */
1702 static void
1703 analyze_memory_references (void)
1705 unsigned i;
1706 bitmap empty;
1708 memory_accesses.refs = htab_create (100, memref_hash, memref_eq, NULL);
1709 memory_accesses.refs_list.create (0);
1710 memory_accesses.refs_in_loop.create (number_of_loops ());
1711 memory_accesses.all_refs_in_loop.create (number_of_loops ());
1712 memory_accesses.all_refs_stored_in_loop.create (number_of_loops ());
1714 for (i = 0; i < number_of_loops (); i++)
1716 empty = BITMAP_ALLOC (&lim_bitmap_obstack);
1717 memory_accesses.refs_in_loop.quick_push (empty);
1718 empty = BITMAP_ALLOC (&lim_bitmap_obstack);
1719 memory_accesses.all_refs_in_loop.quick_push (empty);
1720 empty = BITMAP_ALLOC (&lim_bitmap_obstack);
1721 memory_accesses.all_refs_stored_in_loop.quick_push (empty);
1724 memory_accesses.ttae_cache = NULL;
1726 gather_mem_refs_in_loops ();
1727 create_vop_ref_mapping ();
1730 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1731 tree_to_aff_combination_expand. */
1733 static bool
1734 mem_refs_may_alias_p (tree mem1, tree mem2, struct pointer_map_t **ttae_cache)
1736 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1737 object and their offset differ in such a way that the locations cannot
1738 overlap, then they cannot alias. */
1739 double_int size1, size2;
1740 aff_tree off1, off2;
1742 /* Perform basic offset and type-based disambiguation. */
1743 if (!refs_may_alias_p (mem1, mem2))
1744 return false;
1746 /* The expansion of addresses may be a bit expensive, thus we only do
1747 the check at -O2 and higher optimization levels. */
1748 if (optimize < 2)
1749 return true;
1751 get_inner_reference_aff (mem1, &off1, &size1);
1752 get_inner_reference_aff (mem2, &off2, &size2);
1753 aff_combination_expand (&off1, ttae_cache);
1754 aff_combination_expand (&off2, ttae_cache);
1755 aff_combination_scale (&off1, double_int_minus_one);
1756 aff_combination_add (&off2, &off1);
1758 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1759 return false;
1761 return true;
1764 /* Rewrites location LOC by TMP_VAR. */
1766 static void
1767 rewrite_mem_ref_loc (mem_ref_loc_p loc, tree tmp_var)
1769 *loc->ref = tmp_var;
1770 update_stmt (loc->stmt);
1773 /* Adds all locations of REF in LOOP and its subloops to LOCS. */
1775 static void
1776 get_all_locs_in_loop (struct loop *loop, mem_ref_p ref,
1777 vec<mem_ref_loc_p> *locs)
1779 mem_ref_locs_p accs;
1780 unsigned i;
1781 mem_ref_loc_p loc;
1782 bitmap refs = memory_accesses.all_refs_in_loop[loop->num];
1783 struct loop *subloop;
1785 if (!bitmap_bit_p (refs, ref->id))
1786 return;
1788 if (ref->accesses_in_loop.length ()
1789 > (unsigned) loop->num)
1791 accs = ref->accesses_in_loop[loop->num];
1792 if (accs)
1794 FOR_EACH_VEC_ELT (accs->locs, i, loc)
1795 locs->safe_push (loc);
1799 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
1800 get_all_locs_in_loop (subloop, ref, locs);
1803 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1805 static void
1806 rewrite_mem_refs (struct loop *loop, mem_ref_p ref, tree tmp_var)
1808 unsigned i;
1809 mem_ref_loc_p loc;
1810 vec<mem_ref_loc_p> locs = vNULL;
1812 get_all_locs_in_loop (loop, ref, &locs);
1813 FOR_EACH_VEC_ELT (locs, i, loc)
1814 rewrite_mem_ref_loc (loc, tmp_var);
1815 locs.release ();
1818 /* The name and the length of the currently generated variable
1819 for lsm. */
1820 #define MAX_LSM_NAME_LENGTH 40
1821 static char lsm_tmp_name[MAX_LSM_NAME_LENGTH + 1];
1822 static int lsm_tmp_name_length;
1824 /* Adds S to lsm_tmp_name. */
1826 static void
1827 lsm_tmp_name_add (const char *s)
1829 int l = strlen (s) + lsm_tmp_name_length;
1830 if (l > MAX_LSM_NAME_LENGTH)
1831 return;
1833 strcpy (lsm_tmp_name + lsm_tmp_name_length, s);
1834 lsm_tmp_name_length = l;
1837 /* Stores the name for temporary variable that replaces REF to
1838 lsm_tmp_name. */
1840 static void
1841 gen_lsm_tmp_name (tree ref)
1843 const char *name;
1845 switch (TREE_CODE (ref))
1847 case MEM_REF:
1848 case TARGET_MEM_REF:
1849 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1850 lsm_tmp_name_add ("_");
1851 break;
1853 case ADDR_EXPR:
1854 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1855 break;
1857 case BIT_FIELD_REF:
1858 case VIEW_CONVERT_EXPR:
1859 case ARRAY_RANGE_REF:
1860 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1861 break;
1863 case REALPART_EXPR:
1864 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1865 lsm_tmp_name_add ("_RE");
1866 break;
1868 case IMAGPART_EXPR:
1869 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1870 lsm_tmp_name_add ("_IM");
1871 break;
1873 case COMPONENT_REF:
1874 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1875 lsm_tmp_name_add ("_");
1876 name = get_name (TREE_OPERAND (ref, 1));
1877 if (!name)
1878 name = "F";
1879 lsm_tmp_name_add (name);
1880 break;
1882 case ARRAY_REF:
1883 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1884 lsm_tmp_name_add ("_I");
1885 break;
1887 case SSA_NAME:
1888 case VAR_DECL:
1889 case PARM_DECL:
1890 name = get_name (ref);
1891 if (!name)
1892 name = "D";
1893 lsm_tmp_name_add (name);
1894 break;
1896 case STRING_CST:
1897 lsm_tmp_name_add ("S");
1898 break;
1900 case RESULT_DECL:
1901 lsm_tmp_name_add ("R");
1902 break;
1904 case INTEGER_CST:
1905 /* Nothing. */
1906 break;
1908 default:
1909 gcc_unreachable ();
1913 /* Determines name for temporary variable that replaces REF.
1914 The name is accumulated into the lsm_tmp_name variable.
1915 N is added to the name of the temporary. */
1917 char *
1918 get_lsm_tmp_name (tree ref, unsigned n)
1920 char ns[2];
1922 lsm_tmp_name_length = 0;
1923 gen_lsm_tmp_name (ref);
1924 lsm_tmp_name_add ("_lsm");
1925 if (n < 10)
1927 ns[0] = '0' + n;
1928 ns[1] = 0;
1929 lsm_tmp_name_add (ns);
1931 return lsm_tmp_name;
1934 struct prev_flag_edges {
1935 /* Edge to insert new flag comparison code. */
1936 edge append_cond_position;
1938 /* Edge for fall through from previous flag comparison. */
1939 edge last_cond_fallthru;
1942 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1943 MEM along edge EX.
1945 The store is only done if MEM has changed. We do this so no
1946 changes to MEM occur on code paths that did not originally store
1947 into it.
1949 The common case for execute_sm will transform:
1951 for (...) {
1952 if (foo)
1953 stuff;
1954 else
1955 MEM = TMP_VAR;
1958 into:
1960 lsm = MEM;
1961 for (...) {
1962 if (foo)
1963 stuff;
1964 else
1965 lsm = TMP_VAR;
1967 MEM = lsm;
1969 This function will generate:
1971 lsm = MEM;
1973 lsm_flag = false;
1975 for (...) {
1976 if (foo)
1977 stuff;
1978 else {
1979 lsm = TMP_VAR;
1980 lsm_flag = true;
1983 if (lsm_flag) <--
1984 MEM = lsm; <--
1987 static void
1988 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag)
1990 basic_block new_bb, then_bb, old_dest;
1991 bool loop_has_only_one_exit;
1992 edge then_old_edge, orig_ex = ex;
1993 gimple_stmt_iterator gsi;
1994 gimple stmt;
1995 struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1997 /* ?? Insert store after previous store if applicable. See note
1998 below. */
1999 if (prev_edges)
2000 ex = prev_edges->append_cond_position;
2002 loop_has_only_one_exit = single_pred_p (ex->dest);
2004 if (loop_has_only_one_exit)
2005 ex = split_block_after_labels (ex->dest);
2007 old_dest = ex->dest;
2008 new_bb = split_edge (ex);
2009 then_bb = create_empty_bb (new_bb);
2010 if (current_loops && new_bb->loop_father)
2011 add_bb_to_loop (then_bb, new_bb->loop_father);
2013 gsi = gsi_start_bb (new_bb);
2014 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2015 NULL_TREE, NULL_TREE);
2016 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2018 gsi = gsi_start_bb (then_bb);
2019 /* Insert actual store. */
2020 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2021 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2023 make_edge (new_bb, then_bb, EDGE_TRUE_VALUE);
2024 make_edge (new_bb, old_dest, EDGE_FALSE_VALUE);
2025 then_old_edge = make_edge (then_bb, old_dest, EDGE_FALLTHRU);
2027 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2029 if (prev_edges)
2031 basic_block prevbb = prev_edges->last_cond_fallthru->src;
2032 redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
2033 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2034 set_immediate_dominator (CDI_DOMINATORS, old_dest,
2035 recompute_dominator (CDI_DOMINATORS, old_dest));
2038 /* ?? Because stores may alias, they must happen in the exact
2039 sequence they originally happened. Save the position right after
2040 the (_lsm) store we just created so we can continue appending after
2041 it and maintain the original order. */
2043 struct prev_flag_edges *p;
2045 if (orig_ex->aux)
2046 orig_ex->aux = NULL;
2047 alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
2048 p = (struct prev_flag_edges *) orig_ex->aux;
2049 p->append_cond_position = then_old_edge;
2050 p->last_cond_fallthru = find_edge (new_bb, old_dest);
2051 orig_ex->aux = (void *) p;
2054 if (!loop_has_only_one_exit)
2055 for (gsi = gsi_start_phis (old_dest); !gsi_end_p (gsi); gsi_next (&gsi))
2057 gimple phi = gsi_stmt (gsi);
2058 unsigned i;
2060 for (i = 0; i < gimple_phi_num_args (phi); i++)
2061 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2063 tree arg = gimple_phi_arg_def (phi, i);
2064 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2065 update_stmt (phi);
2068 /* Remove the original fall through edge. This was the
2069 single_succ_edge (new_bb). */
2070 EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU;
2073 /* Helper function for execute_sm. On every location where REF is
2074 set, set an appropriate flag indicating the store. */
2076 static tree
2077 execute_sm_if_changed_flag_set (struct loop *loop, mem_ref_p ref)
2079 unsigned i;
2080 mem_ref_loc_p loc;
2081 tree flag;
2082 vec<mem_ref_loc_p> locs = vNULL;
2083 char *str = get_lsm_tmp_name (ref->mem, ~0);
2085 lsm_tmp_name_add ("_flag");
2086 flag = create_tmp_reg (boolean_type_node, str);
2087 get_all_locs_in_loop (loop, ref, &locs);
2088 FOR_EACH_VEC_ELT (locs, i, loc)
2090 gimple_stmt_iterator gsi;
2091 gimple stmt;
2093 /* Only set the flag for writes. */
2094 if (is_gimple_assign (loc->stmt)
2095 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2097 gsi = gsi_for_stmt (loc->stmt);
2098 stmt = gimple_build_assign (flag, boolean_true_node);
2099 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2102 locs.release ();
2103 return flag;
2106 /* Executes store motion of memory reference REF from LOOP.
2107 Exits from the LOOP are stored in EXITS. The initialization of the
2108 temporary variable is put to the preheader of the loop, and assignments
2109 to the reference from the temporary variable are emitted to exits. */
2111 static void
2112 execute_sm (struct loop *loop, vec<edge> exits, mem_ref_p ref)
2114 tree tmp_var, store_flag;
2115 unsigned i;
2116 gimple load;
2117 struct fmt_data fmt_data;
2118 edge ex, latch_edge;
2119 struct lim_aux_data *lim_data;
2120 bool multi_threaded_model_p = false;
2122 if (dump_file && (dump_flags & TDF_DETAILS))
2124 fprintf (dump_file, "Executing store motion of ");
2125 print_generic_expr (dump_file, ref->mem, 0);
2126 fprintf (dump_file, " from loop %d\n", loop->num);
2129 tmp_var = create_tmp_reg (TREE_TYPE (ref->mem),
2130 get_lsm_tmp_name (ref->mem, ~0));
2132 fmt_data.loop = loop;
2133 fmt_data.orig_loop = loop;
2134 for_each_index (&ref->mem, force_move_till, &fmt_data);
2136 if (block_in_transaction (loop_preheader_edge (loop)->src)
2137 || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
2138 multi_threaded_model_p = true;
2140 if (multi_threaded_model_p)
2141 store_flag = execute_sm_if_changed_flag_set (loop, ref);
2143 rewrite_mem_refs (loop, ref, tmp_var);
2145 /* Emit the load code into the latch, so that we are sure it will
2146 be processed after all dependencies. */
2147 latch_edge = loop_latch_edge (loop);
2149 /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2150 load altogether, since the store is predicated by a flag. We
2151 could, do the load only if it was originally in the loop. */
2152 load = gimple_build_assign (tmp_var, unshare_expr (ref->mem));
2153 lim_data = init_lim_data (load);
2154 lim_data->max_loop = loop;
2155 lim_data->tgt_loop = loop;
2156 gsi_insert_on_edge (latch_edge, load);
2158 if (multi_threaded_model_p)
2160 load = gimple_build_assign (store_flag, boolean_false_node);
2161 lim_data = init_lim_data (load);
2162 lim_data->max_loop = loop;
2163 lim_data->tgt_loop = loop;
2164 gsi_insert_on_edge (latch_edge, load);
2167 /* Sink the store to every exit from the loop. */
2168 FOR_EACH_VEC_ELT (exits, i, ex)
2169 if (!multi_threaded_model_p)
2171 gimple store;
2172 store = gimple_build_assign (unshare_expr (ref->mem), tmp_var);
2173 gsi_insert_on_edge (ex, store);
2175 else
2176 execute_sm_if_changed (ex, ref->mem, tmp_var, store_flag);
2179 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2180 edges of the LOOP. */
2182 static void
2183 hoist_memory_references (struct loop *loop, bitmap mem_refs,
2184 vec<edge> exits)
2186 mem_ref_p ref;
2187 unsigned i;
2188 bitmap_iterator bi;
2190 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2192 ref = memory_accesses.refs_list[i];
2193 execute_sm (loop, exits, ref);
2197 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2198 make sure REF is always stored to in LOOP. */
2200 static bool
2201 ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
2203 vec<mem_ref_loc_p> locs = vNULL;
2204 unsigned i;
2205 mem_ref_loc_p loc;
2206 bool ret = false;
2207 struct loop *must_exec;
2208 tree base;
2210 base = get_base_address (ref->mem);
2211 if (INDIRECT_REF_P (base)
2212 || TREE_CODE (base) == MEM_REF)
2213 base = TREE_OPERAND (base, 0);
2215 get_all_locs_in_loop (loop, ref, &locs);
2216 FOR_EACH_VEC_ELT (locs, i, loc)
2218 if (!get_lim_data (loc->stmt))
2219 continue;
2221 /* If we require an always executed store make sure the statement
2222 stores to the reference. */
2223 if (stored_p)
2225 tree lhs;
2226 if (!gimple_get_lhs (loc->stmt))
2227 continue;
2228 lhs = get_base_address (gimple_get_lhs (loc->stmt));
2229 if (!lhs)
2230 continue;
2231 if (INDIRECT_REF_P (lhs)
2232 || TREE_CODE (lhs) == MEM_REF)
2233 lhs = TREE_OPERAND (lhs, 0);
2234 if (lhs != base)
2235 continue;
2238 must_exec = get_lim_data (loc->stmt)->always_executed_in;
2239 if (!must_exec)
2240 continue;
2242 if (must_exec == loop
2243 || flow_loop_nested_p (must_exec, loop))
2245 ret = true;
2246 break;
2249 locs.release ();
2251 return ret;
2254 /* Returns true if REF1 and REF2 are independent. */
2256 static bool
2257 refs_independent_p (mem_ref_p ref1, mem_ref_p ref2)
2259 if (ref1 == ref2
2260 || bitmap_bit_p (ref1->indep_ref, ref2->id))
2261 return true;
2262 if (bitmap_bit_p (ref1->dep_ref, ref2->id))
2263 return false;
2264 if (!MEM_ANALYZABLE (ref1)
2265 || !MEM_ANALYZABLE (ref2))
2266 return false;
2268 if (dump_file && (dump_flags & TDF_DETAILS))
2269 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2270 ref1->id, ref2->id);
2272 if (mem_refs_may_alias_p (ref1->mem, ref2->mem,
2273 &memory_accesses.ttae_cache))
2275 bitmap_set_bit (ref1->dep_ref, ref2->id);
2276 bitmap_set_bit (ref2->dep_ref, ref1->id);
2277 if (dump_file && (dump_flags & TDF_DETAILS))
2278 fprintf (dump_file, "dependent.\n");
2279 return false;
2281 else
2283 bitmap_set_bit (ref1->indep_ref, ref2->id);
2284 bitmap_set_bit (ref2->indep_ref, ref1->id);
2285 if (dump_file && (dump_flags & TDF_DETAILS))
2286 fprintf (dump_file, "independent.\n");
2287 return true;
2291 /* Records the information whether REF is independent in LOOP (according
2292 to INDEP). */
2294 static void
2295 record_indep_loop (struct loop *loop, mem_ref_p ref, bool indep)
2297 if (indep)
2298 bitmap_set_bit (ref->indep_loop, loop->num);
2299 else
2300 bitmap_set_bit (ref->dep_loop, loop->num);
2303 /* Returns true if REF is independent on all other memory references in
2304 LOOP. */
2306 static bool
2307 ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref)
2309 bitmap refs_to_check;
2310 unsigned i;
2311 bitmap_iterator bi;
2312 bool ret = true, stored = bitmap_bit_p (ref->stored, loop->num);
2313 mem_ref_p aref;
2315 if (stored)
2316 refs_to_check = memory_accesses.all_refs_in_loop[loop->num];
2317 else
2318 refs_to_check = memory_accesses.all_refs_stored_in_loop[loop->num];
2320 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2322 aref = memory_accesses.refs_list[i];
2323 if (!MEM_ANALYZABLE (aref)
2324 || !refs_independent_p (ref, aref))
2326 ret = false;
2327 record_indep_loop (loop, aref, false);
2328 break;
2332 return ret;
2335 /* Returns true if REF is independent on all other memory references in
2336 LOOP. Wrapper over ref_indep_loop_p_1, caching its results. */
2338 static bool
2339 ref_indep_loop_p (struct loop *loop, mem_ref_p ref)
2341 bool ret;
2343 if (bitmap_bit_p (ref->indep_loop, loop->num))
2344 return true;
2345 if (bitmap_bit_p (ref->dep_loop, loop->num))
2346 return false;
2348 ret = ref_indep_loop_p_1 (loop, ref);
2350 if (dump_file && (dump_flags & TDF_DETAILS))
2351 fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2352 ref->id, loop->num, ret ? "independent" : "dependent");
2354 record_indep_loop (loop, ref, ret);
2356 return ret;
2359 /* Returns true if we can perform store motion of REF from LOOP. */
2361 static bool
2362 can_sm_ref_p (struct loop *loop, mem_ref_p ref)
2364 tree base;
2366 /* Can't hoist unanalyzable refs. */
2367 if (!MEM_ANALYZABLE (ref))
2368 return false;
2370 /* Unless the reference is stored in the loop, there is nothing to do. */
2371 if (!bitmap_bit_p (ref->stored, loop->num))
2372 return false;
2374 /* It should be movable. */
2375 if (!is_gimple_reg_type (TREE_TYPE (ref->mem))
2376 || TREE_THIS_VOLATILE (ref->mem)
2377 || !for_each_index (&ref->mem, may_move_till, loop))
2378 return false;
2380 /* If it can throw fail, we do not properly update EH info. */
2381 if (tree_could_throw_p (ref->mem))
2382 return false;
2384 /* If it can trap, it must be always executed in LOOP.
2385 Readonly memory locations may trap when storing to them, but
2386 tree_could_trap_p is a predicate for rvalues, so check that
2387 explicitly. */
2388 base = get_base_address (ref->mem);
2389 if ((tree_could_trap_p (ref->mem)
2390 || (DECL_P (base) && TREE_READONLY (base)))
2391 && !ref_always_accessed_p (loop, ref, true))
2392 return false;
2394 /* And it must be independent on all other memory references
2395 in LOOP. */
2396 if (!ref_indep_loop_p (loop, ref))
2397 return false;
2399 return true;
2402 /* Marks the references in LOOP for that store motion should be performed
2403 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2404 motion was performed in one of the outer loops. */
2406 static void
2407 find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2409 bitmap refs = memory_accesses.all_refs_in_loop[loop->num];
2410 unsigned i;
2411 bitmap_iterator bi;
2412 mem_ref_p ref;
2414 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2416 ref = memory_accesses.refs_list[i];
2417 if (can_sm_ref_p (loop, ref))
2418 bitmap_set_bit (refs_to_sm, i);
2422 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2423 for a store motion optimization (i.e. whether we can insert statement
2424 on its exits). */
2426 static bool
2427 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2428 vec<edge> exits)
2430 unsigned i;
2431 edge ex;
2433 FOR_EACH_VEC_ELT (exits, i, ex)
2434 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2435 return false;
2437 return true;
2440 /* Try to perform store motion for all memory references modified inside
2441 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2442 store motion was executed in one of the outer loops. */
2444 static void
2445 store_motion_loop (struct loop *loop, bitmap sm_executed)
2447 vec<edge> exits = get_loop_exit_edges (loop);
2448 struct loop *subloop;
2449 bitmap sm_in_loop = BITMAP_ALLOC (NULL);
2451 if (loop_suitable_for_sm (loop, exits))
2453 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2454 hoist_memory_references (loop, sm_in_loop, exits);
2456 exits.release ();
2458 bitmap_ior_into (sm_executed, sm_in_loop);
2459 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2460 store_motion_loop (subloop, sm_executed);
2461 bitmap_and_compl_into (sm_executed, sm_in_loop);
2462 BITMAP_FREE (sm_in_loop);
2465 /* Try to perform store motion for all memory references modified inside
2466 loops. */
2468 static void
2469 store_motion (void)
2471 struct loop *loop;
2472 bitmap sm_executed = BITMAP_ALLOC (NULL);
2474 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2475 store_motion_loop (loop, sm_executed);
2477 BITMAP_FREE (sm_executed);
2478 gsi_commit_edge_inserts ();
2481 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2482 for each such basic block bb records the outermost loop for that execution
2483 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2484 blocks that contain a nonpure call. */
2486 static void
2487 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
2489 basic_block bb = NULL, *bbs, last = NULL;
2490 unsigned i;
2491 edge e;
2492 struct loop *inn_loop = loop;
2494 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2496 bbs = get_loop_body_in_dom_order (loop);
2498 for (i = 0; i < loop->num_nodes; i++)
2500 edge_iterator ei;
2501 bb = bbs[i];
2503 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2504 last = bb;
2506 if (bitmap_bit_p (contains_call, bb->index))
2507 break;
2509 FOR_EACH_EDGE (e, ei, bb->succs)
2510 if (!flow_bb_inside_loop_p (loop, e->dest))
2511 break;
2512 if (e)
2513 break;
2515 /* A loop might be infinite (TODO use simple loop analysis
2516 to disprove this if possible). */
2517 if (bb->flags & BB_IRREDUCIBLE_LOOP)
2518 break;
2520 if (!flow_bb_inside_loop_p (inn_loop, bb))
2521 break;
2523 if (bb->loop_father->header == bb)
2525 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2526 break;
2528 /* In a loop that is always entered we may proceed anyway.
2529 But record that we entered it and stop once we leave it. */
2530 inn_loop = bb->loop_father;
2534 while (1)
2536 SET_ALWAYS_EXECUTED_IN (last, loop);
2537 if (last == loop->header)
2538 break;
2539 last = get_immediate_dominator (CDI_DOMINATORS, last);
2542 free (bbs);
2545 for (loop = loop->inner; loop; loop = loop->next)
2546 fill_always_executed_in (loop, contains_call);
2549 /* Compute the global information needed by the loop invariant motion pass. */
2551 static void
2552 tree_ssa_lim_initialize (void)
2554 sbitmap contains_call = sbitmap_alloc (last_basic_block);
2555 gimple_stmt_iterator bsi;
2556 struct loop *loop;
2557 basic_block bb;
2559 bitmap_obstack_initialize (&lim_bitmap_obstack);
2561 bitmap_clear (contains_call);
2562 FOR_EACH_BB (bb)
2564 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2566 if (nonpure_call_p (gsi_stmt (bsi)))
2567 break;
2570 if (!gsi_end_p (bsi))
2571 bitmap_set_bit (contains_call, bb->index);
2574 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2575 fill_always_executed_in (loop, contains_call);
2577 sbitmap_free (contains_call);
2579 lim_aux_data_map = pointer_map_create ();
2581 if (flag_tm)
2582 compute_transaction_bits ();
2584 alloc_aux_for_edges (0);
2587 /* Cleans up after the invariant motion pass. */
2589 static void
2590 tree_ssa_lim_finalize (void)
2592 basic_block bb;
2593 unsigned i;
2594 mem_ref_p ref;
2596 free_aux_for_edges ();
2598 FOR_EACH_BB (bb)
2599 SET_ALWAYS_EXECUTED_IN (bb, NULL);
2601 bitmap_obstack_release (&lim_bitmap_obstack);
2602 pointer_map_destroy (lim_aux_data_map);
2604 htab_delete (memory_accesses.refs);
2606 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2607 memref_free (ref);
2608 memory_accesses.refs_list.release ();
2610 memory_accesses.refs_in_loop.release ();
2611 memory_accesses.all_refs_in_loop.release ();
2612 memory_accesses.all_refs_stored_in_loop.release ();
2614 if (memory_accesses.ttae_cache)
2615 free_affine_expand_cache (&memory_accesses.ttae_cache);
2618 /* Moves invariants from loops. Only "expensive" invariants are moved out --
2619 i.e. those that are likely to be win regardless of the register pressure. */
2621 unsigned int
2622 tree_ssa_lim (void)
2624 unsigned int todo;
2626 tree_ssa_lim_initialize ();
2628 /* Gathers information about memory accesses in the loops. */
2629 analyze_memory_references ();
2631 /* For each statement determine the outermost loop in that it is
2632 invariant and cost for computing the invariant. */
2633 determine_invariantness ();
2635 /* Execute store motion. Force the necessary invariants to be moved
2636 out of the loops as well. */
2637 store_motion ();
2639 /* Move the expressions that are expensive enough. */
2640 todo = move_computations ();
2642 tree_ssa_lim_finalize ();
2644 return todo;