simplify-rtx.c (simplify_rtx): Use simplify_subreg rather than simplify_gen_subreg.
[official-gcc.git] / gcc / tree-ssa-loop-im.c
blob5b8d6038bcf4967b6f6b9c3ad574746b654806f2
1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
30 #include "output.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "domwalk.h"
37 #include "params.h"
38 #include "tree-pass.h"
39 #include "flags.h"
40 #include "real.h"
41 #include "hashtab.h"
43 /* TODO: Support for predicated code motion. I.e.
45 while (1)
47 if (cond)
49 a = inv;
50 something;
54 Where COND and INV are is invariants, but evaluating INV may trap or be
55 invalid from some other reason if !COND. This may be transformed to
57 if (cond)
58 a = inv;
59 while (1)
61 if (cond)
62 something;
63 } */
65 /* A type for the list of statements that have to be moved in order to be able
66 to hoist an invariant computation. */
68 struct depend
70 tree stmt;
71 struct depend *next;
74 /* The auxiliary data kept for each statement. */
76 struct lim_aux_data
78 struct loop *max_loop; /* The outermost loop in that the statement
79 is invariant. */
81 struct loop *tgt_loop; /* The loop out of that we want to move the
82 invariant. */
84 struct loop *always_executed_in;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
87 is entered. */
89 bool sm_done; /* True iff the store motion for a memory
90 reference in the statement has already
91 been executed. */
93 unsigned cost; /* Cost of the computation performed by the
94 statement. */
96 struct depend *depends; /* List of statements that must be also hoisted
97 out of the loop when this statement is
98 hoisted; i.e. those that define the operands
99 of the statement and are inside of the
100 MAX_LOOP loop. */
103 #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
104 ? NULL \
105 : (struct lim_aux_data *) (stmt_ann (STMT)->common.aux))
107 /* Description of a memory reference location for store motion. */
109 struct mem_ref_loc
111 tree *ref; /* The reference itself. */
112 tree stmt; /* The statement in that it occurs. */
113 struct mem_ref_loc *next; /* Next use in the chain. */
116 /* Description of a memory reference for store motion. */
118 struct mem_ref
120 tree mem; /* The memory itself. */
121 hashval_t hash; /* Its hash value. */
122 bool is_stored; /* True if there is a store to the location
123 in the loop. */
124 struct mem_ref_loc *locs; /* The locations where it is found. */
125 bitmap vops; /* Vops corresponding to this memory
126 location. */
127 struct mem_ref *next; /* Next memory reference in the list.
128 Memory references are stored in a hash
129 table, but the hash function depends
130 on values of pointers. Thus we cannot use
131 htab_traverse, since then we would get
132 miscompares during bootstrap (although the
133 produced code would be correct). */
136 /* Minimum cost of an expensive expression. */
137 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
139 /* The outermost loop for that execution of the header guarantees that the
140 block will be executed. */
141 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
143 /* Calls CBCK for each index in memory reference ADDR_P. There are two
144 kinds situations handled; in each of these cases, the memory reference
145 and DATA are passed to the callback:
147 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
148 pass the pointer to the index to the callback.
150 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
151 pointer to addr to the callback.
153 If the callback returns false, the whole search stops and false is returned.
154 Otherwise the function returns true after traversing through the whole
155 reference *ADDR_P. */
157 bool
158 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
160 tree *nxt, *idx;
162 for (; ; addr_p = nxt)
164 switch (TREE_CODE (*addr_p))
166 case SSA_NAME:
167 return cbck (*addr_p, addr_p, data);
169 case MISALIGNED_INDIRECT_REF:
170 case ALIGN_INDIRECT_REF:
171 case INDIRECT_REF:
172 nxt = &TREE_OPERAND (*addr_p, 0);
173 return cbck (*addr_p, nxt, data);
175 case BIT_FIELD_REF:
176 case VIEW_CONVERT_EXPR:
177 case REALPART_EXPR:
178 case IMAGPART_EXPR:
179 nxt = &TREE_OPERAND (*addr_p, 0);
180 break;
182 case COMPONENT_REF:
183 /* If the component has varying offset, it behaves like index
184 as well. */
185 idx = &TREE_OPERAND (*addr_p, 2);
186 if (*idx
187 && !cbck (*addr_p, idx, data))
188 return false;
190 nxt = &TREE_OPERAND (*addr_p, 0);
191 break;
193 case ARRAY_REF:
194 case ARRAY_RANGE_REF:
195 nxt = &TREE_OPERAND (*addr_p, 0);
196 if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
197 return false;
198 break;
200 case VAR_DECL:
201 case PARM_DECL:
202 case STRING_CST:
203 case RESULT_DECL:
204 case VECTOR_CST:
205 case COMPLEX_CST:
206 case INTEGER_CST:
207 case REAL_CST:
208 return true;
210 case TARGET_MEM_REF:
211 idx = &TMR_BASE (*addr_p);
212 if (*idx
213 && !cbck (*addr_p, idx, data))
214 return false;
215 idx = &TMR_INDEX (*addr_p);
216 if (*idx
217 && !cbck (*addr_p, idx, data))
218 return false;
219 return true;
221 default:
222 gcc_unreachable ();
227 /* If it is possible to hoist the statement STMT unconditionally,
228 returns MOVE_POSSIBLE.
229 If it is possible to hoist the statement STMT, but we must avoid making
230 it executed if it would not be executed in the original program (e.g.
231 because it may trap), return MOVE_PRESERVE_EXECUTION.
232 Otherwise return MOVE_IMPOSSIBLE. */
234 enum move_pos
235 movement_possibility (tree stmt)
237 tree lhs, rhs;
239 if (flag_unswitch_loops
240 && TREE_CODE (stmt) == COND_EXPR)
242 /* If we perform unswitching, force the operands of the invariant
243 condition to be moved out of the loop. */
244 return MOVE_POSSIBLE;
247 if (TREE_CODE (stmt) != MODIFY_EXPR)
248 return MOVE_IMPOSSIBLE;
250 if (stmt_ends_bb_p (stmt))
251 return MOVE_IMPOSSIBLE;
253 if (stmt_ann (stmt)->has_volatile_ops)
254 return MOVE_IMPOSSIBLE;
256 lhs = TREE_OPERAND (stmt, 0);
257 if (TREE_CODE (lhs) == SSA_NAME
258 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
259 return MOVE_IMPOSSIBLE;
261 rhs = TREE_OPERAND (stmt, 1);
263 if (TREE_SIDE_EFFECTS (rhs))
264 return MOVE_IMPOSSIBLE;
266 if (TREE_CODE (lhs) != SSA_NAME
267 || tree_could_trap_p (rhs))
268 return MOVE_PRESERVE_EXECUTION;
270 if (get_call_expr_in (stmt))
272 /* While pure or const call is guaranteed to have no side effects, we
273 cannot move it arbitrarily. Consider code like
275 char *s = something ();
277 while (1)
279 if (s)
280 t = strlen (s);
281 else
282 t = 0;
285 Here the strlen call cannot be moved out of the loop, even though
286 s is invariant. In addition to possibly creating a call with
287 invalid arguments, moving out a function call that is not executed
288 may cause performance regressions in case the call is costly and
289 not executed at all. */
290 return MOVE_PRESERVE_EXECUTION;
292 return MOVE_POSSIBLE;
295 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
296 loop to that we could move the expression using DEF if it did not have
297 other operands, i.e. the outermost loop enclosing LOOP in that the value
298 of DEF is invariant. */
300 static struct loop *
301 outermost_invariant_loop (tree def, struct loop *loop)
303 tree def_stmt;
304 basic_block def_bb;
305 struct loop *max_loop;
307 if (TREE_CODE (def) != SSA_NAME)
308 return superloop_at_depth (loop, 1);
310 def_stmt = SSA_NAME_DEF_STMT (def);
311 def_bb = bb_for_stmt (def_stmt);
312 if (!def_bb)
313 return superloop_at_depth (loop, 1);
315 max_loop = find_common_loop (loop, def_bb->loop_father);
317 if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop)
318 max_loop = find_common_loop (max_loop,
319 LIM_DATA (def_stmt)->max_loop->outer);
320 if (max_loop == loop)
321 return NULL;
322 max_loop = superloop_at_depth (loop, max_loop->depth + 1);
324 return max_loop;
327 /* Returns the outermost superloop of LOOP in that the expression EXPR is
328 invariant. */
330 static struct loop *
331 outermost_invariant_loop_expr (tree expr, struct loop *loop)
333 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
334 unsigned i, nops;
335 struct loop *max_loop = superloop_at_depth (loop, 1), *aloop;
337 if (TREE_CODE (expr) == SSA_NAME
338 || TREE_CODE (expr) == INTEGER_CST
339 || is_gimple_min_invariant (expr))
340 return outermost_invariant_loop (expr, loop);
342 if (class != tcc_unary
343 && class != tcc_binary
344 && class != tcc_expression
345 && class != tcc_comparison)
346 return NULL;
348 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
349 for (i = 0; i < nops; i++)
351 aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop);
352 if (!aloop)
353 return NULL;
355 if (flow_loop_nested_p (max_loop, aloop))
356 max_loop = aloop;
359 return max_loop;
362 /* DATA is a structure containing information associated with a statement
363 inside LOOP. DEF is one of the operands of this statement.
365 Find the outermost loop enclosing LOOP in that value of DEF is invariant
366 and record this in DATA->max_loop field. If DEF itself is defined inside
367 this loop as well (i.e. we need to hoist it out of the loop if we want
368 to hoist the statement represented by DATA), record the statement in that
369 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
370 add the cost of the computation of DEF to the DATA->cost.
372 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
374 static bool
375 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
376 bool add_cost)
378 tree def_stmt = SSA_NAME_DEF_STMT (def);
379 basic_block def_bb = bb_for_stmt (def_stmt);
380 struct loop *max_loop;
381 struct depend *dep;
383 if (!def_bb)
384 return true;
386 max_loop = outermost_invariant_loop (def, loop);
387 if (!max_loop)
388 return false;
390 if (flow_loop_nested_p (data->max_loop, max_loop))
391 data->max_loop = max_loop;
393 if (!LIM_DATA (def_stmt))
394 return true;
396 if (add_cost
397 /* Only add the cost if the statement defining DEF is inside LOOP,
398 i.e. if it is likely that by moving the invariants dependent
399 on it, we will be able to avoid creating a new register for
400 it (since it will be only used in these dependent invariants). */
401 && def_bb->loop_father == loop)
402 data->cost += LIM_DATA (def_stmt)->cost;
404 dep = XNEW (struct depend);
405 dep->stmt = def_stmt;
406 dep->next = data->depends;
407 data->depends = dep;
409 return true;
412 /* Returns an estimate for a cost of statement STMT. TODO -- the values here
413 are just ad-hoc constants. The estimates should be based on target-specific
414 values. */
416 static unsigned
417 stmt_cost (tree stmt)
419 tree rhs;
420 unsigned cost = 1;
422 /* Always try to create possibilities for unswitching. */
423 if (TREE_CODE (stmt) == COND_EXPR)
424 return LIM_EXPENSIVE;
426 rhs = TREE_OPERAND (stmt, 1);
428 /* Hoisting memory references out should almost surely be a win. */
429 if (stmt_references_memory_p (stmt))
430 cost += 20;
432 switch (TREE_CODE (rhs))
434 case CALL_EXPR:
435 /* We should be hoisting calls if possible. */
437 /* Unless the call is a builtin_constant_p; this always folds to a
438 constant, so moving it is useless. */
439 rhs = get_callee_fndecl (rhs);
440 if (DECL_BUILT_IN_CLASS (rhs) == BUILT_IN_NORMAL
441 && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P)
442 return 0;
444 cost += 20;
445 break;
447 case MULT_EXPR:
448 case TRUNC_DIV_EXPR:
449 case CEIL_DIV_EXPR:
450 case FLOOR_DIV_EXPR:
451 case ROUND_DIV_EXPR:
452 case EXACT_DIV_EXPR:
453 case CEIL_MOD_EXPR:
454 case FLOOR_MOD_EXPR:
455 case ROUND_MOD_EXPR:
456 case TRUNC_MOD_EXPR:
457 case RDIV_EXPR:
458 /* Division and multiplication are usually expensive. */
459 cost += 20;
460 break;
462 default:
463 break;
466 return cost;
469 /* Determine the outermost loop to that it is possible to hoist a statement
470 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
471 the outermost loop in that the value computed by STMT is invariant.
472 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
473 we preserve the fact whether STMT is executed. It also fills other related
474 information to LIM_DATA (STMT).
476 The function returns false if STMT cannot be hoisted outside of the loop it
477 is defined in, and true otherwise. */
479 static bool
480 determine_max_movement (tree stmt, bool must_preserve_exec)
482 basic_block bb = bb_for_stmt (stmt);
483 struct loop *loop = bb->loop_father;
484 struct loop *level;
485 struct lim_aux_data *lim_data = LIM_DATA (stmt);
486 tree val;
487 ssa_op_iter iter;
489 if (must_preserve_exec)
490 level = ALWAYS_EXECUTED_IN (bb);
491 else
492 level = superloop_at_depth (loop, 1);
493 lim_data->max_loop = level;
495 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
496 if (!add_dependency (val, lim_data, loop, true))
497 return false;
499 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
500 if (!add_dependency (val, lim_data, loop, false))
501 return false;
503 lim_data->cost += stmt_cost (stmt);
505 return true;
508 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
509 and that one of the operands of this statement is computed by STMT.
510 Ensure that STMT (together with all the statements that define its
511 operands) is hoisted at least out of the loop LEVEL. */
513 static void
514 set_level (tree stmt, struct loop *orig_loop, struct loop *level)
516 struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father;
517 struct depend *dep;
519 stmt_loop = find_common_loop (orig_loop, stmt_loop);
520 if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop)
521 stmt_loop = find_common_loop (stmt_loop,
522 LIM_DATA (stmt)->tgt_loop->outer);
523 if (flow_loop_nested_p (stmt_loop, level))
524 return;
526 gcc_assert (LIM_DATA (stmt));
527 gcc_assert (level == LIM_DATA (stmt)->max_loop
528 || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level));
530 LIM_DATA (stmt)->tgt_loop = level;
531 for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next)
532 set_level (dep->stmt, orig_loop, level);
535 /* Determines an outermost loop from that we want to hoist the statement STMT.
536 For now we chose the outermost possible loop. TODO -- use profiling
537 information to set it more sanely. */
539 static void
540 set_profitable_level (tree stmt)
542 set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop);
545 /* Returns true if STMT is not a pure call. */
547 static bool
548 nonpure_call_p (tree stmt)
550 tree call = get_call_expr_in (stmt);
552 if (!call)
553 return false;
555 return TREE_SIDE_EFFECTS (call) != 0;
558 /* Releases the memory occupied by DATA. */
560 static void
561 free_lim_aux_data (struct lim_aux_data *data)
563 struct depend *dep, *next;
565 for (dep = data->depends; dep; dep = next)
567 next = dep->next;
568 free (dep);
570 free (data);
573 /* Determine the outermost loops in that statements in basic block BB are
574 invariant, and record them to the LIM_DATA associated with the statements.
575 Callback for walk_dominator_tree. */
577 static void
578 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
579 basic_block bb)
581 enum move_pos pos;
582 block_stmt_iterator bsi;
583 tree stmt, rhs;
584 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
585 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
587 if (!bb->loop_father->outer)
588 return;
590 if (dump_file && (dump_flags & TDF_DETAILS))
591 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
592 bb->index, bb->loop_father->num, bb->loop_father->depth);
594 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
596 stmt = bsi_stmt (bsi);
598 pos = movement_possibility (stmt);
599 if (pos == MOVE_IMPOSSIBLE)
601 if (nonpure_call_p (stmt))
603 maybe_never = true;
604 outermost = NULL;
606 continue;
609 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
610 to be hoisted out of loop, saving expensive divide. */
611 if (pos == MOVE_POSSIBLE
612 && (rhs = TREE_OPERAND (stmt, 1)) != NULL
613 && TREE_CODE (rhs) == RDIV_EXPR
614 && flag_unsafe_math_optimizations
615 && !flag_trapping_math
616 && outermost_invariant_loop_expr (TREE_OPERAND (rhs, 1),
617 loop_containing_stmt (stmt)) != NULL
618 && outermost_invariant_loop_expr (rhs,
619 loop_containing_stmt (stmt)) == NULL)
621 tree lhs, stmt1, stmt2, var, name;
623 lhs = TREE_OPERAND (stmt, 0);
625 /* stmt must be MODIFY_EXPR. */
626 var = create_tmp_var (TREE_TYPE (rhs), "reciptmp");
627 add_referenced_var (var);
629 stmt1 = build2 (MODIFY_EXPR, void_type_node, var,
630 build2 (RDIV_EXPR, TREE_TYPE (rhs),
631 build_real (TREE_TYPE (rhs), dconst1),
632 TREE_OPERAND (rhs, 1)));
633 name = make_ssa_name (var, stmt1);
634 TREE_OPERAND (stmt1, 0) = name;
635 stmt2 = build2 (MODIFY_EXPR, void_type_node, lhs,
636 build2 (MULT_EXPR, TREE_TYPE (rhs),
637 name, TREE_OPERAND (rhs, 0)));
639 /* Replace division stmt with reciprocal and multiply stmts.
640 The multiply stmt is not invariant, so update iterator
641 and avoid rescanning. */
642 bsi_replace (&bsi, stmt1, true);
643 bsi_insert_after (&bsi, stmt2, BSI_NEW_STMT);
644 SSA_NAME_DEF_STMT (lhs) = stmt2;
646 /* Continue processing with invariant reciprocal statement. */
647 stmt = stmt1;
650 stmt_ann (stmt)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
651 LIM_DATA (stmt)->always_executed_in = outermost;
653 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
654 continue;
656 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
658 LIM_DATA (stmt)->max_loop = NULL;
659 continue;
662 if (dump_file && (dump_flags & TDF_DETAILS))
664 print_generic_stmt_indented (dump_file, stmt, 0, 2);
665 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
666 LIM_DATA (stmt)->max_loop->depth,
667 LIM_DATA (stmt)->cost);
670 if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE)
671 set_profitable_level (stmt);
675 /* For each statement determines the outermost loop in that it is invariant,
676 statements on whose motion it depends and the cost of the computation.
677 This information is stored to the LIM_DATA structure associated with
678 each statement. */
680 static void
681 determine_invariantness (void)
683 struct dom_walk_data walk_data;
685 memset (&walk_data, 0, sizeof (struct dom_walk_data));
686 walk_data.before_dom_children_before_stmts = determine_invariantness_stmt;
688 init_walk_dominator_tree (&walk_data);
689 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
690 fini_walk_dominator_tree (&walk_data);
693 /* Hoist the statements in basic block BB out of the loops prescribed by
694 data stored in LIM_DATA structures associated with each statement. Callback
695 for walk_dominator_tree. */
697 static void
698 move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
699 basic_block bb)
701 struct loop *level;
702 block_stmt_iterator bsi;
703 tree stmt;
704 unsigned cost = 0;
706 if (!bb->loop_father->outer)
707 return;
709 for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
711 stmt = bsi_stmt (bsi);
713 if (!LIM_DATA (stmt))
715 bsi_next (&bsi);
716 continue;
719 cost = LIM_DATA (stmt)->cost;
720 level = LIM_DATA (stmt)->tgt_loop;
721 free_lim_aux_data (LIM_DATA (stmt));
722 stmt_ann (stmt)->common.aux = NULL;
724 if (!level)
726 bsi_next (&bsi);
727 continue;
730 /* We do not really want to move conditionals out of the loop; we just
731 placed it here to force its operands to be moved if necessary. */
732 if (TREE_CODE (stmt) == COND_EXPR)
733 continue;
735 if (dump_file && (dump_flags & TDF_DETAILS))
737 fprintf (dump_file, "Moving statement\n");
738 print_generic_stmt (dump_file, stmt, 0);
739 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
740 cost, level->num);
742 bsi_insert_on_edge (loop_preheader_edge (level), stmt);
743 bsi_remove (&bsi, false);
747 /* Hoist the statements out of the loops prescribed by data stored in
748 LIM_DATA structures associated with each statement.*/
750 static void
751 move_computations (void)
753 struct dom_walk_data walk_data;
755 memset (&walk_data, 0, sizeof (struct dom_walk_data));
756 walk_data.before_dom_children_before_stmts = move_computations_stmt;
758 init_walk_dominator_tree (&walk_data);
759 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
760 fini_walk_dominator_tree (&walk_data);
762 bsi_commit_edge_inserts ();
763 if (need_ssa_update_p ())
764 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
767 /* Checks whether the statement defining variable *INDEX can be hoisted
768 out of the loop passed in DATA. Callback for for_each_index. */
770 static bool
771 may_move_till (tree ref, tree *index, void *data)
773 struct loop *loop = data, *max_loop;
775 /* If REF is an array reference, check also that the step and the lower
776 bound is invariant in LOOP. */
777 if (TREE_CODE (ref) == ARRAY_REF)
779 tree step = array_ref_element_size (ref);
780 tree lbound = array_ref_low_bound (ref);
782 max_loop = outermost_invariant_loop_expr (step, loop);
783 if (!max_loop)
784 return false;
786 max_loop = outermost_invariant_loop_expr (lbound, loop);
787 if (!max_loop)
788 return false;
791 max_loop = outermost_invariant_loop (*index, loop);
792 if (!max_loop)
793 return false;
795 return true;
798 /* Forces statements defining (invariant) SSA names in expression EXPR to be
799 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
801 static void
802 force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop)
804 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
805 unsigned i, nops;
807 if (TREE_CODE (expr) == SSA_NAME)
809 tree stmt = SSA_NAME_DEF_STMT (expr);
810 if (IS_EMPTY_STMT (stmt))
811 return;
813 set_level (stmt, orig_loop, loop);
814 return;
817 if (class != tcc_unary
818 && class != tcc_binary
819 && class != tcc_expression
820 && class != tcc_comparison)
821 return;
823 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
824 for (i = 0; i < nops; i++)
825 force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop);
828 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
829 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
830 for_each_index. */
832 struct fmt_data
834 struct loop *loop;
835 struct loop *orig_loop;
838 static bool
839 force_move_till (tree ref, tree *index, void *data)
841 tree stmt;
842 struct fmt_data *fmt_data = data;
844 if (TREE_CODE (ref) == ARRAY_REF)
846 tree step = array_ref_element_size (ref);
847 tree lbound = array_ref_low_bound (ref);
849 force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop);
850 force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop);
853 if (TREE_CODE (*index) != SSA_NAME)
854 return true;
856 stmt = SSA_NAME_DEF_STMT (*index);
857 if (IS_EMPTY_STMT (stmt))
858 return true;
860 set_level (stmt, fmt_data->orig_loop, fmt_data->loop);
862 return true;
865 /* Records memory reference location *REF to the list MEM_REFS. The reference
866 occurs in statement STMT. */
868 static void
869 record_mem_ref_loc (struct mem_ref_loc **mem_refs, tree stmt, tree *ref)
871 struct mem_ref_loc *aref = XNEW (struct mem_ref_loc);
873 aref->stmt = stmt;
874 aref->ref = ref;
876 aref->next = *mem_refs;
877 *mem_refs = aref;
880 /* Releases list of memory reference locations MEM_REFS. */
882 static void
883 free_mem_ref_locs (struct mem_ref_loc *mem_refs)
885 struct mem_ref_loc *act;
887 while (mem_refs)
889 act = mem_refs;
890 mem_refs = mem_refs->next;
891 free (act);
895 /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
897 static void
898 rewrite_mem_refs (tree tmp_var, struct mem_ref_loc *mem_refs)
900 tree var;
901 ssa_op_iter iter;
903 for (; mem_refs; mem_refs = mem_refs->next)
905 FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter, SSA_OP_ALL_VIRTUALS)
906 mark_sym_for_renaming (SSA_NAME_VAR (var));
908 *mem_refs->ref = tmp_var;
909 update_stmt (mem_refs->stmt);
913 /* The name and the length of the currently generated variable
914 for lsm. */
915 #define MAX_LSM_NAME_LENGTH 40
916 static char lsm_tmp_name[MAX_LSM_NAME_LENGTH + 1];
917 static int lsm_tmp_name_length;
919 /* Adds S to lsm_tmp_name. */
921 static void
922 lsm_tmp_name_add (const char *s)
924 int l = strlen (s) + lsm_tmp_name_length;
925 if (l > MAX_LSM_NAME_LENGTH)
926 return;
928 strcpy (lsm_tmp_name + lsm_tmp_name_length, s);
929 lsm_tmp_name_length = l;
932 /* Stores the name for temporary variable that replaces REF to
933 lsm_tmp_name. */
935 static void
936 gen_lsm_tmp_name (tree ref)
938 const char *name;
940 switch (TREE_CODE (ref))
942 case MISALIGNED_INDIRECT_REF:
943 case ALIGN_INDIRECT_REF:
944 case INDIRECT_REF:
945 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
946 lsm_tmp_name_add ("_");
947 break;
949 case BIT_FIELD_REF:
950 case VIEW_CONVERT_EXPR:
951 case ARRAY_RANGE_REF:
952 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
953 break;
955 case REALPART_EXPR:
956 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
957 lsm_tmp_name_add ("_RE");
958 break;
960 case IMAGPART_EXPR:
961 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
962 lsm_tmp_name_add ("_IM");
963 break;
965 case COMPONENT_REF:
966 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
967 lsm_tmp_name_add ("_");
968 name = get_name (TREE_OPERAND (ref, 1));
969 if (!name)
970 name = "F";
971 lsm_tmp_name_add ("_");
972 lsm_tmp_name_add (name);
974 case ARRAY_REF:
975 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
976 lsm_tmp_name_add ("_I");
977 break;
979 case SSA_NAME:
980 ref = SSA_NAME_VAR (ref);
981 /* Fallthru. */
983 case VAR_DECL:
984 case PARM_DECL:
985 name = get_name (ref);
986 if (!name)
987 name = "D";
988 lsm_tmp_name_add (name);
989 break;
991 case STRING_CST:
992 lsm_tmp_name_add ("S");
993 break;
995 case RESULT_DECL:
996 lsm_tmp_name_add ("R");
997 break;
999 default:
1000 gcc_unreachable ();
1004 /* Determines name for temporary variable that replaces REF.
1005 The name is accumulated into the lsm_tmp_name variable. */
1007 static char *
1008 get_lsm_tmp_name (tree ref)
1010 lsm_tmp_name_length = 0;
1011 gen_lsm_tmp_name (ref);
1012 lsm_tmp_name_add ("_lsm");
1013 return lsm_tmp_name;
1016 /* Records request for store motion of memory reference REF from LOOP.
1017 MEM_REFS is the list of occurrences of the reference REF inside LOOP;
1018 these references are rewritten by a new temporary variable.
1019 Exits from the LOOP are stored in EXITS. The initialization of the
1020 temporary variable is put to the preheader of the loop, and assignments
1021 to the reference from the temporary variable are emitted to exits. */
1023 static void
1024 schedule_sm (struct loop *loop, VEC (edge, heap) *exits, tree ref,
1025 struct mem_ref_loc *mem_refs)
1027 struct mem_ref_loc *aref;
1028 tree tmp_var;
1029 unsigned i;
1030 tree load, store;
1031 struct fmt_data fmt_data;
1032 edge ex;
1034 if (dump_file && (dump_flags & TDF_DETAILS))
1036 fprintf (dump_file, "Executing store motion of ");
1037 print_generic_expr (dump_file, ref, 0);
1038 fprintf (dump_file, " from loop %d\n", loop->num);
1041 tmp_var = make_rename_temp (TREE_TYPE (ref),
1042 get_lsm_tmp_name (ref));
1044 fmt_data.loop = loop;
1045 fmt_data.orig_loop = loop;
1046 for_each_index (&ref, force_move_till, &fmt_data);
1048 rewrite_mem_refs (tmp_var, mem_refs);
1049 for (aref = mem_refs; aref; aref = aref->next)
1050 if (LIM_DATA (aref->stmt))
1051 LIM_DATA (aref->stmt)->sm_done = true;
1053 /* Emit the load & stores. */
1054 load = build2 (MODIFY_EXPR, void_type_node, tmp_var, ref);
1055 get_stmt_ann (load)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
1056 LIM_DATA (load)->max_loop = loop;
1057 LIM_DATA (load)->tgt_loop = loop;
1059 /* Put this into the latch, so that we are sure it will be processed after
1060 all dependencies. */
1061 bsi_insert_on_edge (loop_latch_edge (loop), load);
1063 for (i = 0; VEC_iterate (edge, exits, i, ex); i++)
1065 store = build2 (MODIFY_EXPR, void_type_node,
1066 unshare_expr (ref), tmp_var);
1067 bsi_insert_on_edge (ex, store);
1071 /* Check whether memory reference REF can be hoisted out of the LOOP. If this
1072 is true, prepare the statements that load the value of the memory reference
1073 to a temporary variable in the loop preheader, store it back on the loop
1074 exits, and replace all the references inside LOOP by this temporary variable.
1075 EXITS is the list of exits of LOOP. CLOBBERED_VOPS is the bitmap of virtual
1076 operands that are clobbered by a call or accessed through multiple references
1077 in loop. */
1079 static void
1080 determine_lsm_ref (struct loop *loop, VEC (edge, heap) *exits,
1081 bitmap clobbered_vops, struct mem_ref *ref)
1083 struct mem_ref_loc *aref;
1084 struct loop *must_exec;
1086 /* In case the memory is not stored to, there is nothing for SM to do. */
1087 if (!ref->is_stored)
1088 return;
1090 /* If the reference is aliased with any different ref, or killed by call
1091 in function, then fail. */
1092 if (bitmap_intersect_p (ref->vops, clobbered_vops))
1093 return;
1095 if (tree_could_trap_p (ref->mem))
1097 /* If the memory access is unsafe (i.e. it might trap), ensure that some
1098 of the statements in that it occurs is always executed when the loop
1099 is entered. This way we know that by moving the load from the
1100 reference out of the loop we will not cause the error that would not
1101 occur otherwise.
1103 TODO -- in fact we would like to check for anticipability of the
1104 reference, i.e. that on each path from loop entry to loop exit at
1105 least one of the statements containing the memory reference is
1106 executed. */
1108 for (aref = ref->locs; aref; aref = aref->next)
1110 if (!LIM_DATA (aref->stmt))
1111 continue;
1113 must_exec = LIM_DATA (aref->stmt)->always_executed_in;
1114 if (!must_exec)
1115 continue;
1117 if (must_exec == loop
1118 || flow_loop_nested_p (must_exec, loop))
1119 break;
1122 if (!aref)
1123 return;
1126 schedule_sm (loop, exits, ref->mem, ref->locs);
1129 /* Hoists memory references MEM_REFS out of LOOP. CLOBBERED_VOPS is the list
1130 of vops clobbered by call in loop or accessed by multiple memory references.
1131 EXITS is the list of exit edges of the LOOP. */
1133 static void
1134 hoist_memory_references (struct loop *loop, struct mem_ref *mem_refs,
1135 bitmap clobbered_vops, VEC (edge, heap) *exits)
1137 struct mem_ref *ref;
1139 for (ref = mem_refs; ref; ref = ref->next)
1140 determine_lsm_ref (loop, exits, clobbered_vops, ref);
1143 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
1144 for a store motion optimization (i.e. whether we can insert statement
1145 on its exits). */
1147 static bool
1148 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
1149 VEC (edge, heap) *exits)
1151 unsigned i;
1152 edge ex;
1154 for (i = 0; VEC_iterate (edge, exits, i, ex); i++)
1155 if (ex->flags & EDGE_ABNORMAL)
1156 return false;
1158 return true;
1161 /* A hash function for struct mem_ref object OBJ. */
1163 static hashval_t
1164 memref_hash (const void *obj)
1166 const struct mem_ref *mem = obj;
1168 return mem->hash;
1171 /* An equality function for struct mem_ref object OBJ1 with
1172 memory reference OBJ2. */
1174 static int
1175 memref_eq (const void *obj1, const void *obj2)
1177 const struct mem_ref *mem1 = obj1;
1179 return operand_equal_p (mem1->mem, (tree) obj2, 0);
1182 /* Gathers memory references in statement STMT in LOOP, storing the
1183 information about them in MEM_REFS hash table. Note vops accessed through
1184 unrecognized statements in CLOBBERED_VOPS. The newly created references
1185 are also stored to MEM_REF_LIST. */
1187 static void
1188 gather_mem_refs_stmt (struct loop *loop, htab_t mem_refs,
1189 bitmap clobbered_vops, tree stmt,
1190 struct mem_ref **mem_ref_list)
1192 tree *lhs, *rhs, *mem = NULL;
1193 hashval_t hash;
1194 PTR *slot;
1195 struct mem_ref *ref = NULL;
1196 ssa_op_iter oi;
1197 tree vname;
1198 bool is_stored;
1200 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
1201 return;
1203 /* Recognize MEM = (SSA_NAME | invariant) and SSA_NAME = MEM patterns. */
1204 if (TREE_CODE (stmt) != MODIFY_EXPR)
1205 goto fail;
1207 lhs = &TREE_OPERAND (stmt, 0);
1208 rhs = &TREE_OPERAND (stmt, 1);
1210 if (TREE_CODE (*lhs) == SSA_NAME)
1212 if (!is_gimple_addressable (*rhs))
1213 goto fail;
1215 mem = rhs;
1216 is_stored = false;
1218 else if (TREE_CODE (*rhs) == SSA_NAME
1219 || is_gimple_min_invariant (*rhs))
1221 mem = lhs;
1222 is_stored = true;
1224 else
1225 goto fail;
1227 /* If we cannot create an SSA name for the result, give up. */
1228 if (!is_gimple_reg_type (TREE_TYPE (*mem))
1229 || TREE_THIS_VOLATILE (*mem))
1230 goto fail;
1232 /* If we cannot move the reference out of the loop, fail. */
1233 if (!for_each_index (mem, may_move_till, loop))
1234 goto fail;
1236 hash = iterative_hash_expr (*mem, 0);
1237 slot = htab_find_slot_with_hash (mem_refs, *mem, hash, INSERT);
1239 if (*slot)
1240 ref = *slot;
1241 else
1243 ref = XNEW (struct mem_ref);
1244 ref->mem = *mem;
1245 ref->hash = hash;
1246 ref->locs = NULL;
1247 ref->is_stored = false;
1248 ref->vops = BITMAP_ALLOC (NULL);
1249 ref->next = *mem_ref_list;
1250 *mem_ref_list = ref;
1251 *slot = ref;
1253 ref->is_stored |= is_stored;
1255 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1256 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1257 bitmap_set_bit (ref->vops, DECL_UID (SSA_NAME_VAR (vname)));
1258 record_mem_ref_loc (&ref->locs, stmt, mem);
1259 return;
1261 fail:
1262 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1263 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1264 bitmap_set_bit (clobbered_vops, DECL_UID (SSA_NAME_VAR (vname)));
1267 /* Gathers memory references in LOOP. Notes vops accessed through unrecognized
1268 statements in CLOBBERED_VOPS. The list of the references found by
1269 the function is returned. */
1271 static struct mem_ref *
1272 gather_mem_refs (struct loop *loop, bitmap clobbered_vops)
1274 basic_block *body = get_loop_body (loop);
1275 block_stmt_iterator bsi;
1276 unsigned i;
1277 struct mem_ref *mem_ref_list = NULL;
1278 htab_t mem_refs = htab_create (100, memref_hash, memref_eq, NULL);
1280 for (i = 0; i < loop->num_nodes; i++)
1282 for (bsi = bsi_start (body[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1283 gather_mem_refs_stmt (loop, mem_refs, clobbered_vops, bsi_stmt (bsi),
1284 &mem_ref_list);
1287 free (body);
1289 htab_delete (mem_refs);
1290 return mem_ref_list;
1293 /* Finds the vops accessed by more than one of the memory references described
1294 in MEM_REFS and marks them in CLOBBERED_VOPS. */
1296 static void
1297 find_more_ref_vops (struct mem_ref *mem_refs, bitmap clobbered_vops)
1299 bitmap_head tmp, all_vops;
1300 struct mem_ref *ref;
1302 bitmap_initialize (&tmp, &bitmap_default_obstack);
1303 bitmap_initialize (&all_vops, &bitmap_default_obstack);
1305 for (ref = mem_refs; ref; ref = ref->next)
1307 /* The vops that are already in all_vops are accessed by more than
1308 one memory reference. */
1309 bitmap_and (&tmp, &all_vops, ref->vops);
1310 bitmap_ior_into (clobbered_vops, &tmp);
1311 bitmap_clear (&tmp);
1313 bitmap_ior_into (&all_vops, ref->vops);
1316 bitmap_clear (&all_vops);
1319 /* Releases the memory occupied by REF. */
1321 static void
1322 free_mem_ref (struct mem_ref *ref)
1324 free_mem_ref_locs (ref->locs);
1325 BITMAP_FREE (ref->vops);
1326 free (ref);
1329 /* Releases the memory occupied by REFS. */
1331 static void
1332 free_mem_refs (struct mem_ref *refs)
1334 struct mem_ref *ref, *next;
1336 for (ref = refs; ref; ref = next)
1338 next = ref->next;
1339 free_mem_ref (ref);
1343 /* Try to perform store motion for all memory references modified inside
1344 LOOP. */
1346 static void
1347 determine_lsm_loop (struct loop *loop)
1349 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
1350 bitmap clobbered_vops;
1351 struct mem_ref *mem_refs;
1353 if (!loop_suitable_for_sm (loop, exits))
1355 VEC_free (edge, heap, exits);
1356 return;
1359 /* Find the memory references in LOOP. */
1360 clobbered_vops = BITMAP_ALLOC (NULL);
1361 mem_refs = gather_mem_refs (loop, clobbered_vops);
1363 /* Find the vops that are used for more than one reference. */
1364 find_more_ref_vops (mem_refs, clobbered_vops);
1366 /* Hoist all suitable memory references. */
1367 hoist_memory_references (loop, mem_refs, clobbered_vops, exits);
1369 free_mem_refs (mem_refs);
1370 VEC_free (edge, heap, exits);
1371 BITMAP_FREE (clobbered_vops);
1374 /* Try to perform store motion for all memory references modified inside
1375 loops. */
1377 static void
1378 determine_lsm (void)
1380 struct loop *loop;
1382 if (!current_loops->tree_root->inner)
1383 return;
1385 /* Pass the loops from the outermost and perform the store motion as
1386 suitable. */
1388 loop = current_loops->tree_root->inner;
1389 while (1)
1391 determine_lsm_loop (loop);
1393 if (loop->inner)
1395 loop = loop->inner;
1396 continue;
1398 while (!loop->next)
1400 loop = loop->outer;
1401 if (loop == current_loops->tree_root)
1403 bsi_commit_edge_inserts ();
1404 return;
1407 loop = loop->next;
1411 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1412 for each such basic block bb records the outermost loop for that execution
1413 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1414 blocks that contain a nonpure call. */
1416 static void
1417 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
1419 basic_block bb = NULL, *bbs, last = NULL;
1420 unsigned i;
1421 edge e;
1422 struct loop *inn_loop = loop;
1424 if (!loop->header->aux)
1426 bbs = get_loop_body_in_dom_order (loop);
1428 for (i = 0; i < loop->num_nodes; i++)
1430 edge_iterator ei;
1431 bb = bbs[i];
1433 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1434 last = bb;
1436 if (TEST_BIT (contains_call, bb->index))
1437 break;
1439 FOR_EACH_EDGE (e, ei, bb->succs)
1440 if (!flow_bb_inside_loop_p (loop, e->dest))
1441 break;
1442 if (e)
1443 break;
1445 /* A loop might be infinite (TODO use simple loop analysis
1446 to disprove this if possible). */
1447 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1448 break;
1450 if (!flow_bb_inside_loop_p (inn_loop, bb))
1451 break;
1453 if (bb->loop_father->header == bb)
1455 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1456 break;
1458 /* In a loop that is always entered we may proceed anyway.
1459 But record that we entered it and stop once we leave it. */
1460 inn_loop = bb->loop_father;
1464 while (1)
1466 last->aux = loop;
1467 if (last == loop->header)
1468 break;
1469 last = get_immediate_dominator (CDI_DOMINATORS, last);
1472 free (bbs);
1475 for (loop = loop->inner; loop; loop = loop->next)
1476 fill_always_executed_in (loop, contains_call);
1479 /* Compute the global information needed by the loop invariant motion pass. */
1481 static void
1482 tree_ssa_lim_initialize (void)
1484 sbitmap contains_call = sbitmap_alloc (last_basic_block);
1485 block_stmt_iterator bsi;
1486 struct loop *loop;
1487 basic_block bb;
1489 sbitmap_zero (contains_call);
1490 FOR_EACH_BB (bb)
1492 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1494 if (nonpure_call_p (bsi_stmt (bsi)))
1495 break;
1498 if (!bsi_end_p (bsi))
1499 SET_BIT (contains_call, bb->index);
1502 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
1503 fill_always_executed_in (loop, contains_call);
1505 sbitmap_free (contains_call);
1508 /* Cleans up after the invariant motion pass. */
1510 static void
1511 tree_ssa_lim_finalize (void)
1513 basic_block bb;
1515 FOR_EACH_BB (bb)
1517 bb->aux = NULL;
1521 /* Moves invariants from loops. Only "expensive" invariants are moved out --
1522 i.e. those that are likely to be win regardless of the register pressure. */
1524 void
1525 tree_ssa_lim (void)
1527 tree_ssa_lim_initialize ();
1529 /* For each statement determine the outermost loop in that it is
1530 invariant and cost for computing the invariant. */
1531 determine_invariantness ();
1533 /* For each memory reference determine whether it is possible to hoist it
1534 out of the loop. Force the necessary invariants to be moved out of the
1535 loops as well. */
1536 determine_lsm ();
1538 /* Move the expressions that are expensive enough. */
1539 move_computations ();
1541 tree_ssa_lim_finalize ();