* config.sub: Add cases for the Renesas m32c. (This patch has been
[official-gcc.git] / gcc / tree-ssa-loop-im.c
blobdc9002f71fee6e8d8f39959dfa544a0abe4fab8f
1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
30 #include "output.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "domwalk.h"
37 #include "params.h"
38 #include "tree-pass.h"
39 #include "flags.h"
40 #include "real.h"
41 #include "hashtab.h"
43 /* TODO: Support for predicated code motion. I.e.
45 while (1)
47 if (cond)
49 a = inv;
50 something;
54 Where COND and INV are is invariants, but evaluating INV may trap or be
55 invalid from some other reason if !COND. This may be transformed to
57 if (cond)
58 a = inv;
59 while (1)
61 if (cond)
62 something;
63 } */
65 /* A type for the list of statements that have to be moved in order to be able
66 to hoist an invariant computation. */
68 struct depend
70 tree stmt;
71 struct depend *next;
74 /* The auxiliary data kept for each statement. */
76 struct lim_aux_data
78 struct loop *max_loop; /* The outermost loop in that the statement
79 is invariant. */
81 struct loop *tgt_loop; /* The loop out of that we want to move the
82 invariant. */
84 struct loop *always_executed_in;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
87 is entered. */
89 bool sm_done; /* True iff the store motion for a memory
90 reference in the statement has already
91 been executed. */
93 unsigned cost; /* Cost of the computation performed by the
94 statement. */
96 struct depend *depends; /* List of statements that must be also hoisted
97 out of the loop when this statement is
98 hoisted; i.e. those that define the operands
99 of the statement and are inside of the
100 MAX_LOOP loop. */
103 #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
104 ? NULL \
105 : (struct lim_aux_data *) (stmt_ann (STMT)->aux))
107 /* Description of a memory reference location for store motion. */
109 struct mem_ref_loc
111 tree *ref; /* The reference itself. */
112 tree stmt; /* The statement in that it occurs. */
113 struct mem_ref_loc *next; /* Next use in the chain. */
116 /* Description of a memory reference for store motion. */
118 struct mem_ref
120 tree mem; /* The memory itself. */
121 hashval_t hash; /* Its hash value. */
122 bool is_stored; /* True if there is a store to the location
123 in the loop. */
124 struct mem_ref_loc *locs; /* The locations where it is found. */
125 bitmap vops; /* Vops corresponding to this memory
126 location. */
127 struct mem_ref *next; /* Next memory reference in the list.
128 Memory references are stored in a hash
129 table, but the hash function depends
130 on values of pointers. Thus we cannot use
131 htab_traverse, since then we would get
132 miscompares during bootstrap (although the
133 produced code would be correct). */
136 /* Minimum cost of an expensive expression. */
137 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
139 /* The outermost loop for that execution of the header guarantees that the
140 block will be executed. */
141 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
143 /* Calls CBCK for each index in memory reference ADDR_P. There are two
144 kinds situations handled; in each of these cases, the memory reference
145 and DATA are passed to the callback:
147 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
148 pass the pointer to the index to the callback.
150 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
151 pointer to addr to the callback.
153 If the callback returns false, the whole search stops and false is returned.
154 Otherwise the function returns true after traversing through the whole
155 reference *ADDR_P. */
157 bool
158 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
160 tree *nxt, *idx;
162 for (; ; addr_p = nxt)
164 switch (TREE_CODE (*addr_p))
166 case SSA_NAME:
167 return cbck (*addr_p, addr_p, data);
169 case MISALIGNED_INDIRECT_REF:
170 case ALIGN_INDIRECT_REF:
171 case INDIRECT_REF:
172 nxt = &TREE_OPERAND (*addr_p, 0);
173 return cbck (*addr_p, nxt, data);
175 case BIT_FIELD_REF:
176 case VIEW_CONVERT_EXPR:
177 case ARRAY_RANGE_REF:
178 case REALPART_EXPR:
179 case IMAGPART_EXPR:
180 nxt = &TREE_OPERAND (*addr_p, 0);
181 break;
183 case COMPONENT_REF:
184 /* If the component has varying offset, it behaves like index
185 as well. */
186 idx = &TREE_OPERAND (*addr_p, 2);
187 if (*idx
188 && !cbck (*addr_p, idx, data))
189 return false;
191 nxt = &TREE_OPERAND (*addr_p, 0);
192 break;
194 case ARRAY_REF:
195 nxt = &TREE_OPERAND (*addr_p, 0);
196 if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
197 return false;
198 break;
200 case VAR_DECL:
201 case PARM_DECL:
202 case STRING_CST:
203 case RESULT_DECL:
204 case VECTOR_CST:
205 return true;
207 default:
208 gcc_unreachable ();
213 /* If it is possible to hoist the statement STMT unconditionally,
214 returns MOVE_POSSIBLE.
215 If it is possible to hoist the statement STMT, but we must avoid making
216 it executed if it would not be executed in the original program (e.g.
217 because it may trap), return MOVE_PRESERVE_EXECUTION.
218 Otherwise return MOVE_IMPOSSIBLE. */
220 enum move_pos
221 movement_possibility (tree stmt)
223 tree lhs, rhs;
225 if (flag_unswitch_loops
226 && TREE_CODE (stmt) == COND_EXPR)
228 /* If we perform unswitching, force the operands of the invariant
229 condition to be moved out of the loop. */
230 return MOVE_POSSIBLE;
233 if (TREE_CODE (stmt) != MODIFY_EXPR)
234 return MOVE_IMPOSSIBLE;
236 if (stmt_ends_bb_p (stmt))
237 return MOVE_IMPOSSIBLE;
239 if (stmt_ann (stmt)->has_volatile_ops)
240 return MOVE_IMPOSSIBLE;
242 lhs = TREE_OPERAND (stmt, 0);
243 if (TREE_CODE (lhs) == SSA_NAME
244 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
245 return MOVE_IMPOSSIBLE;
247 rhs = TREE_OPERAND (stmt, 1);
249 if (TREE_SIDE_EFFECTS (rhs))
250 return MOVE_IMPOSSIBLE;
252 if (TREE_CODE (lhs) != SSA_NAME
253 || tree_could_trap_p (rhs))
254 return MOVE_PRESERVE_EXECUTION;
256 if (get_call_expr_in (stmt))
258 /* While pure or const call is guaranteed to have no side effects, we
259 cannot move it arbitrarily. Consider code like
261 char *s = something ();
263 while (1)
265 if (s)
266 t = strlen (s);
267 else
268 t = 0;
271 Here the strlen call cannot be moved out of the loop, even though
272 s is invariant. In addition to possibly creating a call with
273 invalid arguments, moving out a function call that is not executed
274 may cause performance regressions in case the call is costly and
275 not executed at all. */
276 return MOVE_PRESERVE_EXECUTION;
278 return MOVE_POSSIBLE;
281 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
282 loop to that we could move the expression using DEF if it did not have
283 other operands, i.e. the outermost loop enclosing LOOP in that the value
284 of DEF is invariant. */
286 static struct loop *
287 outermost_invariant_loop (tree def, struct loop *loop)
289 tree def_stmt;
290 basic_block def_bb;
291 struct loop *max_loop;
293 if (TREE_CODE (def) != SSA_NAME)
294 return superloop_at_depth (loop, 1);
296 def_stmt = SSA_NAME_DEF_STMT (def);
297 def_bb = bb_for_stmt (def_stmt);
298 if (!def_bb)
299 return superloop_at_depth (loop, 1);
301 max_loop = find_common_loop (loop, def_bb->loop_father);
303 if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop)
304 max_loop = find_common_loop (max_loop,
305 LIM_DATA (def_stmt)->max_loop->outer);
306 if (max_loop == loop)
307 return NULL;
308 max_loop = superloop_at_depth (loop, max_loop->depth + 1);
310 return max_loop;
313 /* Returns the outermost superloop of LOOP in that the expression EXPR is
314 invariant. */
316 static struct loop *
317 outermost_invariant_loop_expr (tree expr, struct loop *loop)
319 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
320 unsigned i, nops;
321 struct loop *max_loop = superloop_at_depth (loop, 1), *aloop;
323 if (TREE_CODE (expr) == SSA_NAME
324 || TREE_CODE (expr) == INTEGER_CST
325 || is_gimple_min_invariant (expr))
326 return outermost_invariant_loop (expr, loop);
328 if (class != tcc_unary
329 && class != tcc_binary
330 && class != tcc_expression
331 && class != tcc_comparison)
332 return NULL;
334 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
335 for (i = 0; i < nops; i++)
337 aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop);
338 if (!aloop)
339 return NULL;
341 if (flow_loop_nested_p (max_loop, aloop))
342 max_loop = aloop;
345 return max_loop;
348 /* DATA is a structure containing information associated with a statement
349 inside LOOP. DEF is one of the operands of this statement.
351 Find the outermost loop enclosing LOOP in that value of DEF is invariant
352 and record this in DATA->max_loop field. If DEF itself is defined inside
353 this loop as well (i.e. we need to hoist it out of the loop if we want
354 to hoist the statement represented by DATA), record the statement in that
355 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
356 add the cost of the computation of DEF to the DATA->cost.
358 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
360 static bool
361 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
362 bool add_cost)
364 tree def_stmt = SSA_NAME_DEF_STMT (def);
365 basic_block def_bb = bb_for_stmt (def_stmt);
366 struct loop *max_loop;
367 struct depend *dep;
369 if (!def_bb)
370 return true;
372 max_loop = outermost_invariant_loop (def, loop);
373 if (!max_loop)
374 return false;
376 if (flow_loop_nested_p (data->max_loop, max_loop))
377 data->max_loop = max_loop;
379 if (!LIM_DATA (def_stmt))
380 return true;
382 if (add_cost
383 /* Only add the cost if the statement defining DEF is inside LOOP,
384 i.e. if it is likely that by moving the invariants dependent
385 on it, we will be able to avoid creating a new register for
386 it (since it will be only used in these dependent invariants). */
387 && def_bb->loop_father == loop)
388 data->cost += LIM_DATA (def_stmt)->cost;
390 dep = xmalloc (sizeof (struct depend));
391 dep->stmt = def_stmt;
392 dep->next = data->depends;
393 data->depends = dep;
395 return true;
398 /* Returns an estimate for a cost of statement STMT. TODO -- the values here
399 are just ad-hoc constants. The estimates should be based on target-specific
400 values. */
402 static unsigned
403 stmt_cost (tree stmt)
405 tree rhs;
406 unsigned cost = 1;
408 /* Always try to create possibilities for unswitching. */
409 if (TREE_CODE (stmt) == COND_EXPR)
410 return LIM_EXPENSIVE;
412 rhs = TREE_OPERAND (stmt, 1);
414 /* Hoisting memory references out should almost surely be a win. */
415 if (stmt_references_memory_p (stmt))
416 cost += 20;
418 switch (TREE_CODE (rhs))
420 case CALL_EXPR:
421 /* We should be hoisting calls if possible. */
423 /* Unless the call is a builtin_constant_p; this always folds to a
424 constant, so moving it is useless. */
425 rhs = get_callee_fndecl (rhs);
426 if (DECL_BUILT_IN_CLASS (rhs) == BUILT_IN_NORMAL
427 && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P)
428 return 0;
430 cost += 20;
431 break;
433 case MULT_EXPR:
434 case TRUNC_DIV_EXPR:
435 case CEIL_DIV_EXPR:
436 case FLOOR_DIV_EXPR:
437 case ROUND_DIV_EXPR:
438 case EXACT_DIV_EXPR:
439 case CEIL_MOD_EXPR:
440 case FLOOR_MOD_EXPR:
441 case ROUND_MOD_EXPR:
442 case TRUNC_MOD_EXPR:
443 case RDIV_EXPR:
444 /* Division and multiplication are usually expensive. */
445 cost += 20;
446 break;
448 default:
449 break;
452 return cost;
455 /* Determine the outermost loop to that it is possible to hoist a statement
456 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
457 the outermost loop in that the value computed by STMT is invariant.
458 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
459 we preserve the fact whether STMT is executed. It also fills other related
460 information to LIM_DATA (STMT).
462 The function returns false if STMT cannot be hoisted outside of the loop it
463 is defined in, and true otherwise. */
465 static bool
466 determine_max_movement (tree stmt, bool must_preserve_exec)
468 basic_block bb = bb_for_stmt (stmt);
469 struct loop *loop = bb->loop_father;
470 struct loop *level;
471 struct lim_aux_data *lim_data = LIM_DATA (stmt);
472 tree val;
473 ssa_op_iter iter;
475 if (must_preserve_exec)
476 level = ALWAYS_EXECUTED_IN (bb);
477 else
478 level = superloop_at_depth (loop, 1);
479 lim_data->max_loop = level;
481 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
482 if (!add_dependency (val, lim_data, loop, true))
483 return false;
485 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
486 if (!add_dependency (val, lim_data, loop, false))
487 return false;
489 lim_data->cost += stmt_cost (stmt);
491 return true;
494 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
495 and that one of the operands of this statement is computed by STMT.
496 Ensure that STMT (together with all the statements that define its
497 operands) is hoisted at least out of the loop LEVEL. */
499 static void
500 set_level (tree stmt, struct loop *orig_loop, struct loop *level)
502 struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father;
503 struct depend *dep;
505 stmt_loop = find_common_loop (orig_loop, stmt_loop);
506 if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop)
507 stmt_loop = find_common_loop (stmt_loop,
508 LIM_DATA (stmt)->tgt_loop->outer);
509 if (flow_loop_nested_p (stmt_loop, level))
510 return;
512 gcc_assert (LIM_DATA (stmt));
513 gcc_assert (level == LIM_DATA (stmt)->max_loop
514 || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level));
516 LIM_DATA (stmt)->tgt_loop = level;
517 for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next)
518 set_level (dep->stmt, orig_loop, level);
521 /* Determines an outermost loop from that we want to hoist the statement STMT.
522 For now we chose the outermost possible loop. TODO -- use profiling
523 information to set it more sanely. */
525 static void
526 set_profitable_level (tree stmt)
528 set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop);
531 /* Returns true if STMT is not a pure call. */
533 static bool
534 nonpure_call_p (tree stmt)
536 tree call = get_call_expr_in (stmt);
538 if (!call)
539 return false;
541 return TREE_SIDE_EFFECTS (call) != 0;
544 /* Releases the memory occupied by DATA. */
546 static void
547 free_lim_aux_data (struct lim_aux_data *data)
549 struct depend *dep, *next;
551 for (dep = data->depends; dep; dep = next)
553 next = dep->next;
554 free (dep);
556 free (data);
559 /* Determine the outermost loops in that statements in basic block BB are
560 invariant, and record them to the LIM_DATA associated with the statements.
561 Callback for walk_dominator_tree. */
563 static void
564 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
565 basic_block bb)
567 enum move_pos pos;
568 block_stmt_iterator bsi;
569 tree stmt, rhs;
570 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
571 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
573 if (!bb->loop_father->outer)
574 return;
576 if (dump_file && (dump_flags & TDF_DETAILS))
577 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
578 bb->index, bb->loop_father->num, bb->loop_father->depth);
580 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
582 stmt = bsi_stmt (bsi);
584 pos = movement_possibility (stmt);
585 if (pos == MOVE_IMPOSSIBLE)
587 if (nonpure_call_p (stmt))
589 maybe_never = true;
590 outermost = NULL;
592 continue;
595 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
596 to be hoisted out of loop, saving expensive divide. */
597 if (pos == MOVE_POSSIBLE
598 && (rhs = TREE_OPERAND (stmt, 1)) != NULL
599 && TREE_CODE (rhs) == RDIV_EXPR
600 && flag_unsafe_math_optimizations
601 && outermost_invariant_loop_expr (TREE_OPERAND (rhs, 1),
602 loop_containing_stmt (stmt)) != NULL
603 && outermost_invariant_loop_expr (rhs,
604 loop_containing_stmt (stmt)) == NULL)
606 tree lhs, stmt1, stmt2, var, name;
608 lhs = TREE_OPERAND (stmt, 0);
610 /* stmt must be MODIFY_EXPR. */
611 var = create_tmp_var (TREE_TYPE (rhs), "reciptmp");
612 add_referenced_tmp_var (var);
614 stmt1 = build2 (MODIFY_EXPR, void_type_node, var,
615 build2 (RDIV_EXPR, TREE_TYPE (rhs),
616 build_real (TREE_TYPE (rhs), dconst1),
617 TREE_OPERAND (rhs, 1)));
618 name = make_ssa_name (var, stmt1);
619 TREE_OPERAND (stmt1, 0) = name;
620 stmt2 = build2 (MODIFY_EXPR, void_type_node, lhs,
621 build2 (MULT_EXPR, TREE_TYPE (rhs),
622 name, TREE_OPERAND (rhs, 0)));
624 /* Replace division stmt with reciprocal and multiply stmts.
625 The multiply stmt is not invariant, so update iterator
626 and avoid rescanning. */
627 bsi_replace (&bsi, stmt1, true);
628 bsi_insert_after (&bsi, stmt2, BSI_NEW_STMT);
629 SSA_NAME_DEF_STMT (lhs) = stmt2;
631 /* Continue processing with invariant reciprocal statement. */
632 stmt = stmt1;
635 stmt_ann (stmt)->aux = xcalloc (1, sizeof (struct lim_aux_data));
636 LIM_DATA (stmt)->always_executed_in = outermost;
638 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
639 continue;
641 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
643 LIM_DATA (stmt)->max_loop = NULL;
644 continue;
647 if (dump_file && (dump_flags & TDF_DETAILS))
649 print_generic_stmt_indented (dump_file, stmt, 0, 2);
650 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
651 LIM_DATA (stmt)->max_loop->depth,
652 LIM_DATA (stmt)->cost);
655 if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE)
656 set_profitable_level (stmt);
660 /* For each statement determines the outermost loop in that it is invariant,
661 statements on whose motion it depends and the cost of the computation.
662 This information is stored to the LIM_DATA structure associated with
663 each statement. */
665 static void
666 determine_invariantness (void)
668 struct dom_walk_data walk_data;
670 memset (&walk_data, 0, sizeof (struct dom_walk_data));
671 walk_data.before_dom_children_before_stmts = determine_invariantness_stmt;
673 init_walk_dominator_tree (&walk_data);
674 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
675 fini_walk_dominator_tree (&walk_data);
678 /* Commits edge insertions and updates loop structures. */
680 void
681 loop_commit_inserts (void)
683 unsigned old_last_basic_block, i;
684 basic_block bb;
686 old_last_basic_block = last_basic_block;
687 bsi_commit_edge_inserts ();
688 for (i = old_last_basic_block; i < (unsigned) last_basic_block; i++)
690 bb = BASIC_BLOCK (i);
691 add_bb_to_loop (bb,
692 find_common_loop (single_pred (bb)->loop_father,
693 single_succ (bb)->loop_father));
697 /* Hoist the statements in basic block BB out of the loops prescribed by
698 data stored in LIM_DATA structures associated with each statement. Callback
699 for walk_dominator_tree. */
701 static void
702 move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
703 basic_block bb)
705 struct loop *level;
706 block_stmt_iterator bsi;
707 tree stmt;
708 unsigned cost = 0;
710 if (!bb->loop_father->outer)
711 return;
713 for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
715 stmt = bsi_stmt (bsi);
717 if (!LIM_DATA (stmt))
719 bsi_next (&bsi);
720 continue;
723 cost = LIM_DATA (stmt)->cost;
724 level = LIM_DATA (stmt)->tgt_loop;
725 free_lim_aux_data (LIM_DATA (stmt));
726 stmt_ann (stmt)->aux = NULL;
728 if (!level)
730 bsi_next (&bsi);
731 continue;
734 /* We do not really want to move conditionals out of the loop; we just
735 placed it here to force its operands to be moved if necessary. */
736 if (TREE_CODE (stmt) == COND_EXPR)
737 continue;
739 if (dump_file && (dump_flags & TDF_DETAILS))
741 fprintf (dump_file, "Moving statement\n");
742 print_generic_stmt (dump_file, stmt, 0);
743 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
744 cost, level->num);
746 bsi_insert_on_edge (loop_preheader_edge (level), stmt);
747 bsi_remove (&bsi);
751 /* Hoist the statements out of the loops prescribed by data stored in
752 LIM_DATA structures associated with each statement.*/
754 static void
755 move_computations (void)
757 struct dom_walk_data walk_data;
759 memset (&walk_data, 0, sizeof (struct dom_walk_data));
760 walk_data.before_dom_children_before_stmts = move_computations_stmt;
762 init_walk_dominator_tree (&walk_data);
763 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
764 fini_walk_dominator_tree (&walk_data);
766 loop_commit_inserts ();
767 if (need_ssa_update_p ())
768 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
771 /* Checks whether the statement defining variable *INDEX can be hoisted
772 out of the loop passed in DATA. Callback for for_each_index. */
774 static bool
775 may_move_till (tree ref, tree *index, void *data)
777 struct loop *loop = data, *max_loop;
779 /* If REF is an array reference, check also that the step and the lower
780 bound is invariant in LOOP. */
781 if (TREE_CODE (ref) == ARRAY_REF)
783 tree step = array_ref_element_size (ref);
784 tree lbound = array_ref_low_bound (ref);
786 max_loop = outermost_invariant_loop_expr (step, loop);
787 if (!max_loop)
788 return false;
790 max_loop = outermost_invariant_loop_expr (lbound, loop);
791 if (!max_loop)
792 return false;
795 max_loop = outermost_invariant_loop (*index, loop);
796 if (!max_loop)
797 return false;
799 return true;
802 /* Forces statements defining (invariant) SSA names in expression EXPR to be
803 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
805 static void
806 force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop)
808 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
809 unsigned i, nops;
811 if (TREE_CODE (expr) == SSA_NAME)
813 tree stmt = SSA_NAME_DEF_STMT (expr);
814 if (IS_EMPTY_STMT (stmt))
815 return;
817 set_level (stmt, orig_loop, loop);
818 return;
821 if (class != tcc_unary
822 && class != tcc_binary
823 && class != tcc_expression
824 && class != tcc_comparison)
825 return;
827 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
828 for (i = 0; i < nops; i++)
829 force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop);
832 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
833 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
834 for_each_index. */
836 struct fmt_data
838 struct loop *loop;
839 struct loop *orig_loop;
842 static bool
843 force_move_till (tree ref, tree *index, void *data)
845 tree stmt;
846 struct fmt_data *fmt_data = data;
848 if (TREE_CODE (ref) == ARRAY_REF)
850 tree step = array_ref_element_size (ref);
851 tree lbound = array_ref_low_bound (ref);
853 force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop);
854 force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop);
857 if (TREE_CODE (*index) != SSA_NAME)
858 return true;
860 stmt = SSA_NAME_DEF_STMT (*index);
861 if (IS_EMPTY_STMT (stmt))
862 return true;
864 set_level (stmt, fmt_data->orig_loop, fmt_data->loop);
866 return true;
869 /* Records memory reference location *REF to the list MEM_REFS. The reference
870 occurs in statement STMT. */
872 static void
873 record_mem_ref_loc (struct mem_ref_loc **mem_refs, tree stmt, tree *ref)
875 struct mem_ref_loc *aref = xmalloc (sizeof (struct mem_ref_loc));
877 aref->stmt = stmt;
878 aref->ref = ref;
880 aref->next = *mem_refs;
881 *mem_refs = aref;
884 /* Releases list of memory reference locations MEM_REFS. */
886 static void
887 free_mem_ref_locs (struct mem_ref_loc *mem_refs)
889 struct mem_ref_loc *act;
891 while (mem_refs)
893 act = mem_refs;
894 mem_refs = mem_refs->next;
895 free (act);
899 /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
901 static void
902 rewrite_mem_refs (tree tmp_var, struct mem_ref_loc *mem_refs)
904 tree var;
905 ssa_op_iter iter;
907 for (; mem_refs; mem_refs = mem_refs->next)
909 FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter, SSA_OP_ALL_VIRTUALS)
910 mark_sym_for_renaming (SSA_NAME_VAR (var));
912 *mem_refs->ref = tmp_var;
913 update_stmt (mem_refs->stmt);
917 /* Records request for store motion of memory reference REF from LOOP.
918 MEM_REFS is the list of occurrences of the reference REF inside LOOP;
919 these references are rewritten by a new temporary variable.
920 Exits from the LOOP are stored in EXITS, there are N_EXITS of them.
921 The initialization of the temporary variable is put to the preheader
922 of the loop, and assignments to the reference from the temporary variable
923 are emitted to exits. */
925 static void
926 schedule_sm (struct loop *loop, edge *exits, unsigned n_exits, tree ref,
927 struct mem_ref_loc *mem_refs)
929 struct mem_ref_loc *aref;
930 tree tmp_var;
931 unsigned i;
932 tree load, store;
933 struct fmt_data fmt_data;
935 if (dump_file && (dump_flags & TDF_DETAILS))
937 fprintf (dump_file, "Executing store motion of ");
938 print_generic_expr (dump_file, ref, 0);
939 fprintf (dump_file, " from loop %d\n", loop->num);
942 tmp_var = make_rename_temp (TREE_TYPE (ref), "lsm_tmp");
944 fmt_data.loop = loop;
945 fmt_data.orig_loop = loop;
946 for_each_index (&ref, force_move_till, &fmt_data);
948 rewrite_mem_refs (tmp_var, mem_refs);
949 for (aref = mem_refs; aref; aref = aref->next)
950 if (LIM_DATA (aref->stmt))
951 LIM_DATA (aref->stmt)->sm_done = true;
953 /* Emit the load & stores. */
954 load = build (MODIFY_EXPR, void_type_node, tmp_var, ref);
955 get_stmt_ann (load)->aux = xcalloc (1, sizeof (struct lim_aux_data));
956 LIM_DATA (load)->max_loop = loop;
957 LIM_DATA (load)->tgt_loop = loop;
959 /* Put this into the latch, so that we are sure it will be processed after
960 all dependencies. */
961 bsi_insert_on_edge (loop_latch_edge (loop), load);
963 for (i = 0; i < n_exits; i++)
965 store = build (MODIFY_EXPR, void_type_node,
966 unshare_expr (ref), tmp_var);
967 bsi_insert_on_edge (exits[i], store);
971 /* Check whether memory reference REF can be hoisted out of the LOOP. If this
972 is true, prepare the statements that load the value of the memory reference
973 to a temporary variable in the loop preheader, store it back on the loop
974 exits, and replace all the references inside LOOP by this temporary variable.
975 LOOP has N_EXITS stored in EXITS. CLOBBERED_VOPS is the bitmap of virtual
976 operands that are clobbered by a call or accessed through multiple references
977 in loop. */
979 static void
980 determine_lsm_ref (struct loop *loop, edge *exits, unsigned n_exits,
981 bitmap clobbered_vops, struct mem_ref *ref)
983 struct mem_ref_loc *aref;
984 struct loop *must_exec;
986 /* In case the memory is not stored to, there is nothing for SM to do. */
987 if (!ref->is_stored)
988 return;
990 /* If the reference is aliased with any different ref, or killed by call
991 in function, then fail. */
992 if (bitmap_intersect_p (ref->vops, clobbered_vops))
993 return;
995 if (tree_could_trap_p (ref->mem))
997 /* If the memory access is unsafe (i.e. it might trap), ensure that some
998 of the statements in that it occurs is always executed when the loop
999 is entered. This way we know that by moving the load from the
1000 reference out of the loop we will not cause the error that would not
1001 occur otherwise.
1003 TODO -- in fact we would like to check for anticipability of the
1004 reference, i.e. that on each path from loop entry to loop exit at
1005 least one of the statements containing the memory reference is
1006 executed. */
1008 for (aref = ref->locs; aref; aref = aref->next)
1010 if (!LIM_DATA (aref->stmt))
1011 continue;
1013 must_exec = LIM_DATA (aref->stmt)->always_executed_in;
1014 if (!must_exec)
1015 continue;
1017 if (must_exec == loop
1018 || flow_loop_nested_p (must_exec, loop))
1019 break;
1022 if (!aref)
1023 return;
1026 schedule_sm (loop, exits, n_exits, ref->mem, ref->locs);
1029 /* Hoists memory references MEM_REFS out of LOOP. CLOBBERED_VOPS is the list
1030 of vops clobbered by call in loop or accessed by multiple memory references.
1031 EXITS is the list of N_EXITS exit edges of the LOOP. */
1033 static void
1034 hoist_memory_references (struct loop *loop, struct mem_ref *mem_refs,
1035 bitmap clobbered_vops, edge *exits, unsigned n_exits)
1037 struct mem_ref *ref;
1039 for (ref = mem_refs; ref; ref = ref->next)
1040 determine_lsm_ref (loop, exits, n_exits, clobbered_vops, ref);
1043 /* Checks whether LOOP (with N_EXITS exits stored in EXITS array) is suitable
1044 for a store motion optimization (i.e. whether we can insert statement
1045 on its exits). */
1047 static bool
1048 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED, edge *exits,
1049 unsigned n_exits)
1051 unsigned i;
1053 for (i = 0; i < n_exits; i++)
1054 if (exits[i]->flags & EDGE_ABNORMAL)
1055 return false;
1057 return true;
1060 /* A hash function for struct mem_ref object OBJ. */
1062 static hashval_t
1063 memref_hash (const void *obj)
1065 const struct mem_ref *mem = obj;
1067 return mem->hash;
1070 /* An equality function for struct mem_ref object OBJ1 with
1071 memory reference OBJ2. */
1073 static int
1074 memref_eq (const void *obj1, const void *obj2)
1076 const struct mem_ref *mem1 = obj1;
1078 return operand_equal_p (mem1->mem, (tree) obj2, 0);
1081 /* Gathers memory references in statement STMT in LOOP, storing the
1082 information about them in MEM_REFS hash table. Note vops accessed through
1083 unrecognized statements in CLOBBERED_VOPS. The newly created references
1084 are also stored to MEM_REF_LIST. */
1086 static void
1087 gather_mem_refs_stmt (struct loop *loop, htab_t mem_refs,
1088 bitmap clobbered_vops, tree stmt,
1089 struct mem_ref **mem_ref_list)
1091 tree *lhs, *rhs, *mem = NULL;
1092 hashval_t hash;
1093 PTR *slot;
1094 struct mem_ref *ref = NULL;
1095 ssa_op_iter oi;
1096 tree vname;
1097 bool is_stored;
1099 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
1100 return;
1102 /* Recognize MEM = (SSA_NAME | invariant) and SSA_NAME = MEM patterns. */
1103 if (TREE_CODE (stmt) != MODIFY_EXPR)
1104 goto fail;
1106 lhs = &TREE_OPERAND (stmt, 0);
1107 rhs = &TREE_OPERAND (stmt, 1);
1109 if (TREE_CODE (*lhs) == SSA_NAME)
1111 if (!is_gimple_addressable (*rhs))
1112 goto fail;
1114 mem = rhs;
1115 is_stored = false;
1117 else if (TREE_CODE (*rhs) == SSA_NAME
1118 || is_gimple_min_invariant (*rhs))
1120 mem = lhs;
1121 is_stored = true;
1123 else
1124 goto fail;
1126 /* If we cannot create an SSA name for the result, give up. */
1127 if (!is_gimple_reg_type (TREE_TYPE (*mem))
1128 || TREE_THIS_VOLATILE (*mem))
1129 goto fail;
1131 /* If we cannot move the reference out of the loop, fail. */
1132 if (!for_each_index (mem, may_move_till, loop))
1133 goto fail;
1135 hash = iterative_hash_expr (*mem, 0);
1136 slot = htab_find_slot_with_hash (mem_refs, *mem, hash, INSERT);
1138 if (*slot)
1139 ref = *slot;
1140 else
1142 ref = xmalloc (sizeof (struct mem_ref));
1143 ref->mem = *mem;
1144 ref->hash = hash;
1145 ref->locs = NULL;
1146 ref->is_stored = false;
1147 ref->vops = BITMAP_ALLOC (NULL);
1148 ref->next = *mem_ref_list;
1149 *mem_ref_list = ref;
1150 *slot = ref;
1152 ref->is_stored |= is_stored;
1154 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1155 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1157 bitmap_set_bit (ref->vops,
1158 var_ann (SSA_NAME_VAR (vname))->uid);
1160 record_mem_ref_loc (&ref->locs, stmt, mem);
1161 return;
1163 fail:
1164 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1165 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1167 bitmap_set_bit (clobbered_vops,
1168 var_ann (SSA_NAME_VAR (vname))->uid);
1172 /* Gathers memory references in LOOP. Notes vops accessed through unrecognized
1173 statements in CLOBBERED_VOPS. The list of the references found by
1174 the function is returned. */
1176 static struct mem_ref *
1177 gather_mem_refs (struct loop *loop, bitmap clobbered_vops)
1179 basic_block *body = get_loop_body (loop);
1180 block_stmt_iterator bsi;
1181 unsigned i;
1182 struct mem_ref *mem_ref_list = NULL;
1183 htab_t mem_refs = htab_create (100, memref_hash, memref_eq, NULL);
1185 for (i = 0; i < loop->num_nodes; i++)
1187 for (bsi = bsi_start (body[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1188 gather_mem_refs_stmt (loop, mem_refs, clobbered_vops, bsi_stmt (bsi),
1189 &mem_ref_list);
1192 free (body);
1194 htab_delete (mem_refs);
1195 return mem_ref_list;
1198 /* Finds the vops accessed by more than one of the memory references described
1199 in MEM_REFS and marks them in CLOBBERED_VOPS. */
1201 static void
1202 find_more_ref_vops (struct mem_ref *mem_refs, bitmap clobbered_vops)
1204 bitmap_head tmp, all_vops;
1205 struct mem_ref *ref;
1207 bitmap_initialize (&tmp, &bitmap_default_obstack);
1208 bitmap_initialize (&all_vops, &bitmap_default_obstack);
1210 for (ref = mem_refs; ref; ref = ref->next)
1212 /* The vops that are already in all_vops are accessed by more than
1213 one memory reference. */
1214 bitmap_and (&tmp, &all_vops, ref->vops);
1215 bitmap_ior_into (clobbered_vops, &tmp);
1216 bitmap_clear (&tmp);
1218 bitmap_ior_into (&all_vops, ref->vops);
1221 bitmap_clear (&all_vops);
1224 /* Releases the memory occupied by REF. */
1226 static void
1227 free_mem_ref (struct mem_ref *ref)
1229 free_mem_ref_locs (ref->locs);
1230 BITMAP_FREE (ref->vops);
1231 free (ref);
1234 /* Releases the memory occupied by REFS. */
1236 static void
1237 free_mem_refs (struct mem_ref *refs)
1239 struct mem_ref *ref, *next;
1241 for (ref = refs; ref; ref = next)
1243 next = ref->next;
1244 free_mem_ref (ref);
1248 /* Try to perform store motion for all memory references modified inside
1249 LOOP. */
1251 static void
1252 determine_lsm_loop (struct loop *loop)
1254 unsigned n_exits;
1255 edge *exits = get_loop_exit_edges (loop, &n_exits);
1256 bitmap clobbered_vops;
1257 struct mem_ref *mem_refs;
1259 if (!loop_suitable_for_sm (loop, exits, n_exits))
1261 free (exits);
1262 return;
1265 /* Find the memory references in LOOP. */
1266 clobbered_vops = BITMAP_ALLOC (NULL);
1267 mem_refs = gather_mem_refs (loop, clobbered_vops);
1269 /* Find the vops that are used for more than one reference. */
1270 find_more_ref_vops (mem_refs, clobbered_vops);
1272 /* Hoist all suitable memory references. */
1273 hoist_memory_references (loop, mem_refs, clobbered_vops, exits, n_exits);
1275 free_mem_refs (mem_refs);
1276 free (exits);
1277 BITMAP_FREE (clobbered_vops);
1280 /* Try to perform store motion for all memory references modified inside
1281 any of LOOPS. */
1283 static void
1284 determine_lsm (struct loops *loops)
1286 struct loop *loop;
1288 if (!loops->tree_root->inner)
1289 return;
1291 /* Pass the loops from the outermost and perform the store motion as
1292 suitable. */
1294 loop = loops->tree_root->inner;
1295 while (1)
1297 determine_lsm_loop (loop);
1299 if (loop->inner)
1301 loop = loop->inner;
1302 continue;
1304 while (!loop->next)
1306 loop = loop->outer;
1307 if (loop == loops->tree_root)
1309 loop_commit_inserts ();
1310 return;
1313 loop = loop->next;
1317 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1318 for each such basic block bb records the outermost loop for that execution
1319 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1320 blocks that contain a nonpure call. */
1322 static void
1323 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
1325 basic_block bb = NULL, *bbs, last = NULL;
1326 unsigned i;
1327 edge e;
1328 struct loop *inn_loop = loop;
1330 if (!loop->header->aux)
1332 bbs = get_loop_body_in_dom_order (loop);
1334 for (i = 0; i < loop->num_nodes; i++)
1336 edge_iterator ei;
1337 bb = bbs[i];
1339 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1340 last = bb;
1342 if (TEST_BIT (contains_call, bb->index))
1343 break;
1345 FOR_EACH_EDGE (e, ei, bb->succs)
1346 if (!flow_bb_inside_loop_p (loop, e->dest))
1347 break;
1348 if (e)
1349 break;
1351 /* A loop might be infinite (TODO use simple loop analysis
1352 to disprove this if possible). */
1353 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1354 break;
1356 if (!flow_bb_inside_loop_p (inn_loop, bb))
1357 break;
1359 if (bb->loop_father->header == bb)
1361 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1362 break;
1364 /* In a loop that is always entered we may proceed anyway.
1365 But record that we entered it and stop once we leave it. */
1366 inn_loop = bb->loop_father;
1370 while (1)
1372 last->aux = loop;
1373 if (last == loop->header)
1374 break;
1375 last = get_immediate_dominator (CDI_DOMINATORS, last);
1378 free (bbs);
1381 for (loop = loop->inner; loop; loop = loop->next)
1382 fill_always_executed_in (loop, contains_call);
1385 /* Compute the global information needed by the loop invariant motion pass.
1386 LOOPS is the loop tree. */
1388 static void
1389 tree_ssa_lim_initialize (struct loops *loops)
1391 sbitmap contains_call = sbitmap_alloc (last_basic_block);
1392 block_stmt_iterator bsi;
1393 struct loop *loop;
1394 basic_block bb;
1396 sbitmap_zero (contains_call);
1397 FOR_EACH_BB (bb)
1399 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1401 if (nonpure_call_p (bsi_stmt (bsi)))
1402 break;
1405 if (!bsi_end_p (bsi))
1406 SET_BIT (contains_call, bb->index);
1409 for (loop = loops->tree_root->inner; loop; loop = loop->next)
1410 fill_always_executed_in (loop, contains_call);
1412 sbitmap_free (contains_call);
1415 /* Cleans up after the invariant motion pass. */
1417 static void
1418 tree_ssa_lim_finalize (void)
1420 basic_block bb;
1422 FOR_EACH_BB (bb)
1424 bb->aux = NULL;
1428 /* Moves invariants from LOOPS. Only "expensive" invariants are moved out --
1429 i.e. those that are likely to be win regardless of the register pressure. */
1431 void
1432 tree_ssa_lim (struct loops *loops)
1434 tree_ssa_lim_initialize (loops);
1436 /* For each statement determine the outermost loop in that it is
1437 invariant and cost for computing the invariant. */
1438 determine_invariantness ();
1440 /* For each memory reference determine whether it is possible to hoist it
1441 out of the loop. Force the necessary invariants to be moved out of the
1442 loops as well. */
1443 determine_lsm (loops);
1445 /* Move the expressions that are expensive enough. */
1446 move_computations ();
1448 tree_ssa_lim_finalize ();