Mark ChangeLog
[official-gcc.git] / gcc / tree-ssa-loop-im.c
blob2404b05c81f6dd31c67046ed37dc7a37c78568bd
1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
30 #include "output.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "domwalk.h"
37 #include "params.h"
38 #include "tree-pass.h"
39 #include "flags.h"
41 /* TODO: Support for predicated code motion. I.e.
43 while (1)
45 if (cond)
47 a = inv;
48 something;
52 Where COND and INV are is invariants, but evaluating INV may trap or be
53 invalid from some other reason if !COND. This may be transformed to
55 if (cond)
56 a = inv;
57 while (1)
59 if (cond)
60 something;
61 } */
63 /* A type for the list of statements that have to be moved in order to be able
64 to hoist an invariant computation. */
66 struct depend
68 tree stmt;
69 struct depend *next;
72 /* The auxiliary data kept for each statement. */
74 struct lim_aux_data
76 struct loop *max_loop; /* The outermost loop in that the statement
77 is invariant. */
79 struct loop *tgt_loop; /* The loop out of that we want to move the
80 invariant. */
82 struct loop *always_executed_in;
83 /* The outermost loop for that we are sure
84 the statement is executed if the loop
85 is entered. */
87 bool sm_done; /* True iff the store motion for a memory
88 reference in the statement has already
89 been executed. */
91 unsigned cost; /* Cost of the computation performed by the
92 statement. */
94 struct depend *depends; /* List of statements that must be also hoisted
95 out of the loop when this statement is
96 hoisted; i.e. those that define the operands
97 of the statement and are inside of the
98 MAX_LOOP loop. */
101 #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
102 ? NULL \
103 : (struct lim_aux_data *) (stmt_ann (STMT)->common.aux))
105 /* Description of a memory reference for store motion. */
107 struct mem_ref
109 tree *ref; /* The reference itself. */
110 tree stmt; /* The statement in that it occurs. */
111 struct mem_ref *next; /* Next use in the chain. */
114 /* Minimum cost of an expensive expression. */
115 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
117 /* The outermost loop for that execution of the header guarantees that the
118 block will be executed. */
119 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
121 static unsigned max_stmt_uid; /* Maximal uid of a statement. Uids to phi
122 nodes are assigned using the versions of
123 ssa names they define. */
125 /* Returns uid of statement STMT. */
127 static unsigned
128 get_stmt_uid (tree stmt)
130 if (TREE_CODE (stmt) == PHI_NODE)
131 return SSA_NAME_VERSION (PHI_RESULT (stmt)) + max_stmt_uid;
133 return stmt_ann (stmt)->uid;
136 /* Calls CBCK for each index in memory reference ADDR_P. There are two
137 kinds situations handled; in each of these cases, the memory reference
138 and DATA are passed to the callback:
140 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
141 pass the pointer to the index to the callback.
143 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
144 pointer to addr to the callback.
146 If the callback returns false, the whole search stops and false is returned.
147 Otherwise the function returns true after traversing through the whole
148 reference *ADDR_P. */
150 bool
151 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
153 tree *nxt, *idx;
155 for (; ; addr_p = nxt)
157 switch (TREE_CODE (*addr_p))
159 case SSA_NAME:
160 return cbck (*addr_p, addr_p, data);
162 case MISALIGNED_INDIRECT_REF:
163 case ALIGN_INDIRECT_REF:
164 case INDIRECT_REF:
165 nxt = &TREE_OPERAND (*addr_p, 0);
166 return cbck (*addr_p, nxt, data);
168 case BIT_FIELD_REF:
169 case VIEW_CONVERT_EXPR:
170 case ARRAY_RANGE_REF:
171 case REALPART_EXPR:
172 case IMAGPART_EXPR:
173 nxt = &TREE_OPERAND (*addr_p, 0);
174 break;
176 case COMPONENT_REF:
177 /* If the component has varying offset, it behaves like index
178 as well. */
179 idx = &TREE_OPERAND (*addr_p, 2);
180 if (*idx
181 && !cbck (*addr_p, idx, data))
182 return false;
184 nxt = &TREE_OPERAND (*addr_p, 0);
185 break;
187 case ARRAY_REF:
188 nxt = &TREE_OPERAND (*addr_p, 0);
189 if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
190 return false;
191 break;
193 case VAR_DECL:
194 case PARM_DECL:
195 case STRING_CST:
196 case RESULT_DECL:
197 case VECTOR_CST:
198 case COMPLEX_CST:
199 case INTEGER_CST:
200 case REAL_CST:
201 return true;
203 default:
204 gcc_unreachable ();
209 /* If it is possible to hoist the statement STMT unconditionally,
210 returns MOVE_POSSIBLE.
211 If it is possible to hoist the statement STMT, but we must avoid making
212 it executed if it would not be executed in the original program (e.g.
213 because it may trap), return MOVE_PRESERVE_EXECUTION.
214 Otherwise return MOVE_IMPOSSIBLE. */
216 enum move_pos
217 movement_possibility (tree stmt)
219 tree lhs, rhs;
221 if (flag_unswitch_loops
222 && TREE_CODE (stmt) == COND_EXPR)
224 /* If we perform unswitching, force the operands of the invariant
225 condition to be moved out of the loop. */
226 get_stmt_operands (stmt);
228 return MOVE_POSSIBLE;
231 if (TREE_CODE (stmt) != MODIFY_EXPR)
232 return MOVE_IMPOSSIBLE;
234 if (stmt_ends_bb_p (stmt))
235 return MOVE_IMPOSSIBLE;
237 get_stmt_operands (stmt);
239 if (stmt_ann (stmt)->has_volatile_ops)
240 return MOVE_IMPOSSIBLE;
242 lhs = TREE_OPERAND (stmt, 0);
243 if (TREE_CODE (lhs) == SSA_NAME
244 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
245 return MOVE_IMPOSSIBLE;
247 rhs = TREE_OPERAND (stmt, 1);
249 if (TREE_SIDE_EFFECTS (rhs))
250 return MOVE_IMPOSSIBLE;
252 if (TREE_CODE (lhs) != SSA_NAME
253 || tree_could_trap_p (rhs))
254 return MOVE_PRESERVE_EXECUTION;
256 if (get_call_expr_in (stmt))
258 /* While pure or const call is guaranteed to have no side effects, we
259 cannot move it arbitrarily. Consider code like
261 char *s = something ();
263 while (1)
265 if (s)
266 t = strlen (s);
267 else
268 t = 0;
271 Here the strlen call cannot be moved out of the loop, even though
272 s is invariant. In addition to possibly creating a call with
273 invalid arguments, moving out a function call that is not executed
274 may cause performance regressions in case the call is costly and
275 not executed at all. */
276 return MOVE_PRESERVE_EXECUTION;
278 return MOVE_POSSIBLE;
281 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
282 loop to that we could move the expression using DEF if it did not have
283 other operands, i.e. the outermost loop enclosing LOOP in that the value
284 of DEF is invariant. */
286 static struct loop *
287 outermost_invariant_loop (tree def, struct loop *loop)
289 tree def_stmt;
290 basic_block def_bb;
291 struct loop *max_loop;
293 if (TREE_CODE (def) != SSA_NAME)
294 return superloop_at_depth (loop, 1);
296 def_stmt = SSA_NAME_DEF_STMT (def);
297 def_bb = bb_for_stmt (def_stmt);
298 if (!def_bb)
299 return superloop_at_depth (loop, 1);
301 max_loop = find_common_loop (loop, def_bb->loop_father);
303 if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop)
304 max_loop = find_common_loop (max_loop,
305 LIM_DATA (def_stmt)->max_loop->outer);
306 if (max_loop == loop)
307 return NULL;
308 max_loop = superloop_at_depth (loop, max_loop->depth + 1);
310 return max_loop;
313 /* Returns the outermost superloop of LOOP in that the expression EXPR is
314 invariant. */
316 static struct loop *
317 outermost_invariant_loop_expr (tree expr, struct loop *loop)
319 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
320 unsigned i, nops;
321 struct loop *max_loop = superloop_at_depth (loop, 1), *aloop;
323 if (TREE_CODE (expr) == SSA_NAME
324 || TREE_CODE (expr) == INTEGER_CST
325 || is_gimple_min_invariant (expr))
326 return outermost_invariant_loop (expr, loop);
328 if (class != tcc_unary
329 && class != tcc_binary
330 && class != tcc_expression
331 && class != tcc_comparison)
332 return NULL;
334 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
335 for (i = 0; i < nops; i++)
337 aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop);
338 if (!aloop)
339 return NULL;
341 if (flow_loop_nested_p (max_loop, aloop))
342 max_loop = aloop;
345 return max_loop;
348 /* DATA is a structure containing information associated with a statement
349 inside LOOP. DEF is one of the operands of this statement.
351 Find the outermost loop enclosing LOOP in that value of DEF is invariant
352 and record this in DATA->max_loop field. If DEF itself is defined inside
353 this loop as well (i.e. we need to hoist it out of the loop if we want
354 to hoist the statement represented by DATA), record the statement in that
355 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
356 add the cost of the computation of DEF to the DATA->cost.
358 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
360 static bool
361 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
362 bool add_cost)
364 tree def_stmt = SSA_NAME_DEF_STMT (def);
365 basic_block def_bb = bb_for_stmt (def_stmt);
366 struct loop *max_loop;
367 struct depend *dep;
369 if (!def_bb)
370 return true;
372 max_loop = outermost_invariant_loop (def, loop);
373 if (!max_loop)
374 return false;
376 if (flow_loop_nested_p (data->max_loop, max_loop))
377 data->max_loop = max_loop;
379 if (!LIM_DATA (def_stmt))
380 return true;
382 if (add_cost
383 /* Only add the cost if the statement defining DEF is inside LOOP,
384 i.e. if it is likely that by moving the invariants dependent
385 on it, we will be able to avoid creating a new register for
386 it (since it will be only used in these dependent invariants). */
387 && def_bb->loop_father == loop)
388 data->cost += LIM_DATA (def_stmt)->cost;
390 dep = xmalloc (sizeof (struct depend));
391 dep->stmt = def_stmt;
392 dep->next = data->depends;
393 data->depends = dep;
395 return true;
398 /* Returns an estimate for a cost of statement STMT. TODO -- the values here
399 are just ad-hoc constants. The estimates should be based on target-specific
400 values. */
402 static unsigned
403 stmt_cost (tree stmt)
405 tree lhs, rhs;
406 unsigned cost = 1;
408 /* Always try to create possibilities for unswitching. */
409 if (TREE_CODE (stmt) == COND_EXPR)
410 return LIM_EXPENSIVE;
412 lhs = TREE_OPERAND (stmt, 0);
413 rhs = TREE_OPERAND (stmt, 1);
415 /* Hoisting memory references out should almost surely be a win. */
416 if (stmt_references_memory_p (stmt))
417 cost += 20;
419 switch (TREE_CODE (rhs))
421 case CALL_EXPR:
422 /* We should be hoisting calls if possible. */
424 /* Unless the call is a builtin_constant_p; this always folds to a
425 constant, so moving it is useless. */
426 rhs = get_callee_fndecl (rhs);
427 if (DECL_BUILT_IN_CLASS (rhs) == BUILT_IN_NORMAL
428 && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P)
429 return 0;
431 cost += 20;
432 break;
434 case MULT_EXPR:
435 case TRUNC_DIV_EXPR:
436 case CEIL_DIV_EXPR:
437 case FLOOR_DIV_EXPR:
438 case ROUND_DIV_EXPR:
439 case EXACT_DIV_EXPR:
440 case CEIL_MOD_EXPR:
441 case FLOOR_MOD_EXPR:
442 case ROUND_MOD_EXPR:
443 case TRUNC_MOD_EXPR:
444 /* Division and multiplication are usually expensive. */
445 cost += 20;
446 break;
448 default:
449 break;
452 return cost;
455 /* Determine the outermost loop to that it is possible to hoist a statement
456 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
457 the outermost loop in that the value computed by STMT is invariant.
458 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
459 we preserve the fact whether STMT is executed. It also fills other related
460 information to LIM_DATA (STMT).
462 The function returns false if STMT cannot be hoisted outside of the loop it
463 is defined in, and true otherwise. */
465 static bool
466 determine_max_movement (tree stmt, bool must_preserve_exec)
468 basic_block bb = bb_for_stmt (stmt);
469 struct loop *loop = bb->loop_father;
470 struct loop *level;
471 struct lim_aux_data *lim_data = LIM_DATA (stmt);
472 tree val;
473 ssa_op_iter iter;
475 if (must_preserve_exec)
476 level = ALWAYS_EXECUTED_IN (bb);
477 else
478 level = superloop_at_depth (loop, 1);
479 lim_data->max_loop = level;
481 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
482 if (!add_dependency (val, lim_data, loop, true))
483 return false;
485 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
486 if (!add_dependency (val, lim_data, loop, false))
487 return false;
489 lim_data->cost += stmt_cost (stmt);
491 return true;
494 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
495 and that one of the operands of this statement is computed by STMT.
496 Ensure that STMT (together with all the statements that define its
497 operands) is hoisted at least out of the loop LEVEL. */
499 static void
500 set_level (tree stmt, struct loop *orig_loop, struct loop *level)
502 struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father;
503 struct depend *dep;
505 stmt_loop = find_common_loop (orig_loop, stmt_loop);
506 if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop)
507 stmt_loop = find_common_loop (stmt_loop,
508 LIM_DATA (stmt)->tgt_loop->outer);
509 if (flow_loop_nested_p (stmt_loop, level))
510 return;
512 gcc_assert (LIM_DATA (stmt));
513 gcc_assert (level == LIM_DATA (stmt)->max_loop
514 || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level));
516 LIM_DATA (stmt)->tgt_loop = level;
517 for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next)
518 set_level (dep->stmt, orig_loop, level);
521 /* Determines an outermost loop from that we want to hoist the statement STMT.
522 For now we chose the outermost possible loop. TODO -- use profiling
523 information to set it more sanely. */
525 static void
526 set_profitable_level (tree stmt)
528 set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop);
531 /* Returns true if STMT is not a pure call. */
533 static bool
534 nonpure_call_p (tree stmt)
536 tree call = get_call_expr_in (stmt);
538 if (!call)
539 return false;
541 return TREE_SIDE_EFFECTS (call) != 0;
544 /* Releases the memory occupied by DATA. */
546 static void
547 free_lim_aux_data (struct lim_aux_data *data)
549 struct depend *dep, *next;
551 for (dep = data->depends; dep; dep = next)
553 next = dep->next;
554 free (dep);
556 free (data);
559 /* Determine the outermost loops in that statements in basic block BB are
560 invariant, and record them to the LIM_DATA associated with the statements.
561 Callback for walk_dominator_tree. */
563 static void
564 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
565 basic_block bb)
567 enum move_pos pos;
568 block_stmt_iterator bsi;
569 tree stmt;
570 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
571 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
573 if (!bb->loop_father->outer)
574 return;
576 if (dump_file && (dump_flags & TDF_DETAILS))
577 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
578 bb->index, bb->loop_father->num, bb->loop_father->depth);
580 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
582 stmt = bsi_stmt (bsi);
584 pos = movement_possibility (stmt);
585 if (pos == MOVE_IMPOSSIBLE)
587 if (nonpure_call_p (stmt))
589 maybe_never = true;
590 outermost = NULL;
592 continue;
595 stmt_ann (stmt)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
596 LIM_DATA (stmt)->always_executed_in = outermost;
598 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
599 continue;
601 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
603 LIM_DATA (stmt)->max_loop = NULL;
604 continue;
607 if (dump_file && (dump_flags & TDF_DETAILS))
609 print_generic_stmt_indented (dump_file, stmt, 0, 2);
610 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
611 LIM_DATA (stmt)->max_loop->depth,
612 LIM_DATA (stmt)->cost);
615 if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE)
616 set_profitable_level (stmt);
620 /* For each statement determines the outermost loop in that it is invariant,
621 statements on whose motion it depends and the cost of the computation.
622 This information is stored to the LIM_DATA structure associated with
623 each statement. */
625 static void
626 determine_invariantness (void)
628 struct dom_walk_data walk_data;
630 memset (&walk_data, 0, sizeof (struct dom_walk_data));
631 walk_data.before_dom_children_before_stmts = determine_invariantness_stmt;
633 init_walk_dominator_tree (&walk_data);
634 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
635 fini_walk_dominator_tree (&walk_data);
638 /* Commits edge insertions and updates loop structures. */
640 void
641 loop_commit_inserts (void)
643 unsigned old_last_basic_block, i;
644 basic_block bb;
646 old_last_basic_block = last_basic_block;
647 bsi_commit_edge_inserts ();
648 for (i = old_last_basic_block; i < (unsigned) last_basic_block; i++)
650 bb = BASIC_BLOCK (i);
651 add_bb_to_loop (bb,
652 find_common_loop (EDGE_SUCC (bb, 0)->dest->loop_father,
653 EDGE_PRED (bb, 0)->src->loop_father));
657 /* Hoist the statements in basic block BB out of the loops prescribed by
658 data stored in LIM_DATA structures associated with each statement. Callback
659 for walk_dominator_tree. */
661 static void
662 move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
663 basic_block bb)
665 struct loop *level;
666 block_stmt_iterator bsi;
667 tree stmt;
668 unsigned cost = 0;
670 if (!bb->loop_father->outer)
671 return;
673 for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
675 stmt = bsi_stmt (bsi);
677 if (!LIM_DATA (stmt))
679 bsi_next (&bsi);
680 continue;
683 cost = LIM_DATA (stmt)->cost;
684 level = LIM_DATA (stmt)->tgt_loop;
685 free_lim_aux_data (LIM_DATA (stmt));
686 stmt_ann (stmt)->common.aux = NULL;
688 if (!level)
690 bsi_next (&bsi);
691 continue;
694 /* We do not really want to move conditionals out of the loop; we just
695 placed it here to force its operands to be moved if necessary. */
696 if (TREE_CODE (stmt) == COND_EXPR)
697 continue;
699 if (dump_file && (dump_flags & TDF_DETAILS))
701 fprintf (dump_file, "Moving statement\n");
702 print_generic_stmt (dump_file, stmt, 0);
703 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
704 cost, level->num);
706 bsi_insert_on_edge (loop_preheader_edge (level), stmt);
707 bsi_remove (&bsi);
711 /* Hoist the statements out of the loops prescribed by data stored in
712 LIM_DATA structures associated with each statement.*/
714 static void
715 move_computations (void)
717 struct dom_walk_data walk_data;
719 memset (&walk_data, 0, sizeof (struct dom_walk_data));
720 walk_data.before_dom_children_before_stmts = move_computations_stmt;
722 init_walk_dominator_tree (&walk_data);
723 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
724 fini_walk_dominator_tree (&walk_data);
726 loop_commit_inserts ();
727 rewrite_into_ssa (false);
728 if (!bitmap_empty_p (vars_to_rename))
730 /* The rewrite of ssa names may cause violation of loop closed ssa
731 form invariants. TODO -- avoid these rewrites completely.
732 Information in virtual phi nodes is sufficient for it. */
733 rewrite_into_loop_closed_ssa ();
735 bitmap_clear (vars_to_rename);
738 /* Checks whether the statement defining variable *INDEX can be hoisted
739 out of the loop passed in DATA. Callback for for_each_index. */
741 static bool
742 may_move_till (tree ref, tree *index, void *data)
744 struct loop *loop = data, *max_loop;
746 /* If REF is an array reference, check also that the step and the lower
747 bound is invariant in LOOP. */
748 if (TREE_CODE (ref) == ARRAY_REF)
750 tree step = array_ref_element_size (ref);
751 tree lbound = array_ref_low_bound (ref);
753 max_loop = outermost_invariant_loop_expr (step, loop);
754 if (!max_loop)
755 return false;
757 max_loop = outermost_invariant_loop_expr (lbound, loop);
758 if (!max_loop)
759 return false;
762 max_loop = outermost_invariant_loop (*index, loop);
763 if (!max_loop)
764 return false;
766 return true;
769 /* Forces statements defining (invariant) SSA names in expression EXPR to be
770 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
772 static void
773 force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop)
775 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
776 unsigned i, nops;
778 if (TREE_CODE (expr) == SSA_NAME)
780 tree stmt = SSA_NAME_DEF_STMT (expr);
781 if (IS_EMPTY_STMT (stmt))
782 return;
784 set_level (stmt, orig_loop, loop);
785 return;
788 if (class != tcc_unary
789 && class != tcc_binary
790 && class != tcc_expression
791 && class != tcc_comparison)
792 return;
794 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
795 for (i = 0; i < nops; i++)
796 force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop);
799 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
800 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
801 for_each_index. */
803 struct fmt_data
805 struct loop *loop;
806 struct loop *orig_loop;
809 static bool
810 force_move_till (tree ref, tree *index, void *data)
812 tree stmt;
813 struct fmt_data *fmt_data = data;
815 if (TREE_CODE (ref) == ARRAY_REF)
817 tree step = array_ref_element_size (ref);
818 tree lbound = array_ref_low_bound (ref);
820 force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop);
821 force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop);
824 if (TREE_CODE (*index) != SSA_NAME)
825 return true;
827 stmt = SSA_NAME_DEF_STMT (*index);
828 if (IS_EMPTY_STMT (stmt))
829 return true;
831 set_level (stmt, fmt_data->orig_loop, fmt_data->loop);
833 return true;
836 /* Records memory reference *REF (that occurs in statement STMT)
837 to the list MEM_REFS. */
839 static void
840 record_mem_ref (struct mem_ref **mem_refs, tree stmt, tree *ref)
842 struct mem_ref *aref = xmalloc (sizeof (struct mem_ref));
844 aref->stmt = stmt;
845 aref->ref = ref;
847 aref->next = *mem_refs;
848 *mem_refs = aref;
851 /* Releases list of memory references MEM_REFS. */
853 static void
854 free_mem_refs (struct mem_ref *mem_refs)
856 struct mem_ref *act;
858 while (mem_refs)
860 act = mem_refs;
861 mem_refs = mem_refs->next;
862 free (act);
866 /* If VAR is defined in LOOP and the statement it is defined in does not belong
867 to the set SEEN, add the statement to QUEUE of length IN_QUEUE and
868 to the set SEEN. */
870 static void
871 maybe_queue_var (tree var, struct loop *loop,
872 sbitmap seen, tree *queue, unsigned *in_queue)
874 tree stmt = SSA_NAME_DEF_STMT (var);
875 basic_block def_bb = bb_for_stmt (stmt);
877 if (!def_bb
878 || !flow_bb_inside_loop_p (loop, def_bb)
879 || TEST_BIT (seen, get_stmt_uid (stmt)))
880 return;
882 SET_BIT (seen, get_stmt_uid (stmt));
883 queue[(*in_queue)++] = stmt;
886 /* If COMMON_REF is NULL, set COMMON_REF to *OP and return true.
887 Otherwise return true if the memory reference *OP is equal to COMMON_REF.
888 Record the reference OP to list MEM_REFS. STMT is the statement in that
889 the reference occurs. */
891 struct sra_data
893 struct mem_ref **mem_refs;
894 tree common_ref;
895 tree stmt;
898 static bool
899 fem_single_reachable_address (tree *op, void *data)
901 struct sra_data *sra_data = data;
903 if (sra_data->common_ref
904 && !operand_equal_p (*op, sra_data->common_ref, 0))
905 return false;
906 sra_data->common_ref = *op;
908 record_mem_ref (sra_data->mem_refs, sra_data->stmt, op);
909 return true;
912 /* Runs CALLBACK for each operand of STMT that is a memory reference. DATA
913 is passed to the CALLBACK as well. The traversal stops if CALLBACK
914 returns false, for_each_memref then returns false as well. Otherwise
915 for_each_memref returns true. */
917 static bool
918 for_each_memref (tree stmt, bool (*callback)(tree *, void *), void *data)
920 tree *op;
922 if (TREE_CODE (stmt) == RETURN_EXPR)
923 stmt = TREE_OPERAND (stmt, 1);
925 if (TREE_CODE (stmt) == MODIFY_EXPR)
927 op = &TREE_OPERAND (stmt, 0);
928 if (TREE_CODE (*op) != SSA_NAME
929 && !callback (op, data))
930 return false;
932 op = &TREE_OPERAND (stmt, 1);
933 if (TREE_CODE (*op) != SSA_NAME
934 && is_gimple_lvalue (*op)
935 && !callback (op, data))
936 return false;
938 stmt = TREE_OPERAND (stmt, 1);
941 if (TREE_CODE (stmt) == WITH_SIZE_EXPR)
942 stmt = TREE_OPERAND (stmt, 0);
944 if (TREE_CODE (stmt) == CALL_EXPR)
946 tree args;
948 for (args = TREE_OPERAND (stmt, 1); args; args = TREE_CHAIN (args))
950 op = &TREE_VALUE (args);
952 if (TREE_CODE (*op) != SSA_NAME
953 && is_gimple_lvalue (*op)
954 && !callback (op, data))
955 return false;
959 return true;
962 /* Determine whether all memory references inside the LOOP that correspond
963 to virtual ssa names defined in statement STMT are equal.
964 If so, store the list of the references to MEM_REFS, and return one
965 of them. Otherwise store NULL to MEM_REFS and return NULL_TREE.
966 *SEEN_CALL_STMT is set to true if the virtual operands suggest
967 that the reference might be clobbered by a call inside the LOOP. */
969 static tree
970 single_reachable_address (struct loop *loop, tree stmt,
971 struct mem_ref **mem_refs,
972 bool *seen_call_stmt)
974 unsigned max_uid = max_stmt_uid + num_ssa_names;
975 tree *queue = xmalloc (sizeof (tree) * max_uid);
976 sbitmap seen = sbitmap_alloc (max_uid);
977 unsigned in_queue = 1;
978 dataflow_t df;
979 unsigned i, n;
980 struct sra_data sra_data;
981 tree call;
982 tree val;
983 ssa_op_iter iter;
985 sbitmap_zero (seen);
987 *mem_refs = NULL;
988 sra_data.mem_refs = mem_refs;
989 sra_data.common_ref = NULL_TREE;
991 queue[0] = stmt;
992 SET_BIT (seen, get_stmt_uid (stmt));
993 *seen_call_stmt = false;
995 while (in_queue)
997 stmt = queue[--in_queue];
998 sra_data.stmt = stmt;
1000 if (LIM_DATA (stmt)
1001 && LIM_DATA (stmt)->sm_done)
1002 goto fail;
1004 switch (TREE_CODE (stmt))
1006 case MODIFY_EXPR:
1007 case CALL_EXPR:
1008 case RETURN_EXPR:
1009 if (!for_each_memref (stmt, fem_single_reachable_address,
1010 &sra_data))
1011 goto fail;
1013 /* If this is a function that may depend on the memory location,
1014 record the fact. We cannot directly refuse call clobbered
1015 operands here, since sra_data.common_ref did not have
1016 to be set yet. */
1017 call = get_call_expr_in (stmt);
1018 if (call
1019 && !(call_expr_flags (call) & ECF_CONST))
1020 *seen_call_stmt = true;
1022 /* Traverse also definitions of the VUSES (there may be other
1023 distinct from the one we used to get to this statement). */
1024 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES)
1025 maybe_queue_var (val, loop, seen, queue, &in_queue);
1027 break;
1029 case PHI_NODE:
1030 for (i = 0; i < (unsigned) PHI_NUM_ARGS (stmt); i++)
1031 if (TREE_CODE (PHI_ARG_DEF (stmt, i)) == SSA_NAME)
1032 maybe_queue_var (PHI_ARG_DEF (stmt, i), loop,
1033 seen, queue, &in_queue);
1034 break;
1036 default:
1037 goto fail;
1040 /* Find uses of virtual names. */
1041 df = get_immediate_uses (stmt);
1042 n = num_immediate_uses (df);
1044 for (i = 0; i < n; i++)
1046 stmt = immediate_use (df, i);
1048 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (stmt)))
1049 continue;
1051 if (TEST_BIT (seen, get_stmt_uid (stmt)))
1052 continue;
1053 SET_BIT (seen, get_stmt_uid (stmt));
1055 queue[in_queue++] = stmt;
1059 free (queue);
1060 sbitmap_free (seen);
1062 return sra_data.common_ref;
1064 fail:
1065 free_mem_refs (*mem_refs);
1066 *mem_refs = NULL;
1067 free (queue);
1068 sbitmap_free (seen);
1070 return NULL;
1073 /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
1075 static void
1076 rewrite_mem_refs (tree tmp_var, struct mem_ref *mem_refs)
1078 tree var;
1079 ssa_op_iter iter;
1081 for (; mem_refs; mem_refs = mem_refs->next)
1083 FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter, SSA_OP_ALL_VIRTUALS)
1085 var = SSA_NAME_VAR (var);
1086 bitmap_set_bit (vars_to_rename, var_ann (var)->uid);
1089 *mem_refs->ref = tmp_var;
1090 modify_stmt (mem_refs->stmt);
1094 /* Records request for store motion of memory reference REF from LOOP.
1095 MEM_REFS is the list of occurrences of the reference REF inside LOOP;
1096 these references are rewritten by a new temporary variable.
1097 Exits from the LOOP are stored in EXITS, there are N_EXITS of them.
1098 The initialization of the temporary variable is put to the preheader
1099 of the loop, and assignments to the reference from the temporary variable
1100 are emitted to exits. */
1102 static void
1103 schedule_sm (struct loop *loop, edge *exits, unsigned n_exits, tree ref,
1104 struct mem_ref *mem_refs)
1106 struct mem_ref *aref;
1107 tree tmp_var;
1108 unsigned i;
1109 tree load, store;
1110 struct fmt_data fmt_data;
1112 if (dump_file && (dump_flags & TDF_DETAILS))
1114 fprintf (dump_file, "Executing store motion of ");
1115 print_generic_expr (dump_file, ref, 0);
1116 fprintf (dump_file, " from loop %d\n", loop->num);
1119 tmp_var = make_rename_temp (TREE_TYPE (ref), "lsm_tmp");
1121 fmt_data.loop = loop;
1122 fmt_data.orig_loop = loop;
1123 for_each_index (&ref, force_move_till, &fmt_data);
1125 rewrite_mem_refs (tmp_var, mem_refs);
1126 for (aref = mem_refs; aref; aref = aref->next)
1127 if (LIM_DATA (aref->stmt))
1128 LIM_DATA (aref->stmt)->sm_done = true;
1130 /* Emit the load & stores. */
1131 load = build (MODIFY_EXPR, void_type_node, tmp_var, ref);
1132 get_stmt_ann (load)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
1133 LIM_DATA (load)->max_loop = loop;
1134 LIM_DATA (load)->tgt_loop = loop;
1136 /* Put this into the latch, so that we are sure it will be processed after
1137 all dependencies. */
1138 bsi_insert_on_edge (loop_latch_edge (loop), load);
1140 for (i = 0; i < n_exits; i++)
1142 store = build (MODIFY_EXPR, void_type_node,
1143 unshare_expr (ref), tmp_var);
1144 bsi_insert_on_edge (exits[i], store);
1148 /* Returns true if REF may be clobbered by calls. */
1150 static bool
1151 is_call_clobbered_ref (tree ref)
1153 tree base;
1155 base = get_base_address (ref);
1156 if (!base)
1157 return true;
1159 if (DECL_P (base))
1160 return is_call_clobbered (base);
1162 if (INDIRECT_REF_P (base))
1164 /* Check whether the alias tags associated with the pointer
1165 are call clobbered. */
1166 tree ptr = TREE_OPERAND (base, 0);
1167 struct ptr_info_def *pi = SSA_NAME_PTR_INFO (ptr);
1168 tree nmt = (pi) ? pi->name_mem_tag : NULL_TREE;
1169 tree tmt = var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
1171 if ((nmt && is_call_clobbered (nmt))
1172 || (tmt && is_call_clobbered (tmt)))
1173 return true;
1175 return false;
1178 gcc_unreachable ();
1181 /* Determine whether all memory references inside LOOP corresponding to the
1182 virtual ssa name REG are equal to each other, and whether the address of
1183 this common reference can be hoisted outside of the loop. If this is true,
1184 prepare the statements that load the value of the memory reference to a
1185 temporary variable in the loop preheader, store it back on the loop exits,
1186 and replace all the references inside LOOP by this temporary variable.
1187 LOOP has N_EXITS stored in EXITS. */
1189 static void
1190 determine_lsm_reg (struct loop *loop, edge *exits, unsigned n_exits, tree reg)
1192 tree ref;
1193 struct mem_ref *mem_refs, *aref;
1194 struct loop *must_exec;
1195 bool sees_call;
1197 if (is_gimple_reg (reg))
1198 return;
1200 ref = single_reachable_address (loop, SSA_NAME_DEF_STMT (reg), &mem_refs,
1201 &sees_call);
1202 if (!ref)
1203 return;
1205 /* If we cannot create a ssa name for the result, give up. */
1206 if (!is_gimple_reg_type (TREE_TYPE (ref))
1207 || TREE_THIS_VOLATILE (ref))
1208 goto fail;
1210 /* If there is a call that may use the location, give up as well. */
1211 if (sees_call
1212 && is_call_clobbered_ref (ref))
1213 goto fail;
1215 if (!for_each_index (&ref, may_move_till, loop))
1216 goto fail;
1218 if (tree_could_trap_p (ref))
1220 /* If the memory access is unsafe (i.e. it might trap), ensure that some
1221 of the statements in that it occurs is always executed when the loop
1222 is entered. This way we know that by moving the load from the
1223 reference out of the loop we will not cause the error that would not
1224 occur otherwise.
1226 TODO -- in fact we would like to check for anticipability of the
1227 reference, i.e. that on each path from loop entry to loop exit at
1228 least one of the statements containing the memory reference is
1229 executed. */
1231 for (aref = mem_refs; aref; aref = aref->next)
1233 if (!LIM_DATA (aref->stmt))
1234 continue;
1236 must_exec = LIM_DATA (aref->stmt)->always_executed_in;
1237 if (!must_exec)
1238 continue;
1240 if (must_exec == loop
1241 || flow_loop_nested_p (must_exec, loop))
1242 break;
1245 if (!aref)
1246 goto fail;
1249 schedule_sm (loop, exits, n_exits, ref, mem_refs);
1251 fail: ;
1252 free_mem_refs (mem_refs);
1255 /* Checks whether LOOP (with N_EXITS exits stored in EXITS array) is suitable
1256 for a store motion optimization (i.e. whether we can insert statement
1257 on its exits). */
1259 static bool
1260 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED, edge *exits,
1261 unsigned n_exits)
1263 unsigned i;
1265 for (i = 0; i < n_exits; i++)
1266 if (exits[i]->flags & EDGE_ABNORMAL)
1267 return false;
1269 return true;
1272 /* Try to perform store motion for all memory references modified inside
1273 LOOP. */
1275 static void
1276 determine_lsm_loop (struct loop *loop)
1278 tree phi;
1279 unsigned n_exits;
1280 edge *exits = get_loop_exit_edges (loop, &n_exits);
1282 if (!loop_suitable_for_sm (loop, exits, n_exits))
1284 free (exits);
1285 return;
1288 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1289 determine_lsm_reg (loop, exits, n_exits, PHI_RESULT (phi));
1291 free (exits);
1294 /* Try to perform store motion for all memory references modified inside
1295 any of LOOPS. */
1297 static void
1298 determine_lsm (struct loops *loops)
1300 struct loop *loop;
1301 basic_block bb;
1303 if (!loops->tree_root->inner)
1304 return;
1306 /* Create a UID for each statement in the function. Ordering of the
1307 UIDs is not important for this pass. */
1308 max_stmt_uid = 0;
1309 FOR_EACH_BB (bb)
1311 block_stmt_iterator bsi;
1313 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1314 stmt_ann (bsi_stmt (bsi))->uid = max_stmt_uid++;
1317 compute_immediate_uses (TDFA_USE_VOPS, NULL);
1319 /* Pass the loops from the outermost. For each virtual operand loop phi node
1320 check whether all the references inside the loop correspond to a single
1321 address, and if so, move them. */
1323 loop = loops->tree_root->inner;
1324 while (1)
1326 determine_lsm_loop (loop);
1328 if (loop->inner)
1330 loop = loop->inner;
1331 continue;
1333 while (!loop->next)
1335 loop = loop->outer;
1336 if (loop == loops->tree_root)
1338 free_df ();
1339 loop_commit_inserts ();
1340 return;
1343 loop = loop->next;
1347 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1348 for each such basic block bb records the outermost loop for that execution
1349 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1350 blocks that contain a nonpure call. */
1352 static void
1353 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
1355 basic_block bb = NULL, *bbs, last = NULL;
1356 unsigned i;
1357 edge e;
1358 struct loop *inn_loop = loop;
1360 if (!loop->header->aux)
1362 bbs = get_loop_body_in_dom_order (loop);
1364 for (i = 0; i < loop->num_nodes; i++)
1366 edge_iterator ei;
1367 bb = bbs[i];
1369 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1370 last = bb;
1372 if (TEST_BIT (contains_call, bb->index))
1373 break;
1375 FOR_EACH_EDGE (e, ei, bb->succs)
1376 if (!flow_bb_inside_loop_p (loop, e->dest))
1377 break;
1378 if (e)
1379 break;
1381 /* A loop might be infinite (TODO use simple loop analysis
1382 to disprove this if possible). */
1383 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1384 break;
1386 if (!flow_bb_inside_loop_p (inn_loop, bb))
1387 break;
1389 if (bb->loop_father->header == bb)
1391 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1392 break;
1394 /* In a loop that is always entered we may proceed anyway.
1395 But record that we entered it and stop once we leave it. */
1396 inn_loop = bb->loop_father;
1400 while (1)
1402 last->aux = loop;
1403 if (last == loop->header)
1404 break;
1405 last = get_immediate_dominator (CDI_DOMINATORS, last);
1408 free (bbs);
1411 for (loop = loop->inner; loop; loop = loop->next)
1412 fill_always_executed_in (loop, contains_call);
1415 /* Compute the global information needed by the loop invariant motion pass.
1416 LOOPS is the loop tree. */
1418 static void
1419 tree_ssa_lim_initialize (struct loops *loops)
1421 sbitmap contains_call = sbitmap_alloc (last_basic_block);
1422 block_stmt_iterator bsi;
1423 struct loop *loop;
1424 basic_block bb;
1426 sbitmap_zero (contains_call);
1427 FOR_EACH_BB (bb)
1429 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1431 if (nonpure_call_p (bsi_stmt (bsi)))
1432 break;
1435 if (!bsi_end_p (bsi))
1436 SET_BIT (contains_call, bb->index);
1439 for (loop = loops->tree_root->inner; loop; loop = loop->next)
1440 fill_always_executed_in (loop, contains_call);
1442 sbitmap_free (contains_call);
1445 /* Cleans up after the invariant motion pass. */
1447 static void
1448 tree_ssa_lim_finalize (void)
1450 basic_block bb;
1452 FOR_EACH_BB (bb)
1454 bb->aux = NULL;
1458 /* Moves invariants from LOOPS. Only "expensive" invariants are moved out --
1459 i.e. those that are likely to be win regardless of the register pressure. */
1461 void
1462 tree_ssa_lim (struct loops *loops)
1464 tree_ssa_lim_initialize (loops);
1466 /* For each statement determine the outermost loop in that it is
1467 invariant and cost for computing the invariant. */
1468 determine_invariantness ();
1470 /* For each memory reference determine whether it is possible to hoist it
1471 out of the loop. Force the necessary invariants to be moved out of the
1472 loops as well. */
1473 determine_lsm (loops);
1475 /* Move the expressions that are expensive enough. */
1476 move_computations ();
1478 tree_ssa_lim_finalize ();