* xcoffout.h (xcoffout_source_line): Update prototype.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob00cc18fd15977ecb71d3a0c4885d30f76dbd21be
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
105 struct iv
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
117 struct version_info
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
127 /* Types of uses. */
128 enum use_type
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* Cost of a computation. */
136 typedef struct
138 unsigned cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143 } comp_cost;
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
148 /* The candidate - cost pair. */
149 struct cost_pair
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
160 /* Use. */
161 struct iv_use
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand *selected;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
180 enum iv_position
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_ORIGINAL /* The original biv. */
187 /* The induction variable candidate. */
188 struct iv_cand
190 unsigned id; /* The number of the candidate. */
191 bool important; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos; /* Where it is computed. */
194 gimple incremented_at;/* For original biv, the statement where it is
195 incremented. */
196 tree var_before; /* The variable used for it before increment. */
197 tree var_after; /* The variable used for it after increment. */
198 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */
203 bitmap depends_on; /* The list of invariants that are used in step of the
204 biv. */
207 /* The data used by the induction variable optimizations. */
209 typedef struct iv_use *iv_use_p;
210 DEF_VEC_P(iv_use_p);
211 DEF_VEC_ALLOC_P(iv_use_p,heap);
213 typedef struct iv_cand *iv_cand_p;
214 DEF_VEC_P(iv_cand_p);
215 DEF_VEC_ALLOC_P(iv_cand_p,heap);
217 struct ivopts_data
219 /* The currently optimized loop. */
220 struct loop *current_loop;
222 /* Numbers of iterations for all exits of the current loop. */
223 struct pointer_map_t *niters;
225 /* Number of registers used in it. */
226 unsigned regs_used;
228 /* The size of version_info array allocated. */
229 unsigned version_info_size;
231 /* The array of information for the ssa names. */
232 struct version_info *version_info;
234 /* The bitmap of indices in version_info whose value was changed. */
235 bitmap relevant;
237 /* The uses of induction variables. */
238 VEC(iv_use_p,heap) *iv_uses;
240 /* The candidates. */
241 VEC(iv_cand_p,heap) *iv_candidates;
243 /* A bitmap of important candidates. */
244 bitmap important_candidates;
246 /* The maximum invariant id. */
247 unsigned max_inv_id;
249 /* Whether to consider just related and important candidates when replacing a
250 use. */
251 bool consider_all_candidates;
253 /* Are we optimizing for speed? */
254 bool speed;
257 /* An assignment of iv candidates to uses. */
259 struct iv_ca
261 /* The number of uses covered by the assignment. */
262 unsigned upto;
264 /* Number of uses that cannot be expressed by the candidates in the set. */
265 unsigned bad_uses;
267 /* Candidate assigned to a use, together with the related costs. */
268 struct cost_pair **cand_for_use;
270 /* Number of times each candidate is used. */
271 unsigned *n_cand_uses;
273 /* The candidates used. */
274 bitmap cands;
276 /* The number of candidates in the set. */
277 unsigned n_cands;
279 /* Total number of registers needed. */
280 unsigned n_regs;
282 /* Total cost of expressing uses. */
283 comp_cost cand_use_cost;
285 /* Total cost of candidates. */
286 unsigned cand_cost;
288 /* Number of times each invariant is used. */
289 unsigned *n_invariant_uses;
291 /* Total cost of the assignment. */
292 comp_cost cost;
295 /* Difference of two iv candidate assignments. */
297 struct iv_ca_delta
299 /* Changed use. */
300 struct iv_use *use;
302 /* An old assignment (for rollback purposes). */
303 struct cost_pair *old_cp;
305 /* A new assignment. */
306 struct cost_pair *new_cp;
308 /* Next change in the list. */
309 struct iv_ca_delta *next_change;
312 /* Bound on number of candidates below that all candidates are considered. */
314 #define CONSIDER_ALL_CANDIDATES_BOUND \
315 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
317 /* If there are more iv occurrences, we just give up (it is quite unlikely that
318 optimizing such a loop would help, and it would take ages). */
320 #define MAX_CONSIDERED_USES \
321 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
323 /* If there are at most this number of ivs in the set, try removing unnecessary
324 ivs from the set always. */
326 #define ALWAYS_PRUNE_CAND_SET_BOUND \
327 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
329 /* The list of trees for that the decl_rtl field must be reset is stored
330 here. */
332 static VEC(tree,heap) *decl_rtl_to_reset;
334 /* Number of uses recorded in DATA. */
336 static inline unsigned
337 n_iv_uses (struct ivopts_data *data)
339 return VEC_length (iv_use_p, data->iv_uses);
342 /* Ith use recorded in DATA. */
344 static inline struct iv_use *
345 iv_use (struct ivopts_data *data, unsigned i)
347 return VEC_index (iv_use_p, data->iv_uses, i);
350 /* Number of candidates recorded in DATA. */
352 static inline unsigned
353 n_iv_cands (struct ivopts_data *data)
355 return VEC_length (iv_cand_p, data->iv_candidates);
358 /* Ith candidate recorded in DATA. */
360 static inline struct iv_cand *
361 iv_cand (struct ivopts_data *data, unsigned i)
363 return VEC_index (iv_cand_p, data->iv_candidates, i);
366 /* The single loop exit if it dominates the latch, NULL otherwise. */
368 edge
369 single_dom_exit (struct loop *loop)
371 edge exit = single_exit (loop);
373 if (!exit)
374 return NULL;
376 if (!just_once_each_iteration_p (loop, exit->src))
377 return NULL;
379 return exit;
382 /* Dumps information about the induction variable IV to FILE. */
384 extern void dump_iv (FILE *, struct iv *);
385 void
386 dump_iv (FILE *file, struct iv *iv)
388 if (iv->ssa_name)
390 fprintf (file, "ssa name ");
391 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
392 fprintf (file, "\n");
395 fprintf (file, " type ");
396 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
397 fprintf (file, "\n");
399 if (iv->step)
401 fprintf (file, " base ");
402 print_generic_expr (file, iv->base, TDF_SLIM);
403 fprintf (file, "\n");
405 fprintf (file, " step ");
406 print_generic_expr (file, iv->step, TDF_SLIM);
407 fprintf (file, "\n");
409 else
411 fprintf (file, " invariant ");
412 print_generic_expr (file, iv->base, TDF_SLIM);
413 fprintf (file, "\n");
416 if (iv->base_object)
418 fprintf (file, " base object ");
419 print_generic_expr (file, iv->base_object, TDF_SLIM);
420 fprintf (file, "\n");
423 if (iv->biv_p)
424 fprintf (file, " is a biv\n");
427 /* Dumps information about the USE to FILE. */
429 extern void dump_use (FILE *, struct iv_use *);
430 void
431 dump_use (FILE *file, struct iv_use *use)
433 fprintf (file, "use %d\n", use->id);
435 switch (use->type)
437 case USE_NONLINEAR_EXPR:
438 fprintf (file, " generic\n");
439 break;
441 case USE_ADDRESS:
442 fprintf (file, " address\n");
443 break;
445 case USE_COMPARE:
446 fprintf (file, " compare\n");
447 break;
449 default:
450 gcc_unreachable ();
453 fprintf (file, " in statement ");
454 print_gimple_stmt (file, use->stmt, 0, 0);
455 fprintf (file, "\n");
457 fprintf (file, " at position ");
458 if (use->op_p)
459 print_generic_expr (file, *use->op_p, TDF_SLIM);
460 fprintf (file, "\n");
462 dump_iv (file, use->iv);
464 if (use->related_cands)
466 fprintf (file, " related candidates ");
467 dump_bitmap (file, use->related_cands);
471 /* Dumps information about the uses to FILE. */
473 extern void dump_uses (FILE *, struct ivopts_data *);
474 void
475 dump_uses (FILE *file, struct ivopts_data *data)
477 unsigned i;
478 struct iv_use *use;
480 for (i = 0; i < n_iv_uses (data); i++)
482 use = iv_use (data, i);
484 dump_use (file, use);
485 fprintf (file, "\n");
489 /* Dumps information about induction variable candidate CAND to FILE. */
491 extern void dump_cand (FILE *, struct iv_cand *);
492 void
493 dump_cand (FILE *file, struct iv_cand *cand)
495 struct iv *iv = cand->iv;
497 fprintf (file, "candidate %d%s\n",
498 cand->id, cand->important ? " (important)" : "");
500 if (cand->depends_on)
502 fprintf (file, " depends on ");
503 dump_bitmap (file, cand->depends_on);
506 if (!iv)
508 fprintf (file, " final value replacement\n");
509 return;
512 switch (cand->pos)
514 case IP_NORMAL:
515 fprintf (file, " incremented before exit test\n");
516 break;
518 case IP_END:
519 fprintf (file, " incremented at end\n");
520 break;
522 case IP_ORIGINAL:
523 fprintf (file, " original biv\n");
524 break;
527 dump_iv (file, iv);
530 /* Returns the info for ssa version VER. */
532 static inline struct version_info *
533 ver_info (struct ivopts_data *data, unsigned ver)
535 return data->version_info + ver;
538 /* Returns the info for ssa name NAME. */
540 static inline struct version_info *
541 name_info (struct ivopts_data *data, tree name)
543 return ver_info (data, SSA_NAME_VERSION (name));
546 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
547 emitted in LOOP. */
549 static bool
550 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
552 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
554 gcc_assert (bb);
556 if (sbb == loop->latch)
557 return true;
559 if (sbb != bb)
560 return false;
562 return stmt == last_stmt (bb);
565 /* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */
568 static bool
569 stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
571 basic_block cand_bb = gimple_bb (cand->incremented_at);
572 basic_block stmt_bb = gimple_bb (stmt);
573 gimple_stmt_iterator bsi;
575 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
576 return false;
578 if (stmt_bb != cand_bb)
579 return true;
581 /* Scan the block from the end, since the original ivs are usually
582 incremented at the end of the loop body. */
583 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
585 if (gsi_stmt (bsi) == cand->incremented_at)
586 return false;
587 if (gsi_stmt (bsi) == stmt)
588 return true;
592 /* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */
595 static bool
596 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
598 switch (cand->pos)
600 case IP_END:
601 return false;
603 case IP_NORMAL:
604 return stmt_after_ip_normal_pos (loop, stmt);
606 case IP_ORIGINAL:
607 return stmt_after_ip_original_pos (cand, stmt);
609 default:
610 gcc_unreachable ();
614 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
616 static bool
617 abnormal_ssa_name_p (tree exp)
619 if (!exp)
620 return false;
622 if (TREE_CODE (exp) != SSA_NAME)
623 return false;
625 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
628 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
629 abnormal phi node. Callback for for_each_index. */
631 static bool
632 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
633 void *data ATTRIBUTE_UNUSED)
635 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
637 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
638 return false;
639 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
640 return false;
643 return !abnormal_ssa_name_p (*index);
646 /* Returns true if EXPR contains a ssa name that occurs in an
647 abnormal phi node. */
649 bool
650 contains_abnormal_ssa_name_p (tree expr)
652 enum tree_code code;
653 enum tree_code_class codeclass;
655 if (!expr)
656 return false;
658 code = TREE_CODE (expr);
659 codeclass = TREE_CODE_CLASS (code);
661 if (code == SSA_NAME)
662 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
664 if (code == INTEGER_CST
665 || is_gimple_min_invariant (expr))
666 return false;
668 if (code == ADDR_EXPR)
669 return !for_each_index (&TREE_OPERAND (expr, 0),
670 idx_contains_abnormal_ssa_name_p,
671 NULL);
673 switch (codeclass)
675 case tcc_binary:
676 case tcc_comparison:
677 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
678 return true;
680 /* Fallthru. */
681 case tcc_unary:
682 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
683 return true;
685 break;
687 default:
688 gcc_unreachable ();
691 return false;
694 /* Returns tree describing number of iterations determined from
695 EXIT of DATA->current_loop, or NULL if something goes wrong. */
697 static tree
698 niter_for_exit (struct ivopts_data *data, edge exit)
700 struct tree_niter_desc desc;
701 tree niter;
702 void **slot;
704 if (!data->niters)
706 data->niters = pointer_map_create ();
707 slot = NULL;
709 else
710 slot = pointer_map_contains (data->niters, exit);
712 if (!slot)
714 /* Try to determine number of iterations. We must know it
715 unconditionally (i.e., without possibility of # of iterations
716 being zero). Also, we cannot safely work with ssa names that
717 appear in phi nodes on abnormal edges, so that we do not create
718 overlapping life ranges for them (PR 27283). */
719 if (number_of_iterations_exit (data->current_loop,
720 exit, &desc, true)
721 && integer_zerop (desc.may_be_zero)
722 && !contains_abnormal_ssa_name_p (desc.niter))
723 niter = desc.niter;
724 else
725 niter = NULL_TREE;
727 *pointer_map_insert (data->niters, exit) = niter;
729 else
730 niter = (tree) *slot;
732 return niter;
735 /* Returns tree describing number of iterations determined from
736 single dominating exit of DATA->current_loop, or NULL if something
737 goes wrong. */
739 static tree
740 niter_for_single_dom_exit (struct ivopts_data *data)
742 edge exit = single_dom_exit (data->current_loop);
744 if (!exit)
745 return NULL;
747 return niter_for_exit (data, exit);
750 /* Initializes data structures used by the iv optimization pass, stored
751 in DATA. */
753 static void
754 tree_ssa_iv_optimize_init (struct ivopts_data *data)
756 data->version_info_size = 2 * num_ssa_names;
757 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
758 data->relevant = BITMAP_ALLOC (NULL);
759 data->important_candidates = BITMAP_ALLOC (NULL);
760 data->max_inv_id = 0;
761 data->niters = NULL;
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
770 static tree
771 determine_base_object (tree expr)
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj;
776 /* If this is a pointer casted to any type, we need to determine
777 the base object for the pointer; so handle conversions before
778 throwing away non-pointer expressions. */
779 if (CONVERT_EXPR_P (expr))
780 return determine_base_object (TREE_OPERAND (expr, 0));
782 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
783 return NULL_TREE;
785 switch (code)
787 case INTEGER_CST:
788 return NULL_TREE;
790 case ADDR_EXPR:
791 obj = TREE_OPERAND (expr, 0);
792 base = get_base_address (obj);
794 if (!base)
795 return expr;
797 if (TREE_CODE (base) == INDIRECT_REF)
798 return determine_base_object (TREE_OPERAND (base, 0));
800 return fold_convert (ptr_type_node,
801 build_fold_addr_expr (base));
803 case POINTER_PLUS_EXPR:
804 return determine_base_object (TREE_OPERAND (expr, 0));
806 case PLUS_EXPR:
807 case MINUS_EXPR:
808 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
809 gcc_unreachable ();
811 default:
812 return fold_convert (ptr_type_node, expr);
816 /* Allocates an induction variable with given initial value BASE and step STEP
817 for loop LOOP. */
819 static struct iv *
820 alloc_iv (tree base, tree step)
822 struct iv *iv = XCNEW (struct iv);
823 gcc_assert (step != NULL_TREE);
825 iv->base = base;
826 iv->base_object = determine_base_object (base);
827 iv->step = step;
828 iv->biv_p = false;
829 iv->have_use_for = false;
830 iv->use_id = 0;
831 iv->ssa_name = NULL_TREE;
833 return iv;
836 /* Sets STEP and BASE for induction variable IV. */
838 static void
839 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
841 struct version_info *info = name_info (data, iv);
843 gcc_assert (!info->iv);
845 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
846 info->iv = alloc_iv (base, step);
847 info->iv->ssa_name = iv;
850 /* Finds induction variable declaration for VAR. */
852 static struct iv *
853 get_iv (struct ivopts_data *data, tree var)
855 basic_block bb;
856 tree type = TREE_TYPE (var);
858 if (!POINTER_TYPE_P (type)
859 && !INTEGRAL_TYPE_P (type))
860 return NULL;
862 if (!name_info (data, var)->iv)
864 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
866 if (!bb
867 || !flow_bb_inside_loop_p (data->current_loop, bb))
868 set_iv (data, var, var, build_int_cst (type, 0));
871 return name_info (data, var)->iv;
874 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
875 not define a simple affine biv with nonzero step. */
877 static tree
878 determine_biv_step (gimple phi)
880 struct loop *loop = gimple_bb (phi)->loop_father;
881 tree name = PHI_RESULT (phi);
882 affine_iv iv;
884 if (!is_gimple_reg (name))
885 return NULL_TREE;
887 if (!simple_iv (loop, loop, name, &iv, true))
888 return NULL_TREE;
890 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
893 /* Finds basic ivs. */
895 static bool
896 find_bivs (struct ivopts_data *data)
898 gimple phi;
899 tree step, type, base;
900 bool found = false;
901 struct loop *loop = data->current_loop;
902 gimple_stmt_iterator psi;
904 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
906 phi = gsi_stmt (psi);
908 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
909 continue;
911 step = determine_biv_step (phi);
912 if (!step)
913 continue;
915 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
916 base = expand_simple_operations (base);
917 if (contains_abnormal_ssa_name_p (base)
918 || contains_abnormal_ssa_name_p (step))
919 continue;
921 type = TREE_TYPE (PHI_RESULT (phi));
922 base = fold_convert (type, base);
923 if (step)
925 if (POINTER_TYPE_P (type))
926 step = fold_convert (sizetype, step);
927 else
928 step = fold_convert (type, step);
931 set_iv (data, PHI_RESULT (phi), base, step);
932 found = true;
935 return found;
938 /* Marks basic ivs. */
940 static void
941 mark_bivs (struct ivopts_data *data)
943 gimple phi;
944 tree var;
945 struct iv *iv, *incr_iv;
946 struct loop *loop = data->current_loop;
947 basic_block incr_bb;
948 gimple_stmt_iterator psi;
950 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
952 phi = gsi_stmt (psi);
954 iv = get_iv (data, PHI_RESULT (phi));
955 if (!iv)
956 continue;
958 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
959 incr_iv = get_iv (data, var);
960 if (!incr_iv)
961 continue;
963 /* If the increment is in the subloop, ignore it. */
964 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
965 if (incr_bb->loop_father != data->current_loop
966 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
967 continue;
969 iv->biv_p = true;
970 incr_iv->biv_p = true;
974 /* Checks whether STMT defines a linear induction variable and stores its
975 parameters to IV. */
977 static bool
978 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
980 tree lhs;
981 struct loop *loop = data->current_loop;
983 iv->base = NULL_TREE;
984 iv->step = NULL_TREE;
986 if (gimple_code (stmt) != GIMPLE_ASSIGN)
987 return false;
989 lhs = gimple_assign_lhs (stmt);
990 if (TREE_CODE (lhs) != SSA_NAME)
991 return false;
993 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
994 return false;
995 iv->base = expand_simple_operations (iv->base);
997 if (contains_abnormal_ssa_name_p (iv->base)
998 || contains_abnormal_ssa_name_p (iv->step))
999 return false;
1001 return true;
1004 /* Finds general ivs in statement STMT. */
1006 static void
1007 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1009 affine_iv iv;
1011 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1012 return;
1014 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1017 /* Finds general ivs in basic block BB. */
1019 static void
1020 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1022 gimple_stmt_iterator bsi;
1024 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1025 find_givs_in_stmt (data, gsi_stmt (bsi));
1028 /* Finds general ivs. */
1030 static void
1031 find_givs (struct ivopts_data *data)
1033 struct loop *loop = data->current_loop;
1034 basic_block *body = get_loop_body_in_dom_order (loop);
1035 unsigned i;
1037 for (i = 0; i < loop->num_nodes; i++)
1038 find_givs_in_bb (data, body[i]);
1039 free (body);
1042 /* For each ssa name defined in LOOP determines whether it is an induction
1043 variable and if so, its initial value and step. */
1045 static bool
1046 find_induction_variables (struct ivopts_data *data)
1048 unsigned i;
1049 bitmap_iterator bi;
1051 if (!find_bivs (data))
1052 return false;
1054 find_givs (data);
1055 mark_bivs (data);
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1059 tree niter = niter_for_single_dom_exit (data);
1061 if (niter)
1063 fprintf (dump_file, " number of iterations ");
1064 print_generic_expr (dump_file, niter, TDF_SLIM);
1065 fprintf (dump_file, "\n\n");
1068 fprintf (dump_file, "Induction variables:\n\n");
1070 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1072 if (ver_info (data, i)->iv)
1073 dump_iv (dump_file, ver_info (data, i)->iv);
1077 return true;
1080 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1082 static struct iv_use *
1083 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1084 gimple stmt, enum use_type use_type)
1086 struct iv_use *use = XCNEW (struct iv_use);
1088 use->id = n_iv_uses (data);
1089 use->type = use_type;
1090 use->iv = iv;
1091 use->stmt = stmt;
1092 use->op_p = use_p;
1093 use->related_cands = BITMAP_ALLOC (NULL);
1095 /* To avoid showing ssa name in the dumps, if it was not reset by the
1096 caller. */
1097 iv->ssa_name = NULL_TREE;
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1100 dump_use (dump_file, use);
1102 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1104 return use;
1107 /* Checks whether OP is a loop-level invariant and if so, records it.
1108 NONLINEAR_USE is true if the invariant is used in a way we do not
1109 handle specially. */
1111 static void
1112 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1114 basic_block bb;
1115 struct version_info *info;
1117 if (TREE_CODE (op) != SSA_NAME
1118 || !is_gimple_reg (op))
1119 return;
1121 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1122 if (bb
1123 && flow_bb_inside_loop_p (data->current_loop, bb))
1124 return;
1126 info = name_info (data, op);
1127 info->name = op;
1128 info->has_nonlin_use |= nonlinear_use;
1129 if (!info->inv_id)
1130 info->inv_id = ++data->max_inv_id;
1131 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1134 /* Checks whether the use OP is interesting and if so, records it. */
1136 static struct iv_use *
1137 find_interesting_uses_op (struct ivopts_data *data, tree op)
1139 struct iv *iv;
1140 struct iv *civ;
1141 gimple stmt;
1142 struct iv_use *use;
1144 if (TREE_CODE (op) != SSA_NAME)
1145 return NULL;
1147 iv = get_iv (data, op);
1148 if (!iv)
1149 return NULL;
1151 if (iv->have_use_for)
1153 use = iv_use (data, iv->use_id);
1155 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1156 return use;
1159 if (integer_zerop (iv->step))
1161 record_invariant (data, op, true);
1162 return NULL;
1164 iv->have_use_for = true;
1166 civ = XNEW (struct iv);
1167 *civ = *iv;
1169 stmt = SSA_NAME_DEF_STMT (op);
1170 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1171 || is_gimple_assign (stmt));
1173 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1174 iv->use_id = use->id;
1176 return use;
1179 /* Given a condition in statement STMT, checks whether it is a compare
1180 of an induction variable and an invariant. If this is the case,
1181 CONTROL_VAR is set to location of the iv, BOUND to the location of
1182 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1183 induction variable descriptions, and true is returned. If this is not
1184 the case, CONTROL_VAR and BOUND are set to the arguments of the
1185 condition and false is returned. */
1187 static bool
1188 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1189 tree **control_var, tree **bound,
1190 struct iv **iv_var, struct iv **iv_bound)
1192 /* The objects returned when COND has constant operands. */
1193 static struct iv const_iv;
1194 static tree zero;
1195 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1196 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1197 bool ret = false;
1199 if (gimple_code (stmt) == GIMPLE_COND)
1201 op0 = gimple_cond_lhs_ptr (stmt);
1202 op1 = gimple_cond_rhs_ptr (stmt);
1204 else
1206 op0 = gimple_assign_rhs1_ptr (stmt);
1207 op1 = gimple_assign_rhs2_ptr (stmt);
1210 zero = integer_zero_node;
1211 const_iv.step = integer_zero_node;
1213 if (TREE_CODE (*op0) == SSA_NAME)
1214 iv0 = get_iv (data, *op0);
1215 if (TREE_CODE (*op1) == SSA_NAME)
1216 iv1 = get_iv (data, *op1);
1218 /* Exactly one of the compared values must be an iv, and the other one must
1219 be an invariant. */
1220 if (!iv0 || !iv1)
1221 goto end;
1223 if (integer_zerop (iv0->step))
1225 /* Control variable may be on the other side. */
1226 tmp_op = op0; op0 = op1; op1 = tmp_op;
1227 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1229 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1231 end:
1232 if (control_var)
1233 *control_var = op0;;
1234 if (iv_var)
1235 *iv_var = iv0;;
1236 if (bound)
1237 *bound = op1;
1238 if (iv_bound)
1239 *iv_bound = iv1;
1241 return ret;
1244 /* Checks whether the condition in STMT is interesting and if so,
1245 records it. */
1247 static void
1248 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1250 tree *var_p, *bound_p;
1251 struct iv *var_iv, *civ;
1253 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1255 find_interesting_uses_op (data, *var_p);
1256 find_interesting_uses_op (data, *bound_p);
1257 return;
1260 civ = XNEW (struct iv);
1261 *civ = *var_iv;
1262 record_use (data, NULL, civ, stmt, USE_COMPARE);
1265 /* Returns true if expression EXPR is obviously invariant in LOOP,
1266 i.e. if all its operands are defined outside of the LOOP. LOOP
1267 should not be the function body. */
1269 bool
1270 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1272 basic_block def_bb;
1273 unsigned i, len;
1275 gcc_assert (loop_depth (loop) > 0);
1277 if (is_gimple_min_invariant (expr))
1278 return true;
1280 if (TREE_CODE (expr) == SSA_NAME)
1282 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1283 if (def_bb
1284 && flow_bb_inside_loop_p (loop, def_bb))
1285 return false;
1287 return true;
1290 if (!EXPR_P (expr))
1291 return false;
1293 len = TREE_OPERAND_LENGTH (expr);
1294 for (i = 0; i < len; i++)
1295 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1296 return false;
1298 return true;
1301 /* Returns true if statement STMT is obviously invariant in LOOP,
1302 i.e. if all its operands on the RHS are defined outside of the LOOP.
1303 LOOP should not be the function body. */
1305 bool
1306 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1308 unsigned i;
1309 tree lhs;
1311 gcc_assert (loop_depth (loop) > 0);
1313 lhs = gimple_get_lhs (stmt);
1314 for (i = 0; i < gimple_num_ops (stmt); i++)
1316 tree op = gimple_op (stmt, i);
1317 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1318 return false;
1321 return true;
1324 /* Cumulates the steps of indices into DATA and replaces their values with the
1325 initial ones. Returns false when the value of the index cannot be determined.
1326 Callback for for_each_index. */
1328 struct ifs_ivopts_data
1330 struct ivopts_data *ivopts_data;
1331 gimple stmt;
1332 tree step;
1335 static bool
1336 idx_find_step (tree base, tree *idx, void *data)
1338 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1339 struct iv *iv;
1340 tree step, iv_base, iv_step, lbound, off;
1341 struct loop *loop = dta->ivopts_data->current_loop;
1343 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1344 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1345 return false;
1347 /* If base is a component ref, require that the offset of the reference
1348 be invariant. */
1349 if (TREE_CODE (base) == COMPONENT_REF)
1351 off = component_ref_field_offset (base);
1352 return expr_invariant_in_loop_p (loop, off);
1355 /* If base is array, first check whether we will be able to move the
1356 reference out of the loop (in order to take its address in strength
1357 reduction). In order for this to work we need both lower bound
1358 and step to be loop invariants. */
1359 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1361 /* Moreover, for a range, the size needs to be invariant as well. */
1362 if (TREE_CODE (base) == ARRAY_RANGE_REF
1363 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1364 return false;
1366 step = array_ref_element_size (base);
1367 lbound = array_ref_low_bound (base);
1369 if (!expr_invariant_in_loop_p (loop, step)
1370 || !expr_invariant_in_loop_p (loop, lbound))
1371 return false;
1374 if (TREE_CODE (*idx) != SSA_NAME)
1375 return true;
1377 iv = get_iv (dta->ivopts_data, *idx);
1378 if (!iv)
1379 return false;
1381 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1382 *&x[0], which is not folded and does not trigger the
1383 ARRAY_REF path below. */
1384 *idx = iv->base;
1386 if (integer_zerop (iv->step))
1387 return true;
1389 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1391 step = array_ref_element_size (base);
1393 /* We only handle addresses whose step is an integer constant. */
1394 if (TREE_CODE (step) != INTEGER_CST)
1395 return false;
1397 else
1398 /* The step for pointer arithmetics already is 1 byte. */
1399 step = build_int_cst (sizetype, 1);
1401 iv_base = iv->base;
1402 iv_step = iv->step;
1403 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1404 sizetype, &iv_base, &iv_step, dta->stmt,
1405 false))
1407 /* The index might wrap. */
1408 return false;
1411 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1412 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1414 return true;
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1420 static bool
1421 idx_record_use (tree base, tree *idx,
1422 void *vdata)
1424 struct ivopts_data *data = (struct ivopts_data *) vdata;
1425 find_interesting_uses_op (data, *idx);
1426 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1428 find_interesting_uses_op (data, array_ref_element_size (base));
1429 find_interesting_uses_op (data, array_ref_low_bound (base));
1431 return true;
1434 /* If we can prove that TOP = cst * BOT for some constant cst,
1435 store cst to MUL and return true. Otherwise return false.
1436 The returned value is always sign-extended, regardless of the
1437 signedness of TOP and BOT. */
1439 static bool
1440 constant_multiple_of (tree top, tree bot, double_int *mul)
1442 tree mby;
1443 enum tree_code code;
1444 double_int res, p0, p1;
1445 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1447 STRIP_NOPS (top);
1448 STRIP_NOPS (bot);
1450 if (operand_equal_p (top, bot, 0))
1452 *mul = double_int_one;
1453 return true;
1456 code = TREE_CODE (top);
1457 switch (code)
1459 case MULT_EXPR:
1460 mby = TREE_OPERAND (top, 1);
1461 if (TREE_CODE (mby) != INTEGER_CST)
1462 return false;
1464 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1465 return false;
1467 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1468 precision);
1469 return true;
1471 case PLUS_EXPR:
1472 case MINUS_EXPR:
1473 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1474 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1475 return false;
1477 if (code == MINUS_EXPR)
1478 p1 = double_int_neg (p1);
1479 *mul = double_int_sext (double_int_add (p0, p1), precision);
1480 return true;
1482 case INTEGER_CST:
1483 if (TREE_CODE (bot) != INTEGER_CST)
1484 return false;
1486 p0 = double_int_sext (tree_to_double_int (top), precision);
1487 p1 = double_int_sext (tree_to_double_int (bot), precision);
1488 if (double_int_zero_p (p1))
1489 return false;
1490 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1491 precision);
1492 return double_int_zero_p (res);
1494 default:
1495 return false;
1499 /* Returns true if memory reference REF with step STEP may be unaligned. */
1501 static bool
1502 may_be_unaligned_p (tree ref, tree step)
1504 tree base;
1505 tree base_type;
1506 HOST_WIDE_INT bitsize;
1507 HOST_WIDE_INT bitpos;
1508 tree toffset;
1509 enum machine_mode mode;
1510 int unsignedp, volatilep;
1511 unsigned base_align;
1513 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1514 thus they are not misaligned. */
1515 if (TREE_CODE (ref) == TARGET_MEM_REF)
1516 return false;
1518 /* The test below is basically copy of what expr.c:normal_inner_ref
1519 does to check whether the object must be loaded by parts when
1520 STRICT_ALIGNMENT is true. */
1521 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1522 &unsignedp, &volatilep, true);
1523 base_type = TREE_TYPE (base);
1524 base_align = TYPE_ALIGN (base_type);
1526 if (mode != BLKmode)
1528 double_int mul;
1529 tree al = build_int_cst (TREE_TYPE (step),
1530 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1532 if (base_align < GET_MODE_ALIGNMENT (mode)
1533 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1534 || bitpos % BITS_PER_UNIT != 0)
1535 return true;
1537 if (!constant_multiple_of (step, al, &mul))
1538 return true;
1541 return false;
1544 /* Return true if EXPR may be non-addressable. */
1546 static bool
1547 may_be_nonaddressable_p (tree expr)
1549 switch (TREE_CODE (expr))
1551 case TARGET_MEM_REF:
1552 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1553 target, thus they are always addressable. */
1554 return false;
1556 case COMPONENT_REF:
1557 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1558 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1560 case VIEW_CONVERT_EXPR:
1561 /* This kind of view-conversions may wrap non-addressable objects
1562 and make them look addressable. After some processing the
1563 non-addressability may be uncovered again, causing ADDR_EXPRs
1564 of inappropriate objects to be built. */
1565 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1566 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1567 return true;
1569 /* ... fall through ... */
1571 case ARRAY_REF:
1572 case ARRAY_RANGE_REF:
1573 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1575 CASE_CONVERT:
1576 return true;
1578 default:
1579 break;
1582 return false;
1585 /* Finds addresses in *OP_P inside STMT. */
1587 static void
1588 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1590 tree base = *op_p, step = build_int_cst (sizetype, 0);
1591 struct iv *civ;
1592 struct ifs_ivopts_data ifs_ivopts_data;
1594 /* Do not play with volatile memory references. A bit too conservative,
1595 perhaps, but safe. */
1596 if (gimple_has_volatile_ops (stmt))
1597 goto fail;
1599 /* Ignore bitfields for now. Not really something terribly complicated
1600 to handle. TODO. */
1601 if (TREE_CODE (base) == BIT_FIELD_REF)
1602 goto fail;
1604 base = unshare_expr (base);
1606 if (TREE_CODE (base) == TARGET_MEM_REF)
1608 tree type = build_pointer_type (TREE_TYPE (base));
1609 tree astep;
1611 if (TMR_BASE (base)
1612 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1614 civ = get_iv (data, TMR_BASE (base));
1615 if (!civ)
1616 goto fail;
1618 TMR_BASE (base) = civ->base;
1619 step = civ->step;
1621 if (TMR_INDEX (base)
1622 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1624 civ = get_iv (data, TMR_INDEX (base));
1625 if (!civ)
1626 goto fail;
1628 TMR_INDEX (base) = civ->base;
1629 astep = civ->step;
1631 if (astep)
1633 if (TMR_STEP (base))
1634 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1636 step = fold_build2 (PLUS_EXPR, type, step, astep);
1640 if (integer_zerop (step))
1641 goto fail;
1642 base = tree_mem_ref_addr (type, base);
1644 else
1646 ifs_ivopts_data.ivopts_data = data;
1647 ifs_ivopts_data.stmt = stmt;
1648 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1649 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1650 || integer_zerop (ifs_ivopts_data.step))
1651 goto fail;
1652 step = ifs_ivopts_data.step;
1654 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1655 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1657 /* Check that the base expression is addressable. This needs
1658 to be done after substituting bases of IVs into it. */
1659 if (may_be_nonaddressable_p (base))
1660 goto fail;
1662 /* Moreover, on strict alignment platforms, check that it is
1663 sufficiently aligned. */
1664 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1665 goto fail;
1667 base = build_fold_addr_expr (base);
1669 /* Substituting bases of IVs into the base expression might
1670 have caused folding opportunities. */
1671 if (TREE_CODE (base) == ADDR_EXPR)
1673 tree *ref = &TREE_OPERAND (base, 0);
1674 while (handled_component_p (*ref))
1675 ref = &TREE_OPERAND (*ref, 0);
1676 if (TREE_CODE (*ref) == INDIRECT_REF)
1677 *ref = fold_indirect_ref (*ref);
1681 civ = alloc_iv (base, step);
1682 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1683 return;
1685 fail:
1686 for_each_index (op_p, idx_record_use, data);
1689 /* Finds and records invariants used in STMT. */
1691 static void
1692 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1694 ssa_op_iter iter;
1695 use_operand_p use_p;
1696 tree op;
1698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1700 op = USE_FROM_PTR (use_p);
1701 record_invariant (data, op, false);
1705 /* Finds interesting uses of induction variables in the statement STMT. */
1707 static void
1708 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1710 struct iv *iv;
1711 tree op, *lhs, *rhs;
1712 ssa_op_iter iter;
1713 use_operand_p use_p;
1714 enum tree_code code;
1716 find_invariants_stmt (data, stmt);
1718 if (gimple_code (stmt) == GIMPLE_COND)
1720 find_interesting_uses_cond (data, stmt);
1721 return;
1724 if (is_gimple_assign (stmt))
1726 lhs = gimple_assign_lhs_ptr (stmt);
1727 rhs = gimple_assign_rhs1_ptr (stmt);
1729 if (TREE_CODE (*lhs) == SSA_NAME)
1731 /* If the statement defines an induction variable, the uses are not
1732 interesting by themselves. */
1734 iv = get_iv (data, *lhs);
1736 if (iv && !integer_zerop (iv->step))
1737 return;
1740 code = gimple_assign_rhs_code (stmt);
1741 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1742 && (REFERENCE_CLASS_P (*rhs)
1743 || is_gimple_val (*rhs)))
1745 if (REFERENCE_CLASS_P (*rhs))
1746 find_interesting_uses_address (data, stmt, rhs);
1747 else
1748 find_interesting_uses_op (data, *rhs);
1750 if (REFERENCE_CLASS_P (*lhs))
1751 find_interesting_uses_address (data, stmt, lhs);
1752 return;
1754 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1756 find_interesting_uses_cond (data, stmt);
1757 return;
1760 /* TODO -- we should also handle address uses of type
1762 memory = call (whatever);
1766 call (memory). */
1769 if (gimple_code (stmt) == GIMPLE_PHI
1770 && gimple_bb (stmt) == data->current_loop->header)
1772 iv = get_iv (data, PHI_RESULT (stmt));
1774 if (iv && !integer_zerop (iv->step))
1775 return;
1778 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1780 op = USE_FROM_PTR (use_p);
1782 if (TREE_CODE (op) != SSA_NAME)
1783 continue;
1785 iv = get_iv (data, op);
1786 if (!iv)
1787 continue;
1789 find_interesting_uses_op (data, op);
1793 /* Finds interesting uses of induction variables outside of loops
1794 on loop exit edge EXIT. */
1796 static void
1797 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1799 gimple phi;
1800 gimple_stmt_iterator psi;
1801 tree def;
1803 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1805 phi = gsi_stmt (psi);
1806 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1807 if (is_gimple_reg (def))
1808 find_interesting_uses_op (data, def);
1812 /* Finds uses of the induction variables that are interesting. */
1814 static void
1815 find_interesting_uses (struct ivopts_data *data)
1817 basic_block bb;
1818 gimple_stmt_iterator bsi;
1819 basic_block *body = get_loop_body (data->current_loop);
1820 unsigned i;
1821 struct version_info *info;
1822 edge e;
1824 if (dump_file && (dump_flags & TDF_DETAILS))
1825 fprintf (dump_file, "Uses:\n\n");
1827 for (i = 0; i < data->current_loop->num_nodes; i++)
1829 edge_iterator ei;
1830 bb = body[i];
1832 FOR_EACH_EDGE (e, ei, bb->succs)
1833 if (e->dest != EXIT_BLOCK_PTR
1834 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1835 find_interesting_uses_outside (data, e);
1837 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1838 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1839 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1840 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1843 if (dump_file && (dump_flags & TDF_DETAILS))
1845 bitmap_iterator bi;
1847 fprintf (dump_file, "\n");
1849 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1851 info = ver_info (data, i);
1852 if (info->inv_id)
1854 fprintf (dump_file, " ");
1855 print_generic_expr (dump_file, info->name, TDF_SLIM);
1856 fprintf (dump_file, " is invariant (%d)%s\n",
1857 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1861 fprintf (dump_file, "\n");
1864 free (body);
1867 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1868 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1869 we are at the top-level of the processed address. */
1871 static tree
1872 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1873 unsigned HOST_WIDE_INT *offset)
1875 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1876 enum tree_code code;
1877 tree type, orig_type = TREE_TYPE (expr);
1878 unsigned HOST_WIDE_INT off0, off1, st;
1879 tree orig_expr = expr;
1881 STRIP_NOPS (expr);
1883 type = TREE_TYPE (expr);
1884 code = TREE_CODE (expr);
1885 *offset = 0;
1887 switch (code)
1889 case INTEGER_CST:
1890 if (!cst_and_fits_in_hwi (expr)
1891 || integer_zerop (expr))
1892 return orig_expr;
1894 *offset = int_cst_value (expr);
1895 return build_int_cst (orig_type, 0);
1897 case POINTER_PLUS_EXPR:
1898 case PLUS_EXPR:
1899 case MINUS_EXPR:
1900 op0 = TREE_OPERAND (expr, 0);
1901 op1 = TREE_OPERAND (expr, 1);
1903 op0 = strip_offset_1 (op0, false, false, &off0);
1904 op1 = strip_offset_1 (op1, false, false, &off1);
1906 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1907 if (op0 == TREE_OPERAND (expr, 0)
1908 && op1 == TREE_OPERAND (expr, 1))
1909 return orig_expr;
1911 if (integer_zerop (op1))
1912 expr = op0;
1913 else if (integer_zerop (op0))
1915 if (code == MINUS_EXPR)
1916 expr = fold_build1 (NEGATE_EXPR, type, op1);
1917 else
1918 expr = op1;
1920 else
1921 expr = fold_build2 (code, type, op0, op1);
1923 return fold_convert (orig_type, expr);
1925 case MULT_EXPR:
1926 op1 = TREE_OPERAND (expr, 1);
1927 if (!cst_and_fits_in_hwi (op1))
1928 return orig_expr;
1930 op0 = TREE_OPERAND (expr, 0);
1931 op0 = strip_offset_1 (op0, false, false, &off0);
1932 if (op0 == TREE_OPERAND (expr, 0))
1933 return orig_expr;
1935 *offset = off0 * int_cst_value (op1);
1936 if (integer_zerop (op0))
1937 expr = op0;
1938 else
1939 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1941 return fold_convert (orig_type, expr);
1943 case ARRAY_REF:
1944 case ARRAY_RANGE_REF:
1945 if (!inside_addr)
1946 return orig_expr;
1948 step = array_ref_element_size (expr);
1949 if (!cst_and_fits_in_hwi (step))
1950 break;
1952 st = int_cst_value (step);
1953 op1 = TREE_OPERAND (expr, 1);
1954 op1 = strip_offset_1 (op1, false, false, &off1);
1955 *offset = off1 * st;
1957 if (top_compref
1958 && integer_zerop (op1))
1960 /* Strip the component reference completely. */
1961 op0 = TREE_OPERAND (expr, 0);
1962 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1963 *offset += off0;
1964 return op0;
1966 break;
1968 case COMPONENT_REF:
1969 if (!inside_addr)
1970 return orig_expr;
1972 tmp = component_ref_field_offset (expr);
1973 if (top_compref
1974 && cst_and_fits_in_hwi (tmp))
1976 /* Strip the component reference completely. */
1977 op0 = TREE_OPERAND (expr, 0);
1978 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1979 *offset = off0 + int_cst_value (tmp);
1980 return op0;
1982 break;
1984 case ADDR_EXPR:
1985 op0 = TREE_OPERAND (expr, 0);
1986 op0 = strip_offset_1 (op0, true, true, &off0);
1987 *offset += off0;
1989 if (op0 == TREE_OPERAND (expr, 0))
1990 return orig_expr;
1992 expr = build_fold_addr_expr (op0);
1993 return fold_convert (orig_type, expr);
1995 case INDIRECT_REF:
1996 inside_addr = false;
1997 break;
1999 default:
2000 return orig_expr;
2003 /* Default handling of expressions for that we want to recurse into
2004 the first operand. */
2005 op0 = TREE_OPERAND (expr, 0);
2006 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2007 *offset += off0;
2009 if (op0 == TREE_OPERAND (expr, 0)
2010 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2011 return orig_expr;
2013 expr = copy_node (expr);
2014 TREE_OPERAND (expr, 0) = op0;
2015 if (op1)
2016 TREE_OPERAND (expr, 1) = op1;
2018 /* Inside address, we might strip the top level component references,
2019 thus changing type of the expression. Handling of ADDR_EXPR
2020 will fix that. */
2021 expr = fold_convert (orig_type, expr);
2023 return expr;
2026 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2028 static tree
2029 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2031 return strip_offset_1 (expr, false, false, offset);
2034 /* Returns variant of TYPE that can be used as base for different uses.
2035 We return unsigned type with the same precision, which avoids problems
2036 with overflows. */
2038 static tree
2039 generic_type_for (tree type)
2041 if (POINTER_TYPE_P (type))
2042 return unsigned_type_for (type);
2044 if (TYPE_UNSIGNED (type))
2045 return type;
2047 return unsigned_type_for (type);
2050 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2051 the bitmap to that we should store it. */
2053 static struct ivopts_data *fd_ivopts_data;
2054 static tree
2055 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2057 bitmap *depends_on = (bitmap *) data;
2058 struct version_info *info;
2060 if (TREE_CODE (*expr_p) != SSA_NAME)
2061 return NULL_TREE;
2062 info = name_info (fd_ivopts_data, *expr_p);
2064 if (!info->inv_id || info->has_nonlin_use)
2065 return NULL_TREE;
2067 if (!*depends_on)
2068 *depends_on = BITMAP_ALLOC (NULL);
2069 bitmap_set_bit (*depends_on, info->inv_id);
2071 return NULL_TREE;
2074 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2075 position to POS. If USE is not NULL, the candidate is set as related to
2076 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2077 replacement of the final value of the iv by a direct computation. */
2079 static struct iv_cand *
2080 add_candidate_1 (struct ivopts_data *data,
2081 tree base, tree step, bool important, enum iv_position pos,
2082 struct iv_use *use, gimple incremented_at)
2084 unsigned i;
2085 struct iv_cand *cand = NULL;
2086 tree type, orig_type;
2088 if (base)
2090 orig_type = TREE_TYPE (base);
2091 type = generic_type_for (orig_type);
2092 if (type != orig_type)
2094 base = fold_convert (type, base);
2095 step = fold_convert (type, step);
2099 for (i = 0; i < n_iv_cands (data); i++)
2101 cand = iv_cand (data, i);
2103 if (cand->pos != pos)
2104 continue;
2106 if (cand->incremented_at != incremented_at)
2107 continue;
2109 if (!cand->iv)
2111 if (!base && !step)
2112 break;
2114 continue;
2117 if (!base && !step)
2118 continue;
2120 if (operand_equal_p (base, cand->iv->base, 0)
2121 && operand_equal_p (step, cand->iv->step, 0))
2122 break;
2125 if (i == n_iv_cands (data))
2127 cand = XCNEW (struct iv_cand);
2128 cand->id = i;
2130 if (!base && !step)
2131 cand->iv = NULL;
2132 else
2133 cand->iv = alloc_iv (base, step);
2135 cand->pos = pos;
2136 if (pos != IP_ORIGINAL && cand->iv)
2138 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2139 cand->var_after = cand->var_before;
2141 cand->important = important;
2142 cand->incremented_at = incremented_at;
2143 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2145 if (step
2146 && TREE_CODE (step) != INTEGER_CST)
2148 fd_ivopts_data = data;
2149 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2152 if (dump_file && (dump_flags & TDF_DETAILS))
2153 dump_cand (dump_file, cand);
2156 if (important && !cand->important)
2158 cand->important = true;
2159 if (dump_file && (dump_flags & TDF_DETAILS))
2160 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2163 if (use)
2165 bitmap_set_bit (use->related_cands, i);
2166 if (dump_file && (dump_flags & TDF_DETAILS))
2167 fprintf (dump_file, "Candidate %d is related to use %d\n",
2168 cand->id, use->id);
2171 return cand;
2174 /* Returns true if incrementing the induction variable at the end of the LOOP
2175 is allowed.
2177 The purpose is to avoid splitting latch edge with a biv increment, thus
2178 creating a jump, possibly confusing other optimization passes and leaving
2179 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2180 is not available (so we do not have a better alternative), or if the latch
2181 edge is already nonempty. */
2183 static bool
2184 allow_ip_end_pos_p (struct loop *loop)
2186 if (!ip_normal_pos (loop))
2187 return true;
2189 if (!empty_block_p (ip_end_pos (loop)))
2190 return true;
2192 return false;
2195 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2196 position to POS. If USE is not NULL, the candidate is set as related to
2197 it. The candidate computation is scheduled on all available positions. */
2199 static void
2200 add_candidate (struct ivopts_data *data,
2201 tree base, tree step, bool important, struct iv_use *use)
2203 if (ip_normal_pos (data->current_loop))
2204 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2205 if (ip_end_pos (data->current_loop)
2206 && allow_ip_end_pos_p (data->current_loop))
2207 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2210 /* Add a standard "0 + 1 * iteration" iv candidate for a
2211 type with SIZE bits. */
2213 static void
2214 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2215 unsigned int size)
2217 tree type = lang_hooks.types.type_for_size (size, true);
2218 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2219 true, NULL);
2222 /* Adds standard iv candidates. */
2224 static void
2225 add_standard_iv_candidates (struct ivopts_data *data)
2227 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2229 /* The same for a double-integer type if it is still fast enough. */
2230 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2231 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2235 /* Adds candidates bases on the old induction variable IV. */
2237 static void
2238 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2240 gimple phi;
2241 tree def;
2242 struct iv_cand *cand;
2244 add_candidate (data, iv->base, iv->step, true, NULL);
2246 /* The same, but with initial value zero. */
2247 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2248 add_candidate (data, size_int (0), iv->step, true, NULL);
2249 else
2250 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2251 iv->step, true, NULL);
2253 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2254 if (gimple_code (phi) == GIMPLE_PHI)
2256 /* Additionally record the possibility of leaving the original iv
2257 untouched. */
2258 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2259 cand = add_candidate_1 (data,
2260 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2261 SSA_NAME_DEF_STMT (def));
2262 cand->var_before = iv->ssa_name;
2263 cand->var_after = def;
2267 /* Adds candidates based on the old induction variables. */
2269 static void
2270 add_old_ivs_candidates (struct ivopts_data *data)
2272 unsigned i;
2273 struct iv *iv;
2274 bitmap_iterator bi;
2276 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2278 iv = ver_info (data, i)->iv;
2279 if (iv && iv->biv_p && !integer_zerop (iv->step))
2280 add_old_iv_candidates (data, iv);
2284 /* Adds candidates based on the value of the induction variable IV and USE. */
2286 static void
2287 add_iv_value_candidates (struct ivopts_data *data,
2288 struct iv *iv, struct iv_use *use)
2290 unsigned HOST_WIDE_INT offset;
2291 tree base;
2292 tree basetype;
2294 add_candidate (data, iv->base, iv->step, false, use);
2296 /* The same, but with initial value zero. Make such variable important,
2297 since it is generic enough so that possibly many uses may be based
2298 on it. */
2299 basetype = TREE_TYPE (iv->base);
2300 if (POINTER_TYPE_P (basetype))
2301 basetype = sizetype;
2302 add_candidate (data, build_int_cst (basetype, 0),
2303 iv->step, true, use);
2305 /* Third, try removing the constant offset. Make sure to even
2306 add a candidate for &a[0] vs. (T *)&a. */
2307 base = strip_offset (iv->base, &offset);
2308 if (offset
2309 || base != iv->base)
2310 add_candidate (data, base, iv->step, false, use);
2313 /* Adds candidates based on the uses. */
2315 static void
2316 add_derived_ivs_candidates (struct ivopts_data *data)
2318 unsigned i;
2320 for (i = 0; i < n_iv_uses (data); i++)
2322 struct iv_use *use = iv_use (data, i);
2324 if (!use)
2325 continue;
2327 switch (use->type)
2329 case USE_NONLINEAR_EXPR:
2330 case USE_COMPARE:
2331 case USE_ADDRESS:
2332 /* Just add the ivs based on the value of the iv used here. */
2333 add_iv_value_candidates (data, use->iv, use);
2334 break;
2336 default:
2337 gcc_unreachable ();
2342 /* Record important candidates and add them to related_cands bitmaps
2343 if needed. */
2345 static void
2346 record_important_candidates (struct ivopts_data *data)
2348 unsigned i;
2349 struct iv_use *use;
2351 for (i = 0; i < n_iv_cands (data); i++)
2353 struct iv_cand *cand = iv_cand (data, i);
2355 if (cand->important)
2356 bitmap_set_bit (data->important_candidates, i);
2359 data->consider_all_candidates = (n_iv_cands (data)
2360 <= CONSIDER_ALL_CANDIDATES_BOUND);
2362 if (data->consider_all_candidates)
2364 /* We will not need "related_cands" bitmaps in this case,
2365 so release them to decrease peak memory consumption. */
2366 for (i = 0; i < n_iv_uses (data); i++)
2368 use = iv_use (data, i);
2369 BITMAP_FREE (use->related_cands);
2372 else
2374 /* Add important candidates to the related_cands bitmaps. */
2375 for (i = 0; i < n_iv_uses (data); i++)
2376 bitmap_ior_into (iv_use (data, i)->related_cands,
2377 data->important_candidates);
2381 /* Finds the candidates for the induction variables. */
2383 static void
2384 find_iv_candidates (struct ivopts_data *data)
2386 /* Add commonly used ivs. */
2387 add_standard_iv_candidates (data);
2389 /* Add old induction variables. */
2390 add_old_ivs_candidates (data);
2392 /* Add induction variables derived from uses. */
2393 add_derived_ivs_candidates (data);
2395 /* Record the important candidates. */
2396 record_important_candidates (data);
2399 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2400 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2401 we allocate a simple list to every use. */
2403 static void
2404 alloc_use_cost_map (struct ivopts_data *data)
2406 unsigned i, size, s, j;
2408 for (i = 0; i < n_iv_uses (data); i++)
2410 struct iv_use *use = iv_use (data, i);
2411 bitmap_iterator bi;
2413 if (data->consider_all_candidates)
2414 size = n_iv_cands (data);
2415 else
2417 s = 0;
2418 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2420 s++;
2423 /* Round up to the power of two, so that moduling by it is fast. */
2424 for (size = 1; size < s; size <<= 1)
2425 continue;
2428 use->n_map_members = size;
2429 use->cost_map = XCNEWVEC (struct cost_pair, size);
2433 /* Returns description of computation cost of expression whose runtime
2434 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2436 static comp_cost
2437 new_cost (unsigned runtime, unsigned complexity)
2439 comp_cost cost;
2441 cost.cost = runtime;
2442 cost.complexity = complexity;
2444 return cost;
2447 /* Adds costs COST1 and COST2. */
2449 static comp_cost
2450 add_costs (comp_cost cost1, comp_cost cost2)
2452 cost1.cost += cost2.cost;
2453 cost1.complexity += cost2.complexity;
2455 return cost1;
2457 /* Subtracts costs COST1 and COST2. */
2459 static comp_cost
2460 sub_costs (comp_cost cost1, comp_cost cost2)
2462 cost1.cost -= cost2.cost;
2463 cost1.complexity -= cost2.complexity;
2465 return cost1;
2468 /* Returns a negative number if COST1 < COST2, a positive number if
2469 COST1 > COST2, and 0 if COST1 = COST2. */
2471 static int
2472 compare_costs (comp_cost cost1, comp_cost cost2)
2474 if (cost1.cost == cost2.cost)
2475 return cost1.complexity - cost2.complexity;
2477 return cost1.cost - cost2.cost;
2480 /* Returns true if COST is infinite. */
2482 static bool
2483 infinite_cost_p (comp_cost cost)
2485 return cost.cost == INFTY;
2488 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2489 on invariants DEPENDS_ON and that the value used in expressing it
2490 is VALUE.*/
2492 static void
2493 set_use_iv_cost (struct ivopts_data *data,
2494 struct iv_use *use, struct iv_cand *cand,
2495 comp_cost cost, bitmap depends_on, tree value)
2497 unsigned i, s;
2499 if (infinite_cost_p (cost))
2501 BITMAP_FREE (depends_on);
2502 return;
2505 if (data->consider_all_candidates)
2507 use->cost_map[cand->id].cand = cand;
2508 use->cost_map[cand->id].cost = cost;
2509 use->cost_map[cand->id].depends_on = depends_on;
2510 use->cost_map[cand->id].value = value;
2511 return;
2514 /* n_map_members is a power of two, so this computes modulo. */
2515 s = cand->id & (use->n_map_members - 1);
2516 for (i = s; i < use->n_map_members; i++)
2517 if (!use->cost_map[i].cand)
2518 goto found;
2519 for (i = 0; i < s; i++)
2520 if (!use->cost_map[i].cand)
2521 goto found;
2523 gcc_unreachable ();
2525 found:
2526 use->cost_map[i].cand = cand;
2527 use->cost_map[i].cost = cost;
2528 use->cost_map[i].depends_on = depends_on;
2529 use->cost_map[i].value = value;
2532 /* Gets cost of (USE, CANDIDATE) pair. */
2534 static struct cost_pair *
2535 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2536 struct iv_cand *cand)
2538 unsigned i, s;
2539 struct cost_pair *ret;
2541 if (!cand)
2542 return NULL;
2544 if (data->consider_all_candidates)
2546 ret = use->cost_map + cand->id;
2547 if (!ret->cand)
2548 return NULL;
2550 return ret;
2553 /* n_map_members is a power of two, so this computes modulo. */
2554 s = cand->id & (use->n_map_members - 1);
2555 for (i = s; i < use->n_map_members; i++)
2556 if (use->cost_map[i].cand == cand)
2557 return use->cost_map + i;
2559 for (i = 0; i < s; i++)
2560 if (use->cost_map[i].cand == cand)
2561 return use->cost_map + i;
2563 return NULL;
2566 /* Returns estimate on cost of computing SEQ. */
2568 static unsigned
2569 seq_cost (rtx seq, bool speed)
2571 unsigned cost = 0;
2572 rtx set;
2574 for (; seq; seq = NEXT_INSN (seq))
2576 set = single_set (seq);
2577 if (set)
2578 cost += rtx_cost (set, SET,speed);
2579 else
2580 cost++;
2583 return cost;
2586 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2587 static rtx
2588 produce_memory_decl_rtl (tree obj, int *regno)
2590 rtx x;
2592 gcc_assert (obj);
2593 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2595 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2596 x = gen_rtx_SYMBOL_REF (Pmode, name);
2597 SET_SYMBOL_REF_DECL (x, obj);
2598 x = gen_rtx_MEM (DECL_MODE (obj), x);
2599 targetm.encode_section_info (obj, x, true);
2601 else
2603 x = gen_raw_REG (Pmode, (*regno)++);
2604 x = gen_rtx_MEM (DECL_MODE (obj), x);
2607 return x;
2610 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2611 walk_tree. DATA contains the actual fake register number. */
2613 static tree
2614 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2616 tree obj = NULL_TREE;
2617 rtx x = NULL_RTX;
2618 int *regno = (int *) data;
2620 switch (TREE_CODE (*expr_p))
2622 case ADDR_EXPR:
2623 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2624 handled_component_p (*expr_p);
2625 expr_p = &TREE_OPERAND (*expr_p, 0))
2626 continue;
2627 obj = *expr_p;
2628 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2629 x = produce_memory_decl_rtl (obj, regno);
2630 break;
2632 case SSA_NAME:
2633 *ws = 0;
2634 obj = SSA_NAME_VAR (*expr_p);
2635 if (!DECL_RTL_SET_P (obj))
2636 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2637 break;
2639 case VAR_DECL:
2640 case PARM_DECL:
2641 case RESULT_DECL:
2642 *ws = 0;
2643 obj = *expr_p;
2645 if (DECL_RTL_SET_P (obj))
2646 break;
2648 if (DECL_MODE (obj) == BLKmode)
2649 x = produce_memory_decl_rtl (obj, regno);
2650 else
2651 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2653 break;
2655 default:
2656 break;
2659 if (x)
2661 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2662 SET_DECL_RTL (obj, x);
2665 return NULL_TREE;
2668 /* Determines cost of the computation of EXPR. */
2670 static unsigned
2671 computation_cost (tree expr, bool speed)
2673 rtx seq, rslt;
2674 tree type = TREE_TYPE (expr);
2675 unsigned cost;
2676 /* Avoid using hard regs in ways which may be unsupported. */
2677 int regno = LAST_VIRTUAL_REGISTER + 1;
2678 enum function_frequency real_frequency = cfun->function_frequency;
2680 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2681 crtl->maybe_hot_insn_p = speed;
2682 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2683 start_sequence ();
2684 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2685 seq = get_insns ();
2686 end_sequence ();
2687 default_rtl_profile ();
2688 cfun->function_frequency = real_frequency;
2690 cost = seq_cost (seq, speed);
2691 if (MEM_P (rslt))
2692 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2694 return cost;
2697 /* Returns variable containing the value of candidate CAND at statement AT. */
2699 static tree
2700 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2702 if (stmt_after_increment (loop, cand, stmt))
2703 return cand->var_after;
2704 else
2705 return cand->var_before;
2708 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2709 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2712 tree_int_cst_sign_bit (const_tree t)
2714 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2715 unsigned HOST_WIDE_INT w;
2717 if (bitno < HOST_BITS_PER_WIDE_INT)
2718 w = TREE_INT_CST_LOW (t);
2719 else
2721 w = TREE_INT_CST_HIGH (t);
2722 bitno -= HOST_BITS_PER_WIDE_INT;
2725 return (w >> bitno) & 1;
2728 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2729 same precision that is at least as wide as the precision of TYPE, stores
2730 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2731 type of A and B. */
2733 static tree
2734 determine_common_wider_type (tree *a, tree *b)
2736 tree wider_type = NULL;
2737 tree suba, subb;
2738 tree atype = TREE_TYPE (*a);
2740 if (CONVERT_EXPR_P (*a))
2742 suba = TREE_OPERAND (*a, 0);
2743 wider_type = TREE_TYPE (suba);
2744 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2745 return atype;
2747 else
2748 return atype;
2750 if (CONVERT_EXPR_P (*b))
2752 subb = TREE_OPERAND (*b, 0);
2753 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2754 return atype;
2756 else
2757 return atype;
2759 *a = suba;
2760 *b = subb;
2761 return wider_type;
2764 /* Determines the expression by that USE is expressed from induction variable
2765 CAND at statement AT in LOOP. The expression is stored in a decomposed
2766 form into AFF. Returns false if USE cannot be expressed using CAND. */
2768 static bool
2769 get_computation_aff (struct loop *loop,
2770 struct iv_use *use, struct iv_cand *cand, gimple at,
2771 struct affine_tree_combination *aff)
2773 tree ubase = use->iv->base;
2774 tree ustep = use->iv->step;
2775 tree cbase = cand->iv->base;
2776 tree cstep = cand->iv->step, cstep_common;
2777 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2778 tree common_type, var;
2779 tree uutype;
2780 aff_tree cbase_aff, var_aff;
2781 double_int rat;
2783 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2785 /* We do not have a precision to express the values of use. */
2786 return false;
2789 var = var_at_stmt (loop, cand, at);
2790 uutype = unsigned_type_for (utype);
2792 /* If the conversion is not noop, perform it. */
2793 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2795 cstep = fold_convert (uutype, cstep);
2796 cbase = fold_convert (uutype, cbase);
2797 var = fold_convert (uutype, var);
2800 if (!constant_multiple_of (ustep, cstep, &rat))
2801 return false;
2803 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2804 type, we achieve better folding by computing their difference in this
2805 wider type, and cast the result to UUTYPE. We do not need to worry about
2806 overflows, as all the arithmetics will in the end be performed in UUTYPE
2807 anyway. */
2808 common_type = determine_common_wider_type (&ubase, &cbase);
2810 /* use = ubase - ratio * cbase + ratio * var. */
2811 tree_to_aff_combination (ubase, common_type, aff);
2812 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2813 tree_to_aff_combination (var, uutype, &var_aff);
2815 /* We need to shift the value if we are after the increment. */
2816 if (stmt_after_increment (loop, cand, at))
2818 aff_tree cstep_aff;
2820 if (common_type != uutype)
2821 cstep_common = fold_convert (common_type, cstep);
2822 else
2823 cstep_common = cstep;
2825 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2826 aff_combination_add (&cbase_aff, &cstep_aff);
2829 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2830 aff_combination_add (aff, &cbase_aff);
2831 if (common_type != uutype)
2832 aff_combination_convert (aff, uutype);
2834 aff_combination_scale (&var_aff, rat);
2835 aff_combination_add (aff, &var_aff);
2837 return true;
2840 /* Determines the expression by that USE is expressed from induction variable
2841 CAND at statement AT in LOOP. The computation is unshared. */
2843 static tree
2844 get_computation_at (struct loop *loop,
2845 struct iv_use *use, struct iv_cand *cand, gimple at)
2847 aff_tree aff;
2848 tree type = TREE_TYPE (use->iv->base);
2850 if (!get_computation_aff (loop, use, cand, at, &aff))
2851 return NULL_TREE;
2852 unshare_aff_combination (&aff);
2853 return fold_convert (type, aff_combination_to_tree (&aff));
2856 /* Determines the expression by that USE is expressed from induction variable
2857 CAND in LOOP. The computation is unshared. */
2859 static tree
2860 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2862 return get_computation_at (loop, use, cand, use->stmt);
2865 /* Returns cost of addition in MODE. */
2867 static unsigned
2868 add_cost (enum machine_mode mode, bool speed)
2870 static unsigned costs[NUM_MACHINE_MODES];
2871 rtx seq;
2872 unsigned cost;
2874 if (costs[mode])
2875 return costs[mode];
2877 start_sequence ();
2878 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2879 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2880 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2881 NULL_RTX);
2882 seq = get_insns ();
2883 end_sequence ();
2885 cost = seq_cost (seq, speed);
2886 if (!cost)
2887 cost = 1;
2889 costs[mode] = cost;
2891 if (dump_file && (dump_flags & TDF_DETAILS))
2892 fprintf (dump_file, "Addition in %s costs %d\n",
2893 GET_MODE_NAME (mode), cost);
2894 return cost;
2897 /* Entry in a hashtable of already known costs for multiplication. */
2898 struct mbc_entry
2900 HOST_WIDE_INT cst; /* The constant to multiply by. */
2901 enum machine_mode mode; /* In mode. */
2902 unsigned cost; /* The cost. */
2905 /* Counts hash value for the ENTRY. */
2907 static hashval_t
2908 mbc_entry_hash (const void *entry)
2910 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2912 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2915 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2917 static int
2918 mbc_entry_eq (const void *entry1, const void *entry2)
2920 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2921 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2923 return (e1->mode == e2->mode
2924 && e1->cst == e2->cst);
2927 /* Returns cost of multiplication by constant CST in MODE. */
2929 unsigned
2930 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2932 static htab_t costs;
2933 struct mbc_entry **cached, act;
2934 rtx seq;
2935 unsigned cost;
2937 if (!costs)
2938 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2940 act.mode = mode;
2941 act.cst = cst;
2942 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2943 if (*cached)
2944 return (*cached)->cost;
2946 *cached = XNEW (struct mbc_entry);
2947 (*cached)->mode = mode;
2948 (*cached)->cst = cst;
2950 start_sequence ();
2951 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2952 gen_int_mode (cst, mode), NULL_RTX, 0);
2953 seq = get_insns ();
2954 end_sequence ();
2956 cost = seq_cost (seq, speed);
2958 if (dump_file && (dump_flags & TDF_DETAILS))
2959 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2960 (int) cst, GET_MODE_NAME (mode), cost);
2962 (*cached)->cost = cost;
2964 return cost;
2967 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2968 validity for a memory reference accessing memory of mode MODE. */
2970 bool
2971 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2973 #define MAX_RATIO 128
2974 static sbitmap valid_mult[MAX_MACHINE_MODE];
2976 if (!valid_mult[mode])
2978 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2979 rtx addr;
2980 HOST_WIDE_INT i;
2982 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2983 sbitmap_zero (valid_mult[mode]);
2984 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2985 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2987 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2988 if (memory_address_p (mode, addr))
2989 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2992 if (dump_file && (dump_flags & TDF_DETAILS))
2994 fprintf (dump_file, " allowed multipliers:");
2995 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2996 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2997 fprintf (dump_file, " %d", (int) i);
2998 fprintf (dump_file, "\n");
2999 fprintf (dump_file, "\n");
3003 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3004 return false;
3006 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
3009 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3010 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3011 variable is omitted. Compute the cost for a memory reference that accesses
3012 a memory location of mode MEM_MODE.
3014 TODO -- there must be some better way. This all is quite crude. */
3016 static comp_cost
3017 get_address_cost (bool symbol_present, bool var_present,
3018 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3019 enum machine_mode mem_mode,
3020 bool speed)
3022 static bool initialized[MAX_MACHINE_MODE];
3023 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3024 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3025 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3026 unsigned cost, acost, complexity;
3027 bool offset_p, ratio_p;
3028 HOST_WIDE_INT s_offset;
3029 unsigned HOST_WIDE_INT mask;
3030 unsigned bits;
3032 if (!initialized[mem_mode])
3034 HOST_WIDE_INT i;
3035 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3036 int old_cse_not_expected;
3037 unsigned sym_p, var_p, off_p, rat_p, add_c;
3038 rtx seq, addr, base;
3039 rtx reg0, reg1;
3041 initialized[mem_mode] = true;
3043 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3045 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3046 for (i = start; i <= 1 << 20; i <<= 1)
3048 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3049 if (!memory_address_p (mem_mode, addr))
3050 break;
3052 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3053 off[mem_mode] = max_offset[mem_mode];
3055 for (i = start; i <= 1 << 20; i <<= 1)
3057 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3058 if (!memory_address_p (mem_mode, addr))
3059 break;
3061 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3063 if (dump_file && (dump_flags & TDF_DETAILS))
3065 fprintf (dump_file, "get_address_cost:\n");
3066 fprintf (dump_file, " min offset %s %d\n",
3067 GET_MODE_NAME (mem_mode),
3068 (int) min_offset[mem_mode]);
3069 fprintf (dump_file, " max offset %s %d\n",
3070 GET_MODE_NAME (mem_mode),
3071 (int) max_offset[mem_mode]);
3074 rat[mem_mode] = 1;
3075 for (i = 2; i <= MAX_RATIO; i++)
3076 if (multiplier_allowed_in_address_p (i, mem_mode))
3078 rat[mem_mode] = i;
3079 break;
3082 /* Compute the cost of various addressing modes. */
3083 acost = 0;
3084 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3085 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3087 for (i = 0; i < 16; i++)
3089 sym_p = i & 1;
3090 var_p = (i >> 1) & 1;
3091 off_p = (i >> 2) & 1;
3092 rat_p = (i >> 3) & 1;
3094 addr = reg0;
3095 if (rat_p)
3096 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3097 gen_int_mode (rat[mem_mode], Pmode));
3099 if (var_p)
3100 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3102 if (sym_p)
3104 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3105 /* ??? We can run into trouble with some backends by presenting
3106 it with symbols which haven't been properly passed through
3107 targetm.encode_section_info. By setting the local bit, we
3108 enhance the probability of things working. */
3109 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3111 if (off_p)
3112 base = gen_rtx_fmt_e (CONST, Pmode,
3113 gen_rtx_fmt_ee (PLUS, Pmode,
3114 base,
3115 gen_int_mode (off[mem_mode],
3116 Pmode)));
3118 else if (off_p)
3119 base = gen_int_mode (off[mem_mode], Pmode);
3120 else
3121 base = NULL_RTX;
3123 if (base)
3124 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3126 start_sequence ();
3127 /* To avoid splitting addressing modes, pretend that no cse will
3128 follow. */
3129 old_cse_not_expected = cse_not_expected;
3130 cse_not_expected = true;
3131 addr = memory_address (mem_mode, addr);
3132 cse_not_expected = old_cse_not_expected;
3133 seq = get_insns ();
3134 end_sequence ();
3136 acost = seq_cost (seq, speed);
3137 acost += address_cost (addr, mem_mode, speed);
3139 if (!acost)
3140 acost = 1;
3141 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3144 /* On some targets, it is quite expensive to load symbol to a register,
3145 which makes addresses that contain symbols look much more expensive.
3146 However, the symbol will have to be loaded in any case before the
3147 loop (and quite likely we have it in register already), so it does not
3148 make much sense to penalize them too heavily. So make some final
3149 tweaks for the SYMBOL_PRESENT modes:
3151 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3152 var is cheaper, use this mode with small penalty.
3153 If VAR_PRESENT is true, try whether the mode with
3154 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3155 if this is the case, use it. */
3156 add_c = add_cost (Pmode, speed);
3157 for (i = 0; i < 8; i++)
3159 var_p = i & 1;
3160 off_p = (i >> 1) & 1;
3161 rat_p = (i >> 2) & 1;
3163 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3164 if (var_p)
3165 acost += add_c;
3167 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3168 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3171 if (dump_file && (dump_flags & TDF_DETAILS))
3173 fprintf (dump_file, "Address costs:\n");
3175 for (i = 0; i < 16; i++)
3177 sym_p = i & 1;
3178 var_p = (i >> 1) & 1;
3179 off_p = (i >> 2) & 1;
3180 rat_p = (i >> 3) & 1;
3182 fprintf (dump_file, " ");
3183 if (sym_p)
3184 fprintf (dump_file, "sym + ");
3185 if (var_p)
3186 fprintf (dump_file, "var + ");
3187 if (off_p)
3188 fprintf (dump_file, "cst + ");
3189 if (rat_p)
3190 fprintf (dump_file, "rat * ");
3192 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3193 fprintf (dump_file, "index costs %d\n", acost);
3195 fprintf (dump_file, "\n");
3199 bits = GET_MODE_BITSIZE (Pmode);
3200 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3201 offset &= mask;
3202 if ((offset >> (bits - 1) & 1))
3203 offset |= ~mask;
3204 s_offset = offset;
3206 cost = 0;
3207 offset_p = (s_offset != 0
3208 && min_offset[mem_mode] <= s_offset
3209 && s_offset <= max_offset[mem_mode]);
3210 ratio_p = (ratio != 1
3211 && multiplier_allowed_in_address_p (ratio, mem_mode));
3213 if (ratio != 1 && !ratio_p)
3214 cost += multiply_by_cost (ratio, Pmode, speed);
3216 if (s_offset && !offset_p && !symbol_present)
3217 cost += add_cost (Pmode, speed);
3219 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3220 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3221 return new_cost (cost + acost, complexity);
3224 /* Estimates cost of forcing expression EXPR into a variable. */
3226 static comp_cost
3227 force_expr_to_var_cost (tree expr, bool speed)
3229 static bool costs_initialized = false;
3230 static unsigned integer_cost [2];
3231 static unsigned symbol_cost [2];
3232 static unsigned address_cost [2];
3233 tree op0, op1;
3234 comp_cost cost0, cost1, cost;
3235 enum machine_mode mode;
3237 if (!costs_initialized)
3239 tree type = build_pointer_type (integer_type_node);
3240 tree var, addr;
3241 rtx x;
3242 int i;
3244 var = create_tmp_var_raw (integer_type_node, "test_var");
3245 TREE_STATIC (var) = 1;
3246 x = produce_memory_decl_rtl (var, NULL);
3247 SET_DECL_RTL (var, x);
3249 addr = build1 (ADDR_EXPR, type, var);
3252 for (i = 0; i < 2; i++)
3254 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3255 2000), i);
3257 symbol_cost[i] = computation_cost (addr, i) + 1;
3259 address_cost[i]
3260 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3261 addr,
3262 build_int_cst (sizetype, 2000)), i) + 1;
3263 if (dump_file && (dump_flags & TDF_DETAILS))
3265 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3266 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3267 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3268 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3269 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3270 fprintf (dump_file, "\n");
3274 costs_initialized = true;
3277 STRIP_NOPS (expr);
3279 if (SSA_VAR_P (expr))
3280 return zero_cost;
3282 if (is_gimple_min_invariant (expr))
3284 if (TREE_CODE (expr) == INTEGER_CST)
3285 return new_cost (integer_cost [speed], 0);
3287 if (TREE_CODE (expr) == ADDR_EXPR)
3289 tree obj = TREE_OPERAND (expr, 0);
3291 if (TREE_CODE (obj) == VAR_DECL
3292 || TREE_CODE (obj) == PARM_DECL
3293 || TREE_CODE (obj) == RESULT_DECL)
3294 return new_cost (symbol_cost [speed], 0);
3297 return new_cost (address_cost [speed], 0);
3300 switch (TREE_CODE (expr))
3302 case POINTER_PLUS_EXPR:
3303 case PLUS_EXPR:
3304 case MINUS_EXPR:
3305 case MULT_EXPR:
3306 op0 = TREE_OPERAND (expr, 0);
3307 op1 = TREE_OPERAND (expr, 1);
3308 STRIP_NOPS (op0);
3309 STRIP_NOPS (op1);
3311 if (is_gimple_val (op0))
3312 cost0 = zero_cost;
3313 else
3314 cost0 = force_expr_to_var_cost (op0, speed);
3316 if (is_gimple_val (op1))
3317 cost1 = zero_cost;
3318 else
3319 cost1 = force_expr_to_var_cost (op1, speed);
3321 break;
3323 case NEGATE_EXPR:
3324 op0 = TREE_OPERAND (expr, 0);
3325 STRIP_NOPS (op0);
3326 op1 = NULL_TREE;
3328 if (is_gimple_val (op0))
3329 cost0 = zero_cost;
3330 else
3331 cost0 = force_expr_to_var_cost (op0, speed);
3333 cost1 = zero_cost;
3334 break;
3336 default:
3337 /* Just an arbitrary value, FIXME. */
3338 return new_cost (target_spill_cost[speed], 0);
3341 mode = TYPE_MODE (TREE_TYPE (expr));
3342 switch (TREE_CODE (expr))
3344 case POINTER_PLUS_EXPR:
3345 case PLUS_EXPR:
3346 case MINUS_EXPR:
3347 case NEGATE_EXPR:
3348 cost = new_cost (add_cost (mode, speed), 0);
3349 break;
3351 case MULT_EXPR:
3352 if (cst_and_fits_in_hwi (op0))
3353 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3354 else if (cst_and_fits_in_hwi (op1))
3355 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3356 else
3357 return new_cost (target_spill_cost [speed], 0);
3358 break;
3360 default:
3361 gcc_unreachable ();
3364 cost = add_costs (cost, cost0);
3365 cost = add_costs (cost, cost1);
3367 /* Bound the cost by target_spill_cost. The parts of complicated
3368 computations often are either loop invariant or at least can
3369 be shared between several iv uses, so letting this grow without
3370 limits would not give reasonable results. */
3371 if (cost.cost > target_spill_cost [speed])
3372 cost.cost = target_spill_cost [speed];
3374 return cost;
3377 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3378 invariants the computation depends on. */
3380 static comp_cost
3381 force_var_cost (struct ivopts_data *data,
3382 tree expr, bitmap *depends_on)
3384 if (depends_on)
3386 fd_ivopts_data = data;
3387 walk_tree (&expr, find_depends, depends_on, NULL);
3390 return force_expr_to_var_cost (expr, data->speed);
3393 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3394 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3395 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3396 invariants the computation depends on. */
3398 static comp_cost
3399 split_address_cost (struct ivopts_data *data,
3400 tree addr, bool *symbol_present, bool *var_present,
3401 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3403 tree core;
3404 HOST_WIDE_INT bitsize;
3405 HOST_WIDE_INT bitpos;
3406 tree toffset;
3407 enum machine_mode mode;
3408 int unsignedp, volatilep;
3410 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3411 &unsignedp, &volatilep, false);
3413 if (toffset != 0
3414 || bitpos % BITS_PER_UNIT != 0
3415 || TREE_CODE (core) != VAR_DECL)
3417 *symbol_present = false;
3418 *var_present = true;
3419 fd_ivopts_data = data;
3420 walk_tree (&addr, find_depends, depends_on, NULL);
3421 return new_cost (target_spill_cost[data->speed], 0);
3424 *offset += bitpos / BITS_PER_UNIT;
3425 if (TREE_STATIC (core)
3426 || DECL_EXTERNAL (core))
3428 *symbol_present = true;
3429 *var_present = false;
3430 return zero_cost;
3433 *symbol_present = false;
3434 *var_present = true;
3435 return zero_cost;
3438 /* Estimates cost of expressing difference of addresses E1 - E2 as
3439 var + symbol + offset. The value of offset is added to OFFSET,
3440 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3441 part is missing. DEPENDS_ON is a set of the invariants the computation
3442 depends on. */
3444 static comp_cost
3445 ptr_difference_cost (struct ivopts_data *data,
3446 tree e1, tree e2, bool *symbol_present, bool *var_present,
3447 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3449 HOST_WIDE_INT diff = 0;
3450 aff_tree aff_e1, aff_e2;
3451 tree type;
3453 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3455 if (ptr_difference_const (e1, e2, &diff))
3457 *offset += diff;
3458 *symbol_present = false;
3459 *var_present = false;
3460 return zero_cost;
3463 if (integer_zerop (e2))
3464 return split_address_cost (data, TREE_OPERAND (e1, 0),
3465 symbol_present, var_present, offset, depends_on);
3467 *symbol_present = false;
3468 *var_present = true;
3470 type = signed_type_for (TREE_TYPE (e1));
3471 tree_to_aff_combination (e1, type, &aff_e1);
3472 tree_to_aff_combination (e2, type, &aff_e2);
3473 aff_combination_scale (&aff_e2, double_int_minus_one);
3474 aff_combination_add (&aff_e1, &aff_e2);
3476 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3479 /* Estimates cost of expressing difference E1 - E2 as
3480 var + symbol + offset. The value of offset is added to OFFSET,
3481 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3482 part is missing. DEPENDS_ON is a set of the invariants the computation
3483 depends on. */
3485 static comp_cost
3486 difference_cost (struct ivopts_data *data,
3487 tree e1, tree e2, bool *symbol_present, bool *var_present,
3488 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3490 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3491 unsigned HOST_WIDE_INT off1, off2;
3492 aff_tree aff_e1, aff_e2;
3493 tree type;
3495 e1 = strip_offset (e1, &off1);
3496 e2 = strip_offset (e2, &off2);
3497 *offset += off1 - off2;
3499 STRIP_NOPS (e1);
3500 STRIP_NOPS (e2);
3502 if (TREE_CODE (e1) == ADDR_EXPR)
3503 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3504 offset, depends_on);
3505 *symbol_present = false;
3507 if (operand_equal_p (e1, e2, 0))
3509 *var_present = false;
3510 return zero_cost;
3513 *var_present = true;
3515 if (integer_zerop (e2))
3516 return force_var_cost (data, e1, depends_on);
3518 if (integer_zerop (e1))
3520 comp_cost cost = force_var_cost (data, e2, depends_on);
3521 cost.cost += multiply_by_cost (-1, mode, data->speed);
3522 return cost;
3525 type = signed_type_for (TREE_TYPE (e1));
3526 tree_to_aff_combination (e1, type, &aff_e1);
3527 tree_to_aff_combination (e2, type, &aff_e2);
3528 aff_combination_scale (&aff_e2, double_int_minus_one);
3529 aff_combination_add (&aff_e1, &aff_e2);
3531 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3534 /* Determines the cost of the computation by that USE is expressed
3535 from induction variable CAND. If ADDRESS_P is true, we just need
3536 to create an address from it, otherwise we want to get it into
3537 register. A set of invariants we depend on is stored in
3538 DEPENDS_ON. AT is the statement at that the value is computed. */
3540 static comp_cost
3541 get_computation_cost_at (struct ivopts_data *data,
3542 struct iv_use *use, struct iv_cand *cand,
3543 bool address_p, bitmap *depends_on, gimple at)
3545 tree ubase = use->iv->base, ustep = use->iv->step;
3546 tree cbase, cstep;
3547 tree utype = TREE_TYPE (ubase), ctype;
3548 unsigned HOST_WIDE_INT cstepi, offset = 0;
3549 HOST_WIDE_INT ratio, aratio;
3550 bool var_present, symbol_present;
3551 comp_cost cost;
3552 double_int rat;
3553 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3555 *depends_on = NULL;
3557 /* Only consider real candidates. */
3558 if (!cand->iv)
3559 return infinite_cost;
3561 cbase = cand->iv->base;
3562 cstep = cand->iv->step;
3563 ctype = TREE_TYPE (cbase);
3565 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3567 /* We do not have a precision to express the values of use. */
3568 return infinite_cost;
3571 if (address_p)
3573 /* Do not try to express address of an object with computation based
3574 on address of a different object. This may cause problems in rtl
3575 level alias analysis (that does not expect this to be happening,
3576 as this is illegal in C), and would be unlikely to be useful
3577 anyway. */
3578 if (use->iv->base_object
3579 && cand->iv->base_object
3580 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3581 return infinite_cost;
3584 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3586 /* TODO -- add direct handling of this case. */
3587 goto fallback;
3590 /* CSTEPI is removed from the offset in case statement is after the
3591 increment. If the step is not constant, we use zero instead.
3592 This is a bit imprecise (there is the extra addition), but
3593 redundancy elimination is likely to transform the code so that
3594 it uses value of the variable before increment anyway,
3595 so it is not that much unrealistic. */
3596 if (cst_and_fits_in_hwi (cstep))
3597 cstepi = int_cst_value (cstep);
3598 else
3599 cstepi = 0;
3601 if (!constant_multiple_of (ustep, cstep, &rat))
3602 return infinite_cost;
3604 if (double_int_fits_in_shwi_p (rat))
3605 ratio = double_int_to_shwi (rat);
3606 else
3607 return infinite_cost;
3609 STRIP_NOPS (cbase);
3610 ctype = TREE_TYPE (cbase);
3612 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3613 or ratio == 1, it is better to handle this like
3615 ubase - ratio * cbase + ratio * var
3617 (also holds in the case ratio == -1, TODO. */
3619 if (cst_and_fits_in_hwi (cbase))
3621 offset = - ratio * int_cst_value (cbase);
3622 cost = difference_cost (data,
3623 ubase, build_int_cst (utype, 0),
3624 &symbol_present, &var_present, &offset,
3625 depends_on);
3627 else if (ratio == 1)
3629 cost = difference_cost (data,
3630 ubase, cbase,
3631 &symbol_present, &var_present, &offset,
3632 depends_on);
3634 else if (address_p
3635 && !POINTER_TYPE_P (ctype)
3636 && multiplier_allowed_in_address_p (ratio,
3637 TYPE_MODE (TREE_TYPE (utype))))
3639 cbase
3640 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3641 cost = difference_cost (data,
3642 ubase, cbase,
3643 &symbol_present, &var_present, &offset,
3644 depends_on);
3646 else
3648 cost = force_var_cost (data, cbase, depends_on);
3649 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3650 cost = add_costs (cost,
3651 difference_cost (data,
3652 ubase, build_int_cst (utype, 0),
3653 &symbol_present, &var_present,
3654 &offset, depends_on));
3657 /* If we are after the increment, the value of the candidate is higher by
3658 one iteration. */
3659 if (stmt_after_increment (data->current_loop, cand, at))
3660 offset -= ratio * cstepi;
3662 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3663 (symbol/var1/const parts may be omitted). If we are looking for an
3664 address, find the cost of addressing this. */
3665 if (address_p)
3666 return add_costs (cost,
3667 get_address_cost (symbol_present, var_present,
3668 offset, ratio,
3669 TYPE_MODE (TREE_TYPE (utype)), speed));
3671 /* Otherwise estimate the costs for computing the expression. */
3672 if (!symbol_present && !var_present && !offset)
3674 if (ratio != 1)
3675 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3676 return cost;
3679 /* Symbol + offset should be compile-time computable so consider that they
3680 are added once to the variable, if present. */
3681 if (var_present && (symbol_present || offset))
3682 cost.cost += add_cost (TYPE_MODE (ctype), speed)
3683 / AVG_LOOP_NITER (data->current_loop);
3685 /* Having offset does not affect runtime cost in case it is added to
3686 symbol, but it increases complexity. */
3687 if (offset)
3688 cost.complexity++;
3690 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3692 aratio = ratio > 0 ? ratio : -ratio;
3693 if (aratio != 1)
3694 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3696 fallback:
3698 /* Just get the expression, expand it and measure the cost. */
3699 tree comp = get_computation_at (data->current_loop, use, cand, at);
3701 if (!comp)
3702 return infinite_cost;
3704 if (address_p)
3705 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3707 return new_cost (computation_cost (comp, speed), 0);
3711 /* Determines the cost of the computation by that USE is expressed
3712 from induction variable CAND. If ADDRESS_P is true, we just need
3713 to create an address from it, otherwise we want to get it into
3714 register. A set of invariants we depend on is stored in
3715 DEPENDS_ON. */
3717 static comp_cost
3718 get_computation_cost (struct ivopts_data *data,
3719 struct iv_use *use, struct iv_cand *cand,
3720 bool address_p, bitmap *depends_on)
3722 return get_computation_cost_at (data,
3723 use, cand, address_p, depends_on, use->stmt);
3726 /* Determines cost of basing replacement of USE on CAND in a generic
3727 expression. */
3729 static bool
3730 determine_use_iv_cost_generic (struct ivopts_data *data,
3731 struct iv_use *use, struct iv_cand *cand)
3733 bitmap depends_on;
3734 comp_cost cost;
3736 /* The simple case first -- if we need to express value of the preserved
3737 original biv, the cost is 0. This also prevents us from counting the
3738 cost of increment twice -- once at this use and once in the cost of
3739 the candidate. */
3740 if (cand->pos == IP_ORIGINAL
3741 && cand->incremented_at == use->stmt)
3743 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3744 return true;
3747 cost = get_computation_cost (data, use, cand, false, &depends_on);
3748 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3750 return !infinite_cost_p (cost);
3753 /* Determines cost of basing replacement of USE on CAND in an address. */
3755 static bool
3756 determine_use_iv_cost_address (struct ivopts_data *data,
3757 struct iv_use *use, struct iv_cand *cand)
3759 bitmap depends_on;
3760 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3762 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3764 return !infinite_cost_p (cost);
3767 /* Computes value of candidate CAND at position AT in iteration NITER, and
3768 stores it to VAL. */
3770 static void
3771 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3772 aff_tree *val)
3774 aff_tree step, delta, nit;
3775 struct iv *iv = cand->iv;
3776 tree type = TREE_TYPE (iv->base);
3777 tree steptype = type;
3778 if (POINTER_TYPE_P (type))
3779 steptype = sizetype;
3781 tree_to_aff_combination (iv->step, steptype, &step);
3782 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3783 aff_combination_convert (&nit, steptype);
3784 aff_combination_mult (&nit, &step, &delta);
3785 if (stmt_after_increment (loop, cand, at))
3786 aff_combination_add (&delta, &step);
3788 tree_to_aff_combination (iv->base, type, val);
3789 aff_combination_add (val, &delta);
3792 /* Returns period of induction variable iv. */
3794 static tree
3795 iv_period (struct iv *iv)
3797 tree step = iv->step, period, type;
3798 tree pow2div;
3800 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3802 /* Period of the iv is gcd (step, type range). Since type range is power
3803 of two, it suffices to determine the maximum power of two that divides
3804 step. */
3805 pow2div = num_ending_zeros (step);
3806 type = unsigned_type_for (TREE_TYPE (step));
3808 period = build_low_bits_mask (type,
3809 (TYPE_PRECISION (type)
3810 - tree_low_cst (pow2div, 1)));
3812 return period;
3815 /* Returns the comparison operator used when eliminating the iv USE. */
3817 static enum tree_code
3818 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3820 struct loop *loop = data->current_loop;
3821 basic_block ex_bb;
3822 edge exit;
3824 ex_bb = gimple_bb (use->stmt);
3825 exit = EDGE_SUCC (ex_bb, 0);
3826 if (flow_bb_inside_loop_p (loop, exit->dest))
3827 exit = EDGE_SUCC (ex_bb, 1);
3829 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3832 /* Check whether it is possible to express the condition in USE by comparison
3833 of candidate CAND. If so, store the value compared with to BOUND. */
3835 static bool
3836 may_eliminate_iv (struct ivopts_data *data,
3837 struct iv_use *use, struct iv_cand *cand, tree *bound)
3839 basic_block ex_bb;
3840 edge exit;
3841 tree nit, period;
3842 struct loop *loop = data->current_loop;
3843 aff_tree bnd;
3845 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3846 return false;
3848 /* For now works only for exits that dominate the loop latch.
3849 TODO: extend to other conditions inside loop body. */
3850 ex_bb = gimple_bb (use->stmt);
3851 if (use->stmt != last_stmt (ex_bb)
3852 || gimple_code (use->stmt) != GIMPLE_COND
3853 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3854 return false;
3856 exit = EDGE_SUCC (ex_bb, 0);
3857 if (flow_bb_inside_loop_p (loop, exit->dest))
3858 exit = EDGE_SUCC (ex_bb, 1);
3859 if (flow_bb_inside_loop_p (loop, exit->dest))
3860 return false;
3862 nit = niter_for_exit (data, exit);
3863 if (!nit)
3864 return false;
3866 /* Determine whether we can use the variable to test the exit condition.
3867 This is the case iff the period of the induction variable is greater
3868 than the number of iterations for which the exit condition is true. */
3869 period = iv_period (cand->iv);
3871 /* If the number of iterations is constant, compare against it directly. */
3872 if (TREE_CODE (nit) == INTEGER_CST)
3874 if (!tree_int_cst_lt (nit, period))
3875 return false;
3878 /* If not, and if this is the only possible exit of the loop, see whether
3879 we can get a conservative estimate on the number of iterations of the
3880 entire loop and compare against that instead. */
3881 else if (loop_only_exit_p (loop, exit))
3883 double_int period_value, max_niter;
3884 if (!estimated_loop_iterations (loop, true, &max_niter))
3885 return false;
3886 period_value = tree_to_double_int (period);
3887 if (double_int_ucmp (max_niter, period_value) >= 0)
3888 return false;
3891 /* Otherwise, punt. */
3892 else
3893 return false;
3895 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3897 *bound = aff_combination_to_tree (&bnd);
3898 /* It is unlikely that computing the number of iterations using division
3899 would be more profitable than keeping the original induction variable. */
3900 if (expression_expensive_p (*bound))
3901 return false;
3902 return true;
3905 /* Determines cost of basing replacement of USE on CAND in a condition. */
3907 static bool
3908 determine_use_iv_cost_condition (struct ivopts_data *data,
3909 struct iv_use *use, struct iv_cand *cand)
3911 tree bound = NULL_TREE;
3912 struct iv *cmp_iv;
3913 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3914 comp_cost elim_cost, express_cost, cost;
3915 bool ok;
3917 /* Only consider real candidates. */
3918 if (!cand->iv)
3920 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3921 return false;
3924 /* Try iv elimination. */
3925 if (may_eliminate_iv (data, use, cand, &bound))
3927 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3928 /* The bound is a loop invariant, so it will be only computed
3929 once. */
3930 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3932 else
3933 elim_cost = infinite_cost;
3935 /* Try expressing the original giv. If it is compared with an invariant,
3936 note that we cannot get rid of it. */
3937 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
3938 gcc_assert (ok);
3940 express_cost = get_computation_cost (data, use, cand, false,
3941 &depends_on_express);
3942 fd_ivopts_data = data;
3943 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3945 /* Choose the better approach, preferring the eliminated IV. */
3946 if (compare_costs (elim_cost, express_cost) <= 0)
3948 cost = elim_cost;
3949 depends_on = depends_on_elim;
3950 depends_on_elim = NULL;
3952 else
3954 cost = express_cost;
3955 depends_on = depends_on_express;
3956 depends_on_express = NULL;
3957 bound = NULL_TREE;
3960 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3962 if (depends_on_elim)
3963 BITMAP_FREE (depends_on_elim);
3964 if (depends_on_express)
3965 BITMAP_FREE (depends_on_express);
3967 return !infinite_cost_p (cost);
3970 /* Determines cost of basing replacement of USE on CAND. Returns false
3971 if USE cannot be based on CAND. */
3973 static bool
3974 determine_use_iv_cost (struct ivopts_data *data,
3975 struct iv_use *use, struct iv_cand *cand)
3977 switch (use->type)
3979 case USE_NONLINEAR_EXPR:
3980 return determine_use_iv_cost_generic (data, use, cand);
3982 case USE_ADDRESS:
3983 return determine_use_iv_cost_address (data, use, cand);
3985 case USE_COMPARE:
3986 return determine_use_iv_cost_condition (data, use, cand);
3988 default:
3989 gcc_unreachable ();
3993 /* Determines costs of basing the use of the iv on an iv candidate. */
3995 static void
3996 determine_use_iv_costs (struct ivopts_data *data)
3998 unsigned i, j;
3999 struct iv_use *use;
4000 struct iv_cand *cand;
4001 bitmap to_clear = BITMAP_ALLOC (NULL);
4003 alloc_use_cost_map (data);
4005 for (i = 0; i < n_iv_uses (data); i++)
4007 use = iv_use (data, i);
4009 if (data->consider_all_candidates)
4011 for (j = 0; j < n_iv_cands (data); j++)
4013 cand = iv_cand (data, j);
4014 determine_use_iv_cost (data, use, cand);
4017 else
4019 bitmap_iterator bi;
4021 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4023 cand = iv_cand (data, j);
4024 if (!determine_use_iv_cost (data, use, cand))
4025 bitmap_set_bit (to_clear, j);
4028 /* Remove the candidates for that the cost is infinite from
4029 the list of related candidates. */
4030 bitmap_and_compl_into (use->related_cands, to_clear);
4031 bitmap_clear (to_clear);
4035 BITMAP_FREE (to_clear);
4037 if (dump_file && (dump_flags & TDF_DETAILS))
4039 fprintf (dump_file, "Use-candidate costs:\n");
4041 for (i = 0; i < n_iv_uses (data); i++)
4043 use = iv_use (data, i);
4045 fprintf (dump_file, "Use %d:\n", i);
4046 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4047 for (j = 0; j < use->n_map_members; j++)
4049 if (!use->cost_map[j].cand
4050 || infinite_cost_p (use->cost_map[j].cost))
4051 continue;
4053 fprintf (dump_file, " %d\t%d\t%d\t",
4054 use->cost_map[j].cand->id,
4055 use->cost_map[j].cost.cost,
4056 use->cost_map[j].cost.complexity);
4057 if (use->cost_map[j].depends_on)
4058 bitmap_print (dump_file,
4059 use->cost_map[j].depends_on, "","");
4060 fprintf (dump_file, "\n");
4063 fprintf (dump_file, "\n");
4065 fprintf (dump_file, "\n");
4069 /* Determines cost of the candidate CAND. */
4071 static void
4072 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4074 comp_cost cost_base;
4075 unsigned cost, cost_step;
4076 tree base;
4078 if (!cand->iv)
4080 cand->cost = 0;
4081 return;
4084 /* There are two costs associated with the candidate -- its increment
4085 and its initialization. The second is almost negligible for any loop
4086 that rolls enough, so we take it just very little into account. */
4088 base = cand->iv->base;
4089 cost_base = force_var_cost (data, base, NULL);
4090 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4092 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4094 /* Prefer the original ivs unless we may gain something by replacing it.
4095 The reason is to make debugging simpler; so this is not relevant for
4096 artificial ivs created by other optimization passes. */
4097 if (cand->pos != IP_ORIGINAL
4098 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4099 cost++;
4101 /* Prefer not to insert statements into latch unless there are some
4102 already (so that we do not create unnecessary jumps). */
4103 if (cand->pos == IP_END
4104 && empty_block_p (ip_end_pos (data->current_loop)))
4105 cost++;
4107 cand->cost = cost;
4110 /* Determines costs of computation of the candidates. */
4112 static void
4113 determine_iv_costs (struct ivopts_data *data)
4115 unsigned i;
4117 if (dump_file && (dump_flags & TDF_DETAILS))
4119 fprintf (dump_file, "Candidate costs:\n");
4120 fprintf (dump_file, " cand\tcost\n");
4123 for (i = 0; i < n_iv_cands (data); i++)
4125 struct iv_cand *cand = iv_cand (data, i);
4127 determine_iv_cost (data, cand);
4129 if (dump_file && (dump_flags & TDF_DETAILS))
4130 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4133 if (dump_file && (dump_flags & TDF_DETAILS))
4134 fprintf (dump_file, "\n");
4137 /* Calculates cost for having SIZE induction variables. */
4139 static unsigned
4140 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4142 /* We add size to the cost, so that we prefer eliminating ivs
4143 if possible. */
4144 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4147 /* For each size of the induction variable set determine the penalty. */
4149 static void
4150 determine_set_costs (struct ivopts_data *data)
4152 unsigned j, n;
4153 gimple phi;
4154 gimple_stmt_iterator psi;
4155 tree op;
4156 struct loop *loop = data->current_loop;
4157 bitmap_iterator bi;
4159 /* We use the following model (definitely improvable, especially the
4160 cost function -- TODO):
4162 We estimate the number of registers available (using MD data), name it A.
4164 We estimate the number of registers used by the loop, name it U. This
4165 number is obtained as the number of loop phi nodes (not counting virtual
4166 registers and bivs) + the number of variables from outside of the loop.
4168 We set a reserve R (free regs that are used for temporary computations,
4169 etc.). For now the reserve is a constant 3.
4171 Let I be the number of induction variables.
4173 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4174 make a lot of ivs without a reason).
4175 -- if A - R < U + I <= A, the cost is I * PRES_COST
4176 -- if U + I > A, the cost is I * PRES_COST and
4177 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4179 if (dump_file && (dump_flags & TDF_DETAILS))
4181 fprintf (dump_file, "Global costs:\n");
4182 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4183 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4184 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4187 n = 0;
4188 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4190 phi = gsi_stmt (psi);
4191 op = PHI_RESULT (phi);
4193 if (!is_gimple_reg (op))
4194 continue;
4196 if (get_iv (data, op))
4197 continue;
4199 n++;
4202 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4204 struct version_info *info = ver_info (data, j);
4206 if (info->inv_id && info->has_nonlin_use)
4207 n++;
4210 data->regs_used = n;
4211 if (dump_file && (dump_flags & TDF_DETAILS))
4212 fprintf (dump_file, " regs_used %d\n", n);
4214 if (dump_file && (dump_flags & TDF_DETAILS))
4216 fprintf (dump_file, " cost for size:\n");
4217 fprintf (dump_file, " ivs\tcost\n");
4218 for (j = 0; j <= 2 * target_avail_regs; j++)
4219 fprintf (dump_file, " %d\t%d\n", j,
4220 ivopts_global_cost_for_size (data, j));
4221 fprintf (dump_file, "\n");
4225 /* Returns true if A is a cheaper cost pair than B. */
4227 static bool
4228 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4230 int cmp;
4232 if (!a)
4233 return false;
4235 if (!b)
4236 return true;
4238 cmp = compare_costs (a->cost, b->cost);
4239 if (cmp < 0)
4240 return true;
4242 if (cmp > 0)
4243 return false;
4245 /* In case the costs are the same, prefer the cheaper candidate. */
4246 if (a->cand->cost < b->cand->cost)
4247 return true;
4249 return false;
4252 /* Computes the cost field of IVS structure. */
4254 static void
4255 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4257 comp_cost cost = ivs->cand_use_cost;
4258 cost.cost += ivs->cand_cost;
4259 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4261 ivs->cost = cost;
4264 /* Remove invariants in set INVS to set IVS. */
4266 static void
4267 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4269 bitmap_iterator bi;
4270 unsigned iid;
4272 if (!invs)
4273 return;
4275 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4277 ivs->n_invariant_uses[iid]--;
4278 if (ivs->n_invariant_uses[iid] == 0)
4279 ivs->n_regs--;
4283 /* Set USE not to be expressed by any candidate in IVS. */
4285 static void
4286 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4287 struct iv_use *use)
4289 unsigned uid = use->id, cid;
4290 struct cost_pair *cp;
4292 cp = ivs->cand_for_use[uid];
4293 if (!cp)
4294 return;
4295 cid = cp->cand->id;
4297 ivs->bad_uses++;
4298 ivs->cand_for_use[uid] = NULL;
4299 ivs->n_cand_uses[cid]--;
4301 if (ivs->n_cand_uses[cid] == 0)
4303 bitmap_clear_bit (ivs->cands, cid);
4304 /* Do not count the pseudocandidates. */
4305 if (cp->cand->iv)
4306 ivs->n_regs--;
4307 ivs->n_cands--;
4308 ivs->cand_cost -= cp->cand->cost;
4310 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4313 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4315 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4316 iv_ca_recount_cost (data, ivs);
4319 /* Add invariants in set INVS to set IVS. */
4321 static void
4322 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4324 bitmap_iterator bi;
4325 unsigned iid;
4327 if (!invs)
4328 return;
4330 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4332 ivs->n_invariant_uses[iid]++;
4333 if (ivs->n_invariant_uses[iid] == 1)
4334 ivs->n_regs++;
4338 /* Set cost pair for USE in set IVS to CP. */
4340 static void
4341 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4342 struct iv_use *use, struct cost_pair *cp)
4344 unsigned uid = use->id, cid;
4346 if (ivs->cand_for_use[uid] == cp)
4347 return;
4349 if (ivs->cand_for_use[uid])
4350 iv_ca_set_no_cp (data, ivs, use);
4352 if (cp)
4354 cid = cp->cand->id;
4356 ivs->bad_uses--;
4357 ivs->cand_for_use[uid] = cp;
4358 ivs->n_cand_uses[cid]++;
4359 if (ivs->n_cand_uses[cid] == 1)
4361 bitmap_set_bit (ivs->cands, cid);
4362 /* Do not count the pseudocandidates. */
4363 if (cp->cand->iv)
4364 ivs->n_regs++;
4365 ivs->n_cands++;
4366 ivs->cand_cost += cp->cand->cost;
4368 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4371 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4372 iv_ca_set_add_invariants (ivs, cp->depends_on);
4373 iv_ca_recount_cost (data, ivs);
4377 /* Extend set IVS by expressing USE by some of the candidates in it
4378 if possible. */
4380 static void
4381 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4382 struct iv_use *use)
4384 struct cost_pair *best_cp = NULL, *cp;
4385 bitmap_iterator bi;
4386 unsigned i;
4388 gcc_assert (ivs->upto >= use->id);
4390 if (ivs->upto == use->id)
4392 ivs->upto++;
4393 ivs->bad_uses++;
4396 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4398 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4400 if (cheaper_cost_pair (cp, best_cp))
4401 best_cp = cp;
4404 iv_ca_set_cp (data, ivs, use, best_cp);
4407 /* Get cost for assignment IVS. */
4409 static comp_cost
4410 iv_ca_cost (struct iv_ca *ivs)
4412 /* This was a conditional expression but it triggered a bug in
4413 Sun C 5.5. */
4414 if (ivs->bad_uses)
4415 return infinite_cost;
4416 else
4417 return ivs->cost;
4420 /* Returns true if all dependences of CP are among invariants in IVS. */
4422 static bool
4423 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4425 unsigned i;
4426 bitmap_iterator bi;
4428 if (!cp->depends_on)
4429 return true;
4431 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4433 if (ivs->n_invariant_uses[i] == 0)
4434 return false;
4437 return true;
4440 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4441 it before NEXT_CHANGE. */
4443 static struct iv_ca_delta *
4444 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4445 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4447 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4449 change->use = use;
4450 change->old_cp = old_cp;
4451 change->new_cp = new_cp;
4452 change->next_change = next_change;
4454 return change;
4457 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4458 are rewritten. */
4460 static struct iv_ca_delta *
4461 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4463 struct iv_ca_delta *last;
4465 if (!l2)
4466 return l1;
4468 if (!l1)
4469 return l2;
4471 for (last = l1; last->next_change; last = last->next_change)
4472 continue;
4473 last->next_change = l2;
4475 return l1;
4478 /* Returns candidate by that USE is expressed in IVS. */
4480 static struct cost_pair *
4481 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4483 return ivs->cand_for_use[use->id];
4486 /* Reverse the list of changes DELTA, forming the inverse to it. */
4488 static struct iv_ca_delta *
4489 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4491 struct iv_ca_delta *act, *next, *prev = NULL;
4492 struct cost_pair *tmp;
4494 for (act = delta; act; act = next)
4496 next = act->next_change;
4497 act->next_change = prev;
4498 prev = act;
4500 tmp = act->old_cp;
4501 act->old_cp = act->new_cp;
4502 act->new_cp = tmp;
4505 return prev;
4508 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4509 reverted instead. */
4511 static void
4512 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4513 struct iv_ca_delta *delta, bool forward)
4515 struct cost_pair *from, *to;
4516 struct iv_ca_delta *act;
4518 if (!forward)
4519 delta = iv_ca_delta_reverse (delta);
4521 for (act = delta; act; act = act->next_change)
4523 from = act->old_cp;
4524 to = act->new_cp;
4525 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4526 iv_ca_set_cp (data, ivs, act->use, to);
4529 if (!forward)
4530 iv_ca_delta_reverse (delta);
4533 /* Returns true if CAND is used in IVS. */
4535 static bool
4536 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4538 return ivs->n_cand_uses[cand->id] > 0;
4541 /* Returns number of induction variable candidates in the set IVS. */
4543 static unsigned
4544 iv_ca_n_cands (struct iv_ca *ivs)
4546 return ivs->n_cands;
4549 /* Free the list of changes DELTA. */
4551 static void
4552 iv_ca_delta_free (struct iv_ca_delta **delta)
4554 struct iv_ca_delta *act, *next;
4556 for (act = *delta; act; act = next)
4558 next = act->next_change;
4559 free (act);
4562 *delta = NULL;
4565 /* Allocates new iv candidates assignment. */
4567 static struct iv_ca *
4568 iv_ca_new (struct ivopts_data *data)
4570 struct iv_ca *nw = XNEW (struct iv_ca);
4572 nw->upto = 0;
4573 nw->bad_uses = 0;
4574 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4575 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4576 nw->cands = BITMAP_ALLOC (NULL);
4577 nw->n_cands = 0;
4578 nw->n_regs = 0;
4579 nw->cand_use_cost = zero_cost;
4580 nw->cand_cost = 0;
4581 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4582 nw->cost = zero_cost;
4584 return nw;
4587 /* Free memory occupied by the set IVS. */
4589 static void
4590 iv_ca_free (struct iv_ca **ivs)
4592 free ((*ivs)->cand_for_use);
4593 free ((*ivs)->n_cand_uses);
4594 BITMAP_FREE ((*ivs)->cands);
4595 free ((*ivs)->n_invariant_uses);
4596 free (*ivs);
4597 *ivs = NULL;
4600 /* Dumps IVS to FILE. */
4602 static void
4603 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4605 const char *pref = " invariants ";
4606 unsigned i;
4607 comp_cost cost = iv_ca_cost (ivs);
4609 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4610 bitmap_print (file, ivs->cands, " candidates ","\n");
4612 for (i = 1; i <= data->max_inv_id; i++)
4613 if (ivs->n_invariant_uses[i])
4615 fprintf (file, "%s%d", pref, i);
4616 pref = ", ";
4618 fprintf (file, "\n");
4621 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4622 new set, and store differences in DELTA. Number of induction variables
4623 in the new set is stored to N_IVS. */
4625 static comp_cost
4626 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4627 struct iv_cand *cand, struct iv_ca_delta **delta,
4628 unsigned *n_ivs)
4630 unsigned i;
4631 comp_cost cost;
4632 struct iv_use *use;
4633 struct cost_pair *old_cp, *new_cp;
4635 *delta = NULL;
4636 for (i = 0; i < ivs->upto; i++)
4638 use = iv_use (data, i);
4639 old_cp = iv_ca_cand_for_use (ivs, use);
4641 if (old_cp
4642 && old_cp->cand == cand)
4643 continue;
4645 new_cp = get_use_iv_cost (data, use, cand);
4646 if (!new_cp)
4647 continue;
4649 if (!iv_ca_has_deps (ivs, new_cp))
4650 continue;
4652 if (!cheaper_cost_pair (new_cp, old_cp))
4653 continue;
4655 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4658 iv_ca_delta_commit (data, ivs, *delta, true);
4659 cost = iv_ca_cost (ivs);
4660 if (n_ivs)
4661 *n_ivs = iv_ca_n_cands (ivs);
4662 iv_ca_delta_commit (data, ivs, *delta, false);
4664 return cost;
4667 /* Try narrowing set IVS by removing CAND. Return the cost of
4668 the new set and store the differences in DELTA. */
4670 static comp_cost
4671 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4672 struct iv_cand *cand, struct iv_ca_delta **delta)
4674 unsigned i, ci;
4675 struct iv_use *use;
4676 struct cost_pair *old_cp, *new_cp, *cp;
4677 bitmap_iterator bi;
4678 struct iv_cand *cnd;
4679 comp_cost cost;
4681 *delta = NULL;
4682 for (i = 0; i < n_iv_uses (data); i++)
4684 use = iv_use (data, i);
4686 old_cp = iv_ca_cand_for_use (ivs, use);
4687 if (old_cp->cand != cand)
4688 continue;
4690 new_cp = NULL;
4692 if (data->consider_all_candidates)
4694 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4696 if (ci == cand->id)
4697 continue;
4699 cnd = iv_cand (data, ci);
4701 cp = get_use_iv_cost (data, use, cnd);
4702 if (!cp)
4703 continue;
4704 if (!iv_ca_has_deps (ivs, cp))
4705 continue;
4707 if (!cheaper_cost_pair (cp, new_cp))
4708 continue;
4710 new_cp = cp;
4713 else
4715 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4717 if (ci == cand->id)
4718 continue;
4720 cnd = iv_cand (data, ci);
4722 cp = get_use_iv_cost (data, use, cnd);
4723 if (!cp)
4724 continue;
4725 if (!iv_ca_has_deps (ivs, cp))
4726 continue;
4728 if (!cheaper_cost_pair (cp, new_cp))
4729 continue;
4731 new_cp = cp;
4735 if (!new_cp)
4737 iv_ca_delta_free (delta);
4738 return infinite_cost;
4741 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4744 iv_ca_delta_commit (data, ivs, *delta, true);
4745 cost = iv_ca_cost (ivs);
4746 iv_ca_delta_commit (data, ivs, *delta, false);
4748 return cost;
4751 /* Try optimizing the set of candidates IVS by removing candidates different
4752 from to EXCEPT_CAND from it. Return cost of the new set, and store
4753 differences in DELTA. */
4755 static comp_cost
4756 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4757 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4759 bitmap_iterator bi;
4760 struct iv_ca_delta *act_delta, *best_delta;
4761 unsigned i;
4762 comp_cost best_cost, acost;
4763 struct iv_cand *cand;
4765 best_delta = NULL;
4766 best_cost = iv_ca_cost (ivs);
4768 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4770 cand = iv_cand (data, i);
4772 if (cand == except_cand)
4773 continue;
4775 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4777 if (compare_costs (acost, best_cost) < 0)
4779 best_cost = acost;
4780 iv_ca_delta_free (&best_delta);
4781 best_delta = act_delta;
4783 else
4784 iv_ca_delta_free (&act_delta);
4787 if (!best_delta)
4789 *delta = NULL;
4790 return best_cost;
4793 /* Recurse to possibly remove other unnecessary ivs. */
4794 iv_ca_delta_commit (data, ivs, best_delta, true);
4795 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4796 iv_ca_delta_commit (data, ivs, best_delta, false);
4797 *delta = iv_ca_delta_join (best_delta, *delta);
4798 return best_cost;
4801 /* Tries to extend the sets IVS in the best possible way in order
4802 to express the USE. */
4804 static bool
4805 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4806 struct iv_use *use)
4808 comp_cost best_cost, act_cost;
4809 unsigned i;
4810 bitmap_iterator bi;
4811 struct iv_cand *cand;
4812 struct iv_ca_delta *best_delta = NULL, *act_delta;
4813 struct cost_pair *cp;
4815 iv_ca_add_use (data, ivs, use);
4816 best_cost = iv_ca_cost (ivs);
4818 cp = iv_ca_cand_for_use (ivs, use);
4819 if (cp)
4821 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4822 iv_ca_set_no_cp (data, ivs, use);
4825 /* First try important candidates not based on any memory object. Only if
4826 this fails, try the specific ones. Rationale -- in loops with many
4827 variables the best choice often is to use just one generic biv. If we
4828 added here many ivs specific to the uses, the optimization algorithm later
4829 would be likely to get stuck in a local minimum, thus causing us to create
4830 too many ivs. The approach from few ivs to more seems more likely to be
4831 successful -- starting from few ivs, replacing an expensive use by a
4832 specific iv should always be a win. */
4833 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4835 cand = iv_cand (data, i);
4837 if (cand->iv->base_object != NULL_TREE)
4838 continue;
4840 if (iv_ca_cand_used_p (ivs, cand))
4841 continue;
4843 cp = get_use_iv_cost (data, use, cand);
4844 if (!cp)
4845 continue;
4847 iv_ca_set_cp (data, ivs, use, cp);
4848 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4849 iv_ca_set_no_cp (data, ivs, use);
4850 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4852 if (compare_costs (act_cost, best_cost) < 0)
4854 best_cost = act_cost;
4856 iv_ca_delta_free (&best_delta);
4857 best_delta = act_delta;
4859 else
4860 iv_ca_delta_free (&act_delta);
4863 if (infinite_cost_p (best_cost))
4865 for (i = 0; i < use->n_map_members; i++)
4867 cp = use->cost_map + i;
4868 cand = cp->cand;
4869 if (!cand)
4870 continue;
4872 /* Already tried this. */
4873 if (cand->important && cand->iv->base_object == NULL_TREE)
4874 continue;
4876 if (iv_ca_cand_used_p (ivs, cand))
4877 continue;
4879 act_delta = NULL;
4880 iv_ca_set_cp (data, ivs, use, cp);
4881 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4882 iv_ca_set_no_cp (data, ivs, use);
4883 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4884 cp, act_delta);
4886 if (compare_costs (act_cost, best_cost) < 0)
4888 best_cost = act_cost;
4890 if (best_delta)
4891 iv_ca_delta_free (&best_delta);
4892 best_delta = act_delta;
4894 else
4895 iv_ca_delta_free (&act_delta);
4899 iv_ca_delta_commit (data, ivs, best_delta, true);
4900 iv_ca_delta_free (&best_delta);
4902 return !infinite_cost_p (best_cost);
4905 /* Finds an initial assignment of candidates to uses. */
4907 static struct iv_ca *
4908 get_initial_solution (struct ivopts_data *data)
4910 struct iv_ca *ivs = iv_ca_new (data);
4911 unsigned i;
4913 for (i = 0; i < n_iv_uses (data); i++)
4914 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4916 iv_ca_free (&ivs);
4917 return NULL;
4920 return ivs;
4923 /* Tries to improve set of induction variables IVS. */
4925 static bool
4926 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4928 unsigned i, n_ivs;
4929 comp_cost acost, best_cost = iv_ca_cost (ivs);
4930 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4931 struct iv_cand *cand;
4933 /* Try extending the set of induction variables by one. */
4934 for (i = 0; i < n_iv_cands (data); i++)
4936 cand = iv_cand (data, i);
4938 if (iv_ca_cand_used_p (ivs, cand))
4939 continue;
4941 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4942 if (!act_delta)
4943 continue;
4945 /* If we successfully added the candidate and the set is small enough,
4946 try optimizing it by removing other candidates. */
4947 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4949 iv_ca_delta_commit (data, ivs, act_delta, true);
4950 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4951 iv_ca_delta_commit (data, ivs, act_delta, false);
4952 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4955 if (compare_costs (acost, best_cost) < 0)
4957 best_cost = acost;
4958 iv_ca_delta_free (&best_delta);
4959 best_delta = act_delta;
4961 else
4962 iv_ca_delta_free (&act_delta);
4965 if (!best_delta)
4967 /* Try removing the candidates from the set instead. */
4968 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4970 /* Nothing more we can do. */
4971 if (!best_delta)
4972 return false;
4975 iv_ca_delta_commit (data, ivs, best_delta, true);
4976 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4977 iv_ca_delta_free (&best_delta);
4978 return true;
4981 /* Attempts to find the optimal set of induction variables. We do simple
4982 greedy heuristic -- we try to replace at most one candidate in the selected
4983 solution and remove the unused ivs while this improves the cost. */
4985 static struct iv_ca *
4986 find_optimal_iv_set (struct ivopts_data *data)
4988 unsigned i;
4989 struct iv_ca *set;
4990 struct iv_use *use;
4992 /* Get the initial solution. */
4993 set = get_initial_solution (data);
4994 if (!set)
4996 if (dump_file && (dump_flags & TDF_DETAILS))
4997 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4998 return NULL;
5001 if (dump_file && (dump_flags & TDF_DETAILS))
5003 fprintf (dump_file, "Initial set of candidates:\n");
5004 iv_ca_dump (data, dump_file, set);
5007 while (try_improve_iv_set (data, set))
5009 if (dump_file && (dump_flags & TDF_DETAILS))
5011 fprintf (dump_file, "Improved to:\n");
5012 iv_ca_dump (data, dump_file, set);
5016 if (dump_file && (dump_flags & TDF_DETAILS))
5018 comp_cost cost = iv_ca_cost (set);
5019 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5022 for (i = 0; i < n_iv_uses (data); i++)
5024 use = iv_use (data, i);
5025 use->selected = iv_ca_cand_for_use (set, use)->cand;
5028 return set;
5031 /* Creates a new induction variable corresponding to CAND. */
5033 static void
5034 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5036 gimple_stmt_iterator incr_pos;
5037 tree base;
5038 bool after = false;
5040 if (!cand->iv)
5041 return;
5043 switch (cand->pos)
5045 case IP_NORMAL:
5046 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5047 break;
5049 case IP_END:
5050 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5051 after = true;
5052 break;
5054 case IP_ORIGINAL:
5055 /* Mark that the iv is preserved. */
5056 name_info (data, cand->var_before)->preserve_biv = true;
5057 name_info (data, cand->var_after)->preserve_biv = true;
5059 /* Rewrite the increment so that it uses var_before directly. */
5060 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5062 return;
5065 gimple_add_tmp_var (cand->var_before);
5066 add_referenced_var (cand->var_before);
5068 base = unshare_expr (cand->iv->base);
5070 create_iv (base, unshare_expr (cand->iv->step),
5071 cand->var_before, data->current_loop,
5072 &incr_pos, after, &cand->var_before, &cand->var_after);
5075 /* Creates new induction variables described in SET. */
5077 static void
5078 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5080 unsigned i;
5081 struct iv_cand *cand;
5082 bitmap_iterator bi;
5084 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5086 cand = iv_cand (data, i);
5087 create_new_iv (data, cand);
5091 /* Returns the phi-node in BB with result RESULT. */
5093 static gimple
5094 get_phi_with_result (basic_block bb, tree result)
5096 gimple_stmt_iterator i = gsi_start_phis (bb);
5098 for (; !gsi_end_p (i); gsi_next (&i))
5099 if (gimple_phi_result (gsi_stmt (i)) == result)
5100 return gsi_stmt (i);
5102 gcc_unreachable ();
5103 return NULL;
5107 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5108 is true, remove also the ssa name defined by the statement. */
5110 static void
5111 remove_statement (gimple stmt, bool including_defined_name)
5113 if (gimple_code (stmt) == GIMPLE_PHI)
5115 gimple bb_phi = get_phi_with_result (gimple_bb (stmt),
5116 gimple_phi_result (stmt));
5117 gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi);
5118 remove_phi_node (&bsi, including_defined_name);
5120 else
5122 gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5123 gsi_remove (&bsi, true);
5124 release_defs (stmt);
5128 /* Rewrites USE (definition of iv used in a nonlinear expression)
5129 using candidate CAND. */
5131 static void
5132 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5133 struct iv_use *use, struct iv_cand *cand)
5135 tree comp;
5136 tree op, tgt;
5137 gimple ass;
5138 gimple_stmt_iterator bsi;
5140 /* An important special case -- if we are asked to express value of
5141 the original iv by itself, just exit; there is no need to
5142 introduce a new computation (that might also need casting the
5143 variable to unsigned and back). */
5144 if (cand->pos == IP_ORIGINAL
5145 && cand->incremented_at == use->stmt)
5147 tree step, ctype, utype;
5148 enum tree_code incr_code = PLUS_EXPR, old_code;
5150 gcc_assert (is_gimple_assign (use->stmt));
5151 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5153 step = cand->iv->step;
5154 ctype = TREE_TYPE (step);
5155 utype = TREE_TYPE (cand->var_after);
5156 if (TREE_CODE (step) == NEGATE_EXPR)
5158 incr_code = MINUS_EXPR;
5159 step = TREE_OPERAND (step, 0);
5162 /* Check whether we may leave the computation unchanged.
5163 This is the case only if it does not rely on other
5164 computations in the loop -- otherwise, the computation
5165 we rely upon may be removed in remove_unused_ivs,
5166 thus leading to ICE. */
5167 old_code = gimple_assign_rhs_code (use->stmt);
5168 if (old_code == PLUS_EXPR
5169 || old_code == MINUS_EXPR
5170 || old_code == POINTER_PLUS_EXPR)
5172 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5173 op = gimple_assign_rhs2 (use->stmt);
5174 else if (old_code != MINUS_EXPR
5175 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5176 op = gimple_assign_rhs1 (use->stmt);
5177 else
5178 op = NULL_TREE;
5180 else
5181 op = NULL_TREE;
5183 if (op
5184 && (TREE_CODE (op) == INTEGER_CST
5185 || operand_equal_p (op, step, 0)))
5186 return;
5188 /* Otherwise, add the necessary computations to express
5189 the iv. */
5190 op = fold_convert (ctype, cand->var_before);
5191 comp = fold_convert (utype,
5192 build2 (incr_code, ctype, op,
5193 unshare_expr (step)));
5195 else
5197 comp = get_computation (data->current_loop, use, cand);
5198 gcc_assert (comp != NULL_TREE);
5201 switch (gimple_code (use->stmt))
5203 case GIMPLE_PHI:
5204 tgt = PHI_RESULT (use->stmt);
5206 /* If we should keep the biv, do not replace it. */
5207 if (name_info (data, tgt)->preserve_biv)
5208 return;
5210 bsi = gsi_after_labels (gimple_bb (use->stmt));
5211 break;
5213 case GIMPLE_ASSIGN:
5214 tgt = gimple_assign_lhs (use->stmt);
5215 bsi = gsi_for_stmt (use->stmt);
5216 break;
5218 default:
5219 gcc_unreachable ();
5222 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5223 true, GSI_SAME_STMT);
5225 if (gimple_code (use->stmt) == GIMPLE_PHI)
5227 ass = gimple_build_assign (tgt, op);
5228 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5229 remove_statement (use->stmt, false);
5231 else
5233 gimple_assign_set_rhs_from_tree (&bsi, op);
5234 use->stmt = gsi_stmt (bsi);
5238 /* Replaces ssa name in index IDX by its basic variable. Callback for
5239 for_each_index. */
5241 static bool
5242 idx_remove_ssa_names (tree base, tree *idx,
5243 void *data ATTRIBUTE_UNUSED)
5245 tree *op;
5247 if (TREE_CODE (*idx) == SSA_NAME)
5248 *idx = SSA_NAME_VAR (*idx);
5250 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5252 op = &TREE_OPERAND (base, 2);
5253 if (*op
5254 && TREE_CODE (*op) == SSA_NAME)
5255 *op = SSA_NAME_VAR (*op);
5256 op = &TREE_OPERAND (base, 3);
5257 if (*op
5258 && TREE_CODE (*op) == SSA_NAME)
5259 *op = SSA_NAME_VAR (*op);
5262 return true;
5265 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5267 static tree
5268 unshare_and_remove_ssa_names (tree ref)
5270 ref = unshare_expr (ref);
5271 for_each_index (&ref, idx_remove_ssa_names, NULL);
5273 return ref;
5276 /* Copies the reference information from OLD_REF to NEW_REF. */
5278 static void
5279 copy_ref_info (tree new_ref, tree old_ref)
5281 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5282 copy_mem_ref_info (new_ref, old_ref);
5283 else
5284 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5287 /* Rewrites USE (address that is an iv) using candidate CAND. */
5289 static void
5290 rewrite_use_address (struct ivopts_data *data,
5291 struct iv_use *use, struct iv_cand *cand)
5293 aff_tree aff;
5294 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5295 tree ref;
5296 bool ok;
5298 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5299 gcc_assert (ok);
5300 unshare_aff_combination (&aff);
5302 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
5303 copy_ref_info (ref, *use->op_p);
5304 *use->op_p = ref;
5307 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5308 candidate CAND. */
5310 static void
5311 rewrite_use_compare (struct ivopts_data *data,
5312 struct iv_use *use, struct iv_cand *cand)
5314 tree comp, *var_p, op, bound;
5315 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5316 enum tree_code compare;
5317 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5318 bool ok;
5320 bound = cp->value;
5321 if (bound)
5323 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5324 tree var_type = TREE_TYPE (var);
5325 gimple_seq stmts;
5327 compare = iv_elimination_compare (data, use);
5328 bound = unshare_expr (fold_convert (var_type, bound));
5329 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5330 if (stmts)
5331 gsi_insert_seq_on_edge_immediate (
5332 loop_preheader_edge (data->current_loop),
5333 stmts);
5335 gimple_cond_set_lhs (use->stmt, var);
5336 gimple_cond_set_code (use->stmt, compare);
5337 gimple_cond_set_rhs (use->stmt, op);
5338 return;
5341 /* The induction variable elimination failed; just express the original
5342 giv. */
5343 comp = get_computation (data->current_loop, use, cand);
5344 gcc_assert (comp != NULL_TREE);
5346 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5347 gcc_assert (ok);
5349 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5350 true, GSI_SAME_STMT);
5353 /* Rewrites USE using candidate CAND. */
5355 static void
5356 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5358 switch (use->type)
5360 case USE_NONLINEAR_EXPR:
5361 rewrite_use_nonlinear_expr (data, use, cand);
5362 break;
5364 case USE_ADDRESS:
5365 rewrite_use_address (data, use, cand);
5366 break;
5368 case USE_COMPARE:
5369 rewrite_use_compare (data, use, cand);
5370 break;
5372 default:
5373 gcc_unreachable ();
5376 update_stmt (use->stmt);
5379 /* Rewrite the uses using the selected induction variables. */
5381 static void
5382 rewrite_uses (struct ivopts_data *data)
5384 unsigned i;
5385 struct iv_cand *cand;
5386 struct iv_use *use;
5388 for (i = 0; i < n_iv_uses (data); i++)
5390 use = iv_use (data, i);
5391 cand = use->selected;
5392 gcc_assert (cand);
5394 rewrite_use (data, use, cand);
5398 /* Removes the ivs that are not used after rewriting. */
5400 static void
5401 remove_unused_ivs (struct ivopts_data *data)
5403 unsigned j;
5404 bitmap_iterator bi;
5406 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5408 struct version_info *info;
5410 info = ver_info (data, j);
5411 if (info->iv
5412 && !integer_zerop (info->iv->step)
5413 && !info->inv_id
5414 && !info->iv->have_use_for
5415 && !info->preserve_biv)
5416 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5420 /* Frees data allocated by the optimization of a single loop. */
5422 static void
5423 free_loop_data (struct ivopts_data *data)
5425 unsigned i, j;
5426 bitmap_iterator bi;
5427 tree obj;
5429 if (data->niters)
5431 pointer_map_destroy (data->niters);
5432 data->niters = NULL;
5435 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5437 struct version_info *info;
5439 info = ver_info (data, i);
5440 if (info->iv)
5441 free (info->iv);
5442 info->iv = NULL;
5443 info->has_nonlin_use = false;
5444 info->preserve_biv = false;
5445 info->inv_id = 0;
5447 bitmap_clear (data->relevant);
5448 bitmap_clear (data->important_candidates);
5450 for (i = 0; i < n_iv_uses (data); i++)
5452 struct iv_use *use = iv_use (data, i);
5454 free (use->iv);
5455 BITMAP_FREE (use->related_cands);
5456 for (j = 0; j < use->n_map_members; j++)
5457 if (use->cost_map[j].depends_on)
5458 BITMAP_FREE (use->cost_map[j].depends_on);
5459 free (use->cost_map);
5460 free (use);
5462 VEC_truncate (iv_use_p, data->iv_uses, 0);
5464 for (i = 0; i < n_iv_cands (data); i++)
5466 struct iv_cand *cand = iv_cand (data, i);
5468 if (cand->iv)
5469 free (cand->iv);
5470 if (cand->depends_on)
5471 BITMAP_FREE (cand->depends_on);
5472 free (cand);
5474 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5476 if (data->version_info_size < num_ssa_names)
5478 data->version_info_size = 2 * num_ssa_names;
5479 free (data->version_info);
5480 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5483 data->max_inv_id = 0;
5485 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5486 SET_DECL_RTL (obj, NULL_RTX);
5488 VEC_truncate (tree, decl_rtl_to_reset, 0);
5491 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5492 loop tree. */
5494 static void
5495 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5497 free_loop_data (data);
5498 free (data->version_info);
5499 BITMAP_FREE (data->relevant);
5500 BITMAP_FREE (data->important_candidates);
5502 VEC_free (tree, heap, decl_rtl_to_reset);
5503 VEC_free (iv_use_p, heap, data->iv_uses);
5504 VEC_free (iv_cand_p, heap, data->iv_candidates);
5507 /* Optimizes the LOOP. Returns true if anything changed. */
5509 static bool
5510 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5512 bool changed = false;
5513 struct iv_ca *iv_ca;
5514 edge exit;
5516 gcc_assert (!data->niters);
5517 data->current_loop = loop;
5518 data->speed = optimize_loop_for_speed_p (loop);
5520 if (dump_file && (dump_flags & TDF_DETAILS))
5522 fprintf (dump_file, "Processing loop %d\n", loop->num);
5524 exit = single_dom_exit (loop);
5525 if (exit)
5527 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5528 exit->src->index, exit->dest->index);
5529 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5530 fprintf (dump_file, "\n");
5533 fprintf (dump_file, "\n");
5536 /* For each ssa name determines whether it behaves as an induction variable
5537 in some loop. */
5538 if (!find_induction_variables (data))
5539 goto finish;
5541 /* Finds interesting uses (item 1). */
5542 find_interesting_uses (data);
5543 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5544 goto finish;
5546 /* Finds candidates for the induction variables (item 2). */
5547 find_iv_candidates (data);
5549 /* Calculates the costs (item 3, part 1). */
5550 determine_use_iv_costs (data);
5551 determine_iv_costs (data);
5552 determine_set_costs (data);
5554 /* Find the optimal set of induction variables (item 3, part 2). */
5555 iv_ca = find_optimal_iv_set (data);
5556 if (!iv_ca)
5557 goto finish;
5558 changed = true;
5560 /* Create the new induction variables (item 4, part 1). */
5561 create_new_ivs (data, iv_ca);
5562 iv_ca_free (&iv_ca);
5564 /* Rewrite the uses (item 4, part 2). */
5565 rewrite_uses (data);
5567 /* Remove the ivs that are unused after rewriting. */
5568 remove_unused_ivs (data);
5570 /* We have changed the structure of induction variables; it might happen
5571 that definitions in the scev database refer to some of them that were
5572 eliminated. */
5573 scev_reset ();
5575 finish:
5576 free_loop_data (data);
5578 return changed;
5581 /* Main entry point. Optimizes induction variables in loops. */
5583 void
5584 tree_ssa_iv_optimize (void)
5586 struct loop *loop;
5587 struct ivopts_data data;
5588 loop_iterator li;
5590 tree_ssa_iv_optimize_init (&data);
5592 /* Optimize the loops starting with the innermost ones. */
5593 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5595 if (dump_file && (dump_flags & TDF_DETAILS))
5596 flow_loop_dump (loop, dump_file, NULL, 1);
5598 tree_ssa_iv_optimize_loop (&data, loop);
5601 tree_ssa_iv_optimize_finalize (&data);