* var-tracking.c (insn_stack_adjust_offset_pre_post): If insn has a
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob628d426935c529d9ee5651b965c295d50e86756a
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
3 Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
105 struct iv
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
117 struct version_info
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
127 /* Types of uses. */
128 enum use_type
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* Cost of a computation. */
136 typedef struct
138 unsigned cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143 } comp_cost;
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
148 /* The candidate - cost pair. */
149 struct cost_pair
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
160 /* Use. */
161 struct iv_use
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand *selected;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
180 enum iv_position
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_ORIGINAL /* The original biv. */
187 /* The induction variable candidate. */
188 struct iv_cand
190 unsigned id; /* The number of the candidate. */
191 bool important; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos; /* Where it is computed. */
194 gimple incremented_at;/* For original biv, the statement where it is
195 incremented. */
196 tree var_before; /* The variable used for it before increment. */
197 tree var_after; /* The variable used for it after increment. */
198 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */
203 bitmap depends_on; /* The list of invariants that are used in step of the
204 biv. */
207 /* The data used by the induction variable optimizations. */
209 typedef struct iv_use *iv_use_p;
210 DEF_VEC_P(iv_use_p);
211 DEF_VEC_ALLOC_P(iv_use_p,heap);
213 typedef struct iv_cand *iv_cand_p;
214 DEF_VEC_P(iv_cand_p);
215 DEF_VEC_ALLOC_P(iv_cand_p,heap);
217 struct ivopts_data
219 /* The currently optimized loop. */
220 struct loop *current_loop;
222 /* Are we optimizing for speed? */
223 bool speed;
225 /* Number of registers used in it. */
226 unsigned regs_used;
228 /* Numbers of iterations for all exits of the current loop. */
229 struct pointer_map_t *niters;
231 /* The size of version_info array allocated. */
232 unsigned version_info_size;
234 /* The array of information for the ssa names. */
235 struct version_info *version_info;
237 /* The bitmap of indices in version_info whose value was changed. */
238 bitmap relevant;
240 /* The maximum invariant id. */
241 unsigned max_inv_id;
243 /* The uses of induction variables. */
244 VEC(iv_use_p,heap) *iv_uses;
246 /* The candidates. */
247 VEC(iv_cand_p,heap) *iv_candidates;
249 /* A bitmap of important candidates. */
250 bitmap important_candidates;
252 /* Whether to consider just related and important candidates when replacing a
253 use. */
254 bool consider_all_candidates;
257 /* An assignment of iv candidates to uses. */
259 struct iv_ca
261 /* The number of uses covered by the assignment. */
262 unsigned upto;
264 /* Number of uses that cannot be expressed by the candidates in the set. */
265 unsigned bad_uses;
267 /* Candidate assigned to a use, together with the related costs. */
268 struct cost_pair **cand_for_use;
270 /* Number of times each candidate is used. */
271 unsigned *n_cand_uses;
273 /* The candidates used. */
274 bitmap cands;
276 /* The number of candidates in the set. */
277 unsigned n_cands;
279 /* Total number of registers needed. */
280 unsigned n_regs;
282 /* Total cost of expressing uses. */
283 comp_cost cand_use_cost;
285 /* Total cost of candidates. */
286 unsigned cand_cost;
288 /* Number of times each invariant is used. */
289 unsigned *n_invariant_uses;
291 /* Total cost of the assignment. */
292 comp_cost cost;
295 /* Difference of two iv candidate assignments. */
297 struct iv_ca_delta
299 /* Changed use. */
300 struct iv_use *use;
302 /* An old assignment (for rollback purposes). */
303 struct cost_pair *old_cp;
305 /* A new assignment. */
306 struct cost_pair *new_cp;
308 /* Next change in the list. */
309 struct iv_ca_delta *next_change;
312 /* Bound on number of candidates below that all candidates are considered. */
314 #define CONSIDER_ALL_CANDIDATES_BOUND \
315 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
317 /* If there are more iv occurrences, we just give up (it is quite unlikely that
318 optimizing such a loop would help, and it would take ages). */
320 #define MAX_CONSIDERED_USES \
321 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
323 /* If there are at most this number of ivs in the set, try removing unnecessary
324 ivs from the set always. */
326 #define ALWAYS_PRUNE_CAND_SET_BOUND \
327 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
329 /* The list of trees for that the decl_rtl field must be reset is stored
330 here. */
332 static VEC(tree,heap) *decl_rtl_to_reset;
334 /* Number of uses recorded in DATA. */
336 static inline unsigned
337 n_iv_uses (struct ivopts_data *data)
339 return VEC_length (iv_use_p, data->iv_uses);
342 /* Ith use recorded in DATA. */
344 static inline struct iv_use *
345 iv_use (struct ivopts_data *data, unsigned i)
347 return VEC_index (iv_use_p, data->iv_uses, i);
350 /* Number of candidates recorded in DATA. */
352 static inline unsigned
353 n_iv_cands (struct ivopts_data *data)
355 return VEC_length (iv_cand_p, data->iv_candidates);
358 /* Ith candidate recorded in DATA. */
360 static inline struct iv_cand *
361 iv_cand (struct ivopts_data *data, unsigned i)
363 return VEC_index (iv_cand_p, data->iv_candidates, i);
366 /* The single loop exit if it dominates the latch, NULL otherwise. */
368 edge
369 single_dom_exit (struct loop *loop)
371 edge exit = single_exit (loop);
373 if (!exit)
374 return NULL;
376 if (!just_once_each_iteration_p (loop, exit->src))
377 return NULL;
379 return exit;
382 /* Dumps information about the induction variable IV to FILE. */
384 extern void dump_iv (FILE *, struct iv *);
385 void
386 dump_iv (FILE *file, struct iv *iv)
388 if (iv->ssa_name)
390 fprintf (file, "ssa name ");
391 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
392 fprintf (file, "\n");
395 fprintf (file, " type ");
396 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
397 fprintf (file, "\n");
399 if (iv->step)
401 fprintf (file, " base ");
402 print_generic_expr (file, iv->base, TDF_SLIM);
403 fprintf (file, "\n");
405 fprintf (file, " step ");
406 print_generic_expr (file, iv->step, TDF_SLIM);
407 fprintf (file, "\n");
409 else
411 fprintf (file, " invariant ");
412 print_generic_expr (file, iv->base, TDF_SLIM);
413 fprintf (file, "\n");
416 if (iv->base_object)
418 fprintf (file, " base object ");
419 print_generic_expr (file, iv->base_object, TDF_SLIM);
420 fprintf (file, "\n");
423 if (iv->biv_p)
424 fprintf (file, " is a biv\n");
427 /* Dumps information about the USE to FILE. */
429 extern void dump_use (FILE *, struct iv_use *);
430 void
431 dump_use (FILE *file, struct iv_use *use)
433 fprintf (file, "use %d\n", use->id);
435 switch (use->type)
437 case USE_NONLINEAR_EXPR:
438 fprintf (file, " generic\n");
439 break;
441 case USE_ADDRESS:
442 fprintf (file, " address\n");
443 break;
445 case USE_COMPARE:
446 fprintf (file, " compare\n");
447 break;
449 default:
450 gcc_unreachable ();
453 fprintf (file, " in statement ");
454 print_gimple_stmt (file, use->stmt, 0, 0);
455 fprintf (file, "\n");
457 fprintf (file, " at position ");
458 if (use->op_p)
459 print_generic_expr (file, *use->op_p, TDF_SLIM);
460 fprintf (file, "\n");
462 dump_iv (file, use->iv);
464 if (use->related_cands)
466 fprintf (file, " related candidates ");
467 dump_bitmap (file, use->related_cands);
471 /* Dumps information about the uses to FILE. */
473 extern void dump_uses (FILE *, struct ivopts_data *);
474 void
475 dump_uses (FILE *file, struct ivopts_data *data)
477 unsigned i;
478 struct iv_use *use;
480 for (i = 0; i < n_iv_uses (data); i++)
482 use = iv_use (data, i);
484 dump_use (file, use);
485 fprintf (file, "\n");
489 /* Dumps information about induction variable candidate CAND to FILE. */
491 extern void dump_cand (FILE *, struct iv_cand *);
492 void
493 dump_cand (FILE *file, struct iv_cand *cand)
495 struct iv *iv = cand->iv;
497 fprintf (file, "candidate %d%s\n",
498 cand->id, cand->important ? " (important)" : "");
500 if (cand->depends_on)
502 fprintf (file, " depends on ");
503 dump_bitmap (file, cand->depends_on);
506 if (!iv)
508 fprintf (file, " final value replacement\n");
509 return;
512 switch (cand->pos)
514 case IP_NORMAL:
515 fprintf (file, " incremented before exit test\n");
516 break;
518 case IP_END:
519 fprintf (file, " incremented at end\n");
520 break;
522 case IP_ORIGINAL:
523 fprintf (file, " original biv\n");
524 break;
527 dump_iv (file, iv);
530 /* Returns the info for ssa version VER. */
532 static inline struct version_info *
533 ver_info (struct ivopts_data *data, unsigned ver)
535 return data->version_info + ver;
538 /* Returns the info for ssa name NAME. */
540 static inline struct version_info *
541 name_info (struct ivopts_data *data, tree name)
543 return ver_info (data, SSA_NAME_VERSION (name));
546 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
547 emitted in LOOP. */
549 static bool
550 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
552 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
554 gcc_assert (bb);
556 if (sbb == loop->latch)
557 return true;
559 if (sbb != bb)
560 return false;
562 return stmt == last_stmt (bb);
565 /* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */
568 static bool
569 stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
571 basic_block cand_bb = gimple_bb (cand->incremented_at);
572 basic_block stmt_bb = gimple_bb (stmt);
573 gimple_stmt_iterator bsi;
575 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
576 return false;
578 if (stmt_bb != cand_bb)
579 return true;
581 /* Scan the block from the end, since the original ivs are usually
582 incremented at the end of the loop body. */
583 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
585 if (gsi_stmt (bsi) == cand->incremented_at)
586 return false;
587 if (gsi_stmt (bsi) == stmt)
588 return true;
592 /* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */
595 static bool
596 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
598 switch (cand->pos)
600 case IP_END:
601 return false;
603 case IP_NORMAL:
604 return stmt_after_ip_normal_pos (loop, stmt);
606 case IP_ORIGINAL:
607 return stmt_after_ip_original_pos (cand, stmt);
609 default:
610 gcc_unreachable ();
614 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
616 static bool
617 abnormal_ssa_name_p (tree exp)
619 if (!exp)
620 return false;
622 if (TREE_CODE (exp) != SSA_NAME)
623 return false;
625 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
628 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
629 abnormal phi node. Callback for for_each_index. */
631 static bool
632 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
633 void *data ATTRIBUTE_UNUSED)
635 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
637 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
638 return false;
639 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
640 return false;
643 return !abnormal_ssa_name_p (*index);
646 /* Returns true if EXPR contains a ssa name that occurs in an
647 abnormal phi node. */
649 bool
650 contains_abnormal_ssa_name_p (tree expr)
652 enum tree_code code;
653 enum tree_code_class codeclass;
655 if (!expr)
656 return false;
658 code = TREE_CODE (expr);
659 codeclass = TREE_CODE_CLASS (code);
661 if (code == SSA_NAME)
662 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
664 if (code == INTEGER_CST
665 || is_gimple_min_invariant (expr))
666 return false;
668 if (code == ADDR_EXPR)
669 return !for_each_index (&TREE_OPERAND (expr, 0),
670 idx_contains_abnormal_ssa_name_p,
671 NULL);
673 switch (codeclass)
675 case tcc_binary:
676 case tcc_comparison:
677 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
678 return true;
680 /* Fallthru. */
681 case tcc_unary:
682 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
683 return true;
685 break;
687 default:
688 gcc_unreachable ();
691 return false;
694 /* Returns tree describing number of iterations determined from
695 EXIT of DATA->current_loop, or NULL if something goes wrong. */
697 static tree
698 niter_for_exit (struct ivopts_data *data, edge exit)
700 struct tree_niter_desc desc;
701 tree niter;
702 void **slot;
704 if (!data->niters)
706 data->niters = pointer_map_create ();
707 slot = NULL;
709 else
710 slot = pointer_map_contains (data->niters, exit);
712 if (!slot)
714 /* Try to determine number of iterations. We must know it
715 unconditionally (i.e., without possibility of # of iterations
716 being zero). Also, we cannot safely work with ssa names that
717 appear in phi nodes on abnormal edges, so that we do not create
718 overlapping life ranges for them (PR 27283). */
719 if (number_of_iterations_exit (data->current_loop,
720 exit, &desc, true)
721 && integer_zerop (desc.may_be_zero)
722 && !contains_abnormal_ssa_name_p (desc.niter))
723 niter = desc.niter;
724 else
725 niter = NULL_TREE;
727 *pointer_map_insert (data->niters, exit) = niter;
729 else
730 niter = (tree) *slot;
732 return niter;
735 /* Returns tree describing number of iterations determined from
736 single dominating exit of DATA->current_loop, or NULL if something
737 goes wrong. */
739 static tree
740 niter_for_single_dom_exit (struct ivopts_data *data)
742 edge exit = single_dom_exit (data->current_loop);
744 if (!exit)
745 return NULL;
747 return niter_for_exit (data, exit);
750 /* Initializes data structures used by the iv optimization pass, stored
751 in DATA. */
753 static void
754 tree_ssa_iv_optimize_init (struct ivopts_data *data)
756 data->version_info_size = 2 * num_ssa_names;
757 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
758 data->relevant = BITMAP_ALLOC (NULL);
759 data->important_candidates = BITMAP_ALLOC (NULL);
760 data->max_inv_id = 0;
761 data->niters = NULL;
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
770 static tree
771 determine_base_object (tree expr)
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj;
776 /* If this is a pointer casted to any type, we need to determine
777 the base object for the pointer; so handle conversions before
778 throwing away non-pointer expressions. */
779 if (CONVERT_EXPR_P (expr))
780 return determine_base_object (TREE_OPERAND (expr, 0));
782 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
783 return NULL_TREE;
785 switch (code)
787 case INTEGER_CST:
788 return NULL_TREE;
790 case ADDR_EXPR:
791 obj = TREE_OPERAND (expr, 0);
792 base = get_base_address (obj);
794 if (!base)
795 return expr;
797 if (TREE_CODE (base) == INDIRECT_REF)
798 return determine_base_object (TREE_OPERAND (base, 0));
800 return fold_convert (ptr_type_node,
801 build_fold_addr_expr (base));
803 case POINTER_PLUS_EXPR:
804 return determine_base_object (TREE_OPERAND (expr, 0));
806 case PLUS_EXPR:
807 case MINUS_EXPR:
808 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
809 gcc_unreachable ();
811 default:
812 return fold_convert (ptr_type_node, expr);
816 /* Allocates an induction variable with given initial value BASE and step STEP
817 for loop LOOP. */
819 static struct iv *
820 alloc_iv (tree base, tree step)
822 struct iv *iv = XCNEW (struct iv);
823 gcc_assert (step != NULL_TREE);
825 iv->base = base;
826 iv->base_object = determine_base_object (base);
827 iv->step = step;
828 iv->biv_p = false;
829 iv->have_use_for = false;
830 iv->use_id = 0;
831 iv->ssa_name = NULL_TREE;
833 return iv;
836 /* Sets STEP and BASE for induction variable IV. */
838 static void
839 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
841 struct version_info *info = name_info (data, iv);
843 gcc_assert (!info->iv);
845 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
846 info->iv = alloc_iv (base, step);
847 info->iv->ssa_name = iv;
850 /* Finds induction variable declaration for VAR. */
852 static struct iv *
853 get_iv (struct ivopts_data *data, tree var)
855 basic_block bb;
856 tree type = TREE_TYPE (var);
858 if (!POINTER_TYPE_P (type)
859 && !INTEGRAL_TYPE_P (type))
860 return NULL;
862 if (!name_info (data, var)->iv)
864 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
866 if (!bb
867 || !flow_bb_inside_loop_p (data->current_loop, bb))
868 set_iv (data, var, var, build_int_cst (type, 0));
871 return name_info (data, var)->iv;
874 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
875 not define a simple affine biv with nonzero step. */
877 static tree
878 determine_biv_step (gimple phi)
880 struct loop *loop = gimple_bb (phi)->loop_father;
881 tree name = PHI_RESULT (phi);
882 affine_iv iv;
884 if (!is_gimple_reg (name))
885 return NULL_TREE;
887 if (!simple_iv (loop, phi, name, &iv, true))
888 return NULL_TREE;
890 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
893 /* Finds basic ivs. */
895 static bool
896 find_bivs (struct ivopts_data *data)
898 gimple phi;
899 tree step, type, base;
900 bool found = false;
901 struct loop *loop = data->current_loop;
902 gimple_stmt_iterator psi;
904 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
906 phi = gsi_stmt (psi);
908 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
909 continue;
911 step = determine_biv_step (phi);
912 if (!step)
913 continue;
915 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
916 base = expand_simple_operations (base);
917 if (contains_abnormal_ssa_name_p (base)
918 || contains_abnormal_ssa_name_p (step))
919 continue;
921 type = TREE_TYPE (PHI_RESULT (phi));
922 base = fold_convert (type, base);
923 if (step)
925 if (POINTER_TYPE_P (type))
926 step = fold_convert (sizetype, step);
927 else
928 step = fold_convert (type, step);
931 set_iv (data, PHI_RESULT (phi), base, step);
932 found = true;
935 return found;
938 /* Marks basic ivs. */
940 static void
941 mark_bivs (struct ivopts_data *data)
943 gimple phi;
944 tree var;
945 struct iv *iv, *incr_iv;
946 struct loop *loop = data->current_loop;
947 basic_block incr_bb;
948 gimple_stmt_iterator psi;
950 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
952 phi = gsi_stmt (psi);
954 iv = get_iv (data, PHI_RESULT (phi));
955 if (!iv)
956 continue;
958 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
959 incr_iv = get_iv (data, var);
960 if (!incr_iv)
961 continue;
963 /* If the increment is in the subloop, ignore it. */
964 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
965 if (incr_bb->loop_father != data->current_loop
966 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
967 continue;
969 iv->biv_p = true;
970 incr_iv->biv_p = true;
974 /* Checks whether STMT defines a linear induction variable and stores its
975 parameters to IV. */
977 static bool
978 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
980 tree lhs;
981 struct loop *loop = data->current_loop;
983 iv->base = NULL_TREE;
984 iv->step = NULL_TREE;
986 if (gimple_code (stmt) != GIMPLE_ASSIGN)
987 return false;
989 lhs = gimple_assign_lhs (stmt);
990 if (TREE_CODE (lhs) != SSA_NAME)
991 return false;
993 if (!simple_iv (loop, stmt, lhs, iv, true))
994 return false;
995 iv->base = expand_simple_operations (iv->base);
997 if (contains_abnormal_ssa_name_p (iv->base)
998 || contains_abnormal_ssa_name_p (iv->step))
999 return false;
1001 return true;
1004 /* Finds general ivs in statement STMT. */
1006 static void
1007 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1009 affine_iv iv;
1011 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1012 return;
1014 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1017 /* Finds general ivs in basic block BB. */
1019 static void
1020 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1022 gimple_stmt_iterator bsi;
1024 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1025 find_givs_in_stmt (data, gsi_stmt (bsi));
1028 /* Finds general ivs. */
1030 static void
1031 find_givs (struct ivopts_data *data)
1033 struct loop *loop = data->current_loop;
1034 basic_block *body = get_loop_body_in_dom_order (loop);
1035 unsigned i;
1037 for (i = 0; i < loop->num_nodes; i++)
1038 find_givs_in_bb (data, body[i]);
1039 free (body);
1042 /* For each ssa name defined in LOOP determines whether it is an induction
1043 variable and if so, its initial value and step. */
1045 static bool
1046 find_induction_variables (struct ivopts_data *data)
1048 unsigned i;
1049 bitmap_iterator bi;
1051 if (!find_bivs (data))
1052 return false;
1054 find_givs (data);
1055 mark_bivs (data);
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1059 tree niter = niter_for_single_dom_exit (data);
1061 if (niter)
1063 fprintf (dump_file, " number of iterations ");
1064 print_generic_expr (dump_file, niter, TDF_SLIM);
1065 fprintf (dump_file, "\n\n");
1068 fprintf (dump_file, "Induction variables:\n\n");
1070 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1072 if (ver_info (data, i)->iv)
1073 dump_iv (dump_file, ver_info (data, i)->iv);
1077 return true;
1080 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1082 static struct iv_use *
1083 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1084 gimple stmt, enum use_type use_type)
1086 struct iv_use *use = XCNEW (struct iv_use);
1088 use->id = n_iv_uses (data);
1089 use->type = use_type;
1090 use->iv = iv;
1091 use->stmt = stmt;
1092 use->op_p = use_p;
1093 use->related_cands = BITMAP_ALLOC (NULL);
1095 /* To avoid showing ssa name in the dumps, if it was not reset by the
1096 caller. */
1097 iv->ssa_name = NULL_TREE;
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1100 dump_use (dump_file, use);
1102 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1104 return use;
1107 /* Checks whether OP is a loop-level invariant and if so, records it.
1108 NONLINEAR_USE is true if the invariant is used in a way we do not
1109 handle specially. */
1111 static void
1112 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1114 basic_block bb;
1115 struct version_info *info;
1117 if (TREE_CODE (op) != SSA_NAME
1118 || !is_gimple_reg (op))
1119 return;
1121 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1122 if (bb
1123 && flow_bb_inside_loop_p (data->current_loop, bb))
1124 return;
1126 info = name_info (data, op);
1127 info->name = op;
1128 info->has_nonlin_use |= nonlinear_use;
1129 if (!info->inv_id)
1130 info->inv_id = ++data->max_inv_id;
1131 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1134 /* Checks whether the use OP is interesting and if so, records it. */
1136 static struct iv_use *
1137 find_interesting_uses_op (struct ivopts_data *data, tree op)
1139 struct iv *iv;
1140 struct iv *civ;
1141 gimple stmt;
1142 struct iv_use *use;
1144 if (TREE_CODE (op) != SSA_NAME)
1145 return NULL;
1147 iv = get_iv (data, op);
1148 if (!iv)
1149 return NULL;
1151 if (iv->have_use_for)
1153 use = iv_use (data, iv->use_id);
1155 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1156 return use;
1159 if (integer_zerop (iv->step))
1161 record_invariant (data, op, true);
1162 return NULL;
1164 iv->have_use_for = true;
1166 civ = XNEW (struct iv);
1167 *civ = *iv;
1169 stmt = SSA_NAME_DEF_STMT (op);
1170 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1171 || is_gimple_assign (stmt));
1173 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1174 iv->use_id = use->id;
1176 return use;
1179 /* Given a condition in statement STMT, checks whether it is a compare
1180 of an induction variable and an invariant. If this is the case,
1181 CONTROL_VAR is set to location of the iv, BOUND to the location of
1182 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1183 induction variable descriptions, and true is returned. If this is not
1184 the case, CONTROL_VAR and BOUND are set to the arguments of the
1185 condition and false is returned. */
1187 static bool
1188 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1189 tree **control_var, tree **bound,
1190 struct iv **iv_var, struct iv **iv_bound)
1192 /* The objects returned when COND has constant operands. */
1193 static struct iv const_iv;
1194 static tree zero;
1195 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1196 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1197 bool ret = false;
1199 if (gimple_code (stmt) == GIMPLE_COND)
1201 op0 = gimple_cond_lhs_ptr (stmt);
1202 op1 = gimple_cond_rhs_ptr (stmt);
1204 else
1206 op0 = gimple_assign_rhs1_ptr (stmt);
1207 op1 = gimple_assign_rhs2_ptr (stmt);
1210 zero = integer_zero_node;
1211 const_iv.step = integer_zero_node;
1213 if (TREE_CODE (*op0) == SSA_NAME)
1214 iv0 = get_iv (data, *op0);
1215 if (TREE_CODE (*op1) == SSA_NAME)
1216 iv1 = get_iv (data, *op1);
1218 /* Exactly one of the compared values must be an iv, and the other one must
1219 be an invariant. */
1220 if (!iv0 || !iv1)
1221 goto end;
1223 if (integer_zerop (iv0->step))
1225 /* Control variable may be on the other side. */
1226 tmp_op = op0; op0 = op1; op1 = tmp_op;
1227 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1229 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1231 end:
1232 if (control_var)
1233 *control_var = op0;;
1234 if (iv_var)
1235 *iv_var = iv0;;
1236 if (bound)
1237 *bound = op1;
1238 if (iv_bound)
1239 *iv_bound = iv1;
1241 return ret;
1244 /* Checks whether the condition in STMT is interesting and if so,
1245 records it. */
1247 static void
1248 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1250 tree *var_p, *bound_p;
1251 struct iv *var_iv, *civ;
1253 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1255 find_interesting_uses_op (data, *var_p);
1256 find_interesting_uses_op (data, *bound_p);
1257 return;
1260 civ = XNEW (struct iv);
1261 *civ = *var_iv;
1262 record_use (data, NULL, civ, stmt, USE_COMPARE);
1265 /* Returns true if expression EXPR is obviously invariant in LOOP,
1266 i.e. if all its operands are defined outside of the LOOP. LOOP
1267 should not be the function body. */
1269 bool
1270 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1272 basic_block def_bb;
1273 unsigned i, len;
1275 gcc_assert (loop_depth (loop) > 0);
1277 if (is_gimple_min_invariant (expr))
1278 return true;
1280 if (TREE_CODE (expr) == SSA_NAME)
1282 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1283 if (def_bb
1284 && flow_bb_inside_loop_p (loop, def_bb))
1285 return false;
1287 return true;
1290 if (!EXPR_P (expr))
1291 return false;
1293 len = TREE_OPERAND_LENGTH (expr);
1294 for (i = 0; i < len; i++)
1295 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1296 return false;
1298 return true;
1301 /* Returns true if statement STMT is obviously invariant in LOOP,
1302 i.e. if all its operands on the RHS are defined outside of the LOOP.
1303 LOOP should not be the function body. */
1305 bool
1306 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1308 unsigned i;
1309 tree lhs;
1311 gcc_assert (loop_depth (loop) > 0);
1313 lhs = gimple_get_lhs (stmt);
1314 for (i = 0; i < gimple_num_ops (stmt); i++)
1316 tree op = gimple_op (stmt, i);
1317 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1318 return false;
1321 return true;
1324 /* Cumulates the steps of indices into DATA and replaces their values with the
1325 initial ones. Returns false when the value of the index cannot be determined.
1326 Callback for for_each_index. */
1328 struct ifs_ivopts_data
1330 struct ivopts_data *ivopts_data;
1331 gimple stmt;
1332 tree step;
1335 static bool
1336 idx_find_step (tree base, tree *idx, void *data)
1338 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1339 struct iv *iv;
1340 tree step, iv_base, iv_step, lbound, off;
1341 struct loop *loop = dta->ivopts_data->current_loop;
1343 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1344 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1345 return false;
1347 /* If base is a component ref, require that the offset of the reference
1348 be invariant. */
1349 if (TREE_CODE (base) == COMPONENT_REF)
1351 off = component_ref_field_offset (base);
1352 return expr_invariant_in_loop_p (loop, off);
1355 /* If base is array, first check whether we will be able to move the
1356 reference out of the loop (in order to take its address in strength
1357 reduction). In order for this to work we need both lower bound
1358 and step to be loop invariants. */
1359 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1361 /* Moreover, for a range, the size needs to be invariant as well. */
1362 if (TREE_CODE (base) == ARRAY_RANGE_REF
1363 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1364 return false;
1366 step = array_ref_element_size (base);
1367 lbound = array_ref_low_bound (base);
1369 if (!expr_invariant_in_loop_p (loop, step)
1370 || !expr_invariant_in_loop_p (loop, lbound))
1371 return false;
1374 if (TREE_CODE (*idx) != SSA_NAME)
1375 return true;
1377 iv = get_iv (dta->ivopts_data, *idx);
1378 if (!iv)
1379 return false;
1381 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1382 *&x[0], which is not folded and does not trigger the
1383 ARRAY_REF path below. */
1384 *idx = iv->base;
1386 if (integer_zerop (iv->step))
1387 return true;
1389 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1391 step = array_ref_element_size (base);
1393 /* We only handle addresses whose step is an integer constant. */
1394 if (TREE_CODE (step) != INTEGER_CST)
1395 return false;
1397 else
1398 /* The step for pointer arithmetics already is 1 byte. */
1399 step = build_int_cst (sizetype, 1);
1401 iv_base = iv->base;
1402 iv_step = iv->step;
1403 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1404 sizetype, &iv_base, &iv_step, dta->stmt,
1405 false))
1407 /* The index might wrap. */
1408 return false;
1411 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1412 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1414 return true;
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1420 static bool
1421 idx_record_use (tree base, tree *idx,
1422 void *vdata)
1424 struct ivopts_data *data = (struct ivopts_data *) vdata;
1425 find_interesting_uses_op (data, *idx);
1426 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1428 find_interesting_uses_op (data, array_ref_element_size (base));
1429 find_interesting_uses_op (data, array_ref_low_bound (base));
1431 return true;
1434 /* If we can prove that TOP = cst * BOT for some constant cst,
1435 store cst to MUL and return true. Otherwise return false.
1436 The returned value is always sign-extended, regardless of the
1437 signedness of TOP and BOT. */
1439 static bool
1440 constant_multiple_of (tree top, tree bot, double_int *mul)
1442 tree mby;
1443 enum tree_code code;
1444 double_int res, p0, p1;
1445 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1447 STRIP_NOPS (top);
1448 STRIP_NOPS (bot);
1450 if (operand_equal_p (top, bot, 0))
1452 *mul = double_int_one;
1453 return true;
1456 code = TREE_CODE (top);
1457 switch (code)
1459 case MULT_EXPR:
1460 mby = TREE_OPERAND (top, 1);
1461 if (TREE_CODE (mby) != INTEGER_CST)
1462 return false;
1464 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1465 return false;
1467 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1468 precision);
1469 return true;
1471 case PLUS_EXPR:
1472 case MINUS_EXPR:
1473 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1474 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1475 return false;
1477 if (code == MINUS_EXPR)
1478 p1 = double_int_neg (p1);
1479 *mul = double_int_sext (double_int_add (p0, p1), precision);
1480 return true;
1482 case INTEGER_CST:
1483 if (TREE_CODE (bot) != INTEGER_CST)
1484 return false;
1486 p0 = double_int_sext (tree_to_double_int (top), precision);
1487 p1 = double_int_sext (tree_to_double_int (bot), precision);
1488 if (double_int_zero_p (p1))
1489 return false;
1490 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1491 precision);
1492 return double_int_zero_p (res);
1494 default:
1495 return false;
1499 /* Returns true if memory reference REF with step STEP may be unaligned. */
1501 static bool
1502 may_be_unaligned_p (tree ref, tree step)
1504 tree base;
1505 tree base_type;
1506 HOST_WIDE_INT bitsize;
1507 HOST_WIDE_INT bitpos;
1508 tree toffset;
1509 enum machine_mode mode;
1510 int unsignedp, volatilep;
1511 unsigned base_align;
1513 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1514 thus they are not misaligned. */
1515 if (TREE_CODE (ref) == TARGET_MEM_REF)
1516 return false;
1518 /* The test below is basically copy of what expr.c:normal_inner_ref
1519 does to check whether the object must be loaded by parts when
1520 STRICT_ALIGNMENT is true. */
1521 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1522 &unsignedp, &volatilep, true);
1523 base_type = TREE_TYPE (base);
1524 base_align = TYPE_ALIGN (base_type);
1526 if (mode != BLKmode)
1528 double_int mul;
1529 tree al = build_int_cst (TREE_TYPE (step),
1530 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1532 if (base_align < GET_MODE_ALIGNMENT (mode)
1533 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1534 || bitpos % BITS_PER_UNIT != 0)
1535 return true;
1537 if (!constant_multiple_of (step, al, &mul))
1538 return true;
1541 return false;
1544 /* Return true if EXPR may be non-addressable. */
1546 static bool
1547 may_be_nonaddressable_p (tree expr)
1549 switch (TREE_CODE (expr))
1551 case TARGET_MEM_REF:
1552 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1553 target, thus they are always addressable. */
1554 return false;
1556 case COMPONENT_REF:
1557 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1558 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1560 case VIEW_CONVERT_EXPR:
1561 /* This kind of view-conversions may wrap non-addressable objects
1562 and make them look addressable. After some processing the
1563 non-addressability may be uncovered again, causing ADDR_EXPRs
1564 of inappropriate objects to be built. */
1565 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1566 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1567 return true;
1569 /* ... fall through ... */
1571 case ARRAY_REF:
1572 case ARRAY_RANGE_REF:
1573 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1575 CASE_CONVERT:
1576 return true;
1578 default:
1579 break;
1582 return false;
1585 /* Finds addresses in *OP_P inside STMT. */
1587 static void
1588 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1590 tree base = *op_p, step = build_int_cst (sizetype, 0);
1591 struct iv *civ;
1592 struct ifs_ivopts_data ifs_ivopts_data;
1594 /* Do not play with volatile memory references. A bit too conservative,
1595 perhaps, but safe. */
1596 if (gimple_has_volatile_ops (stmt))
1597 goto fail;
1599 /* Ignore bitfields for now. Not really something terribly complicated
1600 to handle. TODO. */
1601 if (TREE_CODE (base) == BIT_FIELD_REF)
1602 goto fail;
1604 base = unshare_expr (base);
1606 if (TREE_CODE (base) == TARGET_MEM_REF)
1608 tree type = build_pointer_type (TREE_TYPE (base));
1609 tree astep;
1611 if (TMR_BASE (base)
1612 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1614 civ = get_iv (data, TMR_BASE (base));
1615 if (!civ)
1616 goto fail;
1618 TMR_BASE (base) = civ->base;
1619 step = civ->step;
1621 if (TMR_INDEX (base)
1622 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1624 civ = get_iv (data, TMR_INDEX (base));
1625 if (!civ)
1626 goto fail;
1628 TMR_INDEX (base) = civ->base;
1629 astep = civ->step;
1631 if (astep)
1633 if (TMR_STEP (base))
1634 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1636 step = fold_build2 (PLUS_EXPR, type, step, astep);
1640 if (integer_zerop (step))
1641 goto fail;
1642 base = tree_mem_ref_addr (type, base);
1644 else
1646 ifs_ivopts_data.ivopts_data = data;
1647 ifs_ivopts_data.stmt = stmt;
1648 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1649 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1650 || integer_zerop (ifs_ivopts_data.step))
1651 goto fail;
1652 step = ifs_ivopts_data.step;
1654 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1655 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1657 /* Check that the base expression is addressable. This needs
1658 to be done after substituting bases of IVs into it. */
1659 if (may_be_nonaddressable_p (base))
1660 goto fail;
1662 /* Moreover, on strict alignment platforms, check that it is
1663 sufficiently aligned. */
1664 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1665 goto fail;
1667 base = build_fold_addr_expr (base);
1669 /* Substituting bases of IVs into the base expression might
1670 have caused folding opportunities. */
1671 if (TREE_CODE (base) == ADDR_EXPR)
1673 tree *ref = &TREE_OPERAND (base, 0);
1674 while (handled_component_p (*ref))
1675 ref = &TREE_OPERAND (*ref, 0);
1676 if (TREE_CODE (*ref) == INDIRECT_REF)
1677 *ref = fold_indirect_ref (*ref);
1681 civ = alloc_iv (base, step);
1682 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1683 return;
1685 fail:
1686 for_each_index (op_p, idx_record_use, data);
1689 /* Finds and records invariants used in STMT. */
1691 static void
1692 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1694 ssa_op_iter iter;
1695 use_operand_p use_p;
1696 tree op;
1698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1700 op = USE_FROM_PTR (use_p);
1701 record_invariant (data, op, false);
1705 /* Finds interesting uses of induction variables in the statement STMT. */
1707 static void
1708 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1710 struct iv *iv;
1711 tree op, *lhs, *rhs;
1712 ssa_op_iter iter;
1713 use_operand_p use_p;
1714 enum tree_code code;
1716 find_invariants_stmt (data, stmt);
1718 if (gimple_code (stmt) == GIMPLE_COND)
1720 find_interesting_uses_cond (data, stmt);
1721 return;
1724 if (is_gimple_assign (stmt))
1726 lhs = gimple_assign_lhs_ptr (stmt);
1727 rhs = gimple_assign_rhs1_ptr (stmt);
1729 if (TREE_CODE (*lhs) == SSA_NAME)
1731 /* If the statement defines an induction variable, the uses are not
1732 interesting by themselves. */
1734 iv = get_iv (data, *lhs);
1736 if (iv && !integer_zerop (iv->step))
1737 return;
1740 code = gimple_assign_rhs_code (stmt);
1741 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1742 && (REFERENCE_CLASS_P (*rhs)
1743 || is_gimple_val (*rhs)))
1745 if (REFERENCE_CLASS_P (*rhs))
1746 find_interesting_uses_address (data, stmt, rhs);
1747 else
1748 find_interesting_uses_op (data, *rhs);
1750 if (REFERENCE_CLASS_P (*lhs))
1751 find_interesting_uses_address (data, stmt, lhs);
1752 return;
1754 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1756 find_interesting_uses_cond (data, stmt);
1757 return;
1760 /* TODO -- we should also handle address uses of type
1762 memory = call (whatever);
1766 call (memory). */
1769 if (gimple_code (stmt) == GIMPLE_PHI
1770 && gimple_bb (stmt) == data->current_loop->header)
1772 iv = get_iv (data, PHI_RESULT (stmt));
1774 if (iv && !integer_zerop (iv->step))
1775 return;
1778 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1780 op = USE_FROM_PTR (use_p);
1782 if (TREE_CODE (op) != SSA_NAME)
1783 continue;
1785 iv = get_iv (data, op);
1786 if (!iv)
1787 continue;
1789 find_interesting_uses_op (data, op);
1793 /* Finds interesting uses of induction variables outside of loops
1794 on loop exit edge EXIT. */
1796 static void
1797 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1799 gimple phi;
1800 gimple_stmt_iterator psi;
1801 tree def;
1803 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1805 phi = gsi_stmt (psi);
1806 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1807 if (is_gimple_reg (def))
1808 find_interesting_uses_op (data, def);
1812 /* Finds uses of the induction variables that are interesting. */
1814 static void
1815 find_interesting_uses (struct ivopts_data *data)
1817 basic_block bb;
1818 gimple_stmt_iterator bsi;
1819 basic_block *body = get_loop_body (data->current_loop);
1820 unsigned i;
1821 struct version_info *info;
1822 edge e;
1824 if (dump_file && (dump_flags & TDF_DETAILS))
1825 fprintf (dump_file, "Uses:\n\n");
1827 for (i = 0; i < data->current_loop->num_nodes; i++)
1829 edge_iterator ei;
1830 bb = body[i];
1832 FOR_EACH_EDGE (e, ei, bb->succs)
1833 if (e->dest != EXIT_BLOCK_PTR
1834 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1835 find_interesting_uses_outside (data, e);
1837 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1838 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1839 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1840 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1843 if (dump_file && (dump_flags & TDF_DETAILS))
1845 bitmap_iterator bi;
1847 fprintf (dump_file, "\n");
1849 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1851 info = ver_info (data, i);
1852 if (info->inv_id)
1854 fprintf (dump_file, " ");
1855 print_generic_expr (dump_file, info->name, TDF_SLIM);
1856 fprintf (dump_file, " is invariant (%d)%s\n",
1857 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1861 fprintf (dump_file, "\n");
1864 free (body);
1867 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1868 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1869 we are at the top-level of the processed address. */
1871 static tree
1872 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1873 unsigned HOST_WIDE_INT *offset)
1875 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1876 enum tree_code code;
1877 tree type, orig_type = TREE_TYPE (expr);
1878 unsigned HOST_WIDE_INT off0, off1, st;
1879 tree orig_expr = expr;
1881 STRIP_NOPS (expr);
1883 type = TREE_TYPE (expr);
1884 code = TREE_CODE (expr);
1885 *offset = 0;
1887 switch (code)
1889 case INTEGER_CST:
1890 if (!cst_and_fits_in_hwi (expr)
1891 || integer_zerop (expr))
1892 return orig_expr;
1894 *offset = int_cst_value (expr);
1895 return build_int_cst (orig_type, 0);
1897 case POINTER_PLUS_EXPR:
1898 case PLUS_EXPR:
1899 case MINUS_EXPR:
1900 op0 = TREE_OPERAND (expr, 0);
1901 op1 = TREE_OPERAND (expr, 1);
1903 op0 = strip_offset_1 (op0, false, false, &off0);
1904 op1 = strip_offset_1 (op1, false, false, &off1);
1906 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1907 if (op0 == TREE_OPERAND (expr, 0)
1908 && op1 == TREE_OPERAND (expr, 1))
1909 return orig_expr;
1911 if (integer_zerop (op1))
1912 expr = op0;
1913 else if (integer_zerop (op0))
1915 if (code == MINUS_EXPR)
1916 expr = fold_build1 (NEGATE_EXPR, type, op1);
1917 else
1918 expr = op1;
1920 else
1921 expr = fold_build2 (code, type, op0, op1);
1923 return fold_convert (orig_type, expr);
1925 case ARRAY_REF:
1926 case ARRAY_RANGE_REF:
1927 if (!inside_addr)
1928 return orig_expr;
1930 step = array_ref_element_size (expr);
1931 if (!cst_and_fits_in_hwi (step))
1932 break;
1934 st = int_cst_value (step);
1935 op1 = TREE_OPERAND (expr, 1);
1936 op1 = strip_offset_1 (op1, false, false, &off1);
1937 *offset = off1 * st;
1939 if (top_compref
1940 && integer_zerop (op1))
1942 /* Strip the component reference completely. */
1943 op0 = TREE_OPERAND (expr, 0);
1944 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1945 *offset += off0;
1946 return op0;
1948 break;
1950 case COMPONENT_REF:
1951 if (!inside_addr)
1952 return orig_expr;
1954 tmp = component_ref_field_offset (expr);
1955 if (top_compref
1956 && cst_and_fits_in_hwi (tmp))
1958 /* Strip the component reference completely. */
1959 op0 = TREE_OPERAND (expr, 0);
1960 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1961 *offset = off0 + int_cst_value (tmp);
1962 return op0;
1964 break;
1966 case ADDR_EXPR:
1967 op0 = TREE_OPERAND (expr, 0);
1968 op0 = strip_offset_1 (op0, true, true, &off0);
1969 *offset += off0;
1971 if (op0 == TREE_OPERAND (expr, 0))
1972 return orig_expr;
1974 expr = build_fold_addr_expr (op0);
1975 return fold_convert (orig_type, expr);
1977 case INDIRECT_REF:
1978 inside_addr = false;
1979 break;
1981 default:
1982 return orig_expr;
1985 /* Default handling of expressions for that we want to recurse into
1986 the first operand. */
1987 op0 = TREE_OPERAND (expr, 0);
1988 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1989 *offset += off0;
1991 if (op0 == TREE_OPERAND (expr, 0)
1992 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1993 return orig_expr;
1995 expr = copy_node (expr);
1996 TREE_OPERAND (expr, 0) = op0;
1997 if (op1)
1998 TREE_OPERAND (expr, 1) = op1;
2000 /* Inside address, we might strip the top level component references,
2001 thus changing type of the expression. Handling of ADDR_EXPR
2002 will fix that. */
2003 expr = fold_convert (orig_type, expr);
2005 return expr;
2008 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2010 static tree
2011 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2013 return strip_offset_1 (expr, false, false, offset);
2016 /* Returns variant of TYPE that can be used as base for different uses.
2017 We return unsigned type with the same precision, which avoids problems
2018 with overflows. */
2020 static tree
2021 generic_type_for (tree type)
2023 if (POINTER_TYPE_P (type))
2024 return unsigned_type_for (type);
2026 if (TYPE_UNSIGNED (type))
2027 return type;
2029 return unsigned_type_for (type);
2032 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2033 the bitmap to that we should store it. */
2035 static struct ivopts_data *fd_ivopts_data;
2036 static tree
2037 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2039 bitmap *depends_on = (bitmap *) data;
2040 struct version_info *info;
2042 if (TREE_CODE (*expr_p) != SSA_NAME)
2043 return NULL_TREE;
2044 info = name_info (fd_ivopts_data, *expr_p);
2046 if (!info->inv_id || info->has_nonlin_use)
2047 return NULL_TREE;
2049 if (!*depends_on)
2050 *depends_on = BITMAP_ALLOC (NULL);
2051 bitmap_set_bit (*depends_on, info->inv_id);
2053 return NULL_TREE;
2056 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2057 position to POS. If USE is not NULL, the candidate is set as related to
2058 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2059 replacement of the final value of the iv by a direct computation. */
2061 static struct iv_cand *
2062 add_candidate_1 (struct ivopts_data *data,
2063 tree base, tree step, bool important, enum iv_position pos,
2064 struct iv_use *use, gimple incremented_at)
2066 unsigned i;
2067 struct iv_cand *cand = NULL;
2068 tree type, orig_type;
2070 if (base)
2072 orig_type = TREE_TYPE (base);
2073 type = generic_type_for (orig_type);
2074 /* Don't convert the base to the generic type for pointers as the generic
2075 type is an integer type with the same size as the pointer type. */
2076 if (type != orig_type && !POINTER_TYPE_P (orig_type))
2078 base = fold_convert (type, base);
2079 step = fold_convert (type, step);
2083 for (i = 0; i < n_iv_cands (data); i++)
2085 cand = iv_cand (data, i);
2087 if (cand->pos != pos)
2088 continue;
2090 if (cand->incremented_at != incremented_at)
2091 continue;
2093 if (!cand->iv)
2095 if (!base && !step)
2096 break;
2098 continue;
2101 if (!base && !step)
2102 continue;
2104 if (operand_equal_p (base, cand->iv->base, 0)
2105 && operand_equal_p (step, cand->iv->step, 0))
2106 break;
2109 if (i == n_iv_cands (data))
2111 cand = XCNEW (struct iv_cand);
2112 cand->id = i;
2114 if (!base && !step)
2115 cand->iv = NULL;
2116 else
2117 cand->iv = alloc_iv (base, step);
2119 cand->pos = pos;
2120 if (pos != IP_ORIGINAL && cand->iv)
2122 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2123 cand->var_after = cand->var_before;
2125 cand->important = important;
2126 cand->incremented_at = incremented_at;
2127 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2129 if (step
2130 && TREE_CODE (step) != INTEGER_CST)
2132 fd_ivopts_data = data;
2133 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2136 if (dump_file && (dump_flags & TDF_DETAILS))
2137 dump_cand (dump_file, cand);
2140 if (important && !cand->important)
2142 cand->important = true;
2143 if (dump_file && (dump_flags & TDF_DETAILS))
2144 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2147 if (use)
2149 bitmap_set_bit (use->related_cands, i);
2150 if (dump_file && (dump_flags & TDF_DETAILS))
2151 fprintf (dump_file, "Candidate %d is related to use %d\n",
2152 cand->id, use->id);
2155 return cand;
2158 /* Returns true if incrementing the induction variable at the end of the LOOP
2159 is allowed.
2161 The purpose is to avoid splitting latch edge with a biv increment, thus
2162 creating a jump, possibly confusing other optimization passes and leaving
2163 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2164 is not available (so we do not have a better alternative), or if the latch
2165 edge is already nonempty. */
2167 static bool
2168 allow_ip_end_pos_p (struct loop *loop)
2170 if (!ip_normal_pos (loop))
2171 return true;
2173 if (!empty_block_p (ip_end_pos (loop)))
2174 return true;
2176 return false;
2179 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2180 position to POS. If USE is not NULL, the candidate is set as related to
2181 it. The candidate computation is scheduled on all available positions. */
2183 static void
2184 add_candidate (struct ivopts_data *data,
2185 tree base, tree step, bool important, struct iv_use *use)
2187 if (ip_normal_pos (data->current_loop))
2188 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2189 if (ip_end_pos (data->current_loop)
2190 && allow_ip_end_pos_p (data->current_loop))
2191 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2194 /* Add a standard "0 + 1 * iteration" iv candidate for a
2195 type with SIZE bits. */
2197 static void
2198 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2199 unsigned int size)
2201 tree type = lang_hooks.types.type_for_size (size, true);
2202 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2203 true, NULL);
2206 /* Adds standard iv candidates. */
2208 static void
2209 add_standard_iv_candidates (struct ivopts_data *data)
2211 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2213 /* The same for a double-integer type if it is still fast enough. */
2214 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2215 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2219 /* Adds candidates bases on the old induction variable IV. */
2221 static void
2222 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2224 gimple phi;
2225 tree def;
2226 struct iv_cand *cand;
2228 add_candidate (data, iv->base, iv->step, true, NULL);
2230 /* The same, but with initial value zero. */
2231 add_candidate (data,
2232 build_int_cst (TREE_TYPE (iv->base), 0),
2233 iv->step, true, NULL);
2235 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2236 if (gimple_code (phi) == GIMPLE_PHI)
2238 /* Additionally record the possibility of leaving the original iv
2239 untouched. */
2240 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2241 cand = add_candidate_1 (data,
2242 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2243 SSA_NAME_DEF_STMT (def));
2244 cand->var_before = iv->ssa_name;
2245 cand->var_after = def;
2249 /* Adds candidates based on the old induction variables. */
2251 static void
2252 add_old_ivs_candidates (struct ivopts_data *data)
2254 unsigned i;
2255 struct iv *iv;
2256 bitmap_iterator bi;
2258 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2260 iv = ver_info (data, i)->iv;
2261 if (iv && iv->biv_p && !integer_zerop (iv->step))
2262 add_old_iv_candidates (data, iv);
2266 /* Adds candidates based on the value of the induction variable IV and USE. */
2268 static void
2269 add_iv_value_candidates (struct ivopts_data *data,
2270 struct iv *iv, struct iv_use *use)
2272 unsigned HOST_WIDE_INT offset;
2273 tree base;
2274 tree basetype;
2276 add_candidate (data, iv->base, iv->step, false, use);
2278 /* The same, but with initial value zero. Make such variable important,
2279 since it is generic enough so that possibly many uses may be based
2280 on it. */
2281 basetype = TREE_TYPE (iv->base);
2282 if (POINTER_TYPE_P (basetype))
2283 basetype = sizetype;
2284 add_candidate (data, build_int_cst (basetype, 0),
2285 iv->step, true, use);
2287 /* Third, try removing the constant offset. Make sure to even
2288 add a candidate for &a[0] vs. (T *)&a. */
2289 base = strip_offset (iv->base, &offset);
2290 if (offset
2291 || base != iv->base)
2292 add_candidate (data, base, iv->step, false, use);
2295 /* Adds candidates based on the uses. */
2297 static void
2298 add_derived_ivs_candidates (struct ivopts_data *data)
2300 unsigned i;
2302 for (i = 0; i < n_iv_uses (data); i++)
2304 struct iv_use *use = iv_use (data, i);
2306 if (!use)
2307 continue;
2309 switch (use->type)
2311 case USE_NONLINEAR_EXPR:
2312 case USE_COMPARE:
2313 case USE_ADDRESS:
2314 /* Just add the ivs based on the value of the iv used here. */
2315 add_iv_value_candidates (data, use->iv, use);
2316 break;
2318 default:
2319 gcc_unreachable ();
2324 /* Record important candidates and add them to related_cands bitmaps
2325 if needed. */
2327 static void
2328 record_important_candidates (struct ivopts_data *data)
2330 unsigned i;
2331 struct iv_use *use;
2333 for (i = 0; i < n_iv_cands (data); i++)
2335 struct iv_cand *cand = iv_cand (data, i);
2337 if (cand->important)
2338 bitmap_set_bit (data->important_candidates, i);
2341 data->consider_all_candidates = (n_iv_cands (data)
2342 <= CONSIDER_ALL_CANDIDATES_BOUND);
2344 if (data->consider_all_candidates)
2346 /* We will not need "related_cands" bitmaps in this case,
2347 so release them to decrease peak memory consumption. */
2348 for (i = 0; i < n_iv_uses (data); i++)
2350 use = iv_use (data, i);
2351 BITMAP_FREE (use->related_cands);
2354 else
2356 /* Add important candidates to the related_cands bitmaps. */
2357 for (i = 0; i < n_iv_uses (data); i++)
2358 bitmap_ior_into (iv_use (data, i)->related_cands,
2359 data->important_candidates);
2363 /* Finds the candidates for the induction variables. */
2365 static void
2366 find_iv_candidates (struct ivopts_data *data)
2368 /* Add commonly used ivs. */
2369 add_standard_iv_candidates (data);
2371 /* Add old induction variables. */
2372 add_old_ivs_candidates (data);
2374 /* Add induction variables derived from uses. */
2375 add_derived_ivs_candidates (data);
2377 /* Record the important candidates. */
2378 record_important_candidates (data);
2381 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2382 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2383 we allocate a simple list to every use. */
2385 static void
2386 alloc_use_cost_map (struct ivopts_data *data)
2388 unsigned i, size, s, j;
2390 for (i = 0; i < n_iv_uses (data); i++)
2392 struct iv_use *use = iv_use (data, i);
2393 bitmap_iterator bi;
2395 if (data->consider_all_candidates)
2396 size = n_iv_cands (data);
2397 else
2399 s = 0;
2400 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2402 s++;
2405 /* Round up to the power of two, so that moduling by it is fast. */
2406 for (size = 1; size < s; size <<= 1)
2407 continue;
2410 use->n_map_members = size;
2411 use->cost_map = XCNEWVEC (struct cost_pair, size);
2415 /* Returns description of computation cost of expression whose runtime
2416 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2418 static comp_cost
2419 new_cost (unsigned runtime, unsigned complexity)
2421 comp_cost cost;
2423 cost.cost = runtime;
2424 cost.complexity = complexity;
2426 return cost;
2429 /* Adds costs COST1 and COST2. */
2431 static comp_cost
2432 add_costs (comp_cost cost1, comp_cost cost2)
2434 cost1.cost += cost2.cost;
2435 cost1.complexity += cost2.complexity;
2437 return cost1;
2439 /* Subtracts costs COST1 and COST2. */
2441 static comp_cost
2442 sub_costs (comp_cost cost1, comp_cost cost2)
2444 cost1.cost -= cost2.cost;
2445 cost1.complexity -= cost2.complexity;
2447 return cost1;
2450 /* Returns a negative number if COST1 < COST2, a positive number if
2451 COST1 > COST2, and 0 if COST1 = COST2. */
2453 static int
2454 compare_costs (comp_cost cost1, comp_cost cost2)
2456 if (cost1.cost == cost2.cost)
2457 return cost1.complexity - cost2.complexity;
2459 return cost1.cost - cost2.cost;
2462 /* Returns true if COST is infinite. */
2464 static bool
2465 infinite_cost_p (comp_cost cost)
2467 return cost.cost == INFTY;
2470 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2471 on invariants DEPENDS_ON and that the value used in expressing it
2472 is VALUE.*/
2474 static void
2475 set_use_iv_cost (struct ivopts_data *data,
2476 struct iv_use *use, struct iv_cand *cand,
2477 comp_cost cost, bitmap depends_on, tree value)
2479 unsigned i, s;
2481 if (infinite_cost_p (cost))
2483 BITMAP_FREE (depends_on);
2484 return;
2487 if (data->consider_all_candidates)
2489 use->cost_map[cand->id].cand = cand;
2490 use->cost_map[cand->id].cost = cost;
2491 use->cost_map[cand->id].depends_on = depends_on;
2492 use->cost_map[cand->id].value = value;
2493 return;
2496 /* n_map_members is a power of two, so this computes modulo. */
2497 s = cand->id & (use->n_map_members - 1);
2498 for (i = s; i < use->n_map_members; i++)
2499 if (!use->cost_map[i].cand)
2500 goto found;
2501 for (i = 0; i < s; i++)
2502 if (!use->cost_map[i].cand)
2503 goto found;
2505 gcc_unreachable ();
2507 found:
2508 use->cost_map[i].cand = cand;
2509 use->cost_map[i].cost = cost;
2510 use->cost_map[i].depends_on = depends_on;
2511 use->cost_map[i].value = value;
2514 /* Gets cost of (USE, CANDIDATE) pair. */
2516 static struct cost_pair *
2517 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2518 struct iv_cand *cand)
2520 unsigned i, s;
2521 struct cost_pair *ret;
2523 if (!cand)
2524 return NULL;
2526 if (data->consider_all_candidates)
2528 ret = use->cost_map + cand->id;
2529 if (!ret->cand)
2530 return NULL;
2532 return ret;
2535 /* n_map_members is a power of two, so this computes modulo. */
2536 s = cand->id & (use->n_map_members - 1);
2537 for (i = s; i < use->n_map_members; i++)
2538 if (use->cost_map[i].cand == cand)
2539 return use->cost_map + i;
2541 for (i = 0; i < s; i++)
2542 if (use->cost_map[i].cand == cand)
2543 return use->cost_map + i;
2545 return NULL;
2548 /* Returns estimate on cost of computing SEQ. */
2550 static unsigned
2551 seq_cost (rtx seq, bool speed)
2553 unsigned cost = 0;
2554 rtx set;
2556 for (; seq; seq = NEXT_INSN (seq))
2558 set = single_set (seq);
2559 if (set)
2560 cost += rtx_cost (set, SET,speed);
2561 else
2562 cost++;
2565 return cost;
2568 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2569 static rtx
2570 produce_memory_decl_rtl (tree obj, int *regno)
2572 rtx x;
2574 gcc_assert (obj);
2575 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2577 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2578 x = gen_rtx_SYMBOL_REF (Pmode, name);
2579 SET_SYMBOL_REF_DECL (x, obj);
2580 x = gen_rtx_MEM (DECL_MODE (obj), x);
2581 targetm.encode_section_info (obj, x, true);
2583 else
2585 x = gen_raw_REG (Pmode, (*regno)++);
2586 x = gen_rtx_MEM (DECL_MODE (obj), x);
2589 return x;
2592 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2593 walk_tree. DATA contains the actual fake register number. */
2595 static tree
2596 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2598 tree obj = NULL_TREE;
2599 rtx x = NULL_RTX;
2600 int *regno = (int *) data;
2602 switch (TREE_CODE (*expr_p))
2604 case ADDR_EXPR:
2605 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2606 handled_component_p (*expr_p);
2607 expr_p = &TREE_OPERAND (*expr_p, 0))
2608 continue;
2609 obj = *expr_p;
2610 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2611 x = produce_memory_decl_rtl (obj, regno);
2612 break;
2614 case SSA_NAME:
2615 *ws = 0;
2616 obj = SSA_NAME_VAR (*expr_p);
2617 if (!DECL_RTL_SET_P (obj))
2618 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2619 break;
2621 case VAR_DECL:
2622 case PARM_DECL:
2623 case RESULT_DECL:
2624 *ws = 0;
2625 obj = *expr_p;
2627 if (DECL_RTL_SET_P (obj))
2628 break;
2630 if (DECL_MODE (obj) == BLKmode)
2631 x = produce_memory_decl_rtl (obj, regno);
2632 else
2633 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2635 break;
2637 default:
2638 break;
2641 if (x)
2643 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2644 SET_DECL_RTL (obj, x);
2647 return NULL_TREE;
2650 /* Determines cost of the computation of EXPR. */
2652 static unsigned
2653 computation_cost (tree expr, bool speed)
2655 rtx seq, rslt;
2656 tree type = TREE_TYPE (expr);
2657 unsigned cost;
2658 /* Avoid using hard regs in ways which may be unsupported. */
2659 int regno = LAST_VIRTUAL_REGISTER + 1;
2660 enum function_frequency real_frequency = cfun->function_frequency;
2662 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2663 crtl->maybe_hot_insn_p = speed;
2664 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2665 start_sequence ();
2666 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2667 seq = get_insns ();
2668 end_sequence ();
2669 default_rtl_profile ();
2670 cfun->function_frequency = real_frequency;
2672 cost = seq_cost (seq, speed);
2673 if (MEM_P (rslt))
2674 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2676 return cost;
2679 /* Returns variable containing the value of candidate CAND at statement AT. */
2681 static tree
2682 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2684 if (stmt_after_increment (loop, cand, stmt))
2685 return cand->var_after;
2686 else
2687 return cand->var_before;
2690 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2691 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2694 tree_int_cst_sign_bit (const_tree t)
2696 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2697 unsigned HOST_WIDE_INT w;
2699 if (bitno < HOST_BITS_PER_WIDE_INT)
2700 w = TREE_INT_CST_LOW (t);
2701 else
2703 w = TREE_INT_CST_HIGH (t);
2704 bitno -= HOST_BITS_PER_WIDE_INT;
2707 return (w >> bitno) & 1;
2710 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2711 same precision that is at least as wide as the precision of TYPE, stores
2712 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2713 type of A and B. */
2715 static tree
2716 determine_common_wider_type (tree *a, tree *b)
2718 tree wider_type = NULL;
2719 tree suba, subb;
2720 tree atype = TREE_TYPE (*a);
2722 if (CONVERT_EXPR_P (*a))
2724 suba = TREE_OPERAND (*a, 0);
2725 wider_type = TREE_TYPE (suba);
2726 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2727 return atype;
2729 else
2730 return atype;
2732 if (CONVERT_EXPR_P (*b))
2734 subb = TREE_OPERAND (*b, 0);
2735 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2736 return atype;
2738 else
2739 return atype;
2741 *a = suba;
2742 *b = subb;
2743 return wider_type;
2746 /* Determines the expression by that USE is expressed from induction variable
2747 CAND at statement AT in LOOP. The expression is stored in a decomposed
2748 form into AFF. Returns false if USE cannot be expressed using CAND. */
2750 static bool
2751 get_computation_aff (struct loop *loop,
2752 struct iv_use *use, struct iv_cand *cand, gimple at,
2753 struct affine_tree_combination *aff)
2755 tree ubase = use->iv->base;
2756 tree ustep = use->iv->step;
2757 tree cbase = cand->iv->base;
2758 tree cstep = cand->iv->step, cstep_common;
2759 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2760 tree common_type, var;
2761 tree uutype;
2762 aff_tree cbase_aff, var_aff;
2763 double_int rat;
2765 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2767 /* We do not have a precision to express the values of use. */
2768 return false;
2771 var = var_at_stmt (loop, cand, at);
2772 uutype = unsigned_type_for (utype);
2774 /* If the conversion is not noop, perform it. */
2775 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2777 cstep = fold_convert (uutype, cstep);
2778 cbase = fold_convert (uutype, cbase);
2779 var = fold_convert (uutype, var);
2782 if (!constant_multiple_of (ustep, cstep, &rat))
2783 return false;
2785 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2786 type, we achieve better folding by computing their difference in this
2787 wider type, and cast the result to UUTYPE. We do not need to worry about
2788 overflows, as all the arithmetics will in the end be performed in UUTYPE
2789 anyway. */
2790 common_type = determine_common_wider_type (&ubase, &cbase);
2792 /* use = ubase - ratio * cbase + ratio * var. */
2793 tree_to_aff_combination (ubase, common_type, aff);
2794 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2795 tree_to_aff_combination (var, uutype, &var_aff);
2797 /* We need to shift the value if we are after the increment. */
2798 if (stmt_after_increment (loop, cand, at))
2800 aff_tree cstep_aff;
2802 if (common_type != uutype)
2803 cstep_common = fold_convert (common_type, cstep);
2804 else
2805 cstep_common = cstep;
2807 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2808 aff_combination_add (&cbase_aff, &cstep_aff);
2811 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2812 aff_combination_add (aff, &cbase_aff);
2813 if (common_type != uutype)
2814 aff_combination_convert (aff, uutype);
2816 aff_combination_scale (&var_aff, rat);
2817 aff_combination_add (aff, &var_aff);
2819 return true;
2822 /* Determines the expression by that USE is expressed from induction variable
2823 CAND at statement AT in LOOP. The computation is unshared. */
2825 static tree
2826 get_computation_at (struct loop *loop,
2827 struct iv_use *use, struct iv_cand *cand, gimple at)
2829 aff_tree aff;
2830 tree type = TREE_TYPE (use->iv->base);
2832 if (!get_computation_aff (loop, use, cand, at, &aff))
2833 return NULL_TREE;
2834 unshare_aff_combination (&aff);
2835 return fold_convert (type, aff_combination_to_tree (&aff));
2838 /* Determines the expression by that USE is expressed from induction variable
2839 CAND in LOOP. The computation is unshared. */
2841 static tree
2842 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2844 return get_computation_at (loop, use, cand, use->stmt);
2847 /* Returns cost of addition in MODE. */
2849 static unsigned
2850 add_cost (enum machine_mode mode, bool speed)
2852 static unsigned costs[NUM_MACHINE_MODES];
2853 rtx seq;
2854 unsigned cost;
2856 if (costs[mode])
2857 return costs[mode];
2859 start_sequence ();
2860 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2861 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2862 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2863 NULL_RTX);
2864 seq = get_insns ();
2865 end_sequence ();
2867 cost = seq_cost (seq, speed);
2868 if (!cost)
2869 cost = 1;
2871 costs[mode] = cost;
2873 if (dump_file && (dump_flags & TDF_DETAILS))
2874 fprintf (dump_file, "Addition in %s costs %d\n",
2875 GET_MODE_NAME (mode), cost);
2876 return cost;
2879 /* Entry in a hashtable of already known costs for multiplication. */
2880 struct mbc_entry
2882 HOST_WIDE_INT cst; /* The constant to multiply by. */
2883 enum machine_mode mode; /* In mode. */
2884 unsigned cost; /* The cost. */
2887 /* Counts hash value for the ENTRY. */
2889 static hashval_t
2890 mbc_entry_hash (const void *entry)
2892 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2894 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2897 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2899 static int
2900 mbc_entry_eq (const void *entry1, const void *entry2)
2902 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2903 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2905 return (e1->mode == e2->mode
2906 && e1->cst == e2->cst);
2909 /* Returns cost of multiplication by constant CST in MODE. */
2911 unsigned
2912 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2914 static htab_t costs;
2915 struct mbc_entry **cached, act;
2916 rtx seq;
2917 unsigned cost;
2919 if (!costs)
2920 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2922 act.mode = mode;
2923 act.cst = cst;
2924 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2925 if (*cached)
2926 return (*cached)->cost;
2928 *cached = XNEW (struct mbc_entry);
2929 (*cached)->mode = mode;
2930 (*cached)->cst = cst;
2932 start_sequence ();
2933 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2934 gen_int_mode (cst, mode), NULL_RTX, 0);
2935 seq = get_insns ();
2936 end_sequence ();
2938 cost = seq_cost (seq, speed);
2940 if (dump_file && (dump_flags & TDF_DETAILS))
2941 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2942 (int) cst, GET_MODE_NAME (mode), cost);
2944 (*cached)->cost = cost;
2946 return cost;
2949 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2950 validity for a memory reference accessing memory of mode MODE. */
2952 bool
2953 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2955 #define MAX_RATIO 128
2956 static sbitmap valid_mult[MAX_MACHINE_MODE];
2958 if (!valid_mult[mode])
2960 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2961 rtx addr;
2962 HOST_WIDE_INT i;
2964 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2965 sbitmap_zero (valid_mult[mode]);
2966 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2967 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2969 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2970 if (memory_address_p (mode, addr))
2971 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2974 if (dump_file && (dump_flags & TDF_DETAILS))
2976 fprintf (dump_file, " allowed multipliers:");
2977 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2978 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2979 fprintf (dump_file, " %d", (int) i);
2980 fprintf (dump_file, "\n");
2981 fprintf (dump_file, "\n");
2985 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2986 return false;
2988 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2991 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2992 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2993 variable is omitted. Compute the cost for a memory reference that accesses
2994 a memory location of mode MEM_MODE.
2996 TODO -- there must be some better way. This all is quite crude. */
2998 static comp_cost
2999 get_address_cost (bool symbol_present, bool var_present,
3000 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3001 enum machine_mode mem_mode,
3002 bool speed)
3004 static bool initialized[MAX_MACHINE_MODE];
3005 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3006 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3007 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3008 unsigned cost, acost, complexity;
3009 bool offset_p, ratio_p;
3010 HOST_WIDE_INT s_offset;
3011 unsigned HOST_WIDE_INT mask;
3012 unsigned bits;
3014 if (!initialized[mem_mode])
3016 HOST_WIDE_INT i;
3017 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3018 int old_cse_not_expected;
3019 unsigned sym_p, var_p, off_p, rat_p, add_c;
3020 rtx seq, addr, base;
3021 rtx reg0, reg1;
3023 initialized[mem_mode] = true;
3025 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3027 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3028 for (i = start; i <= 1 << 20; i <<= 1)
3030 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3031 if (!memory_address_p (mem_mode, addr))
3032 break;
3034 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3035 off[mem_mode] = max_offset[mem_mode];
3037 for (i = start; i <= 1 << 20; i <<= 1)
3039 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3040 if (!memory_address_p (mem_mode, addr))
3041 break;
3043 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3045 if (dump_file && (dump_flags & TDF_DETAILS))
3047 fprintf (dump_file, "get_address_cost:\n");
3048 fprintf (dump_file, " min offset %s %d\n",
3049 GET_MODE_NAME (mem_mode),
3050 (int) min_offset[mem_mode]);
3051 fprintf (dump_file, " max offset %s %d\n",
3052 GET_MODE_NAME (mem_mode),
3053 (int) max_offset[mem_mode]);
3056 rat[mem_mode] = 1;
3057 for (i = 2; i <= MAX_RATIO; i++)
3058 if (multiplier_allowed_in_address_p (i, mem_mode))
3060 rat[mem_mode] = i;
3061 break;
3064 /* Compute the cost of various addressing modes. */
3065 acost = 0;
3066 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3067 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3069 for (i = 0; i < 16; i++)
3071 sym_p = i & 1;
3072 var_p = (i >> 1) & 1;
3073 off_p = (i >> 2) & 1;
3074 rat_p = (i >> 3) & 1;
3076 addr = reg0;
3077 if (rat_p)
3078 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3079 gen_int_mode (rat[mem_mode], Pmode));
3081 if (var_p)
3082 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3084 if (sym_p)
3086 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3087 /* ??? We can run into trouble with some backends by presenting
3088 it with symbols which haven't been properly passed through
3089 targetm.encode_section_info. By setting the local bit, we
3090 enhance the probability of things working. */
3091 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3093 if (off_p)
3094 base = gen_rtx_fmt_e (CONST, Pmode,
3095 gen_rtx_fmt_ee (PLUS, Pmode,
3096 base,
3097 gen_int_mode (off[mem_mode],
3098 Pmode)));
3100 else if (off_p)
3101 base = gen_int_mode (off[mem_mode], Pmode);
3102 else
3103 base = NULL_RTX;
3105 if (base)
3106 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3108 start_sequence ();
3109 /* To avoid splitting addressing modes, pretend that no cse will
3110 follow. */
3111 old_cse_not_expected = cse_not_expected;
3112 cse_not_expected = true;
3113 addr = memory_address (mem_mode, addr);
3114 cse_not_expected = old_cse_not_expected;
3115 seq = get_insns ();
3116 end_sequence ();
3118 acost = seq_cost (seq, speed);
3119 acost += address_cost (addr, mem_mode, speed);
3121 if (!acost)
3122 acost = 1;
3123 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3126 /* On some targets, it is quite expensive to load symbol to a register,
3127 which makes addresses that contain symbols look much more expensive.
3128 However, the symbol will have to be loaded in any case before the
3129 loop (and quite likely we have it in register already), so it does not
3130 make much sense to penalize them too heavily. So make some final
3131 tweaks for the SYMBOL_PRESENT modes:
3133 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3134 var is cheaper, use this mode with small penalty.
3135 If VAR_PRESENT is true, try whether the mode with
3136 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3137 if this is the case, use it. */
3138 add_c = add_cost (Pmode, speed);
3139 for (i = 0; i < 8; i++)
3141 var_p = i & 1;
3142 off_p = (i >> 1) & 1;
3143 rat_p = (i >> 2) & 1;
3145 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3146 if (var_p)
3147 acost += add_c;
3149 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3150 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3153 if (dump_file && (dump_flags & TDF_DETAILS))
3155 fprintf (dump_file, "Address costs:\n");
3157 for (i = 0; i < 16; i++)
3159 sym_p = i & 1;
3160 var_p = (i >> 1) & 1;
3161 off_p = (i >> 2) & 1;
3162 rat_p = (i >> 3) & 1;
3164 fprintf (dump_file, " ");
3165 if (sym_p)
3166 fprintf (dump_file, "sym + ");
3167 if (var_p)
3168 fprintf (dump_file, "var + ");
3169 if (off_p)
3170 fprintf (dump_file, "cst + ");
3171 if (rat_p)
3172 fprintf (dump_file, "rat * ");
3174 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3175 fprintf (dump_file, "index costs %d\n", acost);
3177 fprintf (dump_file, "\n");
3181 bits = GET_MODE_BITSIZE (Pmode);
3182 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3183 offset &= mask;
3184 if ((offset >> (bits - 1) & 1))
3185 offset |= ~mask;
3186 s_offset = offset;
3188 cost = 0;
3189 offset_p = (s_offset != 0
3190 && min_offset[mem_mode] <= s_offset
3191 && s_offset <= max_offset[mem_mode]);
3192 ratio_p = (ratio != 1
3193 && multiplier_allowed_in_address_p (ratio, mem_mode));
3195 if (ratio != 1 && !ratio_p)
3196 cost += multiply_by_cost (ratio, Pmode, speed);
3198 if (s_offset && !offset_p && !symbol_present)
3199 cost += add_cost (Pmode, speed);
3201 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3202 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3203 return new_cost (cost + acost, complexity);
3206 /* Estimates cost of forcing expression EXPR into a variable. */
3208 static comp_cost
3209 force_expr_to_var_cost (tree expr, bool speed)
3211 static bool costs_initialized = false;
3212 static unsigned integer_cost [2];
3213 static unsigned symbol_cost [2];
3214 static unsigned address_cost [2];
3215 tree op0, op1;
3216 comp_cost cost0, cost1, cost;
3217 enum machine_mode mode;
3219 if (!costs_initialized)
3221 tree type = build_pointer_type (integer_type_node);
3222 tree var, addr;
3223 rtx x;
3224 int i;
3226 var = create_tmp_var_raw (integer_type_node, "test_var");
3227 TREE_STATIC (var) = 1;
3228 x = produce_memory_decl_rtl (var, NULL);
3229 SET_DECL_RTL (var, x);
3231 addr = build1 (ADDR_EXPR, type, var);
3234 for (i = 0; i < 2; i++)
3236 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3237 2000), i);
3239 symbol_cost[i] = computation_cost (addr, i) + 1;
3241 address_cost[i]
3242 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3243 addr,
3244 build_int_cst (sizetype, 2000)), i) + 1;
3245 if (dump_file && (dump_flags & TDF_DETAILS))
3247 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3248 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3249 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3250 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3251 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3252 fprintf (dump_file, "\n");
3256 costs_initialized = true;
3259 STRIP_NOPS (expr);
3261 if (SSA_VAR_P (expr))
3262 return zero_cost;
3264 if (is_gimple_min_invariant (expr))
3266 if (TREE_CODE (expr) == INTEGER_CST)
3267 return new_cost (integer_cost [speed], 0);
3269 if (TREE_CODE (expr) == ADDR_EXPR)
3271 tree obj = TREE_OPERAND (expr, 0);
3273 if (TREE_CODE (obj) == VAR_DECL
3274 || TREE_CODE (obj) == PARM_DECL
3275 || TREE_CODE (obj) == RESULT_DECL)
3276 return new_cost (symbol_cost [speed], 0);
3279 return new_cost (address_cost [speed], 0);
3282 switch (TREE_CODE (expr))
3284 case POINTER_PLUS_EXPR:
3285 case PLUS_EXPR:
3286 case MINUS_EXPR:
3287 case MULT_EXPR:
3288 op0 = TREE_OPERAND (expr, 0);
3289 op1 = TREE_OPERAND (expr, 1);
3290 STRIP_NOPS (op0);
3291 STRIP_NOPS (op1);
3293 if (is_gimple_val (op0))
3294 cost0 = zero_cost;
3295 else
3296 cost0 = force_expr_to_var_cost (op0, speed);
3298 if (is_gimple_val (op1))
3299 cost1 = zero_cost;
3300 else
3301 cost1 = force_expr_to_var_cost (op1, speed);
3303 break;
3305 default:
3306 /* Just an arbitrary value, FIXME. */
3307 return new_cost (target_spill_cost[speed], 0);
3310 mode = TYPE_MODE (TREE_TYPE (expr));
3311 switch (TREE_CODE (expr))
3313 case POINTER_PLUS_EXPR:
3314 case PLUS_EXPR:
3315 case MINUS_EXPR:
3316 cost = new_cost (add_cost (mode, speed), 0);
3317 break;
3319 case MULT_EXPR:
3320 if (cst_and_fits_in_hwi (op0))
3321 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3322 else if (cst_and_fits_in_hwi (op1))
3323 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3324 else
3325 return new_cost (target_spill_cost [speed], 0);
3326 break;
3328 default:
3329 gcc_unreachable ();
3332 cost = add_costs (cost, cost0);
3333 cost = add_costs (cost, cost1);
3335 /* Bound the cost by target_spill_cost. The parts of complicated
3336 computations often are either loop invariant or at least can
3337 be shared between several iv uses, so letting this grow without
3338 limits would not give reasonable results. */
3339 if (cost.cost > target_spill_cost [speed])
3340 cost.cost = target_spill_cost [speed];
3342 return cost;
3345 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3346 invariants the computation depends on. */
3348 static comp_cost
3349 force_var_cost (struct ivopts_data *data,
3350 tree expr, bitmap *depends_on)
3352 if (depends_on)
3354 fd_ivopts_data = data;
3355 walk_tree (&expr, find_depends, depends_on, NULL);
3358 return force_expr_to_var_cost (expr, data->speed);
3361 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3362 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3363 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3364 invariants the computation depends on. */
3366 static comp_cost
3367 split_address_cost (struct ivopts_data *data,
3368 tree addr, bool *symbol_present, bool *var_present,
3369 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3371 tree core;
3372 HOST_WIDE_INT bitsize;
3373 HOST_WIDE_INT bitpos;
3374 tree toffset;
3375 enum machine_mode mode;
3376 int unsignedp, volatilep;
3378 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3379 &unsignedp, &volatilep, false);
3381 if (toffset != 0
3382 || bitpos % BITS_PER_UNIT != 0
3383 || TREE_CODE (core) != VAR_DECL)
3385 *symbol_present = false;
3386 *var_present = true;
3387 fd_ivopts_data = data;
3388 walk_tree (&addr, find_depends, depends_on, NULL);
3389 return new_cost (target_spill_cost[data->speed], 0);
3392 *offset += bitpos / BITS_PER_UNIT;
3393 if (TREE_STATIC (core)
3394 || DECL_EXTERNAL (core))
3396 *symbol_present = true;
3397 *var_present = false;
3398 return zero_cost;
3401 *symbol_present = false;
3402 *var_present = true;
3403 return zero_cost;
3406 /* Estimates cost of expressing difference of addresses E1 - E2 as
3407 var + symbol + offset. The value of offset is added to OFFSET,
3408 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3409 part is missing. DEPENDS_ON is a set of the invariants the computation
3410 depends on. */
3412 static comp_cost
3413 ptr_difference_cost (struct ivopts_data *data,
3414 tree e1, tree e2, bool *symbol_present, bool *var_present,
3415 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3417 HOST_WIDE_INT diff = 0;
3418 comp_cost cost;
3419 bool speed = optimize_loop_for_speed_p (data->current_loop);
3421 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3423 if (ptr_difference_const (e1, e2, &diff))
3425 *offset += diff;
3426 *symbol_present = false;
3427 *var_present = false;
3428 return zero_cost;
3431 if (integer_zerop (e2))
3432 return split_address_cost (data, TREE_OPERAND (e1, 0),
3433 symbol_present, var_present, offset, depends_on);
3435 *symbol_present = false;
3436 *var_present = true;
3438 cost = force_var_cost (data, e1, depends_on);
3439 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3440 cost.cost += add_cost (Pmode, speed);
3442 return cost;
3445 /* Estimates cost of expressing difference E1 - E2 as
3446 var + symbol + offset. The value of offset is added to OFFSET,
3447 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3448 part is missing. DEPENDS_ON is a set of the invariants the computation
3449 depends on. */
3451 static comp_cost
3452 difference_cost (struct ivopts_data *data,
3453 tree e1, tree e2, bool *symbol_present, bool *var_present,
3454 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3456 comp_cost cost;
3457 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3458 unsigned HOST_WIDE_INT off1, off2;
3460 e1 = strip_offset (e1, &off1);
3461 e2 = strip_offset (e2, &off2);
3462 *offset += off1 - off2;
3464 STRIP_NOPS (e1);
3465 STRIP_NOPS (e2);
3467 if (TREE_CODE (e1) == ADDR_EXPR)
3468 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3469 depends_on);
3470 *symbol_present = false;
3472 if (operand_equal_p (e1, e2, 0))
3474 *var_present = false;
3475 return zero_cost;
3477 *var_present = true;
3478 if (integer_zerop (e2))
3479 return force_var_cost (data, e1, depends_on);
3481 if (integer_zerop (e1))
3483 cost = force_var_cost (data, e2, depends_on);
3484 cost.cost += multiply_by_cost (-1, mode, data->speed);
3486 return cost;
3489 cost = force_var_cost (data, e1, depends_on);
3490 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3491 cost.cost += add_cost (mode, data->speed);
3493 return cost;
3496 /* Determines the cost of the computation by that USE is expressed
3497 from induction variable CAND. If ADDRESS_P is true, we just need
3498 to create an address from it, otherwise we want to get it into
3499 register. A set of invariants we depend on is stored in
3500 DEPENDS_ON. AT is the statement at that the value is computed. */
3502 static comp_cost
3503 get_computation_cost_at (struct ivopts_data *data,
3504 struct iv_use *use, struct iv_cand *cand,
3505 bool address_p, bitmap *depends_on, gimple at)
3507 tree ubase = use->iv->base, ustep = use->iv->step;
3508 tree cbase, cstep;
3509 tree utype = TREE_TYPE (ubase), ctype;
3510 unsigned HOST_WIDE_INT cstepi, offset = 0;
3511 HOST_WIDE_INT ratio, aratio;
3512 bool var_present, symbol_present;
3513 comp_cost cost;
3514 unsigned n_sums;
3515 double_int rat;
3516 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3518 *depends_on = NULL;
3520 /* Only consider real candidates. */
3521 if (!cand->iv)
3522 return infinite_cost;
3524 cbase = cand->iv->base;
3525 cstep = cand->iv->step;
3526 ctype = TREE_TYPE (cbase);
3528 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3530 /* We do not have a precision to express the values of use. */
3531 return infinite_cost;
3534 if (address_p)
3536 /* Do not try to express address of an object with computation based
3537 on address of a different object. This may cause problems in rtl
3538 level alias analysis (that does not expect this to be happening,
3539 as this is illegal in C), and would be unlikely to be useful
3540 anyway. */
3541 if (use->iv->base_object
3542 && cand->iv->base_object
3543 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3544 return infinite_cost;
3547 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3549 /* TODO -- add direct handling of this case. */
3550 goto fallback;
3553 /* CSTEPI is removed from the offset in case statement is after the
3554 increment. If the step is not constant, we use zero instead.
3555 This is a bit imprecise (there is the extra addition), but
3556 redundancy elimination is likely to transform the code so that
3557 it uses value of the variable before increment anyway,
3558 so it is not that much unrealistic. */
3559 if (cst_and_fits_in_hwi (cstep))
3560 cstepi = int_cst_value (cstep);
3561 else
3562 cstepi = 0;
3564 if (!constant_multiple_of (ustep, cstep, &rat))
3565 return infinite_cost;
3567 if (double_int_fits_in_shwi_p (rat))
3568 ratio = double_int_to_shwi (rat);
3569 else
3570 return infinite_cost;
3572 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3573 or ratio == 1, it is better to handle this like
3575 ubase - ratio * cbase + ratio * var
3577 (also holds in the case ratio == -1, TODO. */
3579 if (cst_and_fits_in_hwi (cbase))
3581 offset = - ratio * int_cst_value (cbase);
3582 cost = difference_cost (data,
3583 ubase, build_int_cst (utype, 0),
3584 &symbol_present, &var_present, &offset,
3585 depends_on);
3587 else if (ratio == 1)
3589 cost = difference_cost (data,
3590 ubase, cbase,
3591 &symbol_present, &var_present, &offset,
3592 depends_on);
3594 else
3596 cost = force_var_cost (data, cbase, depends_on);
3597 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3598 cost = add_costs (cost,
3599 difference_cost (data,
3600 ubase, build_int_cst (utype, 0),
3601 &symbol_present, &var_present,
3602 &offset, depends_on));
3605 /* If we are after the increment, the value of the candidate is higher by
3606 one iteration. */
3607 if (stmt_after_increment (data->current_loop, cand, at))
3608 offset -= ratio * cstepi;
3610 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3611 (symbol/var/const parts may be omitted). If we are looking for an address,
3612 find the cost of addressing this. */
3613 if (address_p)
3614 return add_costs (cost, get_address_cost (symbol_present, var_present,
3615 offset, ratio,
3616 TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
3618 /* Otherwise estimate the costs for computing the expression. */
3619 aratio = ratio > 0 ? ratio : -ratio;
3620 if (!symbol_present && !var_present && !offset)
3622 if (ratio != 1)
3623 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3625 return cost;
3628 if (aratio != 1)
3629 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3631 n_sums = 1;
3632 if (var_present
3633 /* Symbol + offset should be compile-time computable. */
3634 && (symbol_present || offset))
3635 n_sums++;
3637 /* Having offset does not affect runtime cost in case it is added to
3638 symbol, but it increases complexity. */
3639 if (offset)
3640 cost.complexity++;
3642 cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed);
3643 return cost;
3645 fallback:
3647 /* Just get the expression, expand it and measure the cost. */
3648 tree comp = get_computation_at (data->current_loop, use, cand, at);
3650 if (!comp)
3651 return infinite_cost;
3653 if (address_p)
3654 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3656 return new_cost (computation_cost (comp, speed), 0);
3660 /* Determines the cost of the computation by that USE is expressed
3661 from induction variable CAND. If ADDRESS_P is true, we just need
3662 to create an address from it, otherwise we want to get it into
3663 register. A set of invariants we depend on is stored in
3664 DEPENDS_ON. */
3666 static comp_cost
3667 get_computation_cost (struct ivopts_data *data,
3668 struct iv_use *use, struct iv_cand *cand,
3669 bool address_p, bitmap *depends_on)
3671 return get_computation_cost_at (data,
3672 use, cand, address_p, depends_on, use->stmt);
3675 /* Determines cost of basing replacement of USE on CAND in a generic
3676 expression. */
3678 static bool
3679 determine_use_iv_cost_generic (struct ivopts_data *data,
3680 struct iv_use *use, struct iv_cand *cand)
3682 bitmap depends_on;
3683 comp_cost cost;
3685 /* The simple case first -- if we need to express value of the preserved
3686 original biv, the cost is 0. This also prevents us from counting the
3687 cost of increment twice -- once at this use and once in the cost of
3688 the candidate. */
3689 if (cand->pos == IP_ORIGINAL
3690 && cand->incremented_at == use->stmt)
3692 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3693 return true;
3696 cost = get_computation_cost (data, use, cand, false, &depends_on);
3697 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3699 return !infinite_cost_p (cost);
3702 /* Determines cost of basing replacement of USE on CAND in an address. */
3704 static bool
3705 determine_use_iv_cost_address (struct ivopts_data *data,
3706 struct iv_use *use, struct iv_cand *cand)
3708 bitmap depends_on;
3709 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3711 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3713 return !infinite_cost_p (cost);
3716 /* Computes value of candidate CAND at position AT in iteration NITER, and
3717 stores it to VAL. */
3719 static void
3720 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3721 aff_tree *val)
3723 aff_tree step, delta, nit;
3724 struct iv *iv = cand->iv;
3725 tree type = TREE_TYPE (iv->base);
3726 tree steptype = type;
3727 if (POINTER_TYPE_P (type))
3728 steptype = sizetype;
3730 tree_to_aff_combination (iv->step, steptype, &step);
3731 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3732 aff_combination_convert (&nit, steptype);
3733 aff_combination_mult (&nit, &step, &delta);
3734 if (stmt_after_increment (loop, cand, at))
3735 aff_combination_add (&delta, &step);
3737 tree_to_aff_combination (iv->base, type, val);
3738 aff_combination_add (val, &delta);
3741 /* Returns period of induction variable iv. */
3743 static tree
3744 iv_period (struct iv *iv)
3746 tree step = iv->step, period, type;
3747 tree pow2div;
3749 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3751 /* Period of the iv is gcd (step, type range). Since type range is power
3752 of two, it suffices to determine the maximum power of two that divides
3753 step. */
3754 pow2div = num_ending_zeros (step);
3755 type = unsigned_type_for (TREE_TYPE (step));
3757 period = build_low_bits_mask (type,
3758 (TYPE_PRECISION (type)
3759 - tree_low_cst (pow2div, 1)));
3761 return period;
3764 /* Returns the comparison operator used when eliminating the iv USE. */
3766 static enum tree_code
3767 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3769 struct loop *loop = data->current_loop;
3770 basic_block ex_bb;
3771 edge exit;
3773 ex_bb = gimple_bb (use->stmt);
3774 exit = EDGE_SUCC (ex_bb, 0);
3775 if (flow_bb_inside_loop_p (loop, exit->dest))
3776 exit = EDGE_SUCC (ex_bb, 1);
3778 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3781 /* Check whether it is possible to express the condition in USE by comparison
3782 of candidate CAND. If so, store the value compared with to BOUND. */
3784 static bool
3785 may_eliminate_iv (struct ivopts_data *data,
3786 struct iv_use *use, struct iv_cand *cand, tree *bound)
3788 basic_block ex_bb;
3789 edge exit;
3790 tree nit, period;
3791 struct loop *loop = data->current_loop;
3792 aff_tree bnd;
3794 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3795 return false;
3797 /* For now works only for exits that dominate the loop latch.
3798 TODO: extend to other conditions inside loop body. */
3799 ex_bb = gimple_bb (use->stmt);
3800 if (use->stmt != last_stmt (ex_bb)
3801 || gimple_code (use->stmt) != GIMPLE_COND
3802 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3803 return false;
3805 exit = EDGE_SUCC (ex_bb, 0);
3806 if (flow_bb_inside_loop_p (loop, exit->dest))
3807 exit = EDGE_SUCC (ex_bb, 1);
3808 if (flow_bb_inside_loop_p (loop, exit->dest))
3809 return false;
3811 nit = niter_for_exit (data, exit);
3812 if (!nit)
3813 return false;
3815 /* Determine whether we can use the variable to test the exit condition.
3816 This is the case iff the period of the induction variable is greater
3817 than the number of iterations for which the exit condition is true. */
3818 period = iv_period (cand->iv);
3820 /* If the number of iterations is constant, compare against it directly. */
3821 if (TREE_CODE (nit) == INTEGER_CST)
3823 if (!tree_int_cst_lt (nit, period))
3824 return false;
3827 /* If not, and if this is the only possible exit of the loop, see whether
3828 we can get a conservative estimate on the number of iterations of the
3829 entire loop and compare against that instead. */
3830 else if (loop_only_exit_p (loop, exit))
3832 double_int period_value, max_niter;
3833 if (!estimated_loop_iterations (loop, true, &max_niter))
3834 return false;
3835 period_value = tree_to_double_int (period);
3836 if (double_int_ucmp (max_niter, period_value) >= 0)
3837 return false;
3840 /* Otherwise, punt. */
3841 else
3842 return false;
3844 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3845 *bound = aff_combination_to_tree (&bnd);
3846 return true;
3849 /* Determines cost of basing replacement of USE on CAND in a condition. */
3851 static bool
3852 determine_use_iv_cost_condition (struct ivopts_data *data,
3853 struct iv_use *use, struct iv_cand *cand)
3855 tree bound = NULL_TREE;
3856 struct iv *cmp_iv;
3857 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3858 comp_cost elim_cost, express_cost, cost;
3859 bool ok;
3861 /* Only consider real candidates. */
3862 if (!cand->iv)
3864 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3865 return false;
3868 /* Try iv elimination. */
3869 if (may_eliminate_iv (data, use, cand, &bound))
3871 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3872 /* The bound is a loop invariant, so it will be only computed
3873 once. */
3874 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3876 else
3877 elim_cost = infinite_cost;
3879 /* Try expressing the original giv. If it is compared with an invariant,
3880 note that we cannot get rid of it. */
3881 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
3882 gcc_assert (ok);
3884 express_cost = get_computation_cost (data, use, cand, false,
3885 &depends_on_express);
3886 fd_ivopts_data = data;
3887 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3889 /* Choose the better approach, preferring the eliminated IV. */
3890 if (compare_costs (elim_cost, express_cost) <= 0)
3892 cost = elim_cost;
3893 depends_on = depends_on_elim;
3894 depends_on_elim = NULL;
3896 else
3898 cost = express_cost;
3899 depends_on = depends_on_express;
3900 depends_on_express = NULL;
3901 bound = NULL_TREE;
3904 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3906 if (depends_on_elim)
3907 BITMAP_FREE (depends_on_elim);
3908 if (depends_on_express)
3909 BITMAP_FREE (depends_on_express);
3911 return !infinite_cost_p (cost);
3914 /* Determines cost of basing replacement of USE on CAND. Returns false
3915 if USE cannot be based on CAND. */
3917 static bool
3918 determine_use_iv_cost (struct ivopts_data *data,
3919 struct iv_use *use, struct iv_cand *cand)
3921 switch (use->type)
3923 case USE_NONLINEAR_EXPR:
3924 return determine_use_iv_cost_generic (data, use, cand);
3926 case USE_ADDRESS:
3927 return determine_use_iv_cost_address (data, use, cand);
3929 case USE_COMPARE:
3930 return determine_use_iv_cost_condition (data, use, cand);
3932 default:
3933 gcc_unreachable ();
3937 /* Determines costs of basing the use of the iv on an iv candidate. */
3939 static void
3940 determine_use_iv_costs (struct ivopts_data *data)
3942 unsigned i, j;
3943 struct iv_use *use;
3944 struct iv_cand *cand;
3945 bitmap to_clear = BITMAP_ALLOC (NULL);
3947 alloc_use_cost_map (data);
3949 for (i = 0; i < n_iv_uses (data); i++)
3951 use = iv_use (data, i);
3953 if (data->consider_all_candidates)
3955 for (j = 0; j < n_iv_cands (data); j++)
3957 cand = iv_cand (data, j);
3958 determine_use_iv_cost (data, use, cand);
3961 else
3963 bitmap_iterator bi;
3965 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3967 cand = iv_cand (data, j);
3968 if (!determine_use_iv_cost (data, use, cand))
3969 bitmap_set_bit (to_clear, j);
3972 /* Remove the candidates for that the cost is infinite from
3973 the list of related candidates. */
3974 bitmap_and_compl_into (use->related_cands, to_clear);
3975 bitmap_clear (to_clear);
3979 BITMAP_FREE (to_clear);
3981 if (dump_file && (dump_flags & TDF_DETAILS))
3983 fprintf (dump_file, "Use-candidate costs:\n");
3985 for (i = 0; i < n_iv_uses (data); i++)
3987 use = iv_use (data, i);
3989 fprintf (dump_file, "Use %d:\n", i);
3990 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
3991 for (j = 0; j < use->n_map_members; j++)
3993 if (!use->cost_map[j].cand
3994 || infinite_cost_p (use->cost_map[j].cost))
3995 continue;
3997 fprintf (dump_file, " %d\t%d\t%d\t",
3998 use->cost_map[j].cand->id,
3999 use->cost_map[j].cost.cost,
4000 use->cost_map[j].cost.complexity);
4001 if (use->cost_map[j].depends_on)
4002 bitmap_print (dump_file,
4003 use->cost_map[j].depends_on, "","");
4004 fprintf (dump_file, "\n");
4007 fprintf (dump_file, "\n");
4009 fprintf (dump_file, "\n");
4013 /* Determines cost of the candidate CAND. */
4015 static void
4016 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4018 comp_cost cost_base;
4019 unsigned cost, cost_step;
4020 tree base;
4022 if (!cand->iv)
4024 cand->cost = 0;
4025 return;
4028 /* There are two costs associated with the candidate -- its increment
4029 and its initialization. The second is almost negligible for any loop
4030 that rolls enough, so we take it just very little into account. */
4032 base = cand->iv->base;
4033 cost_base = force_var_cost (data, base, NULL);
4034 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4036 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4038 /* Prefer the original ivs unless we may gain something by replacing it.
4039 The reason is to make debugging simpler; so this is not relevant for
4040 artificial ivs created by other optimization passes. */
4041 if (cand->pos != IP_ORIGINAL
4042 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4043 cost++;
4045 /* Prefer not to insert statements into latch unless there are some
4046 already (so that we do not create unnecessary jumps). */
4047 if (cand->pos == IP_END
4048 && empty_block_p (ip_end_pos (data->current_loop)))
4049 cost++;
4051 cand->cost = cost;
4054 /* Determines costs of computation of the candidates. */
4056 static void
4057 determine_iv_costs (struct ivopts_data *data)
4059 unsigned i;
4061 if (dump_file && (dump_flags & TDF_DETAILS))
4063 fprintf (dump_file, "Candidate costs:\n");
4064 fprintf (dump_file, " cand\tcost\n");
4067 for (i = 0; i < n_iv_cands (data); i++)
4069 struct iv_cand *cand = iv_cand (data, i);
4071 determine_iv_cost (data, cand);
4073 if (dump_file && (dump_flags & TDF_DETAILS))
4074 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4077 if (dump_file && (dump_flags & TDF_DETAILS))
4078 fprintf (dump_file, "\n");
4081 /* Calculates cost for having SIZE induction variables. */
4083 static unsigned
4084 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4086 /* We add size to the cost, so that we prefer eliminating ivs
4087 if possible. */
4088 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4091 /* For each size of the induction variable set determine the penalty. */
4093 static void
4094 determine_set_costs (struct ivopts_data *data)
4096 unsigned j, n;
4097 gimple phi;
4098 gimple_stmt_iterator psi;
4099 tree op;
4100 struct loop *loop = data->current_loop;
4101 bitmap_iterator bi;
4103 /* We use the following model (definitely improvable, especially the
4104 cost function -- TODO):
4106 We estimate the number of registers available (using MD data), name it A.
4108 We estimate the number of registers used by the loop, name it U. This
4109 number is obtained as the number of loop phi nodes (not counting virtual
4110 registers and bivs) + the number of variables from outside of the loop.
4112 We set a reserve R (free regs that are used for temporary computations,
4113 etc.). For now the reserve is a constant 3.
4115 Let I be the number of induction variables.
4117 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4118 make a lot of ivs without a reason).
4119 -- if A - R < U + I <= A, the cost is I * PRES_COST
4120 -- if U + I > A, the cost is I * PRES_COST and
4121 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4123 if (dump_file && (dump_flags & TDF_DETAILS))
4125 fprintf (dump_file, "Global costs:\n");
4126 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4127 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4128 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4131 n = 0;
4132 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4134 phi = gsi_stmt (psi);
4135 op = PHI_RESULT (phi);
4137 if (!is_gimple_reg (op))
4138 continue;
4140 if (get_iv (data, op))
4141 continue;
4143 n++;
4146 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4148 struct version_info *info = ver_info (data, j);
4150 if (info->inv_id && info->has_nonlin_use)
4151 n++;
4154 data->regs_used = n;
4155 if (dump_file && (dump_flags & TDF_DETAILS))
4156 fprintf (dump_file, " regs_used %d\n", n);
4158 if (dump_file && (dump_flags & TDF_DETAILS))
4160 fprintf (dump_file, " cost for size:\n");
4161 fprintf (dump_file, " ivs\tcost\n");
4162 for (j = 0; j <= 2 * target_avail_regs; j++)
4163 fprintf (dump_file, " %d\t%d\n", j,
4164 ivopts_global_cost_for_size (data, j));
4165 fprintf (dump_file, "\n");
4169 /* Returns true if A is a cheaper cost pair than B. */
4171 static bool
4172 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4174 int cmp;
4176 if (!a)
4177 return false;
4179 if (!b)
4180 return true;
4182 cmp = compare_costs (a->cost, b->cost);
4183 if (cmp < 0)
4184 return true;
4186 if (cmp > 0)
4187 return false;
4189 /* In case the costs are the same, prefer the cheaper candidate. */
4190 if (a->cand->cost < b->cand->cost)
4191 return true;
4193 return false;
4196 /* Computes the cost field of IVS structure. */
4198 static void
4199 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4201 comp_cost cost = ivs->cand_use_cost;
4202 cost.cost += ivs->cand_cost;
4203 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4205 ivs->cost = cost;
4208 /* Remove invariants in set INVS to set IVS. */
4210 static void
4211 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4213 bitmap_iterator bi;
4214 unsigned iid;
4216 if (!invs)
4217 return;
4219 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4221 ivs->n_invariant_uses[iid]--;
4222 if (ivs->n_invariant_uses[iid] == 0)
4223 ivs->n_regs--;
4227 /* Set USE not to be expressed by any candidate in IVS. */
4229 static void
4230 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4231 struct iv_use *use)
4233 unsigned uid = use->id, cid;
4234 struct cost_pair *cp;
4236 cp = ivs->cand_for_use[uid];
4237 if (!cp)
4238 return;
4239 cid = cp->cand->id;
4241 ivs->bad_uses++;
4242 ivs->cand_for_use[uid] = NULL;
4243 ivs->n_cand_uses[cid]--;
4245 if (ivs->n_cand_uses[cid] == 0)
4247 bitmap_clear_bit (ivs->cands, cid);
4248 /* Do not count the pseudocandidates. */
4249 if (cp->cand->iv)
4250 ivs->n_regs--;
4251 ivs->n_cands--;
4252 ivs->cand_cost -= cp->cand->cost;
4254 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4257 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4259 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4260 iv_ca_recount_cost (data, ivs);
4263 /* Add invariants in set INVS to set IVS. */
4265 static void
4266 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4268 bitmap_iterator bi;
4269 unsigned iid;
4271 if (!invs)
4272 return;
4274 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4276 ivs->n_invariant_uses[iid]++;
4277 if (ivs->n_invariant_uses[iid] == 1)
4278 ivs->n_regs++;
4282 /* Set cost pair for USE in set IVS to CP. */
4284 static void
4285 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4286 struct iv_use *use, struct cost_pair *cp)
4288 unsigned uid = use->id, cid;
4290 if (ivs->cand_for_use[uid] == cp)
4291 return;
4293 if (ivs->cand_for_use[uid])
4294 iv_ca_set_no_cp (data, ivs, use);
4296 if (cp)
4298 cid = cp->cand->id;
4300 ivs->bad_uses--;
4301 ivs->cand_for_use[uid] = cp;
4302 ivs->n_cand_uses[cid]++;
4303 if (ivs->n_cand_uses[cid] == 1)
4305 bitmap_set_bit (ivs->cands, cid);
4306 /* Do not count the pseudocandidates. */
4307 if (cp->cand->iv)
4308 ivs->n_regs++;
4309 ivs->n_cands++;
4310 ivs->cand_cost += cp->cand->cost;
4312 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4315 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4316 iv_ca_set_add_invariants (ivs, cp->depends_on);
4317 iv_ca_recount_cost (data, ivs);
4321 /* Extend set IVS by expressing USE by some of the candidates in it
4322 if possible. */
4324 static void
4325 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4326 struct iv_use *use)
4328 struct cost_pair *best_cp = NULL, *cp;
4329 bitmap_iterator bi;
4330 unsigned i;
4332 gcc_assert (ivs->upto >= use->id);
4334 if (ivs->upto == use->id)
4336 ivs->upto++;
4337 ivs->bad_uses++;
4340 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4342 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4344 if (cheaper_cost_pair (cp, best_cp))
4345 best_cp = cp;
4348 iv_ca_set_cp (data, ivs, use, best_cp);
4351 /* Get cost for assignment IVS. */
4353 static comp_cost
4354 iv_ca_cost (struct iv_ca *ivs)
4356 return (ivs->bad_uses ? infinite_cost : ivs->cost);
4359 /* Returns true if all dependences of CP are among invariants in IVS. */
4361 static bool
4362 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4364 unsigned i;
4365 bitmap_iterator bi;
4367 if (!cp->depends_on)
4368 return true;
4370 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4372 if (ivs->n_invariant_uses[i] == 0)
4373 return false;
4376 return true;
4379 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4380 it before NEXT_CHANGE. */
4382 static struct iv_ca_delta *
4383 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4384 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4386 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4388 change->use = use;
4389 change->old_cp = old_cp;
4390 change->new_cp = new_cp;
4391 change->next_change = next_change;
4393 return change;
4396 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4397 are rewritten. */
4399 static struct iv_ca_delta *
4400 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4402 struct iv_ca_delta *last;
4404 if (!l2)
4405 return l1;
4407 if (!l1)
4408 return l2;
4410 for (last = l1; last->next_change; last = last->next_change)
4411 continue;
4412 last->next_change = l2;
4414 return l1;
4417 /* Returns candidate by that USE is expressed in IVS. */
4419 static struct cost_pair *
4420 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4422 return ivs->cand_for_use[use->id];
4425 /* Reverse the list of changes DELTA, forming the inverse to it. */
4427 static struct iv_ca_delta *
4428 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4430 struct iv_ca_delta *act, *next, *prev = NULL;
4431 struct cost_pair *tmp;
4433 for (act = delta; act; act = next)
4435 next = act->next_change;
4436 act->next_change = prev;
4437 prev = act;
4439 tmp = act->old_cp;
4440 act->old_cp = act->new_cp;
4441 act->new_cp = tmp;
4444 return prev;
4447 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4448 reverted instead. */
4450 static void
4451 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4452 struct iv_ca_delta *delta, bool forward)
4454 struct cost_pair *from, *to;
4455 struct iv_ca_delta *act;
4457 if (!forward)
4458 delta = iv_ca_delta_reverse (delta);
4460 for (act = delta; act; act = act->next_change)
4462 from = act->old_cp;
4463 to = act->new_cp;
4464 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4465 iv_ca_set_cp (data, ivs, act->use, to);
4468 if (!forward)
4469 iv_ca_delta_reverse (delta);
4472 /* Returns true if CAND is used in IVS. */
4474 static bool
4475 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4477 return ivs->n_cand_uses[cand->id] > 0;
4480 /* Returns number of induction variable candidates in the set IVS. */
4482 static unsigned
4483 iv_ca_n_cands (struct iv_ca *ivs)
4485 return ivs->n_cands;
4488 /* Free the list of changes DELTA. */
4490 static void
4491 iv_ca_delta_free (struct iv_ca_delta **delta)
4493 struct iv_ca_delta *act, *next;
4495 for (act = *delta; act; act = next)
4497 next = act->next_change;
4498 free (act);
4501 *delta = NULL;
4504 /* Allocates new iv candidates assignment. */
4506 static struct iv_ca *
4507 iv_ca_new (struct ivopts_data *data)
4509 struct iv_ca *nw = XNEW (struct iv_ca);
4511 nw->upto = 0;
4512 nw->bad_uses = 0;
4513 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4514 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4515 nw->cands = BITMAP_ALLOC (NULL);
4516 nw->n_cands = 0;
4517 nw->n_regs = 0;
4518 nw->cand_use_cost = zero_cost;
4519 nw->cand_cost = 0;
4520 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4521 nw->cost = zero_cost;
4523 return nw;
4526 /* Free memory occupied by the set IVS. */
4528 static void
4529 iv_ca_free (struct iv_ca **ivs)
4531 free ((*ivs)->cand_for_use);
4532 free ((*ivs)->n_cand_uses);
4533 BITMAP_FREE ((*ivs)->cands);
4534 free ((*ivs)->n_invariant_uses);
4535 free (*ivs);
4536 *ivs = NULL;
4539 /* Dumps IVS to FILE. */
4541 static void
4542 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4544 const char *pref = " invariants ";
4545 unsigned i;
4546 comp_cost cost = iv_ca_cost (ivs);
4548 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4549 bitmap_print (file, ivs->cands, " candidates ","\n");
4551 for (i = 1; i <= data->max_inv_id; i++)
4552 if (ivs->n_invariant_uses[i])
4554 fprintf (file, "%s%d", pref, i);
4555 pref = ", ";
4557 fprintf (file, "\n");
4560 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4561 new set, and store differences in DELTA. Number of induction variables
4562 in the new set is stored to N_IVS. */
4564 static comp_cost
4565 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4566 struct iv_cand *cand, struct iv_ca_delta **delta,
4567 unsigned *n_ivs)
4569 unsigned i;
4570 comp_cost cost;
4571 struct iv_use *use;
4572 struct cost_pair *old_cp, *new_cp;
4574 *delta = NULL;
4575 for (i = 0; i < ivs->upto; i++)
4577 use = iv_use (data, i);
4578 old_cp = iv_ca_cand_for_use (ivs, use);
4580 if (old_cp
4581 && old_cp->cand == cand)
4582 continue;
4584 new_cp = get_use_iv_cost (data, use, cand);
4585 if (!new_cp)
4586 continue;
4588 if (!iv_ca_has_deps (ivs, new_cp))
4589 continue;
4591 if (!cheaper_cost_pair (new_cp, old_cp))
4592 continue;
4594 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4597 iv_ca_delta_commit (data, ivs, *delta, true);
4598 cost = iv_ca_cost (ivs);
4599 if (n_ivs)
4600 *n_ivs = iv_ca_n_cands (ivs);
4601 iv_ca_delta_commit (data, ivs, *delta, false);
4603 return cost;
4606 /* Try narrowing set IVS by removing CAND. Return the cost of
4607 the new set and store the differences in DELTA. */
4609 static comp_cost
4610 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4611 struct iv_cand *cand, struct iv_ca_delta **delta)
4613 unsigned i, ci;
4614 struct iv_use *use;
4615 struct cost_pair *old_cp, *new_cp, *cp;
4616 bitmap_iterator bi;
4617 struct iv_cand *cnd;
4618 comp_cost cost;
4620 *delta = NULL;
4621 for (i = 0; i < n_iv_uses (data); i++)
4623 use = iv_use (data, i);
4625 old_cp = iv_ca_cand_for_use (ivs, use);
4626 if (old_cp->cand != cand)
4627 continue;
4629 new_cp = NULL;
4631 if (data->consider_all_candidates)
4633 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4635 if (ci == cand->id)
4636 continue;
4638 cnd = iv_cand (data, ci);
4640 cp = get_use_iv_cost (data, use, cnd);
4641 if (!cp)
4642 continue;
4643 if (!iv_ca_has_deps (ivs, cp))
4644 continue;
4646 if (!cheaper_cost_pair (cp, new_cp))
4647 continue;
4649 new_cp = cp;
4652 else
4654 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4656 if (ci == cand->id)
4657 continue;
4659 cnd = iv_cand (data, ci);
4661 cp = get_use_iv_cost (data, use, cnd);
4662 if (!cp)
4663 continue;
4664 if (!iv_ca_has_deps (ivs, cp))
4665 continue;
4667 if (!cheaper_cost_pair (cp, new_cp))
4668 continue;
4670 new_cp = cp;
4674 if (!new_cp)
4676 iv_ca_delta_free (delta);
4677 return infinite_cost;
4680 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4683 iv_ca_delta_commit (data, ivs, *delta, true);
4684 cost = iv_ca_cost (ivs);
4685 iv_ca_delta_commit (data, ivs, *delta, false);
4687 return cost;
4690 /* Try optimizing the set of candidates IVS by removing candidates different
4691 from to EXCEPT_CAND from it. Return cost of the new set, and store
4692 differences in DELTA. */
4694 static comp_cost
4695 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4696 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4698 bitmap_iterator bi;
4699 struct iv_ca_delta *act_delta, *best_delta;
4700 unsigned i;
4701 comp_cost best_cost, acost;
4702 struct iv_cand *cand;
4704 best_delta = NULL;
4705 best_cost = iv_ca_cost (ivs);
4707 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4709 cand = iv_cand (data, i);
4711 if (cand == except_cand)
4712 continue;
4714 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4716 if (compare_costs (acost, best_cost) < 0)
4718 best_cost = acost;
4719 iv_ca_delta_free (&best_delta);
4720 best_delta = act_delta;
4722 else
4723 iv_ca_delta_free (&act_delta);
4726 if (!best_delta)
4728 *delta = NULL;
4729 return best_cost;
4732 /* Recurse to possibly remove other unnecessary ivs. */
4733 iv_ca_delta_commit (data, ivs, best_delta, true);
4734 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4735 iv_ca_delta_commit (data, ivs, best_delta, false);
4736 *delta = iv_ca_delta_join (best_delta, *delta);
4737 return best_cost;
4740 /* Tries to extend the sets IVS in the best possible way in order
4741 to express the USE. */
4743 static bool
4744 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4745 struct iv_use *use)
4747 comp_cost best_cost, act_cost;
4748 unsigned i;
4749 bitmap_iterator bi;
4750 struct iv_cand *cand;
4751 struct iv_ca_delta *best_delta = NULL, *act_delta;
4752 struct cost_pair *cp;
4754 iv_ca_add_use (data, ivs, use);
4755 best_cost = iv_ca_cost (ivs);
4757 cp = iv_ca_cand_for_use (ivs, use);
4758 if (cp)
4760 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4761 iv_ca_set_no_cp (data, ivs, use);
4764 /* First try important candidates not based on any memory object. Only if
4765 this fails, try the specific ones. Rationale -- in loops with many
4766 variables the best choice often is to use just one generic biv. If we
4767 added here many ivs specific to the uses, the optimization algorithm later
4768 would be likely to get stuck in a local minimum, thus causing us to create
4769 too many ivs. The approach from few ivs to more seems more likely to be
4770 successful -- starting from few ivs, replacing an expensive use by a
4771 specific iv should always be a win. */
4772 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4774 cand = iv_cand (data, i);
4776 if (cand->iv->base_object != NULL_TREE)
4777 continue;
4779 if (iv_ca_cand_used_p (ivs, cand))
4780 continue;
4782 cp = get_use_iv_cost (data, use, cand);
4783 if (!cp)
4784 continue;
4786 iv_ca_set_cp (data, ivs, use, cp);
4787 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4788 iv_ca_set_no_cp (data, ivs, use);
4789 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4791 if (compare_costs (act_cost, best_cost) < 0)
4793 best_cost = act_cost;
4795 iv_ca_delta_free (&best_delta);
4796 best_delta = act_delta;
4798 else
4799 iv_ca_delta_free (&act_delta);
4802 if (infinite_cost_p (best_cost))
4804 for (i = 0; i < use->n_map_members; i++)
4806 cp = use->cost_map + i;
4807 cand = cp->cand;
4808 if (!cand)
4809 continue;
4811 /* Already tried this. */
4812 if (cand->important && cand->iv->base_object == NULL_TREE)
4813 continue;
4815 if (iv_ca_cand_used_p (ivs, cand))
4816 continue;
4818 act_delta = NULL;
4819 iv_ca_set_cp (data, ivs, use, cp);
4820 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4821 iv_ca_set_no_cp (data, ivs, use);
4822 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4823 cp, act_delta);
4825 if (compare_costs (act_cost, best_cost) < 0)
4827 best_cost = act_cost;
4829 if (best_delta)
4830 iv_ca_delta_free (&best_delta);
4831 best_delta = act_delta;
4833 else
4834 iv_ca_delta_free (&act_delta);
4838 iv_ca_delta_commit (data, ivs, best_delta, true);
4839 iv_ca_delta_free (&best_delta);
4841 return !infinite_cost_p (best_cost);
4844 /* Finds an initial assignment of candidates to uses. */
4846 static struct iv_ca *
4847 get_initial_solution (struct ivopts_data *data)
4849 struct iv_ca *ivs = iv_ca_new (data);
4850 unsigned i;
4852 for (i = 0; i < n_iv_uses (data); i++)
4853 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4855 iv_ca_free (&ivs);
4856 return NULL;
4859 return ivs;
4862 /* Tries to improve set of induction variables IVS. */
4864 static bool
4865 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4867 unsigned i, n_ivs;
4868 comp_cost acost, best_cost = iv_ca_cost (ivs);
4869 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4870 struct iv_cand *cand;
4872 /* Try extending the set of induction variables by one. */
4873 for (i = 0; i < n_iv_cands (data); i++)
4875 cand = iv_cand (data, i);
4877 if (iv_ca_cand_used_p (ivs, cand))
4878 continue;
4880 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4881 if (!act_delta)
4882 continue;
4884 /* If we successfully added the candidate and the set is small enough,
4885 try optimizing it by removing other candidates. */
4886 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4888 iv_ca_delta_commit (data, ivs, act_delta, true);
4889 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4890 iv_ca_delta_commit (data, ivs, act_delta, false);
4891 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4894 if (compare_costs (acost, best_cost) < 0)
4896 best_cost = acost;
4897 iv_ca_delta_free (&best_delta);
4898 best_delta = act_delta;
4900 else
4901 iv_ca_delta_free (&act_delta);
4904 if (!best_delta)
4906 /* Try removing the candidates from the set instead. */
4907 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4909 /* Nothing more we can do. */
4910 if (!best_delta)
4911 return false;
4914 iv_ca_delta_commit (data, ivs, best_delta, true);
4915 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4916 iv_ca_delta_free (&best_delta);
4917 return true;
4920 /* Attempts to find the optimal set of induction variables. We do simple
4921 greedy heuristic -- we try to replace at most one candidate in the selected
4922 solution and remove the unused ivs while this improves the cost. */
4924 static struct iv_ca *
4925 find_optimal_iv_set (struct ivopts_data *data)
4927 unsigned i;
4928 struct iv_ca *set;
4929 struct iv_use *use;
4931 /* Get the initial solution. */
4932 set = get_initial_solution (data);
4933 if (!set)
4935 if (dump_file && (dump_flags & TDF_DETAILS))
4936 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4937 return NULL;
4940 if (dump_file && (dump_flags & TDF_DETAILS))
4942 fprintf (dump_file, "Initial set of candidates:\n");
4943 iv_ca_dump (data, dump_file, set);
4946 while (try_improve_iv_set (data, set))
4948 if (dump_file && (dump_flags & TDF_DETAILS))
4950 fprintf (dump_file, "Improved to:\n");
4951 iv_ca_dump (data, dump_file, set);
4955 if (dump_file && (dump_flags & TDF_DETAILS))
4957 comp_cost cost = iv_ca_cost (set);
4958 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4961 for (i = 0; i < n_iv_uses (data); i++)
4963 use = iv_use (data, i);
4964 use->selected = iv_ca_cand_for_use (set, use)->cand;
4967 return set;
4970 /* Creates a new induction variable corresponding to CAND. */
4972 static void
4973 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4975 gimple_stmt_iterator incr_pos;
4976 tree base;
4977 bool after = false;
4979 if (!cand->iv)
4980 return;
4982 switch (cand->pos)
4984 case IP_NORMAL:
4985 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
4986 break;
4988 case IP_END:
4989 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
4990 after = true;
4991 break;
4993 case IP_ORIGINAL:
4994 /* Mark that the iv is preserved. */
4995 name_info (data, cand->var_before)->preserve_biv = true;
4996 name_info (data, cand->var_after)->preserve_biv = true;
4998 /* Rewrite the increment so that it uses var_before directly. */
4999 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5001 return;
5004 gimple_add_tmp_var (cand->var_before);
5005 add_referenced_var (cand->var_before);
5007 base = unshare_expr (cand->iv->base);
5009 create_iv (base, unshare_expr (cand->iv->step),
5010 cand->var_before, data->current_loop,
5011 &incr_pos, after, &cand->var_before, &cand->var_after);
5014 /* Creates new induction variables described in SET. */
5016 static void
5017 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5019 unsigned i;
5020 struct iv_cand *cand;
5021 bitmap_iterator bi;
5023 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5025 cand = iv_cand (data, i);
5026 create_new_iv (data, cand);
5030 /* Returns the phi-node in BB with result RESULT. */
5032 static gimple
5033 get_phi_with_result (basic_block bb, tree result)
5035 gimple_stmt_iterator i = gsi_start_phis (bb);
5037 for (; !gsi_end_p (i); gsi_next (&i))
5038 if (gimple_phi_result (gsi_stmt (i)) == result)
5039 return gsi_stmt (i);
5041 gcc_unreachable ();
5042 return NULL;
5046 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5047 is true, remove also the ssa name defined by the statement. */
5049 static void
5050 remove_statement (gimple stmt, bool including_defined_name)
5052 if (gimple_code (stmt) == GIMPLE_PHI)
5054 gimple bb_phi = get_phi_with_result (gimple_bb (stmt),
5055 gimple_phi_result (stmt));
5056 gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi);
5057 remove_phi_node (&bsi, including_defined_name);
5059 else
5061 gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5062 gsi_remove (&bsi, true);
5063 release_defs (stmt);
5067 /* Rewrites USE (definition of iv used in a nonlinear expression)
5068 using candidate CAND. */
5070 static void
5071 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5072 struct iv_use *use, struct iv_cand *cand)
5074 tree comp;
5075 tree op, tgt;
5076 gimple ass;
5077 gimple_stmt_iterator bsi;
5079 /* An important special case -- if we are asked to express value of
5080 the original iv by itself, just exit; there is no need to
5081 introduce a new computation (that might also need casting the
5082 variable to unsigned and back). */
5083 if (cand->pos == IP_ORIGINAL
5084 && cand->incremented_at == use->stmt)
5086 tree step, ctype, utype;
5087 enum tree_code incr_code = PLUS_EXPR, old_code;
5089 gcc_assert (is_gimple_assign (use->stmt));
5090 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5092 step = cand->iv->step;
5093 ctype = TREE_TYPE (step);
5094 utype = TREE_TYPE (cand->var_after);
5095 if (TREE_CODE (step) == NEGATE_EXPR)
5097 incr_code = MINUS_EXPR;
5098 step = TREE_OPERAND (step, 0);
5101 /* Check whether we may leave the computation unchanged.
5102 This is the case only if it does not rely on other
5103 computations in the loop -- otherwise, the computation
5104 we rely upon may be removed in remove_unused_ivs,
5105 thus leading to ICE. */
5106 old_code = gimple_assign_rhs_code (use->stmt);
5107 if (old_code == PLUS_EXPR
5108 || old_code == MINUS_EXPR
5109 || old_code == POINTER_PLUS_EXPR)
5111 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5112 op = gimple_assign_rhs2 (use->stmt);
5113 else if (old_code != MINUS_EXPR
5114 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5115 op = gimple_assign_rhs1 (use->stmt);
5116 else
5117 op = NULL_TREE;
5119 else
5120 op = NULL_TREE;
5122 if (op
5123 && (TREE_CODE (op) == INTEGER_CST
5124 || operand_equal_p (op, step, 0)))
5125 return;
5127 /* Otherwise, add the necessary computations to express
5128 the iv. */
5129 op = fold_convert (ctype, cand->var_before);
5130 comp = fold_convert (utype,
5131 build2 (incr_code, ctype, op,
5132 unshare_expr (step)));
5134 else
5136 comp = get_computation (data->current_loop, use, cand);
5137 gcc_assert (comp != NULL_TREE);
5140 switch (gimple_code (use->stmt))
5142 case GIMPLE_PHI:
5143 tgt = PHI_RESULT (use->stmt);
5145 /* If we should keep the biv, do not replace it. */
5146 if (name_info (data, tgt)->preserve_biv)
5147 return;
5149 bsi = gsi_after_labels (gimple_bb (use->stmt));
5150 break;
5152 case GIMPLE_ASSIGN:
5153 tgt = gimple_assign_lhs (use->stmt);
5154 bsi = gsi_for_stmt (use->stmt);
5155 break;
5157 default:
5158 gcc_unreachable ();
5161 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5162 true, GSI_SAME_STMT);
5164 if (gimple_code (use->stmt) == GIMPLE_PHI)
5166 ass = gimple_build_assign (tgt, op);
5167 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5168 remove_statement (use->stmt, false);
5170 else
5172 gimple_assign_set_rhs_from_tree (&bsi, op);
5173 use->stmt = gsi_stmt (bsi);
5177 /* Replaces ssa name in index IDX by its basic variable. Callback for
5178 for_each_index. */
5180 static bool
5181 idx_remove_ssa_names (tree base, tree *idx,
5182 void *data ATTRIBUTE_UNUSED)
5184 tree *op;
5186 if (TREE_CODE (*idx) == SSA_NAME)
5187 *idx = SSA_NAME_VAR (*idx);
5189 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5191 op = &TREE_OPERAND (base, 2);
5192 if (*op
5193 && TREE_CODE (*op) == SSA_NAME)
5194 *op = SSA_NAME_VAR (*op);
5195 op = &TREE_OPERAND (base, 3);
5196 if (*op
5197 && TREE_CODE (*op) == SSA_NAME)
5198 *op = SSA_NAME_VAR (*op);
5201 return true;
5204 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5206 static tree
5207 unshare_and_remove_ssa_names (tree ref)
5209 ref = unshare_expr (ref);
5210 for_each_index (&ref, idx_remove_ssa_names, NULL);
5212 return ref;
5215 /* Extract the alias analysis info for the memory reference REF. There are
5216 several ways how this information may be stored and what precisely is
5217 its semantics depending on the type of the reference, but there always is
5218 somewhere hidden one _DECL node that is used to determine the set of
5219 virtual operands for the reference. The code below deciphers this jungle
5220 and extracts this single useful piece of information. */
5222 static tree
5223 get_ref_tag (tree ref, tree orig)
5225 tree var = get_base_address (ref);
5226 tree aref = NULL_TREE, tag, sv;
5227 HOST_WIDE_INT offset, size, maxsize;
5229 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5231 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5232 if (ref)
5233 break;
5236 if (!var)
5237 return NULL_TREE;
5239 if (TREE_CODE (var) == INDIRECT_REF)
5241 /* If the base is a dereference of a pointer, first check its name memory
5242 tag. If it does not have one, use its symbol memory tag. */
5243 var = TREE_OPERAND (var, 0);
5244 if (TREE_CODE (var) != SSA_NAME)
5245 return NULL_TREE;
5247 if (SSA_NAME_PTR_INFO (var))
5249 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5250 if (tag)
5251 return tag;
5254 var = SSA_NAME_VAR (var);
5255 tag = symbol_mem_tag (var);
5256 gcc_assert (tag != NULL_TREE);
5257 return tag;
5259 else
5261 if (!DECL_P (var))
5262 return NULL_TREE;
5264 tag = symbol_mem_tag (var);
5265 if (tag)
5266 return tag;
5268 return var;
5272 /* Copies the reference information from OLD_REF to NEW_REF. */
5274 static void
5275 copy_ref_info (tree new_ref, tree old_ref)
5277 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5278 copy_mem_ref_info (new_ref, old_ref);
5279 else
5281 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5282 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5286 /* Rewrites USE (address that is an iv) using candidate CAND. */
5288 static void
5289 rewrite_use_address (struct ivopts_data *data,
5290 struct iv_use *use, struct iv_cand *cand)
5292 aff_tree aff;
5293 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5294 tree ref;
5295 bool ok;
5297 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5298 gcc_assert (ok);
5299 unshare_aff_combination (&aff);
5301 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
5302 copy_ref_info (ref, *use->op_p);
5303 *use->op_p = ref;
5306 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5307 candidate CAND. */
5309 static void
5310 rewrite_use_compare (struct ivopts_data *data,
5311 struct iv_use *use, struct iv_cand *cand)
5313 tree comp, *var_p, op, bound;
5314 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5315 enum tree_code compare;
5316 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5317 bool ok;
5319 bound = cp->value;
5320 if (bound)
5322 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5323 tree var_type = TREE_TYPE (var);
5325 compare = iv_elimination_compare (data, use);
5326 bound = unshare_expr (fold_convert (var_type, bound));
5327 op = force_gimple_operand_gsi (&bsi, bound, true, NULL_TREE,
5328 true, GSI_SAME_STMT);
5330 gimple_cond_set_lhs (use->stmt, var);
5331 gimple_cond_set_code (use->stmt, compare);
5332 gimple_cond_set_rhs (use->stmt, op);
5333 return;
5336 /* The induction variable elimination failed; just express the original
5337 giv. */
5338 comp = get_computation (data->current_loop, use, cand);
5339 gcc_assert (comp != NULL_TREE);
5341 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5342 gcc_assert (ok);
5344 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5345 true, GSI_SAME_STMT);
5348 /* Rewrites USE using candidate CAND. */
5350 static void
5351 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5353 push_stmt_changes (&use->stmt);
5355 switch (use->type)
5357 case USE_NONLINEAR_EXPR:
5358 rewrite_use_nonlinear_expr (data, use, cand);
5359 break;
5361 case USE_ADDRESS:
5362 rewrite_use_address (data, use, cand);
5363 break;
5365 case USE_COMPARE:
5366 rewrite_use_compare (data, use, cand);
5367 break;
5369 default:
5370 gcc_unreachable ();
5373 pop_stmt_changes (&use->stmt);
5376 /* Rewrite the uses using the selected induction variables. */
5378 static void
5379 rewrite_uses (struct ivopts_data *data)
5381 unsigned i;
5382 struct iv_cand *cand;
5383 struct iv_use *use;
5385 for (i = 0; i < n_iv_uses (data); i++)
5387 use = iv_use (data, i);
5388 cand = use->selected;
5389 gcc_assert (cand);
5391 rewrite_use (data, use, cand);
5395 /* Removes the ivs that are not used after rewriting. */
5397 static void
5398 remove_unused_ivs (struct ivopts_data *data)
5400 unsigned j;
5401 bitmap_iterator bi;
5403 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5405 struct version_info *info;
5407 info = ver_info (data, j);
5408 if (info->iv
5409 && !integer_zerop (info->iv->step)
5410 && !info->inv_id
5411 && !info->iv->have_use_for
5412 && !info->preserve_biv)
5413 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5417 /* Frees data allocated by the optimization of a single loop. */
5419 static void
5420 free_loop_data (struct ivopts_data *data)
5422 unsigned i, j;
5423 bitmap_iterator bi;
5424 tree obj;
5426 if (data->niters)
5428 pointer_map_destroy (data->niters);
5429 data->niters = NULL;
5432 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5434 struct version_info *info;
5436 info = ver_info (data, i);
5437 if (info->iv)
5438 free (info->iv);
5439 info->iv = NULL;
5440 info->has_nonlin_use = false;
5441 info->preserve_biv = false;
5442 info->inv_id = 0;
5444 bitmap_clear (data->relevant);
5445 bitmap_clear (data->important_candidates);
5447 for (i = 0; i < n_iv_uses (data); i++)
5449 struct iv_use *use = iv_use (data, i);
5451 free (use->iv);
5452 BITMAP_FREE (use->related_cands);
5453 for (j = 0; j < use->n_map_members; j++)
5454 if (use->cost_map[j].depends_on)
5455 BITMAP_FREE (use->cost_map[j].depends_on);
5456 free (use->cost_map);
5457 free (use);
5459 VEC_truncate (iv_use_p, data->iv_uses, 0);
5461 for (i = 0; i < n_iv_cands (data); i++)
5463 struct iv_cand *cand = iv_cand (data, i);
5465 if (cand->iv)
5466 free (cand->iv);
5467 if (cand->depends_on)
5468 BITMAP_FREE (cand->depends_on);
5469 free (cand);
5471 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5473 if (data->version_info_size < num_ssa_names)
5475 data->version_info_size = 2 * num_ssa_names;
5476 free (data->version_info);
5477 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5480 data->max_inv_id = 0;
5482 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5483 SET_DECL_RTL (obj, NULL_RTX);
5485 VEC_truncate (tree, decl_rtl_to_reset, 0);
5488 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5489 loop tree. */
5491 static void
5492 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5494 free_loop_data (data);
5495 free (data->version_info);
5496 BITMAP_FREE (data->relevant);
5497 BITMAP_FREE (data->important_candidates);
5499 VEC_free (tree, heap, decl_rtl_to_reset);
5500 VEC_free (iv_use_p, heap, data->iv_uses);
5501 VEC_free (iv_cand_p, heap, data->iv_candidates);
5504 /* Optimizes the LOOP. Returns true if anything changed. */
5506 static bool
5507 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5509 bool changed = false;
5510 struct iv_ca *iv_ca;
5511 edge exit;
5513 gcc_assert (!data->niters);
5514 data->current_loop = loop;
5515 data->speed = optimize_loop_for_speed_p (loop);
5517 if (dump_file && (dump_flags & TDF_DETAILS))
5519 fprintf (dump_file, "Processing loop %d\n", loop->num);
5521 exit = single_dom_exit (loop);
5522 if (exit)
5524 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5525 exit->src->index, exit->dest->index);
5526 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5527 fprintf (dump_file, "\n");
5530 fprintf (dump_file, "\n");
5533 /* For each ssa name determines whether it behaves as an induction variable
5534 in some loop. */
5535 if (!find_induction_variables (data))
5536 goto finish;
5538 /* Finds interesting uses (item 1). */
5539 find_interesting_uses (data);
5540 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5541 goto finish;
5543 /* Finds candidates for the induction variables (item 2). */
5544 find_iv_candidates (data);
5546 /* Calculates the costs (item 3, part 1). */
5547 determine_use_iv_costs (data);
5548 determine_iv_costs (data);
5549 determine_set_costs (data);
5551 /* Find the optimal set of induction variables (item 3, part 2). */
5552 iv_ca = find_optimal_iv_set (data);
5553 if (!iv_ca)
5554 goto finish;
5555 changed = true;
5557 /* Create the new induction variables (item 4, part 1). */
5558 create_new_ivs (data, iv_ca);
5559 iv_ca_free (&iv_ca);
5561 /* Rewrite the uses (item 4, part 2). */
5562 rewrite_uses (data);
5564 /* Remove the ivs that are unused after rewriting. */
5565 remove_unused_ivs (data);
5567 /* We have changed the structure of induction variables; it might happen
5568 that definitions in the scev database refer to some of them that were
5569 eliminated. */
5570 scev_reset ();
5572 finish:
5573 free_loop_data (data);
5575 return changed;
5578 /* Main entry point. Optimizes induction variables in loops. */
5580 void
5581 tree_ssa_iv_optimize (void)
5583 struct loop *loop;
5584 struct ivopts_data data;
5585 loop_iterator li;
5587 tree_ssa_iv_optimize_init (&data);
5589 /* Optimize the loops starting with the innermost ones. */
5590 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5592 if (dump_file && (dump_flags & TDF_DETAILS))
5593 flow_loop_dump (loop, dump_file, NULL, 1);
5595 tree_ssa_iv_optimize_loop (&data, loop);
5598 tree_ssa_iv_optimize_finalize (&data);