* gcc.dg/compat/struct-layout-1_generate.c (dg_options): New. Moved
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob4639588a162b390aef008f0e90f71192d16f9c63
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
3 Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
105 struct iv
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
117 struct version_info
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
127 /* Types of uses. */
128 enum use_type
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* Cost of a computation. */
136 typedef struct
138 unsigned cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143 } comp_cost;
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
148 /* The candidate - cost pair. */
149 struct cost_pair
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
160 /* Use. */
161 struct iv_use
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand *selected;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
180 enum iv_position
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_ORIGINAL /* The original biv. */
187 /* The induction variable candidate. */
188 struct iv_cand
190 unsigned id; /* The number of the candidate. */
191 bool important; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos; /* Where it is computed. */
194 gimple incremented_at;/* For original biv, the statement where it is
195 incremented. */
196 tree var_before; /* The variable used for it before increment. */
197 tree var_after; /* The variable used for it after increment. */
198 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */
203 bitmap depends_on; /* The list of invariants that are used in step of the
204 biv. */
207 /* The data used by the induction variable optimizations. */
209 typedef struct iv_use *iv_use_p;
210 DEF_VEC_P(iv_use_p);
211 DEF_VEC_ALLOC_P(iv_use_p,heap);
213 typedef struct iv_cand *iv_cand_p;
214 DEF_VEC_P(iv_cand_p);
215 DEF_VEC_ALLOC_P(iv_cand_p,heap);
217 struct ivopts_data
219 /* The currently optimized loop. */
220 struct loop *current_loop;
222 /* Are we optimizing for speed? */
223 bool speed;
225 /* Number of registers used in it. */
226 unsigned regs_used;
228 /* Numbers of iterations for all exits of the current loop. */
229 struct pointer_map_t *niters;
231 /* The size of version_info array allocated. */
232 unsigned version_info_size;
234 /* The array of information for the ssa names. */
235 struct version_info *version_info;
237 /* The bitmap of indices in version_info whose value was changed. */
238 bitmap relevant;
240 /* The maximum invariant id. */
241 unsigned max_inv_id;
243 /* The uses of induction variables. */
244 VEC(iv_use_p,heap) *iv_uses;
246 /* The candidates. */
247 VEC(iv_cand_p,heap) *iv_candidates;
249 /* A bitmap of important candidates. */
250 bitmap important_candidates;
252 /* Whether to consider just related and important candidates when replacing a
253 use. */
254 bool consider_all_candidates;
257 /* An assignment of iv candidates to uses. */
259 struct iv_ca
261 /* The number of uses covered by the assignment. */
262 unsigned upto;
264 /* Number of uses that cannot be expressed by the candidates in the set. */
265 unsigned bad_uses;
267 /* Candidate assigned to a use, together with the related costs. */
268 struct cost_pair **cand_for_use;
270 /* Number of times each candidate is used. */
271 unsigned *n_cand_uses;
273 /* The candidates used. */
274 bitmap cands;
276 /* The number of candidates in the set. */
277 unsigned n_cands;
279 /* Total number of registers needed. */
280 unsigned n_regs;
282 /* Total cost of expressing uses. */
283 comp_cost cand_use_cost;
285 /* Total cost of candidates. */
286 unsigned cand_cost;
288 /* Number of times each invariant is used. */
289 unsigned *n_invariant_uses;
291 /* Total cost of the assignment. */
292 comp_cost cost;
295 /* Difference of two iv candidate assignments. */
297 struct iv_ca_delta
299 /* Changed use. */
300 struct iv_use *use;
302 /* An old assignment (for rollback purposes). */
303 struct cost_pair *old_cp;
305 /* A new assignment. */
306 struct cost_pair *new_cp;
308 /* Next change in the list. */
309 struct iv_ca_delta *next_change;
312 /* Bound on number of candidates below that all candidates are considered. */
314 #define CONSIDER_ALL_CANDIDATES_BOUND \
315 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
317 /* If there are more iv occurrences, we just give up (it is quite unlikely that
318 optimizing such a loop would help, and it would take ages). */
320 #define MAX_CONSIDERED_USES \
321 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
323 /* If there are at most this number of ivs in the set, try removing unnecessary
324 ivs from the set always. */
326 #define ALWAYS_PRUNE_CAND_SET_BOUND \
327 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
329 /* The list of trees for that the decl_rtl field must be reset is stored
330 here. */
332 static VEC(tree,heap) *decl_rtl_to_reset;
334 /* Number of uses recorded in DATA. */
336 static inline unsigned
337 n_iv_uses (struct ivopts_data *data)
339 return VEC_length (iv_use_p, data->iv_uses);
342 /* Ith use recorded in DATA. */
344 static inline struct iv_use *
345 iv_use (struct ivopts_data *data, unsigned i)
347 return VEC_index (iv_use_p, data->iv_uses, i);
350 /* Number of candidates recorded in DATA. */
352 static inline unsigned
353 n_iv_cands (struct ivopts_data *data)
355 return VEC_length (iv_cand_p, data->iv_candidates);
358 /* Ith candidate recorded in DATA. */
360 static inline struct iv_cand *
361 iv_cand (struct ivopts_data *data, unsigned i)
363 return VEC_index (iv_cand_p, data->iv_candidates, i);
366 /* The single loop exit if it dominates the latch, NULL otherwise. */
368 edge
369 single_dom_exit (struct loop *loop)
371 edge exit = single_exit (loop);
373 if (!exit)
374 return NULL;
376 if (!just_once_each_iteration_p (loop, exit->src))
377 return NULL;
379 return exit;
382 /* Dumps information about the induction variable IV to FILE. */
384 extern void dump_iv (FILE *, struct iv *);
385 void
386 dump_iv (FILE *file, struct iv *iv)
388 if (iv->ssa_name)
390 fprintf (file, "ssa name ");
391 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
392 fprintf (file, "\n");
395 fprintf (file, " type ");
396 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
397 fprintf (file, "\n");
399 if (iv->step)
401 fprintf (file, " base ");
402 print_generic_expr (file, iv->base, TDF_SLIM);
403 fprintf (file, "\n");
405 fprintf (file, " step ");
406 print_generic_expr (file, iv->step, TDF_SLIM);
407 fprintf (file, "\n");
409 else
411 fprintf (file, " invariant ");
412 print_generic_expr (file, iv->base, TDF_SLIM);
413 fprintf (file, "\n");
416 if (iv->base_object)
418 fprintf (file, " base object ");
419 print_generic_expr (file, iv->base_object, TDF_SLIM);
420 fprintf (file, "\n");
423 if (iv->biv_p)
424 fprintf (file, " is a biv\n");
427 /* Dumps information about the USE to FILE. */
429 extern void dump_use (FILE *, struct iv_use *);
430 void
431 dump_use (FILE *file, struct iv_use *use)
433 fprintf (file, "use %d\n", use->id);
435 switch (use->type)
437 case USE_NONLINEAR_EXPR:
438 fprintf (file, " generic\n");
439 break;
441 case USE_ADDRESS:
442 fprintf (file, " address\n");
443 break;
445 case USE_COMPARE:
446 fprintf (file, " compare\n");
447 break;
449 default:
450 gcc_unreachable ();
453 fprintf (file, " in statement ");
454 print_gimple_stmt (file, use->stmt, 0, 0);
455 fprintf (file, "\n");
457 fprintf (file, " at position ");
458 if (use->op_p)
459 print_generic_expr (file, *use->op_p, TDF_SLIM);
460 fprintf (file, "\n");
462 dump_iv (file, use->iv);
464 if (use->related_cands)
466 fprintf (file, " related candidates ");
467 dump_bitmap (file, use->related_cands);
471 /* Dumps information about the uses to FILE. */
473 extern void dump_uses (FILE *, struct ivopts_data *);
474 void
475 dump_uses (FILE *file, struct ivopts_data *data)
477 unsigned i;
478 struct iv_use *use;
480 for (i = 0; i < n_iv_uses (data); i++)
482 use = iv_use (data, i);
484 dump_use (file, use);
485 fprintf (file, "\n");
489 /* Dumps information about induction variable candidate CAND to FILE. */
491 extern void dump_cand (FILE *, struct iv_cand *);
492 void
493 dump_cand (FILE *file, struct iv_cand *cand)
495 struct iv *iv = cand->iv;
497 fprintf (file, "candidate %d%s\n",
498 cand->id, cand->important ? " (important)" : "");
500 if (cand->depends_on)
502 fprintf (file, " depends on ");
503 dump_bitmap (file, cand->depends_on);
506 if (!iv)
508 fprintf (file, " final value replacement\n");
509 return;
512 switch (cand->pos)
514 case IP_NORMAL:
515 fprintf (file, " incremented before exit test\n");
516 break;
518 case IP_END:
519 fprintf (file, " incremented at end\n");
520 break;
522 case IP_ORIGINAL:
523 fprintf (file, " original biv\n");
524 break;
527 dump_iv (file, iv);
530 /* Returns the info for ssa version VER. */
532 static inline struct version_info *
533 ver_info (struct ivopts_data *data, unsigned ver)
535 return data->version_info + ver;
538 /* Returns the info for ssa name NAME. */
540 static inline struct version_info *
541 name_info (struct ivopts_data *data, tree name)
543 return ver_info (data, SSA_NAME_VERSION (name));
546 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
547 emitted in LOOP. */
549 static bool
550 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
552 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
554 gcc_assert (bb);
556 if (sbb == loop->latch)
557 return true;
559 if (sbb != bb)
560 return false;
562 return stmt == last_stmt (bb);
565 /* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */
568 static bool
569 stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
571 basic_block cand_bb = gimple_bb (cand->incremented_at);
572 basic_block stmt_bb = gimple_bb (stmt);
573 gimple_stmt_iterator bsi;
575 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
576 return false;
578 if (stmt_bb != cand_bb)
579 return true;
581 /* Scan the block from the end, since the original ivs are usually
582 incremented at the end of the loop body. */
583 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
585 if (gsi_stmt (bsi) == cand->incremented_at)
586 return false;
587 if (gsi_stmt (bsi) == stmt)
588 return true;
592 /* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */
595 static bool
596 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
598 switch (cand->pos)
600 case IP_END:
601 return false;
603 case IP_NORMAL:
604 return stmt_after_ip_normal_pos (loop, stmt);
606 case IP_ORIGINAL:
607 return stmt_after_ip_original_pos (cand, stmt);
609 default:
610 gcc_unreachable ();
614 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
616 static bool
617 abnormal_ssa_name_p (tree exp)
619 if (!exp)
620 return false;
622 if (TREE_CODE (exp) != SSA_NAME)
623 return false;
625 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
628 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
629 abnormal phi node. Callback for for_each_index. */
631 static bool
632 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
633 void *data ATTRIBUTE_UNUSED)
635 if (TREE_CODE (base) == ARRAY_REF)
637 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
638 return false;
639 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
640 return false;
643 return !abnormal_ssa_name_p (*index);
646 /* Returns true if EXPR contains a ssa name that occurs in an
647 abnormal phi node. */
649 bool
650 contains_abnormal_ssa_name_p (tree expr)
652 enum tree_code code;
653 enum tree_code_class codeclass;
655 if (!expr)
656 return false;
658 code = TREE_CODE (expr);
659 codeclass = TREE_CODE_CLASS (code);
661 if (code == SSA_NAME)
662 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
664 if (code == INTEGER_CST
665 || is_gimple_min_invariant (expr))
666 return false;
668 if (code == ADDR_EXPR)
669 return !for_each_index (&TREE_OPERAND (expr, 0),
670 idx_contains_abnormal_ssa_name_p,
671 NULL);
673 switch (codeclass)
675 case tcc_binary:
676 case tcc_comparison:
677 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
678 return true;
680 /* Fallthru. */
681 case tcc_unary:
682 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
683 return true;
685 break;
687 default:
688 gcc_unreachable ();
691 return false;
694 /* Returns tree describing number of iterations determined from
695 EXIT of DATA->current_loop, or NULL if something goes wrong. */
697 static tree
698 niter_for_exit (struct ivopts_data *data, edge exit)
700 struct tree_niter_desc desc;
701 tree niter;
702 void **slot;
704 if (!data->niters)
706 data->niters = pointer_map_create ();
707 slot = NULL;
709 else
710 slot = pointer_map_contains (data->niters, exit);
712 if (!slot)
714 /* Try to determine number of iterations. We must know it
715 unconditionally (i.e., without possibility of # of iterations
716 being zero). Also, we cannot safely work with ssa names that
717 appear in phi nodes on abnormal edges, so that we do not create
718 overlapping life ranges for them (PR 27283). */
719 if (number_of_iterations_exit (data->current_loop,
720 exit, &desc, true)
721 && integer_zerop (desc.may_be_zero)
722 && !contains_abnormal_ssa_name_p (desc.niter))
723 niter = desc.niter;
724 else
725 niter = NULL_TREE;
727 *pointer_map_insert (data->niters, exit) = niter;
729 else
730 niter = (tree) *slot;
732 return niter;
735 /* Returns tree describing number of iterations determined from
736 single dominating exit of DATA->current_loop, or NULL if something
737 goes wrong. */
739 static tree
740 niter_for_single_dom_exit (struct ivopts_data *data)
742 edge exit = single_dom_exit (data->current_loop);
744 if (!exit)
745 return NULL;
747 return niter_for_exit (data, exit);
750 /* Initializes data structures used by the iv optimization pass, stored
751 in DATA. */
753 static void
754 tree_ssa_iv_optimize_init (struct ivopts_data *data)
756 data->version_info_size = 2 * num_ssa_names;
757 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
758 data->relevant = BITMAP_ALLOC (NULL);
759 data->important_candidates = BITMAP_ALLOC (NULL);
760 data->max_inv_id = 0;
761 data->niters = NULL;
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
770 static tree
771 determine_base_object (tree expr)
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj;
776 /* If this is a pointer casted to any type, we need to determine
777 the base object for the pointer; so handle conversions before
778 throwing away non-pointer expressions. */
779 if (CONVERT_EXPR_P (expr))
780 return determine_base_object (TREE_OPERAND (expr, 0));
782 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
783 return NULL_TREE;
785 switch (code)
787 case INTEGER_CST:
788 return NULL_TREE;
790 case ADDR_EXPR:
791 obj = TREE_OPERAND (expr, 0);
792 base = get_base_address (obj);
794 if (!base)
795 return expr;
797 if (TREE_CODE (base) == INDIRECT_REF)
798 return determine_base_object (TREE_OPERAND (base, 0));
800 return fold_convert (ptr_type_node,
801 build_fold_addr_expr (base));
803 case POINTER_PLUS_EXPR:
804 return determine_base_object (TREE_OPERAND (expr, 0));
806 case PLUS_EXPR:
807 case MINUS_EXPR:
808 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
809 gcc_unreachable ();
811 default:
812 return fold_convert (ptr_type_node, expr);
816 /* Allocates an induction variable with given initial value BASE and step STEP
817 for loop LOOP. */
819 static struct iv *
820 alloc_iv (tree base, tree step)
822 struct iv *iv = XCNEW (struct iv);
823 gcc_assert (step != NULL_TREE);
825 iv->base = base;
826 iv->base_object = determine_base_object (base);
827 iv->step = step;
828 iv->biv_p = false;
829 iv->have_use_for = false;
830 iv->use_id = 0;
831 iv->ssa_name = NULL_TREE;
833 return iv;
836 /* Sets STEP and BASE for induction variable IV. */
838 static void
839 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
841 struct version_info *info = name_info (data, iv);
843 gcc_assert (!info->iv);
845 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
846 info->iv = alloc_iv (base, step);
847 info->iv->ssa_name = iv;
850 /* Finds induction variable declaration for VAR. */
852 static struct iv *
853 get_iv (struct ivopts_data *data, tree var)
855 basic_block bb;
856 tree type = TREE_TYPE (var);
858 if (!POINTER_TYPE_P (type)
859 && !INTEGRAL_TYPE_P (type))
860 return NULL;
862 if (!name_info (data, var)->iv)
864 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
866 if (!bb
867 || !flow_bb_inside_loop_p (data->current_loop, bb))
868 set_iv (data, var, var, build_int_cst (type, 0));
871 return name_info (data, var)->iv;
874 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
875 not define a simple affine biv with nonzero step. */
877 static tree
878 determine_biv_step (gimple phi)
880 struct loop *loop = gimple_bb (phi)->loop_father;
881 tree name = PHI_RESULT (phi);
882 affine_iv iv;
884 if (!is_gimple_reg (name))
885 return NULL_TREE;
887 if (!simple_iv (loop, phi, name, &iv, true))
888 return NULL_TREE;
890 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
893 /* Finds basic ivs. */
895 static bool
896 find_bivs (struct ivopts_data *data)
898 gimple phi;
899 tree step, type, base;
900 bool found = false;
901 struct loop *loop = data->current_loop;
902 gimple_stmt_iterator psi;
904 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
906 phi = gsi_stmt (psi);
908 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
909 continue;
911 step = determine_biv_step (phi);
912 if (!step)
913 continue;
915 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
916 base = expand_simple_operations (base);
917 if (contains_abnormal_ssa_name_p (base)
918 || contains_abnormal_ssa_name_p (step))
919 continue;
921 type = TREE_TYPE (PHI_RESULT (phi));
922 base = fold_convert (type, base);
923 if (step)
925 if (POINTER_TYPE_P (type))
926 step = fold_convert (sizetype, step);
927 else
928 step = fold_convert (type, step);
931 set_iv (data, PHI_RESULT (phi), base, step);
932 found = true;
935 return found;
938 /* Marks basic ivs. */
940 static void
941 mark_bivs (struct ivopts_data *data)
943 gimple phi;
944 tree var;
945 struct iv *iv, *incr_iv;
946 struct loop *loop = data->current_loop;
947 basic_block incr_bb;
948 gimple_stmt_iterator psi;
950 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
952 phi = gsi_stmt (psi);
954 iv = get_iv (data, PHI_RESULT (phi));
955 if (!iv)
956 continue;
958 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
959 incr_iv = get_iv (data, var);
960 if (!incr_iv)
961 continue;
963 /* If the increment is in the subloop, ignore it. */
964 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
965 if (incr_bb->loop_father != data->current_loop
966 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
967 continue;
969 iv->biv_p = true;
970 incr_iv->biv_p = true;
974 /* Checks whether STMT defines a linear induction variable and stores its
975 parameters to IV. */
977 static bool
978 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
980 tree lhs;
981 struct loop *loop = data->current_loop;
983 iv->base = NULL_TREE;
984 iv->step = NULL_TREE;
986 if (gimple_code (stmt) != GIMPLE_ASSIGN)
987 return false;
989 lhs = gimple_assign_lhs (stmt);
990 if (TREE_CODE (lhs) != SSA_NAME)
991 return false;
993 if (!simple_iv (loop, stmt, lhs, iv, true))
994 return false;
995 iv->base = expand_simple_operations (iv->base);
997 if (contains_abnormal_ssa_name_p (iv->base)
998 || contains_abnormal_ssa_name_p (iv->step))
999 return false;
1001 return true;
1004 /* Finds general ivs in statement STMT. */
1006 static void
1007 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1009 affine_iv iv;
1011 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1012 return;
1014 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1017 /* Finds general ivs in basic block BB. */
1019 static void
1020 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1022 gimple_stmt_iterator bsi;
1024 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1025 find_givs_in_stmt (data, gsi_stmt (bsi));
1028 /* Finds general ivs. */
1030 static void
1031 find_givs (struct ivopts_data *data)
1033 struct loop *loop = data->current_loop;
1034 basic_block *body = get_loop_body_in_dom_order (loop);
1035 unsigned i;
1037 for (i = 0; i < loop->num_nodes; i++)
1038 find_givs_in_bb (data, body[i]);
1039 free (body);
1042 /* For each ssa name defined in LOOP determines whether it is an induction
1043 variable and if so, its initial value and step. */
1045 static bool
1046 find_induction_variables (struct ivopts_data *data)
1048 unsigned i;
1049 bitmap_iterator bi;
1051 if (!find_bivs (data))
1052 return false;
1054 find_givs (data);
1055 mark_bivs (data);
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1059 tree niter = niter_for_single_dom_exit (data);
1061 if (niter)
1063 fprintf (dump_file, " number of iterations ");
1064 print_generic_expr (dump_file, niter, TDF_SLIM);
1065 fprintf (dump_file, "\n\n");
1068 fprintf (dump_file, "Induction variables:\n\n");
1070 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1072 if (ver_info (data, i)->iv)
1073 dump_iv (dump_file, ver_info (data, i)->iv);
1077 return true;
1080 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1082 static struct iv_use *
1083 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1084 gimple stmt, enum use_type use_type)
1086 struct iv_use *use = XCNEW (struct iv_use);
1088 use->id = n_iv_uses (data);
1089 use->type = use_type;
1090 use->iv = iv;
1091 use->stmt = stmt;
1092 use->op_p = use_p;
1093 use->related_cands = BITMAP_ALLOC (NULL);
1095 /* To avoid showing ssa name in the dumps, if it was not reset by the
1096 caller. */
1097 iv->ssa_name = NULL_TREE;
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1100 dump_use (dump_file, use);
1102 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1104 return use;
1107 /* Checks whether OP is a loop-level invariant and if so, records it.
1108 NONLINEAR_USE is true if the invariant is used in a way we do not
1109 handle specially. */
1111 static void
1112 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1114 basic_block bb;
1115 struct version_info *info;
1117 if (TREE_CODE (op) != SSA_NAME
1118 || !is_gimple_reg (op))
1119 return;
1121 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1122 if (bb
1123 && flow_bb_inside_loop_p (data->current_loop, bb))
1124 return;
1126 info = name_info (data, op);
1127 info->name = op;
1128 info->has_nonlin_use |= nonlinear_use;
1129 if (!info->inv_id)
1130 info->inv_id = ++data->max_inv_id;
1131 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1134 /* Checks whether the use OP is interesting and if so, records it. */
1136 static struct iv_use *
1137 find_interesting_uses_op (struct ivopts_data *data, tree op)
1139 struct iv *iv;
1140 struct iv *civ;
1141 gimple stmt;
1142 struct iv_use *use;
1144 if (TREE_CODE (op) != SSA_NAME)
1145 return NULL;
1147 iv = get_iv (data, op);
1148 if (!iv)
1149 return NULL;
1151 if (iv->have_use_for)
1153 use = iv_use (data, iv->use_id);
1155 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1156 return use;
1159 if (integer_zerop (iv->step))
1161 record_invariant (data, op, true);
1162 return NULL;
1164 iv->have_use_for = true;
1166 civ = XNEW (struct iv);
1167 *civ = *iv;
1169 stmt = SSA_NAME_DEF_STMT (op);
1170 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1171 || is_gimple_assign (stmt));
1173 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1174 iv->use_id = use->id;
1176 return use;
1179 /* Given a condition in statement STMT, checks whether it is a compare
1180 of an induction variable and an invariant. If this is the case,
1181 CONTROL_VAR is set to location of the iv, BOUND to the location of
1182 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1183 induction variable descriptions, and true is returned. If this is not
1184 the case, CONTROL_VAR and BOUND are set to the arguments of the
1185 condition and false is returned. */
1187 static bool
1188 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1189 tree **control_var, tree **bound,
1190 struct iv **iv_var, struct iv **iv_bound)
1192 /* The objects returned when COND has constant operands. */
1193 static struct iv const_iv;
1194 static tree zero;
1195 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1196 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1197 bool ret = false;
1199 if (gimple_code (stmt) == GIMPLE_COND)
1201 op0 = gimple_cond_lhs_ptr (stmt);
1202 op1 = gimple_cond_rhs_ptr (stmt);
1204 else
1206 op0 = gimple_assign_rhs1_ptr (stmt);
1207 op1 = gimple_assign_rhs2_ptr (stmt);
1210 zero = integer_zero_node;
1211 const_iv.step = integer_zero_node;
1213 if (TREE_CODE (*op0) == SSA_NAME)
1214 iv0 = get_iv (data, *op0);
1215 if (TREE_CODE (*op1) == SSA_NAME)
1216 iv1 = get_iv (data, *op1);
1218 /* Exactly one of the compared values must be an iv, and the other one must
1219 be an invariant. */
1220 if (!iv0 || !iv1)
1221 goto end;
1223 if (integer_zerop (iv0->step))
1225 /* Control variable may be on the other side. */
1226 tmp_op = op0; op0 = op1; op1 = tmp_op;
1227 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1229 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1231 end:
1232 if (control_var)
1233 *control_var = op0;;
1234 if (iv_var)
1235 *iv_var = iv0;;
1236 if (bound)
1237 *bound = op1;
1238 if (iv_bound)
1239 *iv_bound = iv1;
1241 return ret;
1244 /* Checks whether the condition in STMT is interesting and if so,
1245 records it. */
1247 static void
1248 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1250 tree *var_p, *bound_p;
1251 struct iv *var_iv, *civ;
1253 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1255 find_interesting_uses_op (data, *var_p);
1256 find_interesting_uses_op (data, *bound_p);
1257 return;
1260 civ = XNEW (struct iv);
1261 *civ = *var_iv;
1262 record_use (data, NULL, civ, stmt, USE_COMPARE);
1265 /* Returns true if expression EXPR is obviously invariant in LOOP,
1266 i.e. if all its operands are defined outside of the LOOP. LOOP
1267 should not be the function body. */
1269 bool
1270 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1272 basic_block def_bb;
1273 unsigned i, len;
1275 gcc_assert (loop_depth (loop) > 0);
1277 if (is_gimple_min_invariant (expr))
1278 return true;
1280 if (TREE_CODE (expr) == SSA_NAME)
1282 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1283 if (def_bb
1284 && flow_bb_inside_loop_p (loop, def_bb))
1285 return false;
1287 return true;
1290 if (!EXPR_P (expr))
1291 return false;
1293 len = TREE_OPERAND_LENGTH (expr);
1294 for (i = 0; i < len; i++)
1295 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1296 return false;
1298 return true;
1301 /* Returns true if statement STMT is obviously invariant in LOOP,
1302 i.e. if all its operands on the RHS are defined outside of the LOOP.
1303 LOOP should not be the function body. */
1305 bool
1306 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1308 unsigned i;
1309 tree lhs;
1311 gcc_assert (loop_depth (loop) > 0);
1313 lhs = gimple_get_lhs (stmt);
1314 for (i = 0; i < gimple_num_ops (stmt); i++)
1316 tree op = gimple_op (stmt, i);
1317 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1318 return false;
1321 return true;
1324 /* Cumulates the steps of indices into DATA and replaces their values with the
1325 initial ones. Returns false when the value of the index cannot be determined.
1326 Callback for for_each_index. */
1328 struct ifs_ivopts_data
1330 struct ivopts_data *ivopts_data;
1331 gimple stmt;
1332 tree step;
1335 static bool
1336 idx_find_step (tree base, tree *idx, void *data)
1338 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1339 struct iv *iv;
1340 tree step, iv_base, iv_step, lbound, off;
1341 struct loop *loop = dta->ivopts_data->current_loop;
1343 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1344 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1345 return false;
1347 /* If base is a component ref, require that the offset of the reference
1348 be invariant. */
1349 if (TREE_CODE (base) == COMPONENT_REF)
1351 off = component_ref_field_offset (base);
1352 return expr_invariant_in_loop_p (loop, off);
1355 /* If base is array, first check whether we will be able to move the
1356 reference out of the loop (in order to take its address in strength
1357 reduction). In order for this to work we need both lower bound
1358 and step to be loop invariants. */
1359 if (TREE_CODE (base) == ARRAY_REF)
1361 step = array_ref_element_size (base);
1362 lbound = array_ref_low_bound (base);
1364 if (!expr_invariant_in_loop_p (loop, step)
1365 || !expr_invariant_in_loop_p (loop, lbound))
1366 return false;
1369 if (TREE_CODE (*idx) != SSA_NAME)
1370 return true;
1372 iv = get_iv (dta->ivopts_data, *idx);
1373 if (!iv)
1374 return false;
1376 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1377 *&x[0], which is not folded and does not trigger the
1378 ARRAY_REF path below. */
1379 *idx = iv->base;
1381 if (integer_zerop (iv->step))
1382 return true;
1384 if (TREE_CODE (base) == ARRAY_REF)
1386 step = array_ref_element_size (base);
1388 /* We only handle addresses whose step is an integer constant. */
1389 if (TREE_CODE (step) != INTEGER_CST)
1390 return false;
1392 else
1393 /* The step for pointer arithmetics already is 1 byte. */
1394 step = build_int_cst (sizetype, 1);
1396 iv_base = iv->base;
1397 iv_step = iv->step;
1398 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1399 sizetype, &iv_base, &iv_step, dta->stmt,
1400 false))
1402 /* The index might wrap. */
1403 return false;
1406 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1407 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1409 return true;
1412 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1413 object is passed to it in DATA. */
1415 static bool
1416 idx_record_use (tree base, tree *idx,
1417 void *vdata)
1419 struct ivopts_data *data = (struct ivopts_data *) vdata;
1420 find_interesting_uses_op (data, *idx);
1421 if (TREE_CODE (base) == ARRAY_REF)
1423 find_interesting_uses_op (data, array_ref_element_size (base));
1424 find_interesting_uses_op (data, array_ref_low_bound (base));
1426 return true;
1429 /* If we can prove that TOP = cst * BOT for some constant cst,
1430 store cst to MUL and return true. Otherwise return false.
1431 The returned value is always sign-extended, regardless of the
1432 signedness of TOP and BOT. */
1434 static bool
1435 constant_multiple_of (tree top, tree bot, double_int *mul)
1437 tree mby;
1438 enum tree_code code;
1439 double_int res, p0, p1;
1440 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1442 STRIP_NOPS (top);
1443 STRIP_NOPS (bot);
1445 if (operand_equal_p (top, bot, 0))
1447 *mul = double_int_one;
1448 return true;
1451 code = TREE_CODE (top);
1452 switch (code)
1454 case MULT_EXPR:
1455 mby = TREE_OPERAND (top, 1);
1456 if (TREE_CODE (mby) != INTEGER_CST)
1457 return false;
1459 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1460 return false;
1462 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1463 precision);
1464 return true;
1466 case PLUS_EXPR:
1467 case MINUS_EXPR:
1468 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1469 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1470 return false;
1472 if (code == MINUS_EXPR)
1473 p1 = double_int_neg (p1);
1474 *mul = double_int_sext (double_int_add (p0, p1), precision);
1475 return true;
1477 case INTEGER_CST:
1478 if (TREE_CODE (bot) != INTEGER_CST)
1479 return false;
1481 p0 = double_int_sext (tree_to_double_int (top), precision);
1482 p1 = double_int_sext (tree_to_double_int (bot), precision);
1483 if (double_int_zero_p (p1))
1484 return false;
1485 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1486 precision);
1487 return double_int_zero_p (res);
1489 default:
1490 return false;
1494 /* Returns true if memory reference REF with step STEP may be unaligned. */
1496 static bool
1497 may_be_unaligned_p (tree ref, tree step)
1499 tree base;
1500 tree base_type;
1501 HOST_WIDE_INT bitsize;
1502 HOST_WIDE_INT bitpos;
1503 tree toffset;
1504 enum machine_mode mode;
1505 int unsignedp, volatilep;
1506 unsigned base_align;
1508 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1509 thus they are not misaligned. */
1510 if (TREE_CODE (ref) == TARGET_MEM_REF)
1511 return false;
1513 /* The test below is basically copy of what expr.c:normal_inner_ref
1514 does to check whether the object must be loaded by parts when
1515 STRICT_ALIGNMENT is true. */
1516 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1517 &unsignedp, &volatilep, true);
1518 base_type = TREE_TYPE (base);
1519 base_align = TYPE_ALIGN (base_type);
1521 if (mode != BLKmode)
1523 double_int mul;
1524 tree al = build_int_cst (TREE_TYPE (step),
1525 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1527 if (base_align < GET_MODE_ALIGNMENT (mode)
1528 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1529 || bitpos % BITS_PER_UNIT != 0)
1530 return true;
1532 if (!constant_multiple_of (step, al, &mul))
1533 return true;
1536 return false;
1539 /* Return true if EXPR may be non-addressable. */
1541 static bool
1542 may_be_nonaddressable_p (tree expr)
1544 switch (TREE_CODE (expr))
1546 case TARGET_MEM_REF:
1547 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1548 target, thus they are always addressable. */
1549 return false;
1551 case COMPONENT_REF:
1552 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1553 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1555 case VIEW_CONVERT_EXPR:
1556 /* This kind of view-conversions may wrap non-addressable objects
1557 and make them look addressable. After some processing the
1558 non-addressability may be uncovered again, causing ADDR_EXPRs
1559 of inappropriate objects to be built. */
1560 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1561 || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1562 return true;
1564 /* ... fall through ... */
1566 case ARRAY_REF:
1567 case ARRAY_RANGE_REF:
1568 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1570 CASE_CONVERT:
1571 return true;
1573 default:
1574 break;
1577 return false;
1580 /* Finds addresses in *OP_P inside STMT. */
1582 static void
1583 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1585 tree base = *op_p, step = build_int_cst (sizetype, 0);
1586 struct iv *civ;
1587 struct ifs_ivopts_data ifs_ivopts_data;
1589 /* Do not play with volatile memory references. A bit too conservative,
1590 perhaps, but safe. */
1591 if (gimple_has_volatile_ops (stmt))
1592 goto fail;
1594 /* Ignore bitfields for now. Not really something terribly complicated
1595 to handle. TODO. */
1596 if (TREE_CODE (base) == BIT_FIELD_REF)
1597 goto fail;
1599 base = unshare_expr (base);
1601 if (TREE_CODE (base) == TARGET_MEM_REF)
1603 tree type = build_pointer_type (TREE_TYPE (base));
1604 tree astep;
1606 if (TMR_BASE (base)
1607 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1609 civ = get_iv (data, TMR_BASE (base));
1610 if (!civ)
1611 goto fail;
1613 TMR_BASE (base) = civ->base;
1614 step = civ->step;
1616 if (TMR_INDEX (base)
1617 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1619 civ = get_iv (data, TMR_INDEX (base));
1620 if (!civ)
1621 goto fail;
1623 TMR_INDEX (base) = civ->base;
1624 astep = civ->step;
1626 if (astep)
1628 if (TMR_STEP (base))
1629 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1631 step = fold_build2 (PLUS_EXPR, type, step, astep);
1635 if (integer_zerop (step))
1636 goto fail;
1637 base = tree_mem_ref_addr (type, base);
1639 else
1641 ifs_ivopts_data.ivopts_data = data;
1642 ifs_ivopts_data.stmt = stmt;
1643 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1644 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1645 || integer_zerop (ifs_ivopts_data.step))
1646 goto fail;
1647 step = ifs_ivopts_data.step;
1649 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1650 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1652 /* Check that the base expression is addressable. This needs
1653 to be done after substituting bases of IVs into it. */
1654 if (may_be_nonaddressable_p (base))
1655 goto fail;
1657 /* Moreover, on strict alignment platforms, check that it is
1658 sufficiently aligned. */
1659 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1660 goto fail;
1662 base = build_fold_addr_expr (base);
1664 /* Substituting bases of IVs into the base expression might
1665 have caused folding opportunities. */
1666 if (TREE_CODE (base) == ADDR_EXPR)
1668 tree *ref = &TREE_OPERAND (base, 0);
1669 while (handled_component_p (*ref))
1670 ref = &TREE_OPERAND (*ref, 0);
1671 if (TREE_CODE (*ref) == INDIRECT_REF)
1672 *ref = fold_indirect_ref (*ref);
1676 civ = alloc_iv (base, step);
1677 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1678 return;
1680 fail:
1681 for_each_index (op_p, idx_record_use, data);
1684 /* Finds and records invariants used in STMT. */
1686 static void
1687 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1689 ssa_op_iter iter;
1690 use_operand_p use_p;
1691 tree op;
1693 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1695 op = USE_FROM_PTR (use_p);
1696 record_invariant (data, op, false);
1700 /* Finds interesting uses of induction variables in the statement STMT. */
1702 static void
1703 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1705 struct iv *iv;
1706 tree op, *lhs, *rhs;
1707 ssa_op_iter iter;
1708 use_operand_p use_p;
1709 enum tree_code code;
1711 find_invariants_stmt (data, stmt);
1713 if (gimple_code (stmt) == GIMPLE_COND)
1715 find_interesting_uses_cond (data, stmt);
1716 return;
1719 if (is_gimple_assign (stmt))
1721 lhs = gimple_assign_lhs_ptr (stmt);
1722 rhs = gimple_assign_rhs1_ptr (stmt);
1724 if (TREE_CODE (*lhs) == SSA_NAME)
1726 /* If the statement defines an induction variable, the uses are not
1727 interesting by themselves. */
1729 iv = get_iv (data, *lhs);
1731 if (iv && !integer_zerop (iv->step))
1732 return;
1735 code = gimple_assign_rhs_code (stmt);
1736 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1737 && (REFERENCE_CLASS_P (*rhs)
1738 || is_gimple_val (*rhs)))
1740 if (REFERENCE_CLASS_P (*rhs))
1741 find_interesting_uses_address (data, stmt, rhs);
1742 else
1743 find_interesting_uses_op (data, *rhs);
1745 if (REFERENCE_CLASS_P (*lhs))
1746 find_interesting_uses_address (data, stmt, lhs);
1747 return;
1749 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1751 find_interesting_uses_cond (data, stmt);
1752 return;
1755 /* TODO -- we should also handle address uses of type
1757 memory = call (whatever);
1761 call (memory). */
1764 if (gimple_code (stmt) == GIMPLE_PHI
1765 && gimple_bb (stmt) == data->current_loop->header)
1767 iv = get_iv (data, PHI_RESULT (stmt));
1769 if (iv && !integer_zerop (iv->step))
1770 return;
1773 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1775 op = USE_FROM_PTR (use_p);
1777 if (TREE_CODE (op) != SSA_NAME)
1778 continue;
1780 iv = get_iv (data, op);
1781 if (!iv)
1782 continue;
1784 find_interesting_uses_op (data, op);
1788 /* Finds interesting uses of induction variables outside of loops
1789 on loop exit edge EXIT. */
1791 static void
1792 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1794 gimple phi;
1795 gimple_stmt_iterator psi;
1796 tree def;
1798 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1800 phi = gsi_stmt (psi);
1801 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1802 if (is_gimple_reg (def))
1803 find_interesting_uses_op (data, def);
1807 /* Finds uses of the induction variables that are interesting. */
1809 static void
1810 find_interesting_uses (struct ivopts_data *data)
1812 basic_block bb;
1813 gimple_stmt_iterator bsi;
1814 basic_block *body = get_loop_body (data->current_loop);
1815 unsigned i;
1816 struct version_info *info;
1817 edge e;
1819 if (dump_file && (dump_flags & TDF_DETAILS))
1820 fprintf (dump_file, "Uses:\n\n");
1822 for (i = 0; i < data->current_loop->num_nodes; i++)
1824 edge_iterator ei;
1825 bb = body[i];
1827 FOR_EACH_EDGE (e, ei, bb->succs)
1828 if (e->dest != EXIT_BLOCK_PTR
1829 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1830 find_interesting_uses_outside (data, e);
1832 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1833 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1834 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1835 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1838 if (dump_file && (dump_flags & TDF_DETAILS))
1840 bitmap_iterator bi;
1842 fprintf (dump_file, "\n");
1844 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1846 info = ver_info (data, i);
1847 if (info->inv_id)
1849 fprintf (dump_file, " ");
1850 print_generic_expr (dump_file, info->name, TDF_SLIM);
1851 fprintf (dump_file, " is invariant (%d)%s\n",
1852 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1856 fprintf (dump_file, "\n");
1859 free (body);
1862 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1863 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1864 we are at the top-level of the processed address. */
1866 static tree
1867 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1868 unsigned HOST_WIDE_INT *offset)
1870 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1871 enum tree_code code;
1872 tree type, orig_type = TREE_TYPE (expr);
1873 unsigned HOST_WIDE_INT off0, off1, st;
1874 tree orig_expr = expr;
1876 STRIP_NOPS (expr);
1878 type = TREE_TYPE (expr);
1879 code = TREE_CODE (expr);
1880 *offset = 0;
1882 switch (code)
1884 case INTEGER_CST:
1885 if (!cst_and_fits_in_hwi (expr)
1886 || integer_zerop (expr))
1887 return orig_expr;
1889 *offset = int_cst_value (expr);
1890 return build_int_cst (orig_type, 0);
1892 case POINTER_PLUS_EXPR:
1893 case PLUS_EXPR:
1894 case MINUS_EXPR:
1895 op0 = TREE_OPERAND (expr, 0);
1896 op1 = TREE_OPERAND (expr, 1);
1898 op0 = strip_offset_1 (op0, false, false, &off0);
1899 op1 = strip_offset_1 (op1, false, false, &off1);
1901 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1902 if (op0 == TREE_OPERAND (expr, 0)
1903 && op1 == TREE_OPERAND (expr, 1))
1904 return orig_expr;
1906 if (integer_zerop (op1))
1907 expr = op0;
1908 else if (integer_zerop (op0))
1910 if (code == MINUS_EXPR)
1911 expr = fold_build1 (NEGATE_EXPR, type, op1);
1912 else
1913 expr = op1;
1915 else
1916 expr = fold_build2 (code, type, op0, op1);
1918 return fold_convert (orig_type, expr);
1920 case ARRAY_REF:
1921 if (!inside_addr)
1922 return orig_expr;
1924 step = array_ref_element_size (expr);
1925 if (!cst_and_fits_in_hwi (step))
1926 break;
1928 st = int_cst_value (step);
1929 op1 = TREE_OPERAND (expr, 1);
1930 op1 = strip_offset_1 (op1, false, false, &off1);
1931 *offset = off1 * st;
1933 if (top_compref
1934 && integer_zerop (op1))
1936 /* Strip the component reference completely. */
1937 op0 = TREE_OPERAND (expr, 0);
1938 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1939 *offset += off0;
1940 return op0;
1942 break;
1944 case COMPONENT_REF:
1945 if (!inside_addr)
1946 return orig_expr;
1948 tmp = component_ref_field_offset (expr);
1949 if (top_compref
1950 && cst_and_fits_in_hwi (tmp))
1952 /* Strip the component reference completely. */
1953 op0 = TREE_OPERAND (expr, 0);
1954 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1955 *offset = off0 + int_cst_value (tmp);
1956 return op0;
1958 break;
1960 case ADDR_EXPR:
1961 op0 = TREE_OPERAND (expr, 0);
1962 op0 = strip_offset_1 (op0, true, true, &off0);
1963 *offset += off0;
1965 if (op0 == TREE_OPERAND (expr, 0))
1966 return orig_expr;
1968 expr = build_fold_addr_expr (op0);
1969 return fold_convert (orig_type, expr);
1971 case INDIRECT_REF:
1972 inside_addr = false;
1973 break;
1975 default:
1976 return orig_expr;
1979 /* Default handling of expressions for that we want to recurse into
1980 the first operand. */
1981 op0 = TREE_OPERAND (expr, 0);
1982 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1983 *offset += off0;
1985 if (op0 == TREE_OPERAND (expr, 0)
1986 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1987 return orig_expr;
1989 expr = copy_node (expr);
1990 TREE_OPERAND (expr, 0) = op0;
1991 if (op1)
1992 TREE_OPERAND (expr, 1) = op1;
1994 /* Inside address, we might strip the top level component references,
1995 thus changing type of the expression. Handling of ADDR_EXPR
1996 will fix that. */
1997 expr = fold_convert (orig_type, expr);
1999 return expr;
2002 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2004 static tree
2005 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2007 return strip_offset_1 (expr, false, false, offset);
2010 /* Returns variant of TYPE that can be used as base for different uses.
2011 We return unsigned type with the same precision, which avoids problems
2012 with overflows. */
2014 static tree
2015 generic_type_for (tree type)
2017 if (POINTER_TYPE_P (type))
2018 return unsigned_type_for (type);
2020 if (TYPE_UNSIGNED (type))
2021 return type;
2023 return unsigned_type_for (type);
2026 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2027 the bitmap to that we should store it. */
2029 static struct ivopts_data *fd_ivopts_data;
2030 static tree
2031 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2033 bitmap *depends_on = (bitmap *) data;
2034 struct version_info *info;
2036 if (TREE_CODE (*expr_p) != SSA_NAME)
2037 return NULL_TREE;
2038 info = name_info (fd_ivopts_data, *expr_p);
2040 if (!info->inv_id || info->has_nonlin_use)
2041 return NULL_TREE;
2043 if (!*depends_on)
2044 *depends_on = BITMAP_ALLOC (NULL);
2045 bitmap_set_bit (*depends_on, info->inv_id);
2047 return NULL_TREE;
2050 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2051 position to POS. If USE is not NULL, the candidate is set as related to
2052 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2053 replacement of the final value of the iv by a direct computation. */
2055 static struct iv_cand *
2056 add_candidate_1 (struct ivopts_data *data,
2057 tree base, tree step, bool important, enum iv_position pos,
2058 struct iv_use *use, gimple incremented_at)
2060 unsigned i;
2061 struct iv_cand *cand = NULL;
2062 tree type, orig_type;
2064 if (base)
2066 orig_type = TREE_TYPE (base);
2067 type = generic_type_for (orig_type);
2068 /* Don't convert the base to the generic type for pointers as the generic
2069 type is an integer type with the same size as the pointer type. */
2070 if (type != orig_type && !POINTER_TYPE_P (orig_type))
2072 base = fold_convert (type, base);
2073 step = fold_convert (type, step);
2077 for (i = 0; i < n_iv_cands (data); i++)
2079 cand = iv_cand (data, i);
2081 if (cand->pos != pos)
2082 continue;
2084 if (cand->incremented_at != incremented_at)
2085 continue;
2087 if (!cand->iv)
2089 if (!base && !step)
2090 break;
2092 continue;
2095 if (!base && !step)
2096 continue;
2098 if (operand_equal_p (base, cand->iv->base, 0)
2099 && operand_equal_p (step, cand->iv->step, 0))
2100 break;
2103 if (i == n_iv_cands (data))
2105 cand = XCNEW (struct iv_cand);
2106 cand->id = i;
2108 if (!base && !step)
2109 cand->iv = NULL;
2110 else
2111 cand->iv = alloc_iv (base, step);
2113 cand->pos = pos;
2114 if (pos != IP_ORIGINAL && cand->iv)
2116 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2117 cand->var_after = cand->var_before;
2119 cand->important = important;
2120 cand->incremented_at = incremented_at;
2121 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2123 if (step
2124 && TREE_CODE (step) != INTEGER_CST)
2126 fd_ivopts_data = data;
2127 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2130 if (dump_file && (dump_flags & TDF_DETAILS))
2131 dump_cand (dump_file, cand);
2134 if (important && !cand->important)
2136 cand->important = true;
2137 if (dump_file && (dump_flags & TDF_DETAILS))
2138 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2141 if (use)
2143 bitmap_set_bit (use->related_cands, i);
2144 if (dump_file && (dump_flags & TDF_DETAILS))
2145 fprintf (dump_file, "Candidate %d is related to use %d\n",
2146 cand->id, use->id);
2149 return cand;
2152 /* Returns true if incrementing the induction variable at the end of the LOOP
2153 is allowed.
2155 The purpose is to avoid splitting latch edge with a biv increment, thus
2156 creating a jump, possibly confusing other optimization passes and leaving
2157 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2158 is not available (so we do not have a better alternative), or if the latch
2159 edge is already nonempty. */
2161 static bool
2162 allow_ip_end_pos_p (struct loop *loop)
2164 if (!ip_normal_pos (loop))
2165 return true;
2167 if (!empty_block_p (ip_end_pos (loop)))
2168 return true;
2170 return false;
2173 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2174 position to POS. If USE is not NULL, the candidate is set as related to
2175 it. The candidate computation is scheduled on all available positions. */
2177 static void
2178 add_candidate (struct ivopts_data *data,
2179 tree base, tree step, bool important, struct iv_use *use)
2181 if (ip_normal_pos (data->current_loop))
2182 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2183 if (ip_end_pos (data->current_loop)
2184 && allow_ip_end_pos_p (data->current_loop))
2185 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2188 /* Add a standard "0 + 1 * iteration" iv candidate for a
2189 type with SIZE bits. */
2191 static void
2192 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2193 unsigned int size)
2195 tree type = lang_hooks.types.type_for_size (size, true);
2196 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2197 true, NULL);
2200 /* Adds standard iv candidates. */
2202 static void
2203 add_standard_iv_candidates (struct ivopts_data *data)
2205 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2207 /* The same for a double-integer type if it is still fast enough. */
2208 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2209 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2213 /* Adds candidates bases on the old induction variable IV. */
2215 static void
2216 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2218 gimple phi;
2219 tree def;
2220 struct iv_cand *cand;
2222 add_candidate (data, iv->base, iv->step, true, NULL);
2224 /* The same, but with initial value zero. */
2225 add_candidate (data,
2226 build_int_cst (TREE_TYPE (iv->base), 0),
2227 iv->step, true, NULL);
2229 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2230 if (gimple_code (phi) == GIMPLE_PHI)
2232 /* Additionally record the possibility of leaving the original iv
2233 untouched. */
2234 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2235 cand = add_candidate_1 (data,
2236 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2237 SSA_NAME_DEF_STMT (def));
2238 cand->var_before = iv->ssa_name;
2239 cand->var_after = def;
2243 /* Adds candidates based on the old induction variables. */
2245 static void
2246 add_old_ivs_candidates (struct ivopts_data *data)
2248 unsigned i;
2249 struct iv *iv;
2250 bitmap_iterator bi;
2252 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2254 iv = ver_info (data, i)->iv;
2255 if (iv && iv->biv_p && !integer_zerop (iv->step))
2256 add_old_iv_candidates (data, iv);
2260 /* Adds candidates based on the value of the induction variable IV and USE. */
2262 static void
2263 add_iv_value_candidates (struct ivopts_data *data,
2264 struct iv *iv, struct iv_use *use)
2266 unsigned HOST_WIDE_INT offset;
2267 tree base;
2268 tree basetype;
2270 add_candidate (data, iv->base, iv->step, false, use);
2272 /* The same, but with initial value zero. Make such variable important,
2273 since it is generic enough so that possibly many uses may be based
2274 on it. */
2275 basetype = TREE_TYPE (iv->base);
2276 if (POINTER_TYPE_P (basetype))
2277 basetype = sizetype;
2278 add_candidate (data, build_int_cst (basetype, 0),
2279 iv->step, true, use);
2281 /* Third, try removing the constant offset. Make sure to even
2282 add a candidate for &a[0] vs. (T *)&a. */
2283 base = strip_offset (iv->base, &offset);
2284 if (offset
2285 || base != iv->base)
2286 add_candidate (data, base, iv->step, false, use);
2289 /* Adds candidates based on the uses. */
2291 static void
2292 add_derived_ivs_candidates (struct ivopts_data *data)
2294 unsigned i;
2296 for (i = 0; i < n_iv_uses (data); i++)
2298 struct iv_use *use = iv_use (data, i);
2300 if (!use)
2301 continue;
2303 switch (use->type)
2305 case USE_NONLINEAR_EXPR:
2306 case USE_COMPARE:
2307 case USE_ADDRESS:
2308 /* Just add the ivs based on the value of the iv used here. */
2309 add_iv_value_candidates (data, use->iv, use);
2310 break;
2312 default:
2313 gcc_unreachable ();
2318 /* Record important candidates and add them to related_cands bitmaps
2319 if needed. */
2321 static void
2322 record_important_candidates (struct ivopts_data *data)
2324 unsigned i;
2325 struct iv_use *use;
2327 for (i = 0; i < n_iv_cands (data); i++)
2329 struct iv_cand *cand = iv_cand (data, i);
2331 if (cand->important)
2332 bitmap_set_bit (data->important_candidates, i);
2335 data->consider_all_candidates = (n_iv_cands (data)
2336 <= CONSIDER_ALL_CANDIDATES_BOUND);
2338 if (data->consider_all_candidates)
2340 /* We will not need "related_cands" bitmaps in this case,
2341 so release them to decrease peak memory consumption. */
2342 for (i = 0; i < n_iv_uses (data); i++)
2344 use = iv_use (data, i);
2345 BITMAP_FREE (use->related_cands);
2348 else
2350 /* Add important candidates to the related_cands bitmaps. */
2351 for (i = 0; i < n_iv_uses (data); i++)
2352 bitmap_ior_into (iv_use (data, i)->related_cands,
2353 data->important_candidates);
2357 /* Finds the candidates for the induction variables. */
2359 static void
2360 find_iv_candidates (struct ivopts_data *data)
2362 /* Add commonly used ivs. */
2363 add_standard_iv_candidates (data);
2365 /* Add old induction variables. */
2366 add_old_ivs_candidates (data);
2368 /* Add induction variables derived from uses. */
2369 add_derived_ivs_candidates (data);
2371 /* Record the important candidates. */
2372 record_important_candidates (data);
2375 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2376 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2377 we allocate a simple list to every use. */
2379 static void
2380 alloc_use_cost_map (struct ivopts_data *data)
2382 unsigned i, size, s, j;
2384 for (i = 0; i < n_iv_uses (data); i++)
2386 struct iv_use *use = iv_use (data, i);
2387 bitmap_iterator bi;
2389 if (data->consider_all_candidates)
2390 size = n_iv_cands (data);
2391 else
2393 s = 0;
2394 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2396 s++;
2399 /* Round up to the power of two, so that moduling by it is fast. */
2400 for (size = 1; size < s; size <<= 1)
2401 continue;
2404 use->n_map_members = size;
2405 use->cost_map = XCNEWVEC (struct cost_pair, size);
2409 /* Returns description of computation cost of expression whose runtime
2410 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2412 static comp_cost
2413 new_cost (unsigned runtime, unsigned complexity)
2415 comp_cost cost;
2417 cost.cost = runtime;
2418 cost.complexity = complexity;
2420 return cost;
2423 /* Adds costs COST1 and COST2. */
2425 static comp_cost
2426 add_costs (comp_cost cost1, comp_cost cost2)
2428 cost1.cost += cost2.cost;
2429 cost1.complexity += cost2.complexity;
2431 return cost1;
2433 /* Subtracts costs COST1 and COST2. */
2435 static comp_cost
2436 sub_costs (comp_cost cost1, comp_cost cost2)
2438 cost1.cost -= cost2.cost;
2439 cost1.complexity -= cost2.complexity;
2441 return cost1;
2444 /* Returns a negative number if COST1 < COST2, a positive number if
2445 COST1 > COST2, and 0 if COST1 = COST2. */
2447 static int
2448 compare_costs (comp_cost cost1, comp_cost cost2)
2450 if (cost1.cost == cost2.cost)
2451 return cost1.complexity - cost2.complexity;
2453 return cost1.cost - cost2.cost;
2456 /* Returns true if COST is infinite. */
2458 static bool
2459 infinite_cost_p (comp_cost cost)
2461 return cost.cost == INFTY;
2464 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2465 on invariants DEPENDS_ON and that the value used in expressing it
2466 is VALUE.*/
2468 static void
2469 set_use_iv_cost (struct ivopts_data *data,
2470 struct iv_use *use, struct iv_cand *cand,
2471 comp_cost cost, bitmap depends_on, tree value)
2473 unsigned i, s;
2475 if (infinite_cost_p (cost))
2477 BITMAP_FREE (depends_on);
2478 return;
2481 if (data->consider_all_candidates)
2483 use->cost_map[cand->id].cand = cand;
2484 use->cost_map[cand->id].cost = cost;
2485 use->cost_map[cand->id].depends_on = depends_on;
2486 use->cost_map[cand->id].value = value;
2487 return;
2490 /* n_map_members is a power of two, so this computes modulo. */
2491 s = cand->id & (use->n_map_members - 1);
2492 for (i = s; i < use->n_map_members; i++)
2493 if (!use->cost_map[i].cand)
2494 goto found;
2495 for (i = 0; i < s; i++)
2496 if (!use->cost_map[i].cand)
2497 goto found;
2499 gcc_unreachable ();
2501 found:
2502 use->cost_map[i].cand = cand;
2503 use->cost_map[i].cost = cost;
2504 use->cost_map[i].depends_on = depends_on;
2505 use->cost_map[i].value = value;
2508 /* Gets cost of (USE, CANDIDATE) pair. */
2510 static struct cost_pair *
2511 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2512 struct iv_cand *cand)
2514 unsigned i, s;
2515 struct cost_pair *ret;
2517 if (!cand)
2518 return NULL;
2520 if (data->consider_all_candidates)
2522 ret = use->cost_map + cand->id;
2523 if (!ret->cand)
2524 return NULL;
2526 return ret;
2529 /* n_map_members is a power of two, so this computes modulo. */
2530 s = cand->id & (use->n_map_members - 1);
2531 for (i = s; i < use->n_map_members; i++)
2532 if (use->cost_map[i].cand == cand)
2533 return use->cost_map + i;
2535 for (i = 0; i < s; i++)
2536 if (use->cost_map[i].cand == cand)
2537 return use->cost_map + i;
2539 return NULL;
2542 /* Returns estimate on cost of computing SEQ. */
2544 static unsigned
2545 seq_cost (rtx seq, bool speed)
2547 unsigned cost = 0;
2548 rtx set;
2550 for (; seq; seq = NEXT_INSN (seq))
2552 set = single_set (seq);
2553 if (set)
2554 cost += rtx_cost (set, SET,speed);
2555 else
2556 cost++;
2559 return cost;
2562 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2563 static rtx
2564 produce_memory_decl_rtl (tree obj, int *regno)
2566 rtx x;
2568 gcc_assert (obj);
2569 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2571 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2572 x = gen_rtx_SYMBOL_REF (Pmode, name);
2573 SET_SYMBOL_REF_DECL (x, obj);
2574 x = gen_rtx_MEM (DECL_MODE (obj), x);
2575 targetm.encode_section_info (obj, x, true);
2577 else
2579 x = gen_raw_REG (Pmode, (*regno)++);
2580 x = gen_rtx_MEM (DECL_MODE (obj), x);
2583 return x;
2586 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2587 walk_tree. DATA contains the actual fake register number. */
2589 static tree
2590 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2592 tree obj = NULL_TREE;
2593 rtx x = NULL_RTX;
2594 int *regno = (int *) data;
2596 switch (TREE_CODE (*expr_p))
2598 case ADDR_EXPR:
2599 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2600 handled_component_p (*expr_p);
2601 expr_p = &TREE_OPERAND (*expr_p, 0))
2602 continue;
2603 obj = *expr_p;
2604 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2605 x = produce_memory_decl_rtl (obj, regno);
2606 break;
2608 case SSA_NAME:
2609 *ws = 0;
2610 obj = SSA_NAME_VAR (*expr_p);
2611 if (!DECL_RTL_SET_P (obj))
2612 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2613 break;
2615 case VAR_DECL:
2616 case PARM_DECL:
2617 case RESULT_DECL:
2618 *ws = 0;
2619 obj = *expr_p;
2621 if (DECL_RTL_SET_P (obj))
2622 break;
2624 if (DECL_MODE (obj) == BLKmode)
2625 x = produce_memory_decl_rtl (obj, regno);
2626 else
2627 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2629 break;
2631 default:
2632 break;
2635 if (x)
2637 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2638 SET_DECL_RTL (obj, x);
2641 return NULL_TREE;
2644 /* Determines cost of the computation of EXPR. */
2646 static unsigned
2647 computation_cost (tree expr, bool speed)
2649 rtx seq, rslt;
2650 tree type = TREE_TYPE (expr);
2651 unsigned cost;
2652 /* Avoid using hard regs in ways which may be unsupported. */
2653 int regno = LAST_VIRTUAL_REGISTER + 1;
2654 enum function_frequency real_frequency = cfun->function_frequency;
2656 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2657 crtl->maybe_hot_insn_p = speed;
2658 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2659 start_sequence ();
2660 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2661 seq = get_insns ();
2662 end_sequence ();
2663 default_rtl_profile ();
2664 cfun->function_frequency = real_frequency;
2666 cost = seq_cost (seq, speed);
2667 if (MEM_P (rslt))
2668 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2670 return cost;
2673 /* Returns variable containing the value of candidate CAND at statement AT. */
2675 static tree
2676 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2678 if (stmt_after_increment (loop, cand, stmt))
2679 return cand->var_after;
2680 else
2681 return cand->var_before;
2684 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2685 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2688 tree_int_cst_sign_bit (const_tree t)
2690 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2691 unsigned HOST_WIDE_INT w;
2693 if (bitno < HOST_BITS_PER_WIDE_INT)
2694 w = TREE_INT_CST_LOW (t);
2695 else
2697 w = TREE_INT_CST_HIGH (t);
2698 bitno -= HOST_BITS_PER_WIDE_INT;
2701 return (w >> bitno) & 1;
2704 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2705 same precision that is at least as wide as the precision of TYPE, stores
2706 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2707 type of A and B. */
2709 static tree
2710 determine_common_wider_type (tree *a, tree *b)
2712 tree wider_type = NULL;
2713 tree suba, subb;
2714 tree atype = TREE_TYPE (*a);
2716 if (CONVERT_EXPR_P (*a))
2718 suba = TREE_OPERAND (*a, 0);
2719 wider_type = TREE_TYPE (suba);
2720 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2721 return atype;
2723 else
2724 return atype;
2726 if (CONVERT_EXPR_P (*b))
2728 subb = TREE_OPERAND (*b, 0);
2729 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2730 return atype;
2732 else
2733 return atype;
2735 *a = suba;
2736 *b = subb;
2737 return wider_type;
2740 /* Determines the expression by that USE is expressed from induction variable
2741 CAND at statement AT in LOOP. The expression is stored in a decomposed
2742 form into AFF. Returns false if USE cannot be expressed using CAND. */
2744 static bool
2745 get_computation_aff (struct loop *loop,
2746 struct iv_use *use, struct iv_cand *cand, gimple at,
2747 struct affine_tree_combination *aff)
2749 tree ubase = use->iv->base;
2750 tree ustep = use->iv->step;
2751 tree cbase = cand->iv->base;
2752 tree cstep = cand->iv->step, cstep_common;
2753 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2754 tree common_type, var;
2755 tree uutype;
2756 aff_tree cbase_aff, var_aff;
2757 double_int rat;
2759 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2761 /* We do not have a precision to express the values of use. */
2762 return false;
2765 var = var_at_stmt (loop, cand, at);
2766 uutype = unsigned_type_for (utype);
2768 /* If the conversion is not noop, perform it. */
2769 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2771 cstep = fold_convert (uutype, cstep);
2772 cbase = fold_convert (uutype, cbase);
2773 var = fold_convert (uutype, var);
2776 if (!constant_multiple_of (ustep, cstep, &rat))
2777 return false;
2779 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2780 type, we achieve better folding by computing their difference in this
2781 wider type, and cast the result to UUTYPE. We do not need to worry about
2782 overflows, as all the arithmetics will in the end be performed in UUTYPE
2783 anyway. */
2784 common_type = determine_common_wider_type (&ubase, &cbase);
2786 /* use = ubase - ratio * cbase + ratio * var. */
2787 tree_to_aff_combination (ubase, common_type, aff);
2788 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2789 tree_to_aff_combination (var, uutype, &var_aff);
2791 /* We need to shift the value if we are after the increment. */
2792 if (stmt_after_increment (loop, cand, at))
2794 aff_tree cstep_aff;
2796 if (common_type != uutype)
2797 cstep_common = fold_convert (common_type, cstep);
2798 else
2799 cstep_common = cstep;
2801 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2802 aff_combination_add (&cbase_aff, &cstep_aff);
2805 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2806 aff_combination_add (aff, &cbase_aff);
2807 if (common_type != uutype)
2808 aff_combination_convert (aff, uutype);
2810 aff_combination_scale (&var_aff, rat);
2811 aff_combination_add (aff, &var_aff);
2813 return true;
2816 /* Determines the expression by that USE is expressed from induction variable
2817 CAND at statement AT in LOOP. The computation is unshared. */
2819 static tree
2820 get_computation_at (struct loop *loop,
2821 struct iv_use *use, struct iv_cand *cand, gimple at)
2823 aff_tree aff;
2824 tree type = TREE_TYPE (use->iv->base);
2826 if (!get_computation_aff (loop, use, cand, at, &aff))
2827 return NULL_TREE;
2828 unshare_aff_combination (&aff);
2829 return fold_convert (type, aff_combination_to_tree (&aff));
2832 /* Determines the expression by that USE is expressed from induction variable
2833 CAND in LOOP. The computation is unshared. */
2835 static tree
2836 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2838 return get_computation_at (loop, use, cand, use->stmt);
2841 /* Returns cost of addition in MODE. */
2843 static unsigned
2844 add_cost (enum machine_mode mode, bool speed)
2846 static unsigned costs[NUM_MACHINE_MODES];
2847 rtx seq;
2848 unsigned cost;
2850 if (costs[mode])
2851 return costs[mode];
2853 start_sequence ();
2854 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2855 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2856 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2857 NULL_RTX);
2858 seq = get_insns ();
2859 end_sequence ();
2861 cost = seq_cost (seq, speed);
2862 if (!cost)
2863 cost = 1;
2865 costs[mode] = cost;
2867 if (dump_file && (dump_flags & TDF_DETAILS))
2868 fprintf (dump_file, "Addition in %s costs %d\n",
2869 GET_MODE_NAME (mode), cost);
2870 return cost;
2873 /* Entry in a hashtable of already known costs for multiplication. */
2874 struct mbc_entry
2876 HOST_WIDE_INT cst; /* The constant to multiply by. */
2877 enum machine_mode mode; /* In mode. */
2878 unsigned cost; /* The cost. */
2881 /* Counts hash value for the ENTRY. */
2883 static hashval_t
2884 mbc_entry_hash (const void *entry)
2886 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2888 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2891 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2893 static int
2894 mbc_entry_eq (const void *entry1, const void *entry2)
2896 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2897 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2899 return (e1->mode == e2->mode
2900 && e1->cst == e2->cst);
2903 /* Returns cost of multiplication by constant CST in MODE. */
2905 unsigned
2906 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2908 static htab_t costs;
2909 struct mbc_entry **cached, act;
2910 rtx seq;
2911 unsigned cost;
2913 if (!costs)
2914 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2916 act.mode = mode;
2917 act.cst = cst;
2918 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2919 if (*cached)
2920 return (*cached)->cost;
2922 *cached = XNEW (struct mbc_entry);
2923 (*cached)->mode = mode;
2924 (*cached)->cst = cst;
2926 start_sequence ();
2927 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2928 gen_int_mode (cst, mode), NULL_RTX, 0);
2929 seq = get_insns ();
2930 end_sequence ();
2932 cost = seq_cost (seq, speed);
2934 if (dump_file && (dump_flags & TDF_DETAILS))
2935 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2936 (int) cst, GET_MODE_NAME (mode), cost);
2938 (*cached)->cost = cost;
2940 return cost;
2943 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2944 validity for a memory reference accessing memory of mode MODE. */
2946 bool
2947 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2949 #define MAX_RATIO 128
2950 static sbitmap valid_mult[MAX_MACHINE_MODE];
2952 if (!valid_mult[mode])
2954 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2955 rtx addr;
2956 HOST_WIDE_INT i;
2958 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2959 sbitmap_zero (valid_mult[mode]);
2960 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2961 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2963 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2964 if (memory_address_p (mode, addr))
2965 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2968 if (dump_file && (dump_flags & TDF_DETAILS))
2970 fprintf (dump_file, " allowed multipliers:");
2971 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2972 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2973 fprintf (dump_file, " %d", (int) i);
2974 fprintf (dump_file, "\n");
2975 fprintf (dump_file, "\n");
2979 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2980 return false;
2982 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2985 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2986 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2987 variable is omitted. Compute the cost for a memory reference that accesses
2988 a memory location of mode MEM_MODE.
2990 TODO -- there must be some better way. This all is quite crude. */
2992 static comp_cost
2993 get_address_cost (bool symbol_present, bool var_present,
2994 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2995 enum machine_mode mem_mode,
2996 bool speed)
2998 static bool initialized[MAX_MACHINE_MODE];
2999 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3000 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3001 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3002 unsigned cost, acost, complexity;
3003 bool offset_p, ratio_p;
3004 HOST_WIDE_INT s_offset;
3005 unsigned HOST_WIDE_INT mask;
3006 unsigned bits;
3008 if (!initialized[mem_mode])
3010 HOST_WIDE_INT i;
3011 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3012 int old_cse_not_expected;
3013 unsigned sym_p, var_p, off_p, rat_p, add_c;
3014 rtx seq, addr, base;
3015 rtx reg0, reg1;
3017 initialized[mem_mode] = true;
3019 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3021 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3022 for (i = start; i <= 1 << 20; i <<= 1)
3024 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3025 if (!memory_address_p (mem_mode, addr))
3026 break;
3028 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3029 off[mem_mode] = max_offset[mem_mode];
3031 for (i = start; i <= 1 << 20; i <<= 1)
3033 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3034 if (!memory_address_p (mem_mode, addr))
3035 break;
3037 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3039 if (dump_file && (dump_flags & TDF_DETAILS))
3041 fprintf (dump_file, "get_address_cost:\n");
3042 fprintf (dump_file, " min offset %s %d\n",
3043 GET_MODE_NAME (mem_mode),
3044 (int) min_offset[mem_mode]);
3045 fprintf (dump_file, " max offset %s %d\n",
3046 GET_MODE_NAME (mem_mode),
3047 (int) max_offset[mem_mode]);
3050 rat[mem_mode] = 1;
3051 for (i = 2; i <= MAX_RATIO; i++)
3052 if (multiplier_allowed_in_address_p (i, mem_mode))
3054 rat[mem_mode] = i;
3055 break;
3058 /* Compute the cost of various addressing modes. */
3059 acost = 0;
3060 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3061 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3063 for (i = 0; i < 16; i++)
3065 sym_p = i & 1;
3066 var_p = (i >> 1) & 1;
3067 off_p = (i >> 2) & 1;
3068 rat_p = (i >> 3) & 1;
3070 addr = reg0;
3071 if (rat_p)
3072 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3073 gen_int_mode (rat[mem_mode], Pmode));
3075 if (var_p)
3076 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3078 if (sym_p)
3080 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3081 /* ??? We can run into trouble with some backends by presenting
3082 it with symbols which haven't been properly passed through
3083 targetm.encode_section_info. By setting the local bit, we
3084 enhance the probability of things working. */
3085 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3087 if (off_p)
3088 base = gen_rtx_fmt_e (CONST, Pmode,
3089 gen_rtx_fmt_ee (PLUS, Pmode,
3090 base,
3091 gen_int_mode (off[mem_mode],
3092 Pmode)));
3094 else if (off_p)
3095 base = gen_int_mode (off[mem_mode], Pmode);
3096 else
3097 base = NULL_RTX;
3099 if (base)
3100 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3102 start_sequence ();
3103 /* To avoid splitting addressing modes, pretend that no cse will
3104 follow. */
3105 old_cse_not_expected = cse_not_expected;
3106 cse_not_expected = true;
3107 addr = memory_address (mem_mode, addr);
3108 cse_not_expected = old_cse_not_expected;
3109 seq = get_insns ();
3110 end_sequence ();
3112 acost = seq_cost (seq, speed);
3113 acost += address_cost (addr, mem_mode, speed);
3115 if (!acost)
3116 acost = 1;
3117 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3120 /* On some targets, it is quite expensive to load symbol to a register,
3121 which makes addresses that contain symbols look much more expensive.
3122 However, the symbol will have to be loaded in any case before the
3123 loop (and quite likely we have it in register already), so it does not
3124 make much sense to penalize them too heavily. So make some final
3125 tweaks for the SYMBOL_PRESENT modes:
3127 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3128 var is cheaper, use this mode with small penalty.
3129 If VAR_PRESENT is true, try whether the mode with
3130 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3131 if this is the case, use it. */
3132 add_c = add_cost (Pmode, speed);
3133 for (i = 0; i < 8; i++)
3135 var_p = i & 1;
3136 off_p = (i >> 1) & 1;
3137 rat_p = (i >> 2) & 1;
3139 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3140 if (var_p)
3141 acost += add_c;
3143 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3144 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3147 if (dump_file && (dump_flags & TDF_DETAILS))
3149 fprintf (dump_file, "Address costs:\n");
3151 for (i = 0; i < 16; i++)
3153 sym_p = i & 1;
3154 var_p = (i >> 1) & 1;
3155 off_p = (i >> 2) & 1;
3156 rat_p = (i >> 3) & 1;
3158 fprintf (dump_file, " ");
3159 if (sym_p)
3160 fprintf (dump_file, "sym + ");
3161 if (var_p)
3162 fprintf (dump_file, "var + ");
3163 if (off_p)
3164 fprintf (dump_file, "cst + ");
3165 if (rat_p)
3166 fprintf (dump_file, "rat * ");
3168 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3169 fprintf (dump_file, "index costs %d\n", acost);
3171 fprintf (dump_file, "\n");
3175 bits = GET_MODE_BITSIZE (Pmode);
3176 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3177 offset &= mask;
3178 if ((offset >> (bits - 1) & 1))
3179 offset |= ~mask;
3180 s_offset = offset;
3182 cost = 0;
3183 offset_p = (s_offset != 0
3184 && min_offset[mem_mode] <= s_offset
3185 && s_offset <= max_offset[mem_mode]);
3186 ratio_p = (ratio != 1
3187 && multiplier_allowed_in_address_p (ratio, mem_mode));
3189 if (ratio != 1 && !ratio_p)
3190 cost += multiply_by_cost (ratio, Pmode, speed);
3192 if (s_offset && !offset_p && !symbol_present)
3193 cost += add_cost (Pmode, speed);
3195 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3196 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3197 return new_cost (cost + acost, complexity);
3200 /* Estimates cost of forcing expression EXPR into a variable. */
3202 static comp_cost
3203 force_expr_to_var_cost (tree expr, bool speed)
3205 static bool costs_initialized = false;
3206 static unsigned integer_cost [2];
3207 static unsigned symbol_cost [2];
3208 static unsigned address_cost [2];
3209 tree op0, op1;
3210 comp_cost cost0, cost1, cost;
3211 enum machine_mode mode;
3213 if (!costs_initialized)
3215 tree type = build_pointer_type (integer_type_node);
3216 tree var, addr;
3217 rtx x;
3218 int i;
3220 var = create_tmp_var_raw (integer_type_node, "test_var");
3221 TREE_STATIC (var) = 1;
3222 x = produce_memory_decl_rtl (var, NULL);
3223 SET_DECL_RTL (var, x);
3225 addr = build1 (ADDR_EXPR, type, var);
3228 for (i = 0; i < 2; i++)
3230 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3231 2000), i);
3233 symbol_cost[i] = computation_cost (addr, i) + 1;
3235 address_cost[i]
3236 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3237 addr,
3238 build_int_cst (sizetype, 2000)), i) + 1;
3239 if (dump_file && (dump_flags & TDF_DETAILS))
3241 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3242 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3243 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3244 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3245 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3246 fprintf (dump_file, "\n");
3250 costs_initialized = true;
3253 STRIP_NOPS (expr);
3255 if (SSA_VAR_P (expr))
3256 return zero_cost;
3258 if (is_gimple_min_invariant (expr))
3260 if (TREE_CODE (expr) == INTEGER_CST)
3261 return new_cost (integer_cost [speed], 0);
3263 if (TREE_CODE (expr) == ADDR_EXPR)
3265 tree obj = TREE_OPERAND (expr, 0);
3267 if (TREE_CODE (obj) == VAR_DECL
3268 || TREE_CODE (obj) == PARM_DECL
3269 || TREE_CODE (obj) == RESULT_DECL)
3270 return new_cost (symbol_cost [speed], 0);
3273 return new_cost (address_cost [speed], 0);
3276 switch (TREE_CODE (expr))
3278 case POINTER_PLUS_EXPR:
3279 case PLUS_EXPR:
3280 case MINUS_EXPR:
3281 case MULT_EXPR:
3282 op0 = TREE_OPERAND (expr, 0);
3283 op1 = TREE_OPERAND (expr, 1);
3284 STRIP_NOPS (op0);
3285 STRIP_NOPS (op1);
3287 if (is_gimple_val (op0))
3288 cost0 = zero_cost;
3289 else
3290 cost0 = force_expr_to_var_cost (op0, speed);
3292 if (is_gimple_val (op1))
3293 cost1 = zero_cost;
3294 else
3295 cost1 = force_expr_to_var_cost (op1, speed);
3297 break;
3299 default:
3300 /* Just an arbitrary value, FIXME. */
3301 return new_cost (target_spill_cost[speed], 0);
3304 mode = TYPE_MODE (TREE_TYPE (expr));
3305 switch (TREE_CODE (expr))
3307 case POINTER_PLUS_EXPR:
3308 case PLUS_EXPR:
3309 case MINUS_EXPR:
3310 cost = new_cost (add_cost (mode, speed), 0);
3311 break;
3313 case MULT_EXPR:
3314 if (cst_and_fits_in_hwi (op0))
3315 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3316 else if (cst_and_fits_in_hwi (op1))
3317 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3318 else
3319 return new_cost (target_spill_cost [speed], 0);
3320 break;
3322 default:
3323 gcc_unreachable ();
3326 cost = add_costs (cost, cost0);
3327 cost = add_costs (cost, cost1);
3329 /* Bound the cost by target_spill_cost. The parts of complicated
3330 computations often are either loop invariant or at least can
3331 be shared between several iv uses, so letting this grow without
3332 limits would not give reasonable results. */
3333 if (cost.cost > target_spill_cost [speed])
3334 cost.cost = target_spill_cost [speed];
3336 return cost;
3339 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3340 invariants the computation depends on. */
3342 static comp_cost
3343 force_var_cost (struct ivopts_data *data,
3344 tree expr, bitmap *depends_on)
3346 if (depends_on)
3348 fd_ivopts_data = data;
3349 walk_tree (&expr, find_depends, depends_on, NULL);
3352 return force_expr_to_var_cost (expr, data->speed);
3355 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3356 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3357 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3358 invariants the computation depends on. */
3360 static comp_cost
3361 split_address_cost (struct ivopts_data *data,
3362 tree addr, bool *symbol_present, bool *var_present,
3363 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3365 tree core;
3366 HOST_WIDE_INT bitsize;
3367 HOST_WIDE_INT bitpos;
3368 tree toffset;
3369 enum machine_mode mode;
3370 int unsignedp, volatilep;
3372 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3373 &unsignedp, &volatilep, false);
3375 if (toffset != 0
3376 || bitpos % BITS_PER_UNIT != 0
3377 || TREE_CODE (core) != VAR_DECL)
3379 *symbol_present = false;
3380 *var_present = true;
3381 fd_ivopts_data = data;
3382 walk_tree (&addr, find_depends, depends_on, NULL);
3383 return new_cost (target_spill_cost[data->speed], 0);
3386 *offset += bitpos / BITS_PER_UNIT;
3387 if (TREE_STATIC (core)
3388 || DECL_EXTERNAL (core))
3390 *symbol_present = true;
3391 *var_present = false;
3392 return zero_cost;
3395 *symbol_present = false;
3396 *var_present = true;
3397 return zero_cost;
3400 /* Estimates cost of expressing difference of addresses E1 - E2 as
3401 var + symbol + offset. The value of offset is added to OFFSET,
3402 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3403 part is missing. DEPENDS_ON is a set of the invariants the computation
3404 depends on. */
3406 static comp_cost
3407 ptr_difference_cost (struct ivopts_data *data,
3408 tree e1, tree e2, bool *symbol_present, bool *var_present,
3409 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3411 HOST_WIDE_INT diff = 0;
3412 comp_cost cost;
3413 bool speed = optimize_loop_for_speed_p (data->current_loop);
3415 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3417 if (ptr_difference_const (e1, e2, &diff))
3419 *offset += diff;
3420 *symbol_present = false;
3421 *var_present = false;
3422 return zero_cost;
3425 if (integer_zerop (e2))
3426 return split_address_cost (data, TREE_OPERAND (e1, 0),
3427 symbol_present, var_present, offset, depends_on);
3429 *symbol_present = false;
3430 *var_present = true;
3432 cost = force_var_cost (data, e1, depends_on);
3433 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3434 cost.cost += add_cost (Pmode, speed);
3436 return cost;
3439 /* Estimates cost of expressing difference E1 - E2 as
3440 var + symbol + offset. The value of offset is added to OFFSET,
3441 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3442 part is missing. DEPENDS_ON is a set of the invariants the computation
3443 depends on. */
3445 static comp_cost
3446 difference_cost (struct ivopts_data *data,
3447 tree e1, tree e2, bool *symbol_present, bool *var_present,
3448 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3450 comp_cost cost;
3451 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3452 unsigned HOST_WIDE_INT off1, off2;
3454 e1 = strip_offset (e1, &off1);
3455 e2 = strip_offset (e2, &off2);
3456 *offset += off1 - off2;
3458 STRIP_NOPS (e1);
3459 STRIP_NOPS (e2);
3461 if (TREE_CODE (e1) == ADDR_EXPR)
3462 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3463 depends_on);
3464 *symbol_present = false;
3466 if (operand_equal_p (e1, e2, 0))
3468 *var_present = false;
3469 return zero_cost;
3471 *var_present = true;
3472 if (integer_zerop (e2))
3473 return force_var_cost (data, e1, depends_on);
3475 if (integer_zerop (e1))
3477 cost = force_var_cost (data, e2, depends_on);
3478 cost.cost += multiply_by_cost (-1, mode, data->speed);
3480 return cost;
3483 cost = force_var_cost (data, e1, depends_on);
3484 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3485 cost.cost += add_cost (mode, data->speed);
3487 return cost;
3490 /* Determines the cost of the computation by that USE is expressed
3491 from induction variable CAND. If ADDRESS_P is true, we just need
3492 to create an address from it, otherwise we want to get it into
3493 register. A set of invariants we depend on is stored in
3494 DEPENDS_ON. AT is the statement at that the value is computed. */
3496 static comp_cost
3497 get_computation_cost_at (struct ivopts_data *data,
3498 struct iv_use *use, struct iv_cand *cand,
3499 bool address_p, bitmap *depends_on, gimple at)
3501 tree ubase = use->iv->base, ustep = use->iv->step;
3502 tree cbase, cstep;
3503 tree utype = TREE_TYPE (ubase), ctype;
3504 unsigned HOST_WIDE_INT cstepi, offset = 0;
3505 HOST_WIDE_INT ratio, aratio;
3506 bool var_present, symbol_present;
3507 comp_cost cost;
3508 unsigned n_sums;
3509 double_int rat;
3510 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3512 *depends_on = NULL;
3514 /* Only consider real candidates. */
3515 if (!cand->iv)
3516 return infinite_cost;
3518 cbase = cand->iv->base;
3519 cstep = cand->iv->step;
3520 ctype = TREE_TYPE (cbase);
3522 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3524 /* We do not have a precision to express the values of use. */
3525 return infinite_cost;
3528 if (address_p)
3530 /* Do not try to express address of an object with computation based
3531 on address of a different object. This may cause problems in rtl
3532 level alias analysis (that does not expect this to be happening,
3533 as this is illegal in C), and would be unlikely to be useful
3534 anyway. */
3535 if (use->iv->base_object
3536 && cand->iv->base_object
3537 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3538 return infinite_cost;
3541 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3543 /* TODO -- add direct handling of this case. */
3544 goto fallback;
3547 /* CSTEPI is removed from the offset in case statement is after the
3548 increment. If the step is not constant, we use zero instead.
3549 This is a bit imprecise (there is the extra addition), but
3550 redundancy elimination is likely to transform the code so that
3551 it uses value of the variable before increment anyway,
3552 so it is not that much unrealistic. */
3553 if (cst_and_fits_in_hwi (cstep))
3554 cstepi = int_cst_value (cstep);
3555 else
3556 cstepi = 0;
3558 if (!constant_multiple_of (ustep, cstep, &rat))
3559 return infinite_cost;
3561 if (double_int_fits_in_shwi_p (rat))
3562 ratio = double_int_to_shwi (rat);
3563 else
3564 return infinite_cost;
3566 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3567 or ratio == 1, it is better to handle this like
3569 ubase - ratio * cbase + ratio * var
3571 (also holds in the case ratio == -1, TODO. */
3573 if (cst_and_fits_in_hwi (cbase))
3575 offset = - ratio * int_cst_value (cbase);
3576 cost = difference_cost (data,
3577 ubase, build_int_cst (utype, 0),
3578 &symbol_present, &var_present, &offset,
3579 depends_on);
3581 else if (ratio == 1)
3583 cost = difference_cost (data,
3584 ubase, cbase,
3585 &symbol_present, &var_present, &offset,
3586 depends_on);
3588 else
3590 cost = force_var_cost (data, cbase, depends_on);
3591 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3592 cost = add_costs (cost,
3593 difference_cost (data,
3594 ubase, build_int_cst (utype, 0),
3595 &symbol_present, &var_present,
3596 &offset, depends_on));
3599 /* If we are after the increment, the value of the candidate is higher by
3600 one iteration. */
3601 if (stmt_after_increment (data->current_loop, cand, at))
3602 offset -= ratio * cstepi;
3604 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3605 (symbol/var/const parts may be omitted). If we are looking for an address,
3606 find the cost of addressing this. */
3607 if (address_p)
3608 return add_costs (cost, get_address_cost (symbol_present, var_present,
3609 offset, ratio,
3610 TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
3612 /* Otherwise estimate the costs for computing the expression. */
3613 aratio = ratio > 0 ? ratio : -ratio;
3614 if (!symbol_present && !var_present && !offset)
3616 if (ratio != 1)
3617 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3619 return cost;
3622 if (aratio != 1)
3623 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3625 n_sums = 1;
3626 if (var_present
3627 /* Symbol + offset should be compile-time computable. */
3628 && (symbol_present || offset))
3629 n_sums++;
3631 /* Having offset does not affect runtime cost in case it is added to
3632 symbol, but it increases complexity. */
3633 if (offset)
3634 cost.complexity++;
3636 cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed);
3637 return cost;
3639 fallback:
3641 /* Just get the expression, expand it and measure the cost. */
3642 tree comp = get_computation_at (data->current_loop, use, cand, at);
3644 if (!comp)
3645 return infinite_cost;
3647 if (address_p)
3648 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3650 return new_cost (computation_cost (comp, speed), 0);
3654 /* Determines the cost of the computation by that USE is expressed
3655 from induction variable CAND. If ADDRESS_P is true, we just need
3656 to create an address from it, otherwise we want to get it into
3657 register. A set of invariants we depend on is stored in
3658 DEPENDS_ON. */
3660 static comp_cost
3661 get_computation_cost (struct ivopts_data *data,
3662 struct iv_use *use, struct iv_cand *cand,
3663 bool address_p, bitmap *depends_on)
3665 return get_computation_cost_at (data,
3666 use, cand, address_p, depends_on, use->stmt);
3669 /* Determines cost of basing replacement of USE on CAND in a generic
3670 expression. */
3672 static bool
3673 determine_use_iv_cost_generic (struct ivopts_data *data,
3674 struct iv_use *use, struct iv_cand *cand)
3676 bitmap depends_on;
3677 comp_cost cost;
3679 /* The simple case first -- if we need to express value of the preserved
3680 original biv, the cost is 0. This also prevents us from counting the
3681 cost of increment twice -- once at this use and once in the cost of
3682 the candidate. */
3683 if (cand->pos == IP_ORIGINAL
3684 && cand->incremented_at == use->stmt)
3686 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3687 return true;
3690 cost = get_computation_cost (data, use, cand, false, &depends_on);
3691 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3693 return !infinite_cost_p (cost);
3696 /* Determines cost of basing replacement of USE on CAND in an address. */
3698 static bool
3699 determine_use_iv_cost_address (struct ivopts_data *data,
3700 struct iv_use *use, struct iv_cand *cand)
3702 bitmap depends_on;
3703 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3705 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3707 return !infinite_cost_p (cost);
3710 /* Computes value of candidate CAND at position AT in iteration NITER, and
3711 stores it to VAL. */
3713 static void
3714 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3715 aff_tree *val)
3717 aff_tree step, delta, nit;
3718 struct iv *iv = cand->iv;
3719 tree type = TREE_TYPE (iv->base);
3720 tree steptype = type;
3721 if (POINTER_TYPE_P (type))
3722 steptype = sizetype;
3724 tree_to_aff_combination (iv->step, steptype, &step);
3725 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3726 aff_combination_convert (&nit, steptype);
3727 aff_combination_mult (&nit, &step, &delta);
3728 if (stmt_after_increment (loop, cand, at))
3729 aff_combination_add (&delta, &step);
3731 tree_to_aff_combination (iv->base, type, val);
3732 aff_combination_add (val, &delta);
3735 /* Returns period of induction variable iv. */
3737 static tree
3738 iv_period (struct iv *iv)
3740 tree step = iv->step, period, type;
3741 tree pow2div;
3743 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3745 /* Period of the iv is gcd (step, type range). Since type range is power
3746 of two, it suffices to determine the maximum power of two that divides
3747 step. */
3748 pow2div = num_ending_zeros (step);
3749 type = unsigned_type_for (TREE_TYPE (step));
3751 period = build_low_bits_mask (type,
3752 (TYPE_PRECISION (type)
3753 - tree_low_cst (pow2div, 1)));
3755 return period;
3758 /* Returns the comparison operator used when eliminating the iv USE. */
3760 static enum tree_code
3761 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3763 struct loop *loop = data->current_loop;
3764 basic_block ex_bb;
3765 edge exit;
3767 ex_bb = gimple_bb (use->stmt);
3768 exit = EDGE_SUCC (ex_bb, 0);
3769 if (flow_bb_inside_loop_p (loop, exit->dest))
3770 exit = EDGE_SUCC (ex_bb, 1);
3772 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3775 /* Check whether it is possible to express the condition in USE by comparison
3776 of candidate CAND. If so, store the value compared with to BOUND. */
3778 static bool
3779 may_eliminate_iv (struct ivopts_data *data,
3780 struct iv_use *use, struct iv_cand *cand, tree *bound)
3782 basic_block ex_bb;
3783 edge exit;
3784 tree nit, period;
3785 struct loop *loop = data->current_loop;
3786 aff_tree bnd;
3788 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3789 return false;
3791 /* For now works only for exits that dominate the loop latch.
3792 TODO: extend to other conditions inside loop body. */
3793 ex_bb = gimple_bb (use->stmt);
3794 if (use->stmt != last_stmt (ex_bb)
3795 || gimple_code (use->stmt) != GIMPLE_COND
3796 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3797 return false;
3799 exit = EDGE_SUCC (ex_bb, 0);
3800 if (flow_bb_inside_loop_p (loop, exit->dest))
3801 exit = EDGE_SUCC (ex_bb, 1);
3802 if (flow_bb_inside_loop_p (loop, exit->dest))
3803 return false;
3805 nit = niter_for_exit (data, exit);
3806 if (!nit)
3807 return false;
3809 /* Determine whether we can use the variable to test the exit condition.
3810 This is the case iff the period of the induction variable is greater
3811 than the number of iterations for which the exit condition is true. */
3812 period = iv_period (cand->iv);
3814 /* If the number of iterations is constant, compare against it directly. */
3815 if (TREE_CODE (nit) == INTEGER_CST)
3817 if (!tree_int_cst_lt (nit, period))
3818 return false;
3821 /* If not, and if this is the only possible exit of the loop, see whether
3822 we can get a conservative estimate on the number of iterations of the
3823 entire loop and compare against that instead. */
3824 else if (loop_only_exit_p (loop, exit))
3826 double_int period_value, max_niter;
3827 if (!estimated_loop_iterations (loop, true, &max_niter))
3828 return false;
3829 period_value = tree_to_double_int (period);
3830 if (double_int_ucmp (max_niter, period_value) >= 0)
3831 return false;
3834 /* Otherwise, punt. */
3835 else
3836 return false;
3838 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3839 *bound = aff_combination_to_tree (&bnd);
3840 return true;
3843 /* Determines cost of basing replacement of USE on CAND in a condition. */
3845 static bool
3846 determine_use_iv_cost_condition (struct ivopts_data *data,
3847 struct iv_use *use, struct iv_cand *cand)
3849 tree bound = NULL_TREE;
3850 struct iv *cmp_iv;
3851 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3852 comp_cost elim_cost, express_cost, cost;
3853 bool ok;
3855 /* Only consider real candidates. */
3856 if (!cand->iv)
3858 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3859 return false;
3862 /* Try iv elimination. */
3863 if (may_eliminate_iv (data, use, cand, &bound))
3865 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3866 /* The bound is a loop invariant, so it will be only computed
3867 once. */
3868 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3870 else
3871 elim_cost = infinite_cost;
3873 /* Try expressing the original giv. If it is compared with an invariant,
3874 note that we cannot get rid of it. */
3875 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
3876 gcc_assert (ok);
3878 express_cost = get_computation_cost (data, use, cand, false,
3879 &depends_on_express);
3880 fd_ivopts_data = data;
3881 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3883 /* Choose the better approach, preferring the eliminated IV. */
3884 if (compare_costs (elim_cost, express_cost) <= 0)
3886 cost = elim_cost;
3887 depends_on = depends_on_elim;
3888 depends_on_elim = NULL;
3890 else
3892 cost = express_cost;
3893 depends_on = depends_on_express;
3894 depends_on_express = NULL;
3895 bound = NULL_TREE;
3898 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3900 if (depends_on_elim)
3901 BITMAP_FREE (depends_on_elim);
3902 if (depends_on_express)
3903 BITMAP_FREE (depends_on_express);
3905 return !infinite_cost_p (cost);
3908 /* Determines cost of basing replacement of USE on CAND. Returns false
3909 if USE cannot be based on CAND. */
3911 static bool
3912 determine_use_iv_cost (struct ivopts_data *data,
3913 struct iv_use *use, struct iv_cand *cand)
3915 switch (use->type)
3917 case USE_NONLINEAR_EXPR:
3918 return determine_use_iv_cost_generic (data, use, cand);
3920 case USE_ADDRESS:
3921 return determine_use_iv_cost_address (data, use, cand);
3923 case USE_COMPARE:
3924 return determine_use_iv_cost_condition (data, use, cand);
3926 default:
3927 gcc_unreachable ();
3931 /* Determines costs of basing the use of the iv on an iv candidate. */
3933 static void
3934 determine_use_iv_costs (struct ivopts_data *data)
3936 unsigned i, j;
3937 struct iv_use *use;
3938 struct iv_cand *cand;
3939 bitmap to_clear = BITMAP_ALLOC (NULL);
3941 alloc_use_cost_map (data);
3943 for (i = 0; i < n_iv_uses (data); i++)
3945 use = iv_use (data, i);
3947 if (data->consider_all_candidates)
3949 for (j = 0; j < n_iv_cands (data); j++)
3951 cand = iv_cand (data, j);
3952 determine_use_iv_cost (data, use, cand);
3955 else
3957 bitmap_iterator bi;
3959 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3961 cand = iv_cand (data, j);
3962 if (!determine_use_iv_cost (data, use, cand))
3963 bitmap_set_bit (to_clear, j);
3966 /* Remove the candidates for that the cost is infinite from
3967 the list of related candidates. */
3968 bitmap_and_compl_into (use->related_cands, to_clear);
3969 bitmap_clear (to_clear);
3973 BITMAP_FREE (to_clear);
3975 if (dump_file && (dump_flags & TDF_DETAILS))
3977 fprintf (dump_file, "Use-candidate costs:\n");
3979 for (i = 0; i < n_iv_uses (data); i++)
3981 use = iv_use (data, i);
3983 fprintf (dump_file, "Use %d:\n", i);
3984 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
3985 for (j = 0; j < use->n_map_members; j++)
3987 if (!use->cost_map[j].cand
3988 || infinite_cost_p (use->cost_map[j].cost))
3989 continue;
3991 fprintf (dump_file, " %d\t%d\t%d\t",
3992 use->cost_map[j].cand->id,
3993 use->cost_map[j].cost.cost,
3994 use->cost_map[j].cost.complexity);
3995 if (use->cost_map[j].depends_on)
3996 bitmap_print (dump_file,
3997 use->cost_map[j].depends_on, "","");
3998 fprintf (dump_file, "\n");
4001 fprintf (dump_file, "\n");
4003 fprintf (dump_file, "\n");
4007 /* Determines cost of the candidate CAND. */
4009 static void
4010 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4012 comp_cost cost_base;
4013 unsigned cost, cost_step;
4014 tree base;
4016 if (!cand->iv)
4018 cand->cost = 0;
4019 return;
4022 /* There are two costs associated with the candidate -- its increment
4023 and its initialization. The second is almost negligible for any loop
4024 that rolls enough, so we take it just very little into account. */
4026 base = cand->iv->base;
4027 cost_base = force_var_cost (data, base, NULL);
4028 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4030 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4032 /* Prefer the original ivs unless we may gain something by replacing it.
4033 The reason is to make debugging simpler; so this is not relevant for
4034 artificial ivs created by other optimization passes. */
4035 if (cand->pos != IP_ORIGINAL
4036 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4037 cost++;
4039 /* Prefer not to insert statements into latch unless there are some
4040 already (so that we do not create unnecessary jumps). */
4041 if (cand->pos == IP_END
4042 && empty_block_p (ip_end_pos (data->current_loop)))
4043 cost++;
4045 cand->cost = cost;
4048 /* Determines costs of computation of the candidates. */
4050 static void
4051 determine_iv_costs (struct ivopts_data *data)
4053 unsigned i;
4055 if (dump_file && (dump_flags & TDF_DETAILS))
4057 fprintf (dump_file, "Candidate costs:\n");
4058 fprintf (dump_file, " cand\tcost\n");
4061 for (i = 0; i < n_iv_cands (data); i++)
4063 struct iv_cand *cand = iv_cand (data, i);
4065 determine_iv_cost (data, cand);
4067 if (dump_file && (dump_flags & TDF_DETAILS))
4068 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4071 if (dump_file && (dump_flags & TDF_DETAILS))
4072 fprintf (dump_file, "\n");
4075 /* Calculates cost for having SIZE induction variables. */
4077 static unsigned
4078 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4080 /* We add size to the cost, so that we prefer eliminating ivs
4081 if possible. */
4082 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4085 /* For each size of the induction variable set determine the penalty. */
4087 static void
4088 determine_set_costs (struct ivopts_data *data)
4090 unsigned j, n;
4091 gimple phi;
4092 gimple_stmt_iterator psi;
4093 tree op;
4094 struct loop *loop = data->current_loop;
4095 bitmap_iterator bi;
4097 /* We use the following model (definitely improvable, especially the
4098 cost function -- TODO):
4100 We estimate the number of registers available (using MD data), name it A.
4102 We estimate the number of registers used by the loop, name it U. This
4103 number is obtained as the number of loop phi nodes (not counting virtual
4104 registers and bivs) + the number of variables from outside of the loop.
4106 We set a reserve R (free regs that are used for temporary computations,
4107 etc.). For now the reserve is a constant 3.
4109 Let I be the number of induction variables.
4111 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4112 make a lot of ivs without a reason).
4113 -- if A - R < U + I <= A, the cost is I * PRES_COST
4114 -- if U + I > A, the cost is I * PRES_COST and
4115 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4117 if (dump_file && (dump_flags & TDF_DETAILS))
4119 fprintf (dump_file, "Global costs:\n");
4120 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4121 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4122 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4125 n = 0;
4126 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4128 phi = gsi_stmt (psi);
4129 op = PHI_RESULT (phi);
4131 if (!is_gimple_reg (op))
4132 continue;
4134 if (get_iv (data, op))
4135 continue;
4137 n++;
4140 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4142 struct version_info *info = ver_info (data, j);
4144 if (info->inv_id && info->has_nonlin_use)
4145 n++;
4148 data->regs_used = n;
4149 if (dump_file && (dump_flags & TDF_DETAILS))
4150 fprintf (dump_file, " regs_used %d\n", n);
4152 if (dump_file && (dump_flags & TDF_DETAILS))
4154 fprintf (dump_file, " cost for size:\n");
4155 fprintf (dump_file, " ivs\tcost\n");
4156 for (j = 0; j <= 2 * target_avail_regs; j++)
4157 fprintf (dump_file, " %d\t%d\n", j,
4158 ivopts_global_cost_for_size (data, j));
4159 fprintf (dump_file, "\n");
4163 /* Returns true if A is a cheaper cost pair than B. */
4165 static bool
4166 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4168 int cmp;
4170 if (!a)
4171 return false;
4173 if (!b)
4174 return true;
4176 cmp = compare_costs (a->cost, b->cost);
4177 if (cmp < 0)
4178 return true;
4180 if (cmp > 0)
4181 return false;
4183 /* In case the costs are the same, prefer the cheaper candidate. */
4184 if (a->cand->cost < b->cand->cost)
4185 return true;
4187 return false;
4190 /* Computes the cost field of IVS structure. */
4192 static void
4193 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4195 comp_cost cost = ivs->cand_use_cost;
4196 cost.cost += ivs->cand_cost;
4197 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4199 ivs->cost = cost;
4202 /* Remove invariants in set INVS to set IVS. */
4204 static void
4205 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4207 bitmap_iterator bi;
4208 unsigned iid;
4210 if (!invs)
4211 return;
4213 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4215 ivs->n_invariant_uses[iid]--;
4216 if (ivs->n_invariant_uses[iid] == 0)
4217 ivs->n_regs--;
4221 /* Set USE not to be expressed by any candidate in IVS. */
4223 static void
4224 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4225 struct iv_use *use)
4227 unsigned uid = use->id, cid;
4228 struct cost_pair *cp;
4230 cp = ivs->cand_for_use[uid];
4231 if (!cp)
4232 return;
4233 cid = cp->cand->id;
4235 ivs->bad_uses++;
4236 ivs->cand_for_use[uid] = NULL;
4237 ivs->n_cand_uses[cid]--;
4239 if (ivs->n_cand_uses[cid] == 0)
4241 bitmap_clear_bit (ivs->cands, cid);
4242 /* Do not count the pseudocandidates. */
4243 if (cp->cand->iv)
4244 ivs->n_regs--;
4245 ivs->n_cands--;
4246 ivs->cand_cost -= cp->cand->cost;
4248 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4251 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4253 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4254 iv_ca_recount_cost (data, ivs);
4257 /* Add invariants in set INVS to set IVS. */
4259 static void
4260 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4262 bitmap_iterator bi;
4263 unsigned iid;
4265 if (!invs)
4266 return;
4268 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4270 ivs->n_invariant_uses[iid]++;
4271 if (ivs->n_invariant_uses[iid] == 1)
4272 ivs->n_regs++;
4276 /* Set cost pair for USE in set IVS to CP. */
4278 static void
4279 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4280 struct iv_use *use, struct cost_pair *cp)
4282 unsigned uid = use->id, cid;
4284 if (ivs->cand_for_use[uid] == cp)
4285 return;
4287 if (ivs->cand_for_use[uid])
4288 iv_ca_set_no_cp (data, ivs, use);
4290 if (cp)
4292 cid = cp->cand->id;
4294 ivs->bad_uses--;
4295 ivs->cand_for_use[uid] = cp;
4296 ivs->n_cand_uses[cid]++;
4297 if (ivs->n_cand_uses[cid] == 1)
4299 bitmap_set_bit (ivs->cands, cid);
4300 /* Do not count the pseudocandidates. */
4301 if (cp->cand->iv)
4302 ivs->n_regs++;
4303 ivs->n_cands++;
4304 ivs->cand_cost += cp->cand->cost;
4306 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4309 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4310 iv_ca_set_add_invariants (ivs, cp->depends_on);
4311 iv_ca_recount_cost (data, ivs);
4315 /* Extend set IVS by expressing USE by some of the candidates in it
4316 if possible. */
4318 static void
4319 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4320 struct iv_use *use)
4322 struct cost_pair *best_cp = NULL, *cp;
4323 bitmap_iterator bi;
4324 unsigned i;
4326 gcc_assert (ivs->upto >= use->id);
4328 if (ivs->upto == use->id)
4330 ivs->upto++;
4331 ivs->bad_uses++;
4334 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4336 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4338 if (cheaper_cost_pair (cp, best_cp))
4339 best_cp = cp;
4342 iv_ca_set_cp (data, ivs, use, best_cp);
4345 /* Get cost for assignment IVS. */
4347 static comp_cost
4348 iv_ca_cost (struct iv_ca *ivs)
4350 return (ivs->bad_uses ? infinite_cost : ivs->cost);
4353 /* Returns true if all dependences of CP are among invariants in IVS. */
4355 static bool
4356 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4358 unsigned i;
4359 bitmap_iterator bi;
4361 if (!cp->depends_on)
4362 return true;
4364 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4366 if (ivs->n_invariant_uses[i] == 0)
4367 return false;
4370 return true;
4373 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4374 it before NEXT_CHANGE. */
4376 static struct iv_ca_delta *
4377 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4378 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4380 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4382 change->use = use;
4383 change->old_cp = old_cp;
4384 change->new_cp = new_cp;
4385 change->next_change = next_change;
4387 return change;
4390 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4391 are rewritten. */
4393 static struct iv_ca_delta *
4394 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4396 struct iv_ca_delta *last;
4398 if (!l2)
4399 return l1;
4401 if (!l1)
4402 return l2;
4404 for (last = l1; last->next_change; last = last->next_change)
4405 continue;
4406 last->next_change = l2;
4408 return l1;
4411 /* Returns candidate by that USE is expressed in IVS. */
4413 static struct cost_pair *
4414 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4416 return ivs->cand_for_use[use->id];
4419 /* Reverse the list of changes DELTA, forming the inverse to it. */
4421 static struct iv_ca_delta *
4422 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4424 struct iv_ca_delta *act, *next, *prev = NULL;
4425 struct cost_pair *tmp;
4427 for (act = delta; act; act = next)
4429 next = act->next_change;
4430 act->next_change = prev;
4431 prev = act;
4433 tmp = act->old_cp;
4434 act->old_cp = act->new_cp;
4435 act->new_cp = tmp;
4438 return prev;
4441 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4442 reverted instead. */
4444 static void
4445 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4446 struct iv_ca_delta *delta, bool forward)
4448 struct cost_pair *from, *to;
4449 struct iv_ca_delta *act;
4451 if (!forward)
4452 delta = iv_ca_delta_reverse (delta);
4454 for (act = delta; act; act = act->next_change)
4456 from = act->old_cp;
4457 to = act->new_cp;
4458 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4459 iv_ca_set_cp (data, ivs, act->use, to);
4462 if (!forward)
4463 iv_ca_delta_reverse (delta);
4466 /* Returns true if CAND is used in IVS. */
4468 static bool
4469 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4471 return ivs->n_cand_uses[cand->id] > 0;
4474 /* Returns number of induction variable candidates in the set IVS. */
4476 static unsigned
4477 iv_ca_n_cands (struct iv_ca *ivs)
4479 return ivs->n_cands;
4482 /* Free the list of changes DELTA. */
4484 static void
4485 iv_ca_delta_free (struct iv_ca_delta **delta)
4487 struct iv_ca_delta *act, *next;
4489 for (act = *delta; act; act = next)
4491 next = act->next_change;
4492 free (act);
4495 *delta = NULL;
4498 /* Allocates new iv candidates assignment. */
4500 static struct iv_ca *
4501 iv_ca_new (struct ivopts_data *data)
4503 struct iv_ca *nw = XNEW (struct iv_ca);
4505 nw->upto = 0;
4506 nw->bad_uses = 0;
4507 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4508 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4509 nw->cands = BITMAP_ALLOC (NULL);
4510 nw->n_cands = 0;
4511 nw->n_regs = 0;
4512 nw->cand_use_cost = zero_cost;
4513 nw->cand_cost = 0;
4514 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4515 nw->cost = zero_cost;
4517 return nw;
4520 /* Free memory occupied by the set IVS. */
4522 static void
4523 iv_ca_free (struct iv_ca **ivs)
4525 free ((*ivs)->cand_for_use);
4526 free ((*ivs)->n_cand_uses);
4527 BITMAP_FREE ((*ivs)->cands);
4528 free ((*ivs)->n_invariant_uses);
4529 free (*ivs);
4530 *ivs = NULL;
4533 /* Dumps IVS to FILE. */
4535 static void
4536 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4538 const char *pref = " invariants ";
4539 unsigned i;
4540 comp_cost cost = iv_ca_cost (ivs);
4542 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4543 bitmap_print (file, ivs->cands, " candidates ","\n");
4545 for (i = 1; i <= data->max_inv_id; i++)
4546 if (ivs->n_invariant_uses[i])
4548 fprintf (file, "%s%d", pref, i);
4549 pref = ", ";
4551 fprintf (file, "\n");
4554 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4555 new set, and store differences in DELTA. Number of induction variables
4556 in the new set is stored to N_IVS. */
4558 static comp_cost
4559 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4560 struct iv_cand *cand, struct iv_ca_delta **delta,
4561 unsigned *n_ivs)
4563 unsigned i;
4564 comp_cost cost;
4565 struct iv_use *use;
4566 struct cost_pair *old_cp, *new_cp;
4568 *delta = NULL;
4569 for (i = 0; i < ivs->upto; i++)
4571 use = iv_use (data, i);
4572 old_cp = iv_ca_cand_for_use (ivs, use);
4574 if (old_cp
4575 && old_cp->cand == cand)
4576 continue;
4578 new_cp = get_use_iv_cost (data, use, cand);
4579 if (!new_cp)
4580 continue;
4582 if (!iv_ca_has_deps (ivs, new_cp))
4583 continue;
4585 if (!cheaper_cost_pair (new_cp, old_cp))
4586 continue;
4588 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4591 iv_ca_delta_commit (data, ivs, *delta, true);
4592 cost = iv_ca_cost (ivs);
4593 if (n_ivs)
4594 *n_ivs = iv_ca_n_cands (ivs);
4595 iv_ca_delta_commit (data, ivs, *delta, false);
4597 return cost;
4600 /* Try narrowing set IVS by removing CAND. Return the cost of
4601 the new set and store the differences in DELTA. */
4603 static comp_cost
4604 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4605 struct iv_cand *cand, struct iv_ca_delta **delta)
4607 unsigned i, ci;
4608 struct iv_use *use;
4609 struct cost_pair *old_cp, *new_cp, *cp;
4610 bitmap_iterator bi;
4611 struct iv_cand *cnd;
4612 comp_cost cost;
4614 *delta = NULL;
4615 for (i = 0; i < n_iv_uses (data); i++)
4617 use = iv_use (data, i);
4619 old_cp = iv_ca_cand_for_use (ivs, use);
4620 if (old_cp->cand != cand)
4621 continue;
4623 new_cp = NULL;
4625 if (data->consider_all_candidates)
4627 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4629 if (ci == cand->id)
4630 continue;
4632 cnd = iv_cand (data, ci);
4634 cp = get_use_iv_cost (data, use, cnd);
4635 if (!cp)
4636 continue;
4637 if (!iv_ca_has_deps (ivs, cp))
4638 continue;
4640 if (!cheaper_cost_pair (cp, new_cp))
4641 continue;
4643 new_cp = cp;
4646 else
4648 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4650 if (ci == cand->id)
4651 continue;
4653 cnd = iv_cand (data, ci);
4655 cp = get_use_iv_cost (data, use, cnd);
4656 if (!cp)
4657 continue;
4658 if (!iv_ca_has_deps (ivs, cp))
4659 continue;
4661 if (!cheaper_cost_pair (cp, new_cp))
4662 continue;
4664 new_cp = cp;
4668 if (!new_cp)
4670 iv_ca_delta_free (delta);
4671 return infinite_cost;
4674 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4677 iv_ca_delta_commit (data, ivs, *delta, true);
4678 cost = iv_ca_cost (ivs);
4679 iv_ca_delta_commit (data, ivs, *delta, false);
4681 return cost;
4684 /* Try optimizing the set of candidates IVS by removing candidates different
4685 from to EXCEPT_CAND from it. Return cost of the new set, and store
4686 differences in DELTA. */
4688 static comp_cost
4689 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4690 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4692 bitmap_iterator bi;
4693 struct iv_ca_delta *act_delta, *best_delta;
4694 unsigned i;
4695 comp_cost best_cost, acost;
4696 struct iv_cand *cand;
4698 best_delta = NULL;
4699 best_cost = iv_ca_cost (ivs);
4701 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4703 cand = iv_cand (data, i);
4705 if (cand == except_cand)
4706 continue;
4708 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4710 if (compare_costs (acost, best_cost) < 0)
4712 best_cost = acost;
4713 iv_ca_delta_free (&best_delta);
4714 best_delta = act_delta;
4716 else
4717 iv_ca_delta_free (&act_delta);
4720 if (!best_delta)
4722 *delta = NULL;
4723 return best_cost;
4726 /* Recurse to possibly remove other unnecessary ivs. */
4727 iv_ca_delta_commit (data, ivs, best_delta, true);
4728 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4729 iv_ca_delta_commit (data, ivs, best_delta, false);
4730 *delta = iv_ca_delta_join (best_delta, *delta);
4731 return best_cost;
4734 /* Tries to extend the sets IVS in the best possible way in order
4735 to express the USE. */
4737 static bool
4738 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4739 struct iv_use *use)
4741 comp_cost best_cost, act_cost;
4742 unsigned i;
4743 bitmap_iterator bi;
4744 struct iv_cand *cand;
4745 struct iv_ca_delta *best_delta = NULL, *act_delta;
4746 struct cost_pair *cp;
4748 iv_ca_add_use (data, ivs, use);
4749 best_cost = iv_ca_cost (ivs);
4751 cp = iv_ca_cand_for_use (ivs, use);
4752 if (cp)
4754 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4755 iv_ca_set_no_cp (data, ivs, use);
4758 /* First try important candidates not based on any memory object. Only if
4759 this fails, try the specific ones. Rationale -- in loops with many
4760 variables the best choice often is to use just one generic biv. If we
4761 added here many ivs specific to the uses, the optimization algorithm later
4762 would be likely to get stuck in a local minimum, thus causing us to create
4763 too many ivs. The approach from few ivs to more seems more likely to be
4764 successful -- starting from few ivs, replacing an expensive use by a
4765 specific iv should always be a win. */
4766 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4768 cand = iv_cand (data, i);
4770 if (cand->iv->base_object != NULL_TREE)
4771 continue;
4773 if (iv_ca_cand_used_p (ivs, cand))
4774 continue;
4776 cp = get_use_iv_cost (data, use, cand);
4777 if (!cp)
4778 continue;
4780 iv_ca_set_cp (data, ivs, use, cp);
4781 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4782 iv_ca_set_no_cp (data, ivs, use);
4783 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4785 if (compare_costs (act_cost, best_cost) < 0)
4787 best_cost = act_cost;
4789 iv_ca_delta_free (&best_delta);
4790 best_delta = act_delta;
4792 else
4793 iv_ca_delta_free (&act_delta);
4796 if (infinite_cost_p (best_cost))
4798 for (i = 0; i < use->n_map_members; i++)
4800 cp = use->cost_map + i;
4801 cand = cp->cand;
4802 if (!cand)
4803 continue;
4805 /* Already tried this. */
4806 if (cand->important && cand->iv->base_object == NULL_TREE)
4807 continue;
4809 if (iv_ca_cand_used_p (ivs, cand))
4810 continue;
4812 act_delta = NULL;
4813 iv_ca_set_cp (data, ivs, use, cp);
4814 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4815 iv_ca_set_no_cp (data, ivs, use);
4816 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4817 cp, act_delta);
4819 if (compare_costs (act_cost, best_cost) < 0)
4821 best_cost = act_cost;
4823 if (best_delta)
4824 iv_ca_delta_free (&best_delta);
4825 best_delta = act_delta;
4827 else
4828 iv_ca_delta_free (&act_delta);
4832 iv_ca_delta_commit (data, ivs, best_delta, true);
4833 iv_ca_delta_free (&best_delta);
4835 return !infinite_cost_p (best_cost);
4838 /* Finds an initial assignment of candidates to uses. */
4840 static struct iv_ca *
4841 get_initial_solution (struct ivopts_data *data)
4843 struct iv_ca *ivs = iv_ca_new (data);
4844 unsigned i;
4846 for (i = 0; i < n_iv_uses (data); i++)
4847 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4849 iv_ca_free (&ivs);
4850 return NULL;
4853 return ivs;
4856 /* Tries to improve set of induction variables IVS. */
4858 static bool
4859 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4861 unsigned i, n_ivs;
4862 comp_cost acost, best_cost = iv_ca_cost (ivs);
4863 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4864 struct iv_cand *cand;
4866 /* Try extending the set of induction variables by one. */
4867 for (i = 0; i < n_iv_cands (data); i++)
4869 cand = iv_cand (data, i);
4871 if (iv_ca_cand_used_p (ivs, cand))
4872 continue;
4874 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4875 if (!act_delta)
4876 continue;
4878 /* If we successfully added the candidate and the set is small enough,
4879 try optimizing it by removing other candidates. */
4880 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4882 iv_ca_delta_commit (data, ivs, act_delta, true);
4883 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4884 iv_ca_delta_commit (data, ivs, act_delta, false);
4885 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4888 if (compare_costs (acost, best_cost) < 0)
4890 best_cost = acost;
4891 iv_ca_delta_free (&best_delta);
4892 best_delta = act_delta;
4894 else
4895 iv_ca_delta_free (&act_delta);
4898 if (!best_delta)
4900 /* Try removing the candidates from the set instead. */
4901 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4903 /* Nothing more we can do. */
4904 if (!best_delta)
4905 return false;
4908 iv_ca_delta_commit (data, ivs, best_delta, true);
4909 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4910 iv_ca_delta_free (&best_delta);
4911 return true;
4914 /* Attempts to find the optimal set of induction variables. We do simple
4915 greedy heuristic -- we try to replace at most one candidate in the selected
4916 solution and remove the unused ivs while this improves the cost. */
4918 static struct iv_ca *
4919 find_optimal_iv_set (struct ivopts_data *data)
4921 unsigned i;
4922 struct iv_ca *set;
4923 struct iv_use *use;
4925 /* Get the initial solution. */
4926 set = get_initial_solution (data);
4927 if (!set)
4929 if (dump_file && (dump_flags & TDF_DETAILS))
4930 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4931 return NULL;
4934 if (dump_file && (dump_flags & TDF_DETAILS))
4936 fprintf (dump_file, "Initial set of candidates:\n");
4937 iv_ca_dump (data, dump_file, set);
4940 while (try_improve_iv_set (data, set))
4942 if (dump_file && (dump_flags & TDF_DETAILS))
4944 fprintf (dump_file, "Improved to:\n");
4945 iv_ca_dump (data, dump_file, set);
4949 if (dump_file && (dump_flags & TDF_DETAILS))
4951 comp_cost cost = iv_ca_cost (set);
4952 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4955 for (i = 0; i < n_iv_uses (data); i++)
4957 use = iv_use (data, i);
4958 use->selected = iv_ca_cand_for_use (set, use)->cand;
4961 return set;
4964 /* Creates a new induction variable corresponding to CAND. */
4966 static void
4967 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4969 gimple_stmt_iterator incr_pos;
4970 tree base;
4971 bool after = false;
4973 if (!cand->iv)
4974 return;
4976 switch (cand->pos)
4978 case IP_NORMAL:
4979 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
4980 break;
4982 case IP_END:
4983 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
4984 after = true;
4985 break;
4987 case IP_ORIGINAL:
4988 /* Mark that the iv is preserved. */
4989 name_info (data, cand->var_before)->preserve_biv = true;
4990 name_info (data, cand->var_after)->preserve_biv = true;
4992 /* Rewrite the increment so that it uses var_before directly. */
4993 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4995 return;
4998 gimple_add_tmp_var (cand->var_before);
4999 add_referenced_var (cand->var_before);
5001 base = unshare_expr (cand->iv->base);
5003 create_iv (base, unshare_expr (cand->iv->step),
5004 cand->var_before, data->current_loop,
5005 &incr_pos, after, &cand->var_before, &cand->var_after);
5008 /* Creates new induction variables described in SET. */
5010 static void
5011 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5013 unsigned i;
5014 struct iv_cand *cand;
5015 bitmap_iterator bi;
5017 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5019 cand = iv_cand (data, i);
5020 create_new_iv (data, cand);
5024 /* Returns the phi-node in BB with result RESULT. */
5026 static gimple
5027 get_phi_with_result (basic_block bb, tree result)
5029 gimple_stmt_iterator i = gsi_start_phis (bb);
5031 for (; !gsi_end_p (i); gsi_next (&i))
5032 if (gimple_phi_result (gsi_stmt (i)) == result)
5033 return gsi_stmt (i);
5035 gcc_unreachable ();
5036 return NULL;
5040 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5041 is true, remove also the ssa name defined by the statement. */
5043 static void
5044 remove_statement (gimple stmt, bool including_defined_name)
5046 if (gimple_code (stmt) == GIMPLE_PHI)
5048 gimple bb_phi = get_phi_with_result (gimple_bb (stmt),
5049 gimple_phi_result (stmt));
5050 gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi);
5051 remove_phi_node (&bsi, including_defined_name);
5053 else
5055 gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5056 gsi_remove (&bsi, true);
5057 release_defs (stmt);
5061 /* Rewrites USE (definition of iv used in a nonlinear expression)
5062 using candidate CAND. */
5064 static void
5065 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5066 struct iv_use *use, struct iv_cand *cand)
5068 tree comp;
5069 tree op, tgt;
5070 gimple ass;
5071 gimple_stmt_iterator bsi;
5073 /* An important special case -- if we are asked to express value of
5074 the original iv by itself, just exit; there is no need to
5075 introduce a new computation (that might also need casting the
5076 variable to unsigned and back). */
5077 if (cand->pos == IP_ORIGINAL
5078 && cand->incremented_at == use->stmt)
5080 tree step, ctype, utype;
5081 enum tree_code incr_code = PLUS_EXPR, old_code;
5083 gcc_assert (is_gimple_assign (use->stmt));
5084 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5086 step = cand->iv->step;
5087 ctype = TREE_TYPE (step);
5088 utype = TREE_TYPE (cand->var_after);
5089 if (TREE_CODE (step) == NEGATE_EXPR)
5091 incr_code = MINUS_EXPR;
5092 step = TREE_OPERAND (step, 0);
5095 /* Check whether we may leave the computation unchanged.
5096 This is the case only if it does not rely on other
5097 computations in the loop -- otherwise, the computation
5098 we rely upon may be removed in remove_unused_ivs,
5099 thus leading to ICE. */
5100 old_code = gimple_assign_rhs_code (use->stmt);
5101 if (old_code == PLUS_EXPR
5102 || old_code == MINUS_EXPR
5103 || old_code == POINTER_PLUS_EXPR)
5105 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5106 op = gimple_assign_rhs2 (use->stmt);
5107 else if (old_code != MINUS_EXPR
5108 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5109 op = gimple_assign_rhs1 (use->stmt);
5110 else
5111 op = NULL_TREE;
5113 else
5114 op = NULL_TREE;
5116 if (op
5117 && (TREE_CODE (op) == INTEGER_CST
5118 || operand_equal_p (op, step, 0)))
5119 return;
5121 /* Otherwise, add the necessary computations to express
5122 the iv. */
5123 op = fold_convert (ctype, cand->var_before);
5124 comp = fold_convert (utype,
5125 build2 (incr_code, ctype, op,
5126 unshare_expr (step)));
5128 else
5130 comp = get_computation (data->current_loop, use, cand);
5131 gcc_assert (comp != NULL_TREE);
5134 switch (gimple_code (use->stmt))
5136 case GIMPLE_PHI:
5137 tgt = PHI_RESULT (use->stmt);
5139 /* If we should keep the biv, do not replace it. */
5140 if (name_info (data, tgt)->preserve_biv)
5141 return;
5143 bsi = gsi_after_labels (gimple_bb (use->stmt));
5144 break;
5146 case GIMPLE_ASSIGN:
5147 tgt = gimple_assign_lhs (use->stmt);
5148 bsi = gsi_for_stmt (use->stmt);
5149 break;
5151 default:
5152 gcc_unreachable ();
5155 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5156 true, GSI_SAME_STMT);
5158 if (gimple_code (use->stmt) == GIMPLE_PHI)
5160 ass = gimple_build_assign (tgt, op);
5161 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5162 remove_statement (use->stmt, false);
5164 else
5166 gimple_assign_set_rhs_from_tree (&bsi, op);
5167 use->stmt = gsi_stmt (bsi);
5171 /* Replaces ssa name in index IDX by its basic variable. Callback for
5172 for_each_index. */
5174 static bool
5175 idx_remove_ssa_names (tree base, tree *idx,
5176 void *data ATTRIBUTE_UNUSED)
5178 tree *op;
5180 if (TREE_CODE (*idx) == SSA_NAME)
5181 *idx = SSA_NAME_VAR (*idx);
5183 if (TREE_CODE (base) == ARRAY_REF)
5185 op = &TREE_OPERAND (base, 2);
5186 if (*op
5187 && TREE_CODE (*op) == SSA_NAME)
5188 *op = SSA_NAME_VAR (*op);
5189 op = &TREE_OPERAND (base, 3);
5190 if (*op
5191 && TREE_CODE (*op) == SSA_NAME)
5192 *op = SSA_NAME_VAR (*op);
5195 return true;
5198 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5200 static tree
5201 unshare_and_remove_ssa_names (tree ref)
5203 ref = unshare_expr (ref);
5204 for_each_index (&ref, idx_remove_ssa_names, NULL);
5206 return ref;
5209 /* Extract the alias analysis info for the memory reference REF. There are
5210 several ways how this information may be stored and what precisely is
5211 its semantics depending on the type of the reference, but there always is
5212 somewhere hidden one _DECL node that is used to determine the set of
5213 virtual operands for the reference. The code below deciphers this jungle
5214 and extracts this single useful piece of information. */
5216 static tree
5217 get_ref_tag (tree ref, tree orig)
5219 tree var = get_base_address (ref);
5220 tree aref = NULL_TREE, tag, sv;
5221 HOST_WIDE_INT offset, size, maxsize;
5223 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5225 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5226 if (ref)
5227 break;
5230 if (!var)
5231 return NULL_TREE;
5233 if (TREE_CODE (var) == INDIRECT_REF)
5235 /* If the base is a dereference of a pointer, first check its name memory
5236 tag. If it does not have one, use its symbol memory tag. */
5237 var = TREE_OPERAND (var, 0);
5238 if (TREE_CODE (var) != SSA_NAME)
5239 return NULL_TREE;
5241 if (SSA_NAME_PTR_INFO (var))
5243 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5244 if (tag)
5245 return tag;
5248 var = SSA_NAME_VAR (var);
5249 tag = symbol_mem_tag (var);
5250 gcc_assert (tag != NULL_TREE);
5251 return tag;
5253 else
5255 if (!DECL_P (var))
5256 return NULL_TREE;
5258 tag = symbol_mem_tag (var);
5259 if (tag)
5260 return tag;
5262 return var;
5266 /* Copies the reference information from OLD_REF to NEW_REF. */
5268 static void
5269 copy_ref_info (tree new_ref, tree old_ref)
5271 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5272 copy_mem_ref_info (new_ref, old_ref);
5273 else
5275 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5276 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5280 /* Rewrites USE (address that is an iv) using candidate CAND. */
5282 static void
5283 rewrite_use_address (struct ivopts_data *data,
5284 struct iv_use *use, struct iv_cand *cand)
5286 aff_tree aff;
5287 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5288 tree ref;
5289 bool ok;
5291 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5292 gcc_assert (ok);
5293 unshare_aff_combination (&aff);
5295 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
5296 copy_ref_info (ref, *use->op_p);
5297 *use->op_p = ref;
5300 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5301 candidate CAND. */
5303 static void
5304 rewrite_use_compare (struct ivopts_data *data,
5305 struct iv_use *use, struct iv_cand *cand)
5307 tree comp, *var_p, op, bound;
5308 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5309 enum tree_code compare;
5310 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5311 bool ok;
5313 bound = cp->value;
5314 if (bound)
5316 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5317 tree var_type = TREE_TYPE (var);
5319 compare = iv_elimination_compare (data, use);
5320 bound = unshare_expr (fold_convert (var_type, bound));
5321 op = force_gimple_operand_gsi (&bsi, bound, true, NULL_TREE,
5322 true, GSI_SAME_STMT);
5324 gimple_cond_set_lhs (use->stmt, var);
5325 gimple_cond_set_code (use->stmt, compare);
5326 gimple_cond_set_rhs (use->stmt, op);
5327 return;
5330 /* The induction variable elimination failed; just express the original
5331 giv. */
5332 comp = get_computation (data->current_loop, use, cand);
5333 gcc_assert (comp != NULL_TREE);
5335 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5336 gcc_assert (ok);
5338 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5339 true, GSI_SAME_STMT);
5342 /* Rewrites USE using candidate CAND. */
5344 static void
5345 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5347 push_stmt_changes (&use->stmt);
5349 switch (use->type)
5351 case USE_NONLINEAR_EXPR:
5352 rewrite_use_nonlinear_expr (data, use, cand);
5353 break;
5355 case USE_ADDRESS:
5356 rewrite_use_address (data, use, cand);
5357 break;
5359 case USE_COMPARE:
5360 rewrite_use_compare (data, use, cand);
5361 break;
5363 default:
5364 gcc_unreachable ();
5367 pop_stmt_changes (&use->stmt);
5370 /* Rewrite the uses using the selected induction variables. */
5372 static void
5373 rewrite_uses (struct ivopts_data *data)
5375 unsigned i;
5376 struct iv_cand *cand;
5377 struct iv_use *use;
5379 for (i = 0; i < n_iv_uses (data); i++)
5381 use = iv_use (data, i);
5382 cand = use->selected;
5383 gcc_assert (cand);
5385 rewrite_use (data, use, cand);
5389 /* Removes the ivs that are not used after rewriting. */
5391 static void
5392 remove_unused_ivs (struct ivopts_data *data)
5394 unsigned j;
5395 bitmap_iterator bi;
5397 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5399 struct version_info *info;
5401 info = ver_info (data, j);
5402 if (info->iv
5403 && !integer_zerop (info->iv->step)
5404 && !info->inv_id
5405 && !info->iv->have_use_for
5406 && !info->preserve_biv)
5407 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5411 /* Frees data allocated by the optimization of a single loop. */
5413 static void
5414 free_loop_data (struct ivopts_data *data)
5416 unsigned i, j;
5417 bitmap_iterator bi;
5418 tree obj;
5420 if (data->niters)
5422 pointer_map_destroy (data->niters);
5423 data->niters = NULL;
5426 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5428 struct version_info *info;
5430 info = ver_info (data, i);
5431 if (info->iv)
5432 free (info->iv);
5433 info->iv = NULL;
5434 info->has_nonlin_use = false;
5435 info->preserve_biv = false;
5436 info->inv_id = 0;
5438 bitmap_clear (data->relevant);
5439 bitmap_clear (data->important_candidates);
5441 for (i = 0; i < n_iv_uses (data); i++)
5443 struct iv_use *use = iv_use (data, i);
5445 free (use->iv);
5446 BITMAP_FREE (use->related_cands);
5447 for (j = 0; j < use->n_map_members; j++)
5448 if (use->cost_map[j].depends_on)
5449 BITMAP_FREE (use->cost_map[j].depends_on);
5450 free (use->cost_map);
5451 free (use);
5453 VEC_truncate (iv_use_p, data->iv_uses, 0);
5455 for (i = 0; i < n_iv_cands (data); i++)
5457 struct iv_cand *cand = iv_cand (data, i);
5459 if (cand->iv)
5460 free (cand->iv);
5461 if (cand->depends_on)
5462 BITMAP_FREE (cand->depends_on);
5463 free (cand);
5465 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5467 if (data->version_info_size < num_ssa_names)
5469 data->version_info_size = 2 * num_ssa_names;
5470 free (data->version_info);
5471 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5474 data->max_inv_id = 0;
5476 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5477 SET_DECL_RTL (obj, NULL_RTX);
5479 VEC_truncate (tree, decl_rtl_to_reset, 0);
5482 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5483 loop tree. */
5485 static void
5486 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5488 free_loop_data (data);
5489 free (data->version_info);
5490 BITMAP_FREE (data->relevant);
5491 BITMAP_FREE (data->important_candidates);
5493 VEC_free (tree, heap, decl_rtl_to_reset);
5494 VEC_free (iv_use_p, heap, data->iv_uses);
5495 VEC_free (iv_cand_p, heap, data->iv_candidates);
5498 /* Optimizes the LOOP. Returns true if anything changed. */
5500 static bool
5501 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5503 bool changed = false;
5504 struct iv_ca *iv_ca;
5505 edge exit;
5507 gcc_assert (!data->niters);
5508 data->current_loop = loop;
5509 data->speed = optimize_loop_for_speed_p (loop);
5511 if (dump_file && (dump_flags & TDF_DETAILS))
5513 fprintf (dump_file, "Processing loop %d\n", loop->num);
5515 exit = single_dom_exit (loop);
5516 if (exit)
5518 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5519 exit->src->index, exit->dest->index);
5520 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5521 fprintf (dump_file, "\n");
5524 fprintf (dump_file, "\n");
5527 /* For each ssa name determines whether it behaves as an induction variable
5528 in some loop. */
5529 if (!find_induction_variables (data))
5530 goto finish;
5532 /* Finds interesting uses (item 1). */
5533 find_interesting_uses (data);
5534 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5535 goto finish;
5537 /* Finds candidates for the induction variables (item 2). */
5538 find_iv_candidates (data);
5540 /* Calculates the costs (item 3, part 1). */
5541 determine_use_iv_costs (data);
5542 determine_iv_costs (data);
5543 determine_set_costs (data);
5545 /* Find the optimal set of induction variables (item 3, part 2). */
5546 iv_ca = find_optimal_iv_set (data);
5547 if (!iv_ca)
5548 goto finish;
5549 changed = true;
5551 /* Create the new induction variables (item 4, part 1). */
5552 create_new_ivs (data, iv_ca);
5553 iv_ca_free (&iv_ca);
5555 /* Rewrite the uses (item 4, part 2). */
5556 rewrite_uses (data);
5558 /* Remove the ivs that are unused after rewriting. */
5559 remove_unused_ivs (data);
5561 /* We have changed the structure of induction variables; it might happen
5562 that definitions in the scev database refer to some of them that were
5563 eliminated. */
5564 scev_reset ();
5566 finish:
5567 free_loop_data (data);
5569 return changed;
5572 /* Main entry point. Optimizes induction variables in loops. */
5574 void
5575 tree_ssa_iv_optimize (void)
5577 struct loop *loop;
5578 struct ivopts_data data;
5579 loop_iterator li;
5581 tree_ssa_iv_optimize_init (&data);
5583 /* Optimize the loops starting with the innermost ones. */
5584 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5586 if (dump_file && (dump_flags & TDF_DETAILS))
5587 flow_loop_dump (loop, dump_file, NULL, 1);
5589 tree_ssa_iv_optimize_loop (&data, loop);
5592 tree_ssa_iv_optimize_finalize (&data);