Fix DealII type problems.
[official-gcc/Ramakrishna.git] / gcc / tree-ssa-loop-ivopts.c
blob759edb11770611b53c0a76330762eebee2291dd2
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
105 struct iv
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
117 struct version_info
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
127 /* Types of uses. */
128 enum use_type
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* Cost of a computation. */
136 typedef struct
138 int cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143 } comp_cost;
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
148 /* The candidate - cost pair. */
149 struct cost_pair
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
160 /* Use. */
161 struct iv_use
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand *selected;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
180 enum iv_position
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_BEFORE_USE, /* Immediately before a specific use. */
185 IP_AFTER_USE, /* Immediately after a specific use. */
186 IP_ORIGINAL /* The original biv. */
189 /* The induction variable candidate. */
190 struct iv_cand
192 unsigned id; /* The number of the candidate. */
193 bool important; /* Whether this is an "important" candidate, i.e. such
194 that it should be considered by all uses. */
195 enum iv_position pos; /* Where it is computed. */
196 gimple incremented_at;/* For original biv, the statement where it is
197 incremented. */
198 tree var_before; /* The variable used for it before increment. */
199 tree var_after; /* The variable used for it after increment. */
200 struct iv *iv; /* The value of the candidate. NULL for
201 "pseudocandidate" used to indicate the possibility
202 to replace the final value of an iv by direct
203 computation of the value. */
204 unsigned cost; /* Cost of the candidate. */
205 unsigned cost_step; /* Cost of the candidate's increment operation. */
206 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
207 where it is incremented. */
208 bitmap depends_on; /* The list of invariants that are used in step of the
209 biv. */
212 /* The data used by the induction variable optimizations. */
214 typedef struct iv_use *iv_use_p;
215 DEF_VEC_P(iv_use_p);
216 DEF_VEC_ALLOC_P(iv_use_p,heap);
218 typedef struct iv_cand *iv_cand_p;
219 DEF_VEC_P(iv_cand_p);
220 DEF_VEC_ALLOC_P(iv_cand_p,heap);
222 struct ivopts_data
224 /* The currently optimized loop. */
225 struct loop *current_loop;
227 /* Numbers of iterations for all exits of the current loop. */
228 struct pointer_map_t *niters;
230 /* Number of registers used in it. */
231 unsigned regs_used;
233 /* The size of version_info array allocated. */
234 unsigned version_info_size;
236 /* The array of information for the ssa names. */
237 struct version_info *version_info;
239 /* The bitmap of indices in version_info whose value was changed. */
240 bitmap relevant;
242 /* The uses of induction variables. */
243 VEC(iv_use_p,heap) *iv_uses;
245 /* The candidates. */
246 VEC(iv_cand_p,heap) *iv_candidates;
248 /* A bitmap of important candidates. */
249 bitmap important_candidates;
251 /* The maximum invariant id. */
252 unsigned max_inv_id;
254 /* Whether to consider just related and important candidates when replacing a
255 use. */
256 bool consider_all_candidates;
258 /* Are we optimizing for speed? */
259 bool speed;
262 /* An assignment of iv candidates to uses. */
264 struct iv_ca
266 /* The number of uses covered by the assignment. */
267 unsigned upto;
269 /* Number of uses that cannot be expressed by the candidates in the set. */
270 unsigned bad_uses;
272 /* Candidate assigned to a use, together with the related costs. */
273 struct cost_pair **cand_for_use;
275 /* Number of times each candidate is used. */
276 unsigned *n_cand_uses;
278 /* The candidates used. */
279 bitmap cands;
281 /* The number of candidates in the set. */
282 unsigned n_cands;
284 /* Total number of registers needed. */
285 unsigned n_regs;
287 /* Total cost of expressing uses. */
288 comp_cost cand_use_cost;
290 /* Total cost of candidates. */
291 unsigned cand_cost;
293 /* Number of times each invariant is used. */
294 unsigned *n_invariant_uses;
296 /* Total cost of the assignment. */
297 comp_cost cost;
300 /* Difference of two iv candidate assignments. */
302 struct iv_ca_delta
304 /* Changed use. */
305 struct iv_use *use;
307 /* An old assignment (for rollback purposes). */
308 struct cost_pair *old_cp;
310 /* A new assignment. */
311 struct cost_pair *new_cp;
313 /* Next change in the list. */
314 struct iv_ca_delta *next_change;
317 /* Bound on number of candidates below that all candidates are considered. */
319 #define CONSIDER_ALL_CANDIDATES_BOUND \
320 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
322 /* If there are more iv occurrences, we just give up (it is quite unlikely that
323 optimizing such a loop would help, and it would take ages). */
325 #define MAX_CONSIDERED_USES \
326 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
328 /* If there are at most this number of ivs in the set, try removing unnecessary
329 ivs from the set always. */
331 #define ALWAYS_PRUNE_CAND_SET_BOUND \
332 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
334 /* The list of trees for that the decl_rtl field must be reset is stored
335 here. */
337 static VEC(tree,heap) *decl_rtl_to_reset;
339 /* Number of uses recorded in DATA. */
341 static inline unsigned
342 n_iv_uses (struct ivopts_data *data)
344 return VEC_length (iv_use_p, data->iv_uses);
347 /* Ith use recorded in DATA. */
349 static inline struct iv_use *
350 iv_use (struct ivopts_data *data, unsigned i)
352 return VEC_index (iv_use_p, data->iv_uses, i);
355 /* Number of candidates recorded in DATA. */
357 static inline unsigned
358 n_iv_cands (struct ivopts_data *data)
360 return VEC_length (iv_cand_p, data->iv_candidates);
363 /* Ith candidate recorded in DATA. */
365 static inline struct iv_cand *
366 iv_cand (struct ivopts_data *data, unsigned i)
368 return VEC_index (iv_cand_p, data->iv_candidates, i);
371 /* The single loop exit if it dominates the latch, NULL otherwise. */
373 edge
374 single_dom_exit (struct loop *loop)
376 edge exit = single_exit (loop);
378 if (!exit)
379 return NULL;
381 if (!just_once_each_iteration_p (loop, exit->src))
382 return NULL;
384 return exit;
387 /* Dumps information about the induction variable IV to FILE. */
389 extern void dump_iv (FILE *, struct iv *);
390 void
391 dump_iv (FILE *file, struct iv *iv)
393 if (iv->ssa_name)
395 fprintf (file, "ssa name ");
396 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
397 fprintf (file, "\n");
400 fprintf (file, " type ");
401 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
402 fprintf (file, "\n");
404 if (iv->step)
406 fprintf (file, " base ");
407 print_generic_expr (file, iv->base, TDF_SLIM);
408 fprintf (file, "\n");
410 fprintf (file, " step ");
411 print_generic_expr (file, iv->step, TDF_SLIM);
412 fprintf (file, "\n");
414 else
416 fprintf (file, " invariant ");
417 print_generic_expr (file, iv->base, TDF_SLIM);
418 fprintf (file, "\n");
421 if (iv->base_object)
423 fprintf (file, " base object ");
424 print_generic_expr (file, iv->base_object, TDF_SLIM);
425 fprintf (file, "\n");
428 if (iv->biv_p)
429 fprintf (file, " is a biv\n");
432 /* Dumps information about the USE to FILE. */
434 extern void dump_use (FILE *, struct iv_use *);
435 void
436 dump_use (FILE *file, struct iv_use *use)
438 fprintf (file, "use %d\n", use->id);
440 switch (use->type)
442 case USE_NONLINEAR_EXPR:
443 fprintf (file, " generic\n");
444 break;
446 case USE_ADDRESS:
447 fprintf (file, " address\n");
448 break;
450 case USE_COMPARE:
451 fprintf (file, " compare\n");
452 break;
454 default:
455 gcc_unreachable ();
458 fprintf (file, " in statement ");
459 print_gimple_stmt (file, use->stmt, 0, 0);
460 fprintf (file, "\n");
462 fprintf (file, " at position ");
463 if (use->op_p)
464 print_generic_expr (file, *use->op_p, TDF_SLIM);
465 fprintf (file, "\n");
467 dump_iv (file, use->iv);
469 if (use->related_cands)
471 fprintf (file, " related candidates ");
472 dump_bitmap (file, use->related_cands);
476 /* Dumps information about the uses to FILE. */
478 extern void dump_uses (FILE *, struct ivopts_data *);
479 void
480 dump_uses (FILE *file, struct ivopts_data *data)
482 unsigned i;
483 struct iv_use *use;
485 for (i = 0; i < n_iv_uses (data); i++)
487 use = iv_use (data, i);
489 dump_use (file, use);
490 fprintf (file, "\n");
494 /* Dumps information about induction variable candidate CAND to FILE. */
496 extern void dump_cand (FILE *, struct iv_cand *);
497 void
498 dump_cand (FILE *file, struct iv_cand *cand)
500 struct iv *iv = cand->iv;
502 fprintf (file, "candidate %d%s\n",
503 cand->id, cand->important ? " (important)" : "");
505 if (cand->depends_on)
507 fprintf (file, " depends on ");
508 dump_bitmap (file, cand->depends_on);
511 if (!iv)
513 fprintf (file, " final value replacement\n");
514 return;
517 switch (cand->pos)
519 case IP_NORMAL:
520 fprintf (file, " incremented before exit test\n");
521 break;
523 case IP_BEFORE_USE:
524 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
525 break;
527 case IP_AFTER_USE:
528 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
529 break;
531 case IP_END:
532 fprintf (file, " incremented at end\n");
533 break;
535 case IP_ORIGINAL:
536 fprintf (file, " original biv\n");
537 break;
540 dump_iv (file, iv);
543 /* Returns the info for ssa version VER. */
545 static inline struct version_info *
546 ver_info (struct ivopts_data *data, unsigned ver)
548 return data->version_info + ver;
551 /* Returns the info for ssa name NAME. */
553 static inline struct version_info *
554 name_info (struct ivopts_data *data, tree name)
556 return ver_info (data, SSA_NAME_VERSION (name));
559 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
560 emitted in LOOP. */
562 static bool
563 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
565 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
567 gcc_assert (bb);
569 if (sbb == loop->latch)
570 return true;
572 if (sbb != bb)
573 return false;
575 return stmt == last_stmt (bb);
578 /* Returns true if STMT if after the place where the original induction
579 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
580 if the positions are identical. */
582 static bool
583 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
585 basic_block cand_bb = gimple_bb (cand->incremented_at);
586 basic_block stmt_bb = gimple_bb (stmt);
588 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
589 return false;
591 if (stmt_bb != cand_bb)
592 return true;
594 if (true_if_equal
595 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
596 return true;
597 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
600 /* Returns true if STMT if after the place where the induction variable
601 CAND is incremented in LOOP. */
603 static bool
604 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
606 switch (cand->pos)
608 case IP_END:
609 return false;
611 case IP_NORMAL:
612 return stmt_after_ip_normal_pos (loop, stmt);
614 case IP_ORIGINAL:
615 case IP_AFTER_USE:
616 return stmt_after_inc_pos (cand, stmt, false);
618 case IP_BEFORE_USE:
619 return stmt_after_inc_pos (cand, stmt, true);
621 default:
622 gcc_unreachable ();
626 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
628 static bool
629 abnormal_ssa_name_p (tree exp)
631 if (!exp)
632 return false;
634 if (TREE_CODE (exp) != SSA_NAME)
635 return false;
637 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
640 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
641 abnormal phi node. Callback for for_each_index. */
643 static bool
644 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
645 void *data ATTRIBUTE_UNUSED)
647 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
649 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
650 return false;
651 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
652 return false;
655 return !abnormal_ssa_name_p (*index);
658 /* Returns true if EXPR contains a ssa name that occurs in an
659 abnormal phi node. */
661 bool
662 contains_abnormal_ssa_name_p (tree expr)
664 enum tree_code code;
665 enum tree_code_class codeclass;
667 if (!expr)
668 return false;
670 code = TREE_CODE (expr);
671 codeclass = TREE_CODE_CLASS (code);
673 if (code == SSA_NAME)
674 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
676 if (code == INTEGER_CST
677 || is_gimple_min_invariant (expr))
678 return false;
680 if (code == ADDR_EXPR)
681 return !for_each_index (&TREE_OPERAND (expr, 0),
682 idx_contains_abnormal_ssa_name_p,
683 NULL);
685 switch (codeclass)
687 case tcc_binary:
688 case tcc_comparison:
689 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
690 return true;
692 /* Fallthru. */
693 case tcc_unary:
694 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
695 return true;
697 break;
699 default:
700 gcc_unreachable ();
703 return false;
706 /* Returns tree describing number of iterations determined from
707 EXIT of DATA->current_loop, or NULL if something goes wrong. */
709 static tree
710 niter_for_exit (struct ivopts_data *data, edge exit)
712 struct tree_niter_desc desc;
713 tree niter;
714 void **slot;
716 if (!data->niters)
718 data->niters = pointer_map_create ();
719 slot = NULL;
721 else
722 slot = pointer_map_contains (data->niters, exit);
724 if (!slot)
726 /* Try to determine number of iterations. We must know it
727 unconditionally (i.e., without possibility of # of iterations
728 being zero). Also, we cannot safely work with ssa names that
729 appear in phi nodes on abnormal edges, so that we do not create
730 overlapping life ranges for them (PR 27283). */
731 if (number_of_iterations_exit (data->current_loop,
732 exit, &desc, true)
733 && integer_zerop (desc.may_be_zero)
734 && !contains_abnormal_ssa_name_p (desc.niter))
735 niter = desc.niter;
736 else
737 niter = NULL_TREE;
739 *pointer_map_insert (data->niters, exit) = niter;
741 else
742 niter = (tree) *slot;
744 return niter;
747 /* Returns tree describing number of iterations determined from
748 single dominating exit of DATA->current_loop, or NULL if something
749 goes wrong. */
751 static tree
752 niter_for_single_dom_exit (struct ivopts_data *data)
754 edge exit = single_dom_exit (data->current_loop);
756 if (!exit)
757 return NULL;
759 return niter_for_exit (data, exit);
762 /* Initializes data structures used by the iv optimization pass, stored
763 in DATA. */
765 static void
766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
768 data->version_info_size = 2 * num_ssa_names;
769 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
770 data->relevant = BITMAP_ALLOC (NULL);
771 data->important_candidates = BITMAP_ALLOC (NULL);
772 data->max_inv_id = 0;
773 data->niters = NULL;
774 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
775 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
776 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
779 /* Returns a memory object to that EXPR points. In case we are able to
780 determine that it does not point to any such object, NULL is returned. */
782 static tree
783 determine_base_object (tree expr)
785 enum tree_code code = TREE_CODE (expr);
786 tree base, obj;
788 /* If this is a pointer casted to any type, we need to determine
789 the base object for the pointer; so handle conversions before
790 throwing away non-pointer expressions. */
791 if (CONVERT_EXPR_P (expr))
792 return determine_base_object (TREE_OPERAND (expr, 0));
794 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
795 return NULL_TREE;
797 switch (code)
799 case INTEGER_CST:
800 return NULL_TREE;
802 case ADDR_EXPR:
803 obj = TREE_OPERAND (expr, 0);
804 base = get_base_address (obj);
806 if (!base)
807 return expr;
809 if (TREE_CODE (base) == INDIRECT_REF)
810 return determine_base_object (TREE_OPERAND (base, 0));
812 return fold_convert (ptr_type_node,
813 build_fold_addr_expr (base));
815 case POINTER_PLUS_EXPR:
816 return determine_base_object (TREE_OPERAND (expr, 0));
818 case PLUS_EXPR:
819 case MINUS_EXPR:
820 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
821 gcc_unreachable ();
823 default:
824 return fold_convert (ptr_type_node, expr);
828 /* Allocates an induction variable with given initial value BASE and step STEP
829 for loop LOOP. */
831 static struct iv *
832 alloc_iv (tree base, tree step)
834 struct iv *iv = XCNEW (struct iv);
835 gcc_assert (step != NULL_TREE);
837 iv->base = base;
838 iv->base_object = determine_base_object (base);
839 iv->step = step;
840 iv->biv_p = false;
841 iv->have_use_for = false;
842 iv->use_id = 0;
843 iv->ssa_name = NULL_TREE;
845 return iv;
848 /* Sets STEP and BASE for induction variable IV. */
850 static void
851 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
853 struct version_info *info = name_info (data, iv);
855 gcc_assert (!info->iv);
857 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
858 info->iv = alloc_iv (base, step);
859 info->iv->ssa_name = iv;
862 /* Finds induction variable declaration for VAR. */
864 static struct iv *
865 get_iv (struct ivopts_data *data, tree var)
867 basic_block bb;
868 tree type = TREE_TYPE (var);
870 if (!POINTER_TYPE_P (type)
871 && !INTEGRAL_TYPE_P (type))
872 return NULL;
874 if (!name_info (data, var)->iv)
876 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
878 if (!bb
879 || !flow_bb_inside_loop_p (data->current_loop, bb))
880 set_iv (data, var, var, build_int_cst (type, 0));
883 return name_info (data, var)->iv;
886 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
887 not define a simple affine biv with nonzero step. */
889 static tree
890 determine_biv_step (gimple phi)
892 struct loop *loop = gimple_bb (phi)->loop_father;
893 tree name = PHI_RESULT (phi);
894 affine_iv iv;
896 if (!is_gimple_reg (name))
897 return NULL_TREE;
899 if (!simple_iv (loop, loop, name, &iv, true))
900 return NULL_TREE;
902 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
905 /* Finds basic ivs. */
907 static bool
908 find_bivs (struct ivopts_data *data)
910 gimple phi;
911 tree step, type, base;
912 bool found = false;
913 struct loop *loop = data->current_loop;
914 gimple_stmt_iterator psi;
916 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
918 phi = gsi_stmt (psi);
920 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
921 continue;
923 step = determine_biv_step (phi);
924 if (!step)
925 continue;
927 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
928 base = expand_simple_operations (base);
929 if (contains_abnormal_ssa_name_p (base)
930 || contains_abnormal_ssa_name_p (step))
931 continue;
933 type = TREE_TYPE (PHI_RESULT (phi));
934 base = fold_convert (type, base);
935 if (step)
937 if (POINTER_TYPE_P (type))
938 step = fold_convert (sizetype, step);
939 else
940 step = fold_convert (type, step);
943 set_iv (data, PHI_RESULT (phi), base, step);
944 found = true;
947 return found;
950 /* Marks basic ivs. */
952 static void
953 mark_bivs (struct ivopts_data *data)
955 gimple phi;
956 tree var;
957 struct iv *iv, *incr_iv;
958 struct loop *loop = data->current_loop;
959 basic_block incr_bb;
960 gimple_stmt_iterator psi;
962 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
964 phi = gsi_stmt (psi);
966 iv = get_iv (data, PHI_RESULT (phi));
967 if (!iv)
968 continue;
970 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
971 incr_iv = get_iv (data, var);
972 if (!incr_iv)
973 continue;
975 /* If the increment is in the subloop, ignore it. */
976 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
977 if (incr_bb->loop_father != data->current_loop
978 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
979 continue;
981 iv->biv_p = true;
982 incr_iv->biv_p = true;
986 /* Checks whether STMT defines a linear induction variable and stores its
987 parameters to IV. */
989 static bool
990 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
992 tree lhs;
993 struct loop *loop = data->current_loop;
995 iv->base = NULL_TREE;
996 iv->step = NULL_TREE;
998 if (gimple_code (stmt) != GIMPLE_ASSIGN)
999 return false;
1001 lhs = gimple_assign_lhs (stmt);
1002 if (TREE_CODE (lhs) != SSA_NAME)
1003 return false;
1005 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1006 return false;
1007 iv->base = expand_simple_operations (iv->base);
1009 if (contains_abnormal_ssa_name_p (iv->base)
1010 || contains_abnormal_ssa_name_p (iv->step))
1011 return false;
1013 return true;
1016 /* Finds general ivs in statement STMT. */
1018 static void
1019 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1021 affine_iv iv;
1023 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1024 return;
1026 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1029 /* Finds general ivs in basic block BB. */
1031 static void
1032 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1034 gimple_stmt_iterator bsi;
1036 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1037 find_givs_in_stmt (data, gsi_stmt (bsi));
1040 /* Finds general ivs. */
1042 static void
1043 find_givs (struct ivopts_data *data)
1045 struct loop *loop = data->current_loop;
1046 basic_block *body = get_loop_body_in_dom_order (loop);
1047 unsigned i;
1049 for (i = 0; i < loop->num_nodes; i++)
1050 find_givs_in_bb (data, body[i]);
1051 free (body);
1054 /* For each ssa name defined in LOOP determines whether it is an induction
1055 variable and if so, its initial value and step. */
1057 static bool
1058 find_induction_variables (struct ivopts_data *data)
1060 unsigned i;
1061 bitmap_iterator bi;
1063 if (!find_bivs (data))
1064 return false;
1066 find_givs (data);
1067 mark_bivs (data);
1069 if (dump_file && (dump_flags & TDF_DETAILS))
1071 tree niter = niter_for_single_dom_exit (data);
1073 if (niter)
1075 fprintf (dump_file, " number of iterations ");
1076 print_generic_expr (dump_file, niter, TDF_SLIM);
1077 fprintf (dump_file, "\n\n");
1080 fprintf (dump_file, "Induction variables:\n\n");
1082 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1084 if (ver_info (data, i)->iv)
1085 dump_iv (dump_file, ver_info (data, i)->iv);
1089 return true;
1092 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1094 static struct iv_use *
1095 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1096 gimple stmt, enum use_type use_type)
1098 struct iv_use *use = XCNEW (struct iv_use);
1100 use->id = n_iv_uses (data);
1101 use->type = use_type;
1102 use->iv = iv;
1103 use->stmt = stmt;
1104 use->op_p = use_p;
1105 use->related_cands = BITMAP_ALLOC (NULL);
1107 /* To avoid showing ssa name in the dumps, if it was not reset by the
1108 caller. */
1109 iv->ssa_name = NULL_TREE;
1111 if (dump_file && (dump_flags & TDF_DETAILS))
1112 dump_use (dump_file, use);
1114 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1116 return use;
1119 /* Checks whether OP is a loop-level invariant and if so, records it.
1120 NONLINEAR_USE is true if the invariant is used in a way we do not
1121 handle specially. */
1123 static void
1124 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1126 basic_block bb;
1127 struct version_info *info;
1129 if (TREE_CODE (op) != SSA_NAME
1130 || !is_gimple_reg (op))
1131 return;
1133 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1134 if (bb
1135 && flow_bb_inside_loop_p (data->current_loop, bb))
1136 return;
1138 info = name_info (data, op);
1139 info->name = op;
1140 info->has_nonlin_use |= nonlinear_use;
1141 if (!info->inv_id)
1142 info->inv_id = ++data->max_inv_id;
1143 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1146 /* Checks whether the use OP is interesting and if so, records it. */
1148 static struct iv_use *
1149 find_interesting_uses_op (struct ivopts_data *data, tree op)
1151 struct iv *iv;
1152 struct iv *civ;
1153 gimple stmt;
1154 struct iv_use *use;
1156 if (TREE_CODE (op) != SSA_NAME)
1157 return NULL;
1159 iv = get_iv (data, op);
1160 if (!iv)
1161 return NULL;
1163 if (iv->have_use_for)
1165 use = iv_use (data, iv->use_id);
1167 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1168 return use;
1171 if (integer_zerop (iv->step))
1173 record_invariant (data, op, true);
1174 return NULL;
1176 iv->have_use_for = true;
1178 civ = XNEW (struct iv);
1179 *civ = *iv;
1181 stmt = SSA_NAME_DEF_STMT (op);
1182 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1183 || is_gimple_assign (stmt));
1185 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1186 iv->use_id = use->id;
1188 return use;
1191 /* Given a condition in statement STMT, checks whether it is a compare
1192 of an induction variable and an invariant. If this is the case,
1193 CONTROL_VAR is set to location of the iv, BOUND to the location of
1194 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1195 induction variable descriptions, and true is returned. If this is not
1196 the case, CONTROL_VAR and BOUND are set to the arguments of the
1197 condition and false is returned. */
1199 static bool
1200 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1201 tree **control_var, tree **bound,
1202 struct iv **iv_var, struct iv **iv_bound)
1204 /* The objects returned when COND has constant operands. */
1205 static struct iv const_iv;
1206 static tree zero;
1207 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1208 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1209 bool ret = false;
1211 if (gimple_code (stmt) == GIMPLE_COND)
1213 op0 = gimple_cond_lhs_ptr (stmt);
1214 op1 = gimple_cond_rhs_ptr (stmt);
1216 else
1218 op0 = gimple_assign_rhs1_ptr (stmt);
1219 op1 = gimple_assign_rhs2_ptr (stmt);
1222 zero = integer_zero_node;
1223 const_iv.step = integer_zero_node;
1225 if (TREE_CODE (*op0) == SSA_NAME)
1226 iv0 = get_iv (data, *op0);
1227 if (TREE_CODE (*op1) == SSA_NAME)
1228 iv1 = get_iv (data, *op1);
1230 /* Exactly one of the compared values must be an iv, and the other one must
1231 be an invariant. */
1232 if (!iv0 || !iv1)
1233 goto end;
1235 if (integer_zerop (iv0->step))
1237 /* Control variable may be on the other side. */
1238 tmp_op = op0; op0 = op1; op1 = tmp_op;
1239 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1241 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1243 end:
1244 if (control_var)
1245 *control_var = op0;;
1246 if (iv_var)
1247 *iv_var = iv0;;
1248 if (bound)
1249 *bound = op1;
1250 if (iv_bound)
1251 *iv_bound = iv1;
1253 return ret;
1256 /* Checks whether the condition in STMT is interesting and if so,
1257 records it. */
1259 static void
1260 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1262 tree *var_p, *bound_p;
1263 struct iv *var_iv, *civ;
1265 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1267 find_interesting_uses_op (data, *var_p);
1268 find_interesting_uses_op (data, *bound_p);
1269 return;
1272 civ = XNEW (struct iv);
1273 *civ = *var_iv;
1274 record_use (data, NULL, civ, stmt, USE_COMPARE);
1277 /* Returns true if expression EXPR is obviously invariant in LOOP,
1278 i.e. if all its operands are defined outside of the LOOP. LOOP
1279 should not be the function body. */
1281 bool
1282 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1284 basic_block def_bb;
1285 unsigned i, len;
1287 gcc_assert (loop_depth (loop) > 0);
1289 if (is_gimple_min_invariant (expr))
1290 return true;
1292 if (TREE_CODE (expr) == SSA_NAME)
1294 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1295 if (def_bb
1296 && flow_bb_inside_loop_p (loop, def_bb))
1297 return false;
1299 return true;
1302 if (!EXPR_P (expr))
1303 return false;
1305 len = TREE_OPERAND_LENGTH (expr);
1306 for (i = 0; i < len; i++)
1307 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1308 return false;
1310 return true;
1313 /* Returns true if statement STMT is obviously invariant in LOOP,
1314 i.e. if all its operands on the RHS are defined outside of the LOOP.
1315 LOOP should not be the function body. */
1317 bool
1318 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1320 unsigned i;
1321 tree lhs;
1323 gcc_assert (loop_depth (loop) > 0);
1325 lhs = gimple_get_lhs (stmt);
1326 for (i = 0; i < gimple_num_ops (stmt); i++)
1328 tree op = gimple_op (stmt, i);
1329 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1330 return false;
1333 return true;
1336 /* Cumulates the steps of indices into DATA and replaces their values with the
1337 initial ones. Returns false when the value of the index cannot be determined.
1338 Callback for for_each_index. */
1340 struct ifs_ivopts_data
1342 struct ivopts_data *ivopts_data;
1343 gimple stmt;
1344 tree step;
1347 static bool
1348 idx_find_step (tree base, tree *idx, void *data)
1350 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1351 struct iv *iv;
1352 tree step, iv_base, iv_step, lbound, off;
1353 struct loop *loop = dta->ivopts_data->current_loop;
1355 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1356 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1357 return false;
1359 /* If base is a component ref, require that the offset of the reference
1360 be invariant. */
1361 if (TREE_CODE (base) == COMPONENT_REF)
1363 off = component_ref_field_offset (base);
1364 return expr_invariant_in_loop_p (loop, off);
1367 /* If base is array, first check whether we will be able to move the
1368 reference out of the loop (in order to take its address in strength
1369 reduction). In order for this to work we need both lower bound
1370 and step to be loop invariants. */
1371 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1373 /* Moreover, for a range, the size needs to be invariant as well. */
1374 if (TREE_CODE (base) == ARRAY_RANGE_REF
1375 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1376 return false;
1378 step = array_ref_element_size (base);
1379 lbound = array_ref_low_bound (base);
1381 if (!expr_invariant_in_loop_p (loop, step)
1382 || !expr_invariant_in_loop_p (loop, lbound))
1383 return false;
1386 if (TREE_CODE (*idx) != SSA_NAME)
1387 return true;
1389 iv = get_iv (dta->ivopts_data, *idx);
1390 if (!iv)
1391 return false;
1393 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1394 *&x[0], which is not folded and does not trigger the
1395 ARRAY_REF path below. */
1396 *idx = iv->base;
1398 if (integer_zerop (iv->step))
1399 return true;
1401 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1403 step = array_ref_element_size (base);
1405 /* We only handle addresses whose step is an integer constant. */
1406 if (TREE_CODE (step) != INTEGER_CST)
1407 return false;
1409 else
1410 /* The step for pointer arithmetics already is 1 byte. */
1411 step = build_int_cst (sizetype, 1);
1413 iv_base = iv->base;
1414 iv_step = iv->step;
1415 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1416 sizetype, &iv_base, &iv_step, dta->stmt,
1417 false))
1419 /* The index might wrap. */
1420 return false;
1423 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1424 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1426 return true;
1429 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1430 object is passed to it in DATA. */
1432 static bool
1433 idx_record_use (tree base, tree *idx,
1434 void *vdata)
1436 struct ivopts_data *data = (struct ivopts_data *) vdata;
1437 find_interesting_uses_op (data, *idx);
1438 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1440 find_interesting_uses_op (data, array_ref_element_size (base));
1441 find_interesting_uses_op (data, array_ref_low_bound (base));
1443 return true;
1446 /* If we can prove that TOP = cst * BOT for some constant cst,
1447 store cst to MUL and return true. Otherwise return false.
1448 The returned value is always sign-extended, regardless of the
1449 signedness of TOP and BOT. */
1451 static bool
1452 constant_multiple_of (tree top, tree bot, double_int *mul)
1454 tree mby;
1455 enum tree_code code;
1456 double_int res, p0, p1;
1457 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1459 STRIP_NOPS (top);
1460 STRIP_NOPS (bot);
1462 if (operand_equal_p (top, bot, 0))
1464 *mul = double_int_one;
1465 return true;
1468 code = TREE_CODE (top);
1469 switch (code)
1471 case MULT_EXPR:
1472 mby = TREE_OPERAND (top, 1);
1473 if (TREE_CODE (mby) != INTEGER_CST)
1474 return false;
1476 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1477 return false;
1479 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1480 precision);
1481 return true;
1483 case PLUS_EXPR:
1484 case MINUS_EXPR:
1485 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1486 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1487 return false;
1489 if (code == MINUS_EXPR)
1490 p1 = double_int_neg (p1);
1491 *mul = double_int_sext (double_int_add (p0, p1), precision);
1492 return true;
1494 case INTEGER_CST:
1495 if (TREE_CODE (bot) != INTEGER_CST)
1496 return false;
1498 p0 = double_int_sext (tree_to_double_int (top), precision);
1499 p1 = double_int_sext (tree_to_double_int (bot), precision);
1500 if (double_int_zero_p (p1))
1501 return false;
1502 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1503 precision);
1504 return double_int_zero_p (res);
1506 default:
1507 return false;
1511 /* Returns true if memory reference REF with step STEP may be unaligned. */
1513 static bool
1514 may_be_unaligned_p (tree ref, tree step)
1516 tree base;
1517 tree base_type;
1518 HOST_WIDE_INT bitsize;
1519 HOST_WIDE_INT bitpos;
1520 tree toffset;
1521 enum machine_mode mode;
1522 int unsignedp, volatilep;
1523 unsigned base_align;
1525 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1526 thus they are not misaligned. */
1527 if (TREE_CODE (ref) == TARGET_MEM_REF)
1528 return false;
1530 /* The test below is basically copy of what expr.c:normal_inner_ref
1531 does to check whether the object must be loaded by parts when
1532 STRICT_ALIGNMENT is true. */
1533 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1534 &unsignedp, &volatilep, true);
1535 base_type = TREE_TYPE (base);
1536 base_align = TYPE_ALIGN (base_type);
1538 if (mode != BLKmode)
1540 double_int mul;
1541 tree al = build_int_cst (TREE_TYPE (step),
1542 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1544 if (base_align < GET_MODE_ALIGNMENT (mode)
1545 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1546 || bitpos % BITS_PER_UNIT != 0)
1547 return true;
1549 if (!constant_multiple_of (step, al, &mul))
1550 return true;
1553 return false;
1556 /* Return true if EXPR may be non-addressable. */
1558 static bool
1559 may_be_nonaddressable_p (tree expr)
1561 switch (TREE_CODE (expr))
1563 case TARGET_MEM_REF:
1564 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1565 target, thus they are always addressable. */
1566 return false;
1568 case COMPONENT_REF:
1569 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1570 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1572 case VIEW_CONVERT_EXPR:
1573 /* This kind of view-conversions may wrap non-addressable objects
1574 and make them look addressable. After some processing the
1575 non-addressability may be uncovered again, causing ADDR_EXPRs
1576 of inappropriate objects to be built. */
1577 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1578 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1579 return true;
1581 /* ... fall through ... */
1583 case ARRAY_REF:
1584 case ARRAY_RANGE_REF:
1585 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1587 CASE_CONVERT:
1588 return true;
1590 default:
1591 break;
1594 return false;
1597 /* Finds addresses in *OP_P inside STMT. */
1599 static void
1600 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1602 tree base = *op_p, step = build_int_cst (sizetype, 0);
1603 struct iv *civ;
1604 struct ifs_ivopts_data ifs_ivopts_data;
1606 /* Do not play with volatile memory references. A bit too conservative,
1607 perhaps, but safe. */
1608 if (gimple_has_volatile_ops (stmt))
1609 goto fail;
1611 /* Ignore bitfields for now. Not really something terribly complicated
1612 to handle. TODO. */
1613 if (TREE_CODE (base) == BIT_FIELD_REF)
1614 goto fail;
1616 base = unshare_expr (base);
1618 if (TREE_CODE (base) == TARGET_MEM_REF)
1620 tree type = build_pointer_type (TREE_TYPE (base));
1621 tree astep;
1623 if (TMR_BASE (base)
1624 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1626 civ = get_iv (data, TMR_BASE (base));
1627 if (!civ)
1628 goto fail;
1630 TMR_BASE (base) = civ->base;
1631 step = civ->step;
1633 if (TMR_INDEX (base)
1634 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1636 civ = get_iv (data, TMR_INDEX (base));
1637 if (!civ)
1638 goto fail;
1640 TMR_INDEX (base) = civ->base;
1641 astep = civ->step;
1643 if (astep)
1645 if (TMR_STEP (base))
1646 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1648 step = fold_build2 (PLUS_EXPR, type, step, astep);
1652 if (integer_zerop (step))
1653 goto fail;
1654 base = tree_mem_ref_addr (type, base);
1656 else
1658 ifs_ivopts_data.ivopts_data = data;
1659 ifs_ivopts_data.stmt = stmt;
1660 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1661 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1662 || integer_zerop (ifs_ivopts_data.step))
1663 goto fail;
1664 step = ifs_ivopts_data.step;
1666 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1667 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1669 /* Check that the base expression is addressable. This needs
1670 to be done after substituting bases of IVs into it. */
1671 if (may_be_nonaddressable_p (base))
1672 goto fail;
1674 /* Moreover, on strict alignment platforms, check that it is
1675 sufficiently aligned. */
1676 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1677 goto fail;
1679 base = build_fold_addr_expr (base);
1681 /* Substituting bases of IVs into the base expression might
1682 have caused folding opportunities. */
1683 if (TREE_CODE (base) == ADDR_EXPR)
1685 tree *ref = &TREE_OPERAND (base, 0);
1686 while (handled_component_p (*ref))
1687 ref = &TREE_OPERAND (*ref, 0);
1688 if (TREE_CODE (*ref) == INDIRECT_REF)
1689 *ref = fold_indirect_ref (*ref);
1693 civ = alloc_iv (base, step);
1694 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1695 return;
1697 fail:
1698 for_each_index (op_p, idx_record_use, data);
1701 /* Finds and records invariants used in STMT. */
1703 static void
1704 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1706 ssa_op_iter iter;
1707 use_operand_p use_p;
1708 tree op;
1710 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1712 op = USE_FROM_PTR (use_p);
1713 record_invariant (data, op, false);
1717 /* Finds interesting uses of induction variables in the statement STMT. */
1719 static void
1720 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1722 struct iv *iv;
1723 tree op, *lhs, *rhs;
1724 ssa_op_iter iter;
1725 use_operand_p use_p;
1726 enum tree_code code;
1728 find_invariants_stmt (data, stmt);
1730 if (gimple_code (stmt) == GIMPLE_COND)
1732 find_interesting_uses_cond (data, stmt);
1733 return;
1736 if (is_gimple_assign (stmt))
1738 lhs = gimple_assign_lhs_ptr (stmt);
1739 rhs = gimple_assign_rhs1_ptr (stmt);
1741 if (TREE_CODE (*lhs) == SSA_NAME)
1743 /* If the statement defines an induction variable, the uses are not
1744 interesting by themselves. */
1746 iv = get_iv (data, *lhs);
1748 if (iv && !integer_zerop (iv->step))
1749 return;
1752 code = gimple_assign_rhs_code (stmt);
1753 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1754 && (REFERENCE_CLASS_P (*rhs)
1755 || is_gimple_val (*rhs)))
1757 if (REFERENCE_CLASS_P (*rhs))
1758 find_interesting_uses_address (data, stmt, rhs);
1759 else
1760 find_interesting_uses_op (data, *rhs);
1762 if (REFERENCE_CLASS_P (*lhs))
1763 find_interesting_uses_address (data, stmt, lhs);
1764 return;
1766 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1768 find_interesting_uses_cond (data, stmt);
1769 return;
1772 /* TODO -- we should also handle address uses of type
1774 memory = call (whatever);
1778 call (memory). */
1781 if (gimple_code (stmt) == GIMPLE_PHI
1782 && gimple_bb (stmt) == data->current_loop->header)
1784 iv = get_iv (data, PHI_RESULT (stmt));
1786 if (iv && !integer_zerop (iv->step))
1787 return;
1790 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1792 op = USE_FROM_PTR (use_p);
1794 if (TREE_CODE (op) != SSA_NAME)
1795 continue;
1797 iv = get_iv (data, op);
1798 if (!iv)
1799 continue;
1801 find_interesting_uses_op (data, op);
1805 /* Finds interesting uses of induction variables outside of loops
1806 on loop exit edge EXIT. */
1808 static void
1809 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1811 gimple phi;
1812 gimple_stmt_iterator psi;
1813 tree def;
1815 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1817 phi = gsi_stmt (psi);
1818 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1819 if (is_gimple_reg (def))
1820 find_interesting_uses_op (data, def);
1824 /* Finds uses of the induction variables that are interesting. */
1826 static void
1827 find_interesting_uses (struct ivopts_data *data)
1829 basic_block bb;
1830 gimple_stmt_iterator bsi;
1831 basic_block *body = get_loop_body (data->current_loop);
1832 unsigned i;
1833 struct version_info *info;
1834 edge e;
1836 if (dump_file && (dump_flags & TDF_DETAILS))
1837 fprintf (dump_file, "Uses:\n\n");
1839 for (i = 0; i < data->current_loop->num_nodes; i++)
1841 edge_iterator ei;
1842 bb = body[i];
1844 FOR_EACH_EDGE (e, ei, bb->succs)
1845 if (e->dest != EXIT_BLOCK_PTR
1846 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1847 find_interesting_uses_outside (data, e);
1849 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1850 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1851 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1852 if (!is_gimple_debug (gsi_stmt (bsi)))
1853 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1856 if (dump_file && (dump_flags & TDF_DETAILS))
1858 bitmap_iterator bi;
1860 fprintf (dump_file, "\n");
1862 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1864 info = ver_info (data, i);
1865 if (info->inv_id)
1867 fprintf (dump_file, " ");
1868 print_generic_expr (dump_file, info->name, TDF_SLIM);
1869 fprintf (dump_file, " is invariant (%d)%s\n",
1870 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1874 fprintf (dump_file, "\n");
1877 free (body);
1880 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1881 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1882 we are at the top-level of the processed address. */
1884 static tree
1885 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1886 unsigned HOST_WIDE_INT *offset)
1888 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1889 enum tree_code code;
1890 tree type, orig_type = TREE_TYPE (expr);
1891 unsigned HOST_WIDE_INT off0, off1, st;
1892 tree orig_expr = expr;
1894 STRIP_NOPS (expr);
1896 type = TREE_TYPE (expr);
1897 code = TREE_CODE (expr);
1898 *offset = 0;
1900 switch (code)
1902 case INTEGER_CST:
1903 if (!cst_and_fits_in_hwi (expr)
1904 || integer_zerop (expr))
1905 return orig_expr;
1907 *offset = int_cst_value (expr);
1908 return build_int_cst (orig_type, 0);
1910 case POINTER_PLUS_EXPR:
1911 case PLUS_EXPR:
1912 case MINUS_EXPR:
1913 op0 = TREE_OPERAND (expr, 0);
1914 op1 = TREE_OPERAND (expr, 1);
1916 op0 = strip_offset_1 (op0, false, false, &off0);
1917 op1 = strip_offset_1 (op1, false, false, &off1);
1919 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1920 if (op0 == TREE_OPERAND (expr, 0)
1921 && op1 == TREE_OPERAND (expr, 1))
1922 return orig_expr;
1924 if (integer_zerop (op1))
1925 expr = op0;
1926 else if (integer_zerop (op0))
1928 if (code == MINUS_EXPR)
1929 expr = fold_build1 (NEGATE_EXPR, type, op1);
1930 else
1931 expr = op1;
1933 else
1934 expr = fold_build2 (code, type, op0, op1);
1936 return fold_convert (orig_type, expr);
1938 case MULT_EXPR:
1939 op1 = TREE_OPERAND (expr, 1);
1940 if (!cst_and_fits_in_hwi (op1))
1941 return orig_expr;
1943 op0 = TREE_OPERAND (expr, 0);
1944 op0 = strip_offset_1 (op0, false, false, &off0);
1945 if (op0 == TREE_OPERAND (expr, 0))
1946 return orig_expr;
1948 *offset = off0 * int_cst_value (op1);
1949 if (integer_zerop (op0))
1950 expr = op0;
1951 else
1952 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1954 return fold_convert (orig_type, expr);
1956 case ARRAY_REF:
1957 case ARRAY_RANGE_REF:
1958 if (!inside_addr)
1959 return orig_expr;
1961 step = array_ref_element_size (expr);
1962 if (!cst_and_fits_in_hwi (step))
1963 break;
1965 st = int_cst_value (step);
1966 op1 = TREE_OPERAND (expr, 1);
1967 op1 = strip_offset_1 (op1, false, false, &off1);
1968 *offset = off1 * st;
1970 if (top_compref
1971 && integer_zerop (op1))
1973 /* Strip the component reference completely. */
1974 op0 = TREE_OPERAND (expr, 0);
1975 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1976 *offset += off0;
1977 return op0;
1979 break;
1981 case COMPONENT_REF:
1982 if (!inside_addr)
1983 return orig_expr;
1985 tmp = component_ref_field_offset (expr);
1986 if (top_compref
1987 && cst_and_fits_in_hwi (tmp))
1989 /* Strip the component reference completely. */
1990 op0 = TREE_OPERAND (expr, 0);
1991 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1992 *offset = off0 + int_cst_value (tmp);
1993 return op0;
1995 break;
1997 case ADDR_EXPR:
1998 op0 = TREE_OPERAND (expr, 0);
1999 op0 = strip_offset_1 (op0, true, true, &off0);
2000 *offset += off0;
2002 if (op0 == TREE_OPERAND (expr, 0))
2003 return orig_expr;
2005 expr = build_fold_addr_expr (op0);
2006 return fold_convert (orig_type, expr);
2008 case INDIRECT_REF:
2009 inside_addr = false;
2010 break;
2012 default:
2013 return orig_expr;
2016 /* Default handling of expressions for that we want to recurse into
2017 the first operand. */
2018 op0 = TREE_OPERAND (expr, 0);
2019 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2020 *offset += off0;
2022 if (op0 == TREE_OPERAND (expr, 0)
2023 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2024 return orig_expr;
2026 expr = copy_node (expr);
2027 TREE_OPERAND (expr, 0) = op0;
2028 if (op1)
2029 TREE_OPERAND (expr, 1) = op1;
2031 /* Inside address, we might strip the top level component references,
2032 thus changing type of the expression. Handling of ADDR_EXPR
2033 will fix that. */
2034 expr = fold_convert (orig_type, expr);
2036 return expr;
2039 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2041 static tree
2042 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2044 return strip_offset_1 (expr, false, false, offset);
2047 /* Returns variant of TYPE that can be used as base for different uses.
2048 We return unsigned type with the same precision, which avoids problems
2049 with overflows. */
2051 static tree
2052 generic_type_for (tree type)
2054 if (POINTER_TYPE_P (type))
2055 return unsigned_type_for (type);
2057 if (TYPE_UNSIGNED (type))
2058 return type;
2060 return unsigned_type_for (type);
2063 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2064 the bitmap to that we should store it. */
2066 static struct ivopts_data *fd_ivopts_data;
2067 static tree
2068 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2070 bitmap *depends_on = (bitmap *) data;
2071 struct version_info *info;
2073 if (TREE_CODE (*expr_p) != SSA_NAME)
2074 return NULL_TREE;
2075 info = name_info (fd_ivopts_data, *expr_p);
2077 if (!info->inv_id || info->has_nonlin_use)
2078 return NULL_TREE;
2080 if (!*depends_on)
2081 *depends_on = BITMAP_ALLOC (NULL);
2082 bitmap_set_bit (*depends_on, info->inv_id);
2084 return NULL_TREE;
2087 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2088 position to POS. If USE is not NULL, the candidate is set as related to
2089 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2090 replacement of the final value of the iv by a direct computation. */
2092 static struct iv_cand *
2093 add_candidate_1 (struct ivopts_data *data,
2094 tree base, tree step, bool important, enum iv_position pos,
2095 struct iv_use *use, gimple incremented_at)
2097 unsigned i;
2098 struct iv_cand *cand = NULL;
2099 tree type, orig_type;
2101 if (base)
2103 orig_type = TREE_TYPE (base);
2104 type = generic_type_for (orig_type);
2105 if (type != orig_type)
2107 base = fold_convert (type, base);
2108 step = fold_convert (type, step);
2112 for (i = 0; i < n_iv_cands (data); i++)
2114 cand = iv_cand (data, i);
2116 if (cand->pos != pos)
2117 continue;
2119 if (cand->incremented_at != incremented_at
2120 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2121 && cand->ainc_use != use))
2122 continue;
2124 if (!cand->iv)
2126 if (!base && !step)
2127 break;
2129 continue;
2132 if (!base && !step)
2133 continue;
2135 if (operand_equal_p (base, cand->iv->base, 0)
2136 && operand_equal_p (step, cand->iv->step, 0))
2137 break;
2140 if (i == n_iv_cands (data))
2142 cand = XCNEW (struct iv_cand);
2143 cand->id = i;
2145 if (!base && !step)
2146 cand->iv = NULL;
2147 else
2148 cand->iv = alloc_iv (base, step);
2150 cand->pos = pos;
2151 if (pos != IP_ORIGINAL && cand->iv)
2153 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2154 cand->var_after = cand->var_before;
2156 cand->important = important;
2157 cand->incremented_at = incremented_at;
2158 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2160 if (step
2161 && TREE_CODE (step) != INTEGER_CST)
2163 fd_ivopts_data = data;
2164 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2167 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2168 cand->ainc_use = use;
2169 else
2170 cand->ainc_use = NULL;
2172 if (dump_file && (dump_flags & TDF_DETAILS))
2173 dump_cand (dump_file, cand);
2176 if (important && !cand->important)
2178 cand->important = true;
2179 if (dump_file && (dump_flags & TDF_DETAILS))
2180 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2183 if (use)
2185 bitmap_set_bit (use->related_cands, i);
2186 if (dump_file && (dump_flags & TDF_DETAILS))
2187 fprintf (dump_file, "Candidate %d is related to use %d\n",
2188 cand->id, use->id);
2191 return cand;
2194 /* Returns true if incrementing the induction variable at the end of the LOOP
2195 is allowed.
2197 The purpose is to avoid splitting latch edge with a biv increment, thus
2198 creating a jump, possibly confusing other optimization passes and leaving
2199 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2200 is not available (so we do not have a better alternative), or if the latch
2201 edge is already nonempty. */
2203 static bool
2204 allow_ip_end_pos_p (struct loop *loop)
2206 if (!ip_normal_pos (loop))
2207 return true;
2209 if (!empty_block_p (ip_end_pos (loop)))
2210 return true;
2212 return false;
2215 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2216 Important field is set to IMPORTANT. */
2218 static void
2219 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2220 bool important, struct iv_use *use)
2222 basic_block use_bb = gimple_bb (use->stmt);
2223 enum machine_mode mem_mode;
2224 unsigned HOST_WIDE_INT cstepi;
2226 /* If we insert the increment in any position other than the standard
2227 ones, we must ensure that it is incremented once per iteration.
2228 It must not be in an inner nested loop, or one side of an if
2229 statement. */
2230 if (use_bb->loop_father != data->current_loop
2231 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2232 || stmt_could_throw_p (use->stmt)
2233 || !cst_and_fits_in_hwi (step))
2234 return;
2236 cstepi = int_cst_value (step);
2238 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2239 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2240 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2242 enum tree_code code = MINUS_EXPR;
2243 tree new_base;
2244 tree new_step = step;
2246 if (POINTER_TYPE_P (TREE_TYPE (base)))
2248 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2249 code = POINTER_PLUS_EXPR;
2251 else
2252 new_step = fold_convert (TREE_TYPE (base), new_step);
2253 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2254 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2255 use->stmt);
2257 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2258 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2260 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2261 use->stmt);
2265 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2266 position to POS. If USE is not NULL, the candidate is set as related to
2267 it. The candidate computation is scheduled on all available positions. */
2269 static void
2270 add_candidate (struct ivopts_data *data,
2271 tree base, tree step, bool important, struct iv_use *use)
2273 if (ip_normal_pos (data->current_loop))
2274 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2275 if (ip_end_pos (data->current_loop)
2276 && allow_ip_end_pos_p (data->current_loop))
2277 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2279 if (use != NULL && use->type == USE_ADDRESS)
2280 add_autoinc_candidates (data, base, step, important, use);
2283 /* Add a standard "0 + 1 * iteration" iv candidate for a
2284 type with SIZE bits. */
2286 static void
2287 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2288 unsigned int size)
2290 tree type = lang_hooks.types.type_for_size (size, true);
2291 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2292 true, NULL);
2295 /* Adds standard iv candidates. */
2297 static void
2298 add_standard_iv_candidates (struct ivopts_data *data)
2300 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2302 /* The same for a double-integer type if it is still fast enough. */
2303 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2304 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2308 /* Adds candidates bases on the old induction variable IV. */
2310 static void
2311 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2313 gimple phi;
2314 tree def;
2315 struct iv_cand *cand;
2317 add_candidate (data, iv->base, iv->step, true, NULL);
2319 /* The same, but with initial value zero. */
2320 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2321 add_candidate (data, size_int (0), iv->step, true, NULL);
2322 else
2323 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2324 iv->step, true, NULL);
2326 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2327 if (gimple_code (phi) == GIMPLE_PHI)
2329 /* Additionally record the possibility of leaving the original iv
2330 untouched. */
2331 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2332 cand = add_candidate_1 (data,
2333 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2334 SSA_NAME_DEF_STMT (def));
2335 cand->var_before = iv->ssa_name;
2336 cand->var_after = def;
2340 /* Adds candidates based on the old induction variables. */
2342 static void
2343 add_old_ivs_candidates (struct ivopts_data *data)
2345 unsigned i;
2346 struct iv *iv;
2347 bitmap_iterator bi;
2349 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2351 iv = ver_info (data, i)->iv;
2352 if (iv && iv->biv_p && !integer_zerop (iv->step))
2353 add_old_iv_candidates (data, iv);
2357 /* Adds candidates based on the value of the induction variable IV and USE. */
2359 static void
2360 add_iv_value_candidates (struct ivopts_data *data,
2361 struct iv *iv, struct iv_use *use)
2363 unsigned HOST_WIDE_INT offset;
2364 tree base;
2365 tree basetype;
2367 add_candidate (data, iv->base, iv->step, false, use);
2369 /* The same, but with initial value zero. Make such variable important,
2370 since it is generic enough so that possibly many uses may be based
2371 on it. */
2372 basetype = TREE_TYPE (iv->base);
2373 if (POINTER_TYPE_P (basetype))
2374 basetype = sizetype;
2375 add_candidate (data, build_int_cst (basetype, 0),
2376 iv->step, true, use);
2378 /* Third, try removing the constant offset. Make sure to even
2379 add a candidate for &a[0] vs. (T *)&a. */
2380 base = strip_offset (iv->base, &offset);
2381 if (offset
2382 || base != iv->base)
2383 add_candidate (data, base, iv->step, false, use);
2386 /* Adds candidates based on the uses. */
2388 static void
2389 add_derived_ivs_candidates (struct ivopts_data *data)
2391 unsigned i;
2393 for (i = 0; i < n_iv_uses (data); i++)
2395 struct iv_use *use = iv_use (data, i);
2397 if (!use)
2398 continue;
2400 switch (use->type)
2402 case USE_NONLINEAR_EXPR:
2403 case USE_COMPARE:
2404 case USE_ADDRESS:
2405 /* Just add the ivs based on the value of the iv used here. */
2406 add_iv_value_candidates (data, use->iv, use);
2407 break;
2409 default:
2410 gcc_unreachable ();
2415 /* Record important candidates and add them to related_cands bitmaps
2416 if needed. */
2418 static void
2419 record_important_candidates (struct ivopts_data *data)
2421 unsigned i;
2422 struct iv_use *use;
2424 for (i = 0; i < n_iv_cands (data); i++)
2426 struct iv_cand *cand = iv_cand (data, i);
2428 if (cand->important)
2429 bitmap_set_bit (data->important_candidates, i);
2432 data->consider_all_candidates = (n_iv_cands (data)
2433 <= CONSIDER_ALL_CANDIDATES_BOUND);
2435 if (data->consider_all_candidates)
2437 /* We will not need "related_cands" bitmaps in this case,
2438 so release them to decrease peak memory consumption. */
2439 for (i = 0; i < n_iv_uses (data); i++)
2441 use = iv_use (data, i);
2442 BITMAP_FREE (use->related_cands);
2445 else
2447 /* Add important candidates to the related_cands bitmaps. */
2448 for (i = 0; i < n_iv_uses (data); i++)
2449 bitmap_ior_into (iv_use (data, i)->related_cands,
2450 data->important_candidates);
2454 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2455 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2456 we allocate a simple list to every use. */
2458 static void
2459 alloc_use_cost_map (struct ivopts_data *data)
2461 unsigned i, size, s, j;
2463 for (i = 0; i < n_iv_uses (data); i++)
2465 struct iv_use *use = iv_use (data, i);
2466 bitmap_iterator bi;
2468 if (data->consider_all_candidates)
2469 size = n_iv_cands (data);
2470 else
2472 s = 0;
2473 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2475 s++;
2478 /* Round up to the power of two, so that moduling by it is fast. */
2479 for (size = 1; size < s; size <<= 1)
2480 continue;
2483 use->n_map_members = size;
2484 use->cost_map = XCNEWVEC (struct cost_pair, size);
2488 /* Returns description of computation cost of expression whose runtime
2489 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2491 static comp_cost
2492 new_cost (unsigned runtime, unsigned complexity)
2494 comp_cost cost;
2496 cost.cost = runtime;
2497 cost.complexity = complexity;
2499 return cost;
2502 /* Adds costs COST1 and COST2. */
2504 static comp_cost
2505 add_costs (comp_cost cost1, comp_cost cost2)
2507 cost1.cost += cost2.cost;
2508 cost1.complexity += cost2.complexity;
2510 return cost1;
2512 /* Subtracts costs COST1 and COST2. */
2514 static comp_cost
2515 sub_costs (comp_cost cost1, comp_cost cost2)
2517 cost1.cost -= cost2.cost;
2518 cost1.complexity -= cost2.complexity;
2520 return cost1;
2523 /* Returns a negative number if COST1 < COST2, a positive number if
2524 COST1 > COST2, and 0 if COST1 = COST2. */
2526 static int
2527 compare_costs (comp_cost cost1, comp_cost cost2)
2529 if (cost1.cost == cost2.cost)
2530 return cost1.complexity - cost2.complexity;
2532 return cost1.cost - cost2.cost;
2535 /* Returns true if COST is infinite. */
2537 static bool
2538 infinite_cost_p (comp_cost cost)
2540 return cost.cost == INFTY;
2543 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2544 on invariants DEPENDS_ON and that the value used in expressing it
2545 is VALUE. */
2547 static void
2548 set_use_iv_cost (struct ivopts_data *data,
2549 struct iv_use *use, struct iv_cand *cand,
2550 comp_cost cost, bitmap depends_on, tree value)
2552 unsigned i, s;
2554 if (infinite_cost_p (cost))
2556 BITMAP_FREE (depends_on);
2557 return;
2560 if (data->consider_all_candidates)
2562 use->cost_map[cand->id].cand = cand;
2563 use->cost_map[cand->id].cost = cost;
2564 use->cost_map[cand->id].depends_on = depends_on;
2565 use->cost_map[cand->id].value = value;
2566 return;
2569 /* n_map_members is a power of two, so this computes modulo. */
2570 s = cand->id & (use->n_map_members - 1);
2571 for (i = s; i < use->n_map_members; i++)
2572 if (!use->cost_map[i].cand)
2573 goto found;
2574 for (i = 0; i < s; i++)
2575 if (!use->cost_map[i].cand)
2576 goto found;
2578 gcc_unreachable ();
2580 found:
2581 use->cost_map[i].cand = cand;
2582 use->cost_map[i].cost = cost;
2583 use->cost_map[i].depends_on = depends_on;
2584 use->cost_map[i].value = value;
2587 /* Gets cost of (USE, CANDIDATE) pair. */
2589 static struct cost_pair *
2590 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2591 struct iv_cand *cand)
2593 unsigned i, s;
2594 struct cost_pair *ret;
2596 if (!cand)
2597 return NULL;
2599 if (data->consider_all_candidates)
2601 ret = use->cost_map + cand->id;
2602 if (!ret->cand)
2603 return NULL;
2605 return ret;
2608 /* n_map_members is a power of two, so this computes modulo. */
2609 s = cand->id & (use->n_map_members - 1);
2610 for (i = s; i < use->n_map_members; i++)
2611 if (use->cost_map[i].cand == cand)
2612 return use->cost_map + i;
2614 for (i = 0; i < s; i++)
2615 if (use->cost_map[i].cand == cand)
2616 return use->cost_map + i;
2618 return NULL;
2621 /* Returns estimate on cost of computing SEQ. */
2623 static unsigned
2624 seq_cost (rtx seq, bool speed)
2626 unsigned cost = 0;
2627 rtx set;
2629 for (; seq; seq = NEXT_INSN (seq))
2631 set = single_set (seq);
2632 if (set)
2633 cost += rtx_cost (set, SET,speed);
2634 else
2635 cost++;
2638 return cost;
2641 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2642 static rtx
2643 produce_memory_decl_rtl (tree obj, int *regno)
2645 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2646 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2647 rtx x;
2649 gcc_assert (obj);
2650 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2652 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2653 x = gen_rtx_SYMBOL_REF (address_mode, name);
2654 SET_SYMBOL_REF_DECL (x, obj);
2655 x = gen_rtx_MEM (DECL_MODE (obj), x);
2656 set_mem_addr_space (x, as);
2657 targetm.encode_section_info (obj, x, true);
2659 else
2661 x = gen_raw_REG (address_mode, (*regno)++);
2662 x = gen_rtx_MEM (DECL_MODE (obj), x);
2663 set_mem_addr_space (x, as);
2666 return x;
2669 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2670 walk_tree. DATA contains the actual fake register number. */
2672 static tree
2673 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2675 tree obj = NULL_TREE;
2676 rtx x = NULL_RTX;
2677 int *regno = (int *) data;
2679 switch (TREE_CODE (*expr_p))
2681 case ADDR_EXPR:
2682 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2683 handled_component_p (*expr_p);
2684 expr_p = &TREE_OPERAND (*expr_p, 0))
2685 continue;
2686 obj = *expr_p;
2687 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2688 x = produce_memory_decl_rtl (obj, regno);
2689 break;
2691 case SSA_NAME:
2692 *ws = 0;
2693 obj = SSA_NAME_VAR (*expr_p);
2694 if (!DECL_RTL_SET_P (obj))
2695 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2696 break;
2698 case VAR_DECL:
2699 case PARM_DECL:
2700 case RESULT_DECL:
2701 *ws = 0;
2702 obj = *expr_p;
2704 if (DECL_RTL_SET_P (obj))
2705 break;
2707 if (DECL_MODE (obj) == BLKmode)
2708 x = produce_memory_decl_rtl (obj, regno);
2709 else
2710 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2712 break;
2714 default:
2715 break;
2718 if (x)
2720 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2721 SET_DECL_RTL (obj, x);
2724 return NULL_TREE;
2727 /* Determines cost of the computation of EXPR. */
2729 static unsigned
2730 computation_cost (tree expr, bool speed)
2732 rtx seq, rslt;
2733 tree type = TREE_TYPE (expr);
2734 unsigned cost;
2735 /* Avoid using hard regs in ways which may be unsupported. */
2736 int regno = LAST_VIRTUAL_REGISTER + 1;
2737 enum function_frequency real_frequency = cfun->function_frequency;
2739 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2740 crtl->maybe_hot_insn_p = speed;
2741 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2742 start_sequence ();
2743 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2744 seq = get_insns ();
2745 end_sequence ();
2746 default_rtl_profile ();
2747 cfun->function_frequency = real_frequency;
2749 cost = seq_cost (seq, speed);
2750 if (MEM_P (rslt))
2751 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2752 TYPE_ADDR_SPACE (type), speed);
2754 return cost;
2757 /* Returns variable containing the value of candidate CAND at statement AT. */
2759 static tree
2760 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2762 if (stmt_after_increment (loop, cand, stmt))
2763 return cand->var_after;
2764 else
2765 return cand->var_before;
2768 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2769 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2772 tree_int_cst_sign_bit (const_tree t)
2774 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2775 unsigned HOST_WIDE_INT w;
2777 if (bitno < HOST_BITS_PER_WIDE_INT)
2778 w = TREE_INT_CST_LOW (t);
2779 else
2781 w = TREE_INT_CST_HIGH (t);
2782 bitno -= HOST_BITS_PER_WIDE_INT;
2785 return (w >> bitno) & 1;
2788 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2789 same precision that is at least as wide as the precision of TYPE, stores
2790 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2791 type of A and B. */
2793 static tree
2794 determine_common_wider_type (tree *a, tree *b)
2796 tree wider_type = NULL;
2797 tree suba, subb;
2798 tree atype = TREE_TYPE (*a);
2800 if (CONVERT_EXPR_P (*a))
2802 suba = TREE_OPERAND (*a, 0);
2803 wider_type = TREE_TYPE (suba);
2804 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2805 return atype;
2807 else
2808 return atype;
2810 if (CONVERT_EXPR_P (*b))
2812 subb = TREE_OPERAND (*b, 0);
2813 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2814 return atype;
2816 else
2817 return atype;
2819 *a = suba;
2820 *b = subb;
2821 return wider_type;
2824 /* Determines the expression by that USE is expressed from induction variable
2825 CAND at statement AT in LOOP. The expression is stored in a decomposed
2826 form into AFF. Returns false if USE cannot be expressed using CAND. */
2828 static bool
2829 get_computation_aff (struct loop *loop,
2830 struct iv_use *use, struct iv_cand *cand, gimple at,
2831 struct affine_tree_combination *aff)
2833 tree ubase = use->iv->base;
2834 tree ustep = use->iv->step;
2835 tree cbase = cand->iv->base;
2836 tree cstep = cand->iv->step, cstep_common;
2837 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2838 tree common_type, var;
2839 tree uutype;
2840 aff_tree cbase_aff, var_aff;
2841 double_int rat;
2843 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2845 /* We do not have a precision to express the values of use. */
2846 return false;
2849 var = var_at_stmt (loop, cand, at);
2850 uutype = unsigned_type_for (utype);
2852 /* If the conversion is not noop, perform it. */
2853 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2855 cstep = fold_convert (uutype, cstep);
2856 cbase = fold_convert (uutype, cbase);
2857 var = fold_convert (uutype, var);
2860 if (!constant_multiple_of (ustep, cstep, &rat))
2861 return false;
2863 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2864 type, we achieve better folding by computing their difference in this
2865 wider type, and cast the result to UUTYPE. We do not need to worry about
2866 overflows, as all the arithmetics will in the end be performed in UUTYPE
2867 anyway. */
2868 common_type = determine_common_wider_type (&ubase, &cbase);
2870 /* use = ubase - ratio * cbase + ratio * var. */
2871 tree_to_aff_combination (ubase, common_type, aff);
2872 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2873 tree_to_aff_combination (var, uutype, &var_aff);
2875 /* We need to shift the value if we are after the increment. */
2876 if (stmt_after_increment (loop, cand, at))
2878 aff_tree cstep_aff;
2880 if (common_type != uutype)
2881 cstep_common = fold_convert (common_type, cstep);
2882 else
2883 cstep_common = cstep;
2885 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2886 aff_combination_add (&cbase_aff, &cstep_aff);
2889 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2890 aff_combination_add (aff, &cbase_aff);
2891 if (common_type != uutype)
2892 aff_combination_convert (aff, uutype);
2894 aff_combination_scale (&var_aff, rat);
2895 aff_combination_add (aff, &var_aff);
2897 return true;
2900 /* Determines the expression by that USE is expressed from induction variable
2901 CAND at statement AT in LOOP. The computation is unshared. */
2903 static tree
2904 get_computation_at (struct loop *loop,
2905 struct iv_use *use, struct iv_cand *cand, gimple at)
2907 aff_tree aff;
2908 tree type = TREE_TYPE (use->iv->base);
2910 if (!get_computation_aff (loop, use, cand, at, &aff))
2911 return NULL_TREE;
2912 unshare_aff_combination (&aff);
2913 return fold_convert (type, aff_combination_to_tree (&aff));
2916 /* Determines the expression by that USE is expressed from induction variable
2917 CAND in LOOP. The computation is unshared. */
2919 static tree
2920 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2922 return get_computation_at (loop, use, cand, use->stmt);
2925 /* Returns cost of addition in MODE. */
2927 static unsigned
2928 add_cost (enum machine_mode mode, bool speed)
2930 static unsigned costs[NUM_MACHINE_MODES];
2931 rtx seq;
2932 unsigned cost;
2934 if (costs[mode])
2935 return costs[mode];
2937 start_sequence ();
2938 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2939 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2940 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2941 NULL_RTX);
2942 seq = get_insns ();
2943 end_sequence ();
2945 cost = seq_cost (seq, speed);
2946 if (!cost)
2947 cost = 1;
2949 costs[mode] = cost;
2951 if (dump_file && (dump_flags & TDF_DETAILS))
2952 fprintf (dump_file, "Addition in %s costs %d\n",
2953 GET_MODE_NAME (mode), cost);
2954 return cost;
2957 /* Entry in a hashtable of already known costs for multiplication. */
2958 struct mbc_entry
2960 HOST_WIDE_INT cst; /* The constant to multiply by. */
2961 enum machine_mode mode; /* In mode. */
2962 unsigned cost; /* The cost. */
2965 /* Counts hash value for the ENTRY. */
2967 static hashval_t
2968 mbc_entry_hash (const void *entry)
2970 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2972 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2975 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2977 static int
2978 mbc_entry_eq (const void *entry1, const void *entry2)
2980 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2981 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2983 return (e1->mode == e2->mode
2984 && e1->cst == e2->cst);
2987 /* Returns cost of multiplication by constant CST in MODE. */
2989 unsigned
2990 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2992 static htab_t costs;
2993 struct mbc_entry **cached, act;
2994 rtx seq;
2995 unsigned cost;
2997 if (!costs)
2998 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3000 act.mode = mode;
3001 act.cst = cst;
3002 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3003 if (*cached)
3004 return (*cached)->cost;
3006 *cached = XNEW (struct mbc_entry);
3007 (*cached)->mode = mode;
3008 (*cached)->cst = cst;
3010 start_sequence ();
3011 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3012 gen_int_mode (cst, mode), NULL_RTX, 0);
3013 seq = get_insns ();
3014 end_sequence ();
3016 cost = seq_cost (seq, speed);
3018 if (dump_file && (dump_flags & TDF_DETAILS))
3019 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3020 (int) cst, GET_MODE_NAME (mode), cost);
3022 (*cached)->cost = cost;
3024 return cost;
3027 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3028 validity for a memory reference accessing memory of mode MODE in
3029 address space AS. */
3031 DEF_VEC_P (sbitmap);
3032 DEF_VEC_ALLOC_P (sbitmap, heap);
3034 bool
3035 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3036 addr_space_t as)
3038 #define MAX_RATIO 128
3039 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3040 static VEC (sbitmap, heap) *valid_mult_list;
3041 sbitmap valid_mult;
3043 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3044 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3046 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3047 if (!valid_mult)
3049 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3050 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3051 rtx addr;
3052 HOST_WIDE_INT i;
3054 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3055 sbitmap_zero (valid_mult);
3056 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3057 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3059 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3060 if (memory_address_addr_space_p (mode, addr, as))
3061 SET_BIT (valid_mult, i + MAX_RATIO);
3064 if (dump_file && (dump_flags & TDF_DETAILS))
3066 fprintf (dump_file, " allowed multipliers:");
3067 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3068 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3069 fprintf (dump_file, " %d", (int) i);
3070 fprintf (dump_file, "\n");
3071 fprintf (dump_file, "\n");
3074 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3077 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3078 return false;
3080 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3083 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3084 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3085 variable is omitted. Compute the cost for a memory reference that accesses
3086 a memory location of mode MEM_MODE in address space AS.
3088 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3089 size of MEM_MODE / RATIO) is available. To make this determination, we
3090 look at the size of the increment to be made, which is given in CSTEP.
3091 CSTEP may be zero if the step is unknown.
3092 STMT_AFTER_INC is true iff the statement we're looking at is after the
3093 increment of the original biv.
3095 TODO -- there must be some better way. This all is quite crude. */
3097 typedef struct
3099 HOST_WIDE_INT min_offset, max_offset;
3100 unsigned costs[2][2][2][2];
3101 } *address_cost_data;
3103 DEF_VEC_P (address_cost_data);
3104 DEF_VEC_ALLOC_P (address_cost_data, heap);
3106 static comp_cost
3107 get_address_cost (bool symbol_present, bool var_present,
3108 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3109 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3110 addr_space_t as, bool speed,
3111 bool stmt_after_inc, bool *may_autoinc)
3113 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3114 static VEC(address_cost_data, heap) *address_cost_data_list;
3115 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3116 address_cost_data data;
3117 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3118 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3119 unsigned cost, acost, complexity;
3120 bool offset_p, ratio_p, autoinc;
3121 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3122 unsigned HOST_WIDE_INT mask;
3123 unsigned bits;
3125 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3126 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3127 data_index + 1);
3129 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3130 if (!data)
3132 HOST_WIDE_INT i;
3133 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3134 HOST_WIDE_INT rat, off;
3135 int old_cse_not_expected;
3136 unsigned sym_p, var_p, off_p, rat_p, add_c;
3137 rtx seq, addr, base;
3138 rtx reg0, reg1;
3140 data = (address_cost_data) xcalloc (1, sizeof (*data));
3142 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3144 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3145 for (i = start; i <= 1 << 20; i <<= 1)
3147 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3148 if (!memory_address_addr_space_p (mem_mode, addr, as))
3149 break;
3151 data->max_offset = i == start ? 0 : i >> 1;
3152 off = data->max_offset;
3154 for (i = start; i <= 1 << 20; i <<= 1)
3156 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3157 if (!memory_address_addr_space_p (mem_mode, addr, as))
3158 break;
3160 data->min_offset = i == start ? 0 : -(i >> 1);
3162 if (dump_file && (dump_flags & TDF_DETAILS))
3164 fprintf (dump_file, "get_address_cost:\n");
3165 fprintf (dump_file, " min offset %s %d\n",
3166 GET_MODE_NAME (mem_mode),
3167 (int) data->min_offset);
3168 fprintf (dump_file, " max offset %s %d\n",
3169 GET_MODE_NAME (mem_mode),
3170 (int) data->max_offset);
3173 rat = 1;
3174 for (i = 2; i <= MAX_RATIO; i++)
3175 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3177 rat = i;
3178 break;
3181 /* Compute the cost of various addressing modes. */
3182 acost = 0;
3183 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3184 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3186 if (HAVE_PRE_DECREMENT)
3188 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3189 has_predec[mem_mode]
3190 = memory_address_addr_space_p (mem_mode, addr, as);
3192 if (HAVE_POST_DECREMENT)
3194 addr = gen_rtx_POST_DEC (address_mode, reg0);
3195 has_postdec[mem_mode]
3196 = memory_address_addr_space_p (mem_mode, addr, as);
3198 if (HAVE_PRE_INCREMENT)
3200 addr = gen_rtx_PRE_INC (address_mode, reg0);
3201 has_preinc[mem_mode]
3202 = memory_address_addr_space_p (mem_mode, addr, as);
3204 if (HAVE_POST_INCREMENT)
3206 addr = gen_rtx_POST_INC (address_mode, reg0);
3207 has_postinc[mem_mode]
3208 = memory_address_addr_space_p (mem_mode, addr, as);
3210 for (i = 0; i < 16; i++)
3212 sym_p = i & 1;
3213 var_p = (i >> 1) & 1;
3214 off_p = (i >> 2) & 1;
3215 rat_p = (i >> 3) & 1;
3217 addr = reg0;
3218 if (rat_p)
3219 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3220 gen_int_mode (rat, address_mode));
3222 if (var_p)
3223 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3225 if (sym_p)
3227 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3228 /* ??? We can run into trouble with some backends by presenting
3229 it with symbols which haven't been properly passed through
3230 targetm.encode_section_info. By setting the local bit, we
3231 enhance the probability of things working. */
3232 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3234 if (off_p)
3235 base = gen_rtx_fmt_e (CONST, address_mode,
3236 gen_rtx_fmt_ee
3237 (PLUS, address_mode, base,
3238 gen_int_mode (off, address_mode)));
3240 else if (off_p)
3241 base = gen_int_mode (off, address_mode);
3242 else
3243 base = NULL_RTX;
3245 if (base)
3246 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3248 start_sequence ();
3249 /* To avoid splitting addressing modes, pretend that no cse will
3250 follow. */
3251 old_cse_not_expected = cse_not_expected;
3252 cse_not_expected = true;
3253 addr = memory_address_addr_space (mem_mode, addr, as);
3254 cse_not_expected = old_cse_not_expected;
3255 seq = get_insns ();
3256 end_sequence ();
3258 acost = seq_cost (seq, speed);
3259 acost += address_cost (addr, mem_mode, as, speed);
3261 if (!acost)
3262 acost = 1;
3263 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3266 /* On some targets, it is quite expensive to load symbol to a register,
3267 which makes addresses that contain symbols look much more expensive.
3268 However, the symbol will have to be loaded in any case before the
3269 loop (and quite likely we have it in register already), so it does not
3270 make much sense to penalize them too heavily. So make some final
3271 tweaks for the SYMBOL_PRESENT modes:
3273 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3274 var is cheaper, use this mode with small penalty.
3275 If VAR_PRESENT is true, try whether the mode with
3276 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3277 if this is the case, use it. */
3278 add_c = add_cost (address_mode, speed);
3279 for (i = 0; i < 8; i++)
3281 var_p = i & 1;
3282 off_p = (i >> 1) & 1;
3283 rat_p = (i >> 2) & 1;
3285 acost = data->costs[0][1][off_p][rat_p] + 1;
3286 if (var_p)
3287 acost += add_c;
3289 if (acost < data->costs[1][var_p][off_p][rat_p])
3290 data->costs[1][var_p][off_p][rat_p] = acost;
3293 if (dump_file && (dump_flags & TDF_DETAILS))
3295 fprintf (dump_file, "Address costs:\n");
3297 for (i = 0; i < 16; i++)
3299 sym_p = i & 1;
3300 var_p = (i >> 1) & 1;
3301 off_p = (i >> 2) & 1;
3302 rat_p = (i >> 3) & 1;
3304 fprintf (dump_file, " ");
3305 if (sym_p)
3306 fprintf (dump_file, "sym + ");
3307 if (var_p)
3308 fprintf (dump_file, "var + ");
3309 if (off_p)
3310 fprintf (dump_file, "cst + ");
3311 if (rat_p)
3312 fprintf (dump_file, "rat * ");
3314 acost = data->costs[sym_p][var_p][off_p][rat_p];
3315 fprintf (dump_file, "index costs %d\n", acost);
3317 if (has_predec[mem_mode] || has_postdec[mem_mode]
3318 || has_preinc[mem_mode] || has_postinc[mem_mode])
3319 fprintf (dump_file, " May include autoinc/dec\n");
3320 fprintf (dump_file, "\n");
3323 VEC_replace (address_cost_data, address_cost_data_list,
3324 data_index, data);
3327 bits = GET_MODE_BITSIZE (address_mode);
3328 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3329 offset &= mask;
3330 if ((offset >> (bits - 1) & 1))
3331 offset |= ~mask;
3332 s_offset = offset;
3334 autoinc = false;
3335 msize = GET_MODE_SIZE (mem_mode);
3336 autoinc_offset = offset;
3337 if (stmt_after_inc)
3338 autoinc_offset += ratio * cstep;
3339 if (symbol_present || var_present || ratio != 1)
3340 autoinc = false;
3341 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3342 && msize == cstep)
3343 || (has_postdec[mem_mode] && autoinc_offset == 0
3344 && msize == -cstep)
3345 || (has_preinc[mem_mode] && autoinc_offset == msize
3346 && msize == cstep)
3347 || (has_predec[mem_mode] && autoinc_offset == -msize
3348 && msize == -cstep))
3349 autoinc = true;
3351 cost = 0;
3352 offset_p = (s_offset != 0
3353 && data->min_offset <= s_offset
3354 && s_offset <= data->max_offset);
3355 ratio_p = (ratio != 1
3356 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3358 if (ratio != 1 && !ratio_p)
3359 cost += multiply_by_cost (ratio, address_mode, speed);
3361 if (s_offset && !offset_p && !symbol_present)
3362 cost += add_cost (address_mode, speed);
3364 if (may_autoinc)
3365 *may_autoinc = autoinc;
3366 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3367 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3368 return new_cost (cost + acost, complexity);
3371 /* Estimates cost of forcing expression EXPR into a variable. */
3373 static comp_cost
3374 force_expr_to_var_cost (tree expr, bool speed)
3376 static bool costs_initialized = false;
3377 static unsigned integer_cost [2];
3378 static unsigned symbol_cost [2];
3379 static unsigned address_cost [2];
3380 tree op0, op1;
3381 comp_cost cost0, cost1, cost;
3382 enum machine_mode mode;
3384 if (!costs_initialized)
3386 tree type = build_pointer_type (integer_type_node);
3387 tree var, addr;
3388 rtx x;
3389 int i;
3391 var = create_tmp_var_raw (integer_type_node, "test_var");
3392 TREE_STATIC (var) = 1;
3393 x = produce_memory_decl_rtl (var, NULL);
3394 SET_DECL_RTL (var, x);
3396 addr = build1 (ADDR_EXPR, type, var);
3399 for (i = 0; i < 2; i++)
3401 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3402 2000), i);
3404 symbol_cost[i] = computation_cost (addr, i) + 1;
3406 address_cost[i]
3407 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3408 addr,
3409 build_int_cst (sizetype, 2000)), i) + 1;
3410 if (dump_file && (dump_flags & TDF_DETAILS))
3412 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3413 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3414 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3415 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3416 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3417 fprintf (dump_file, "\n");
3421 costs_initialized = true;
3424 STRIP_NOPS (expr);
3426 if (SSA_VAR_P (expr))
3427 return zero_cost;
3429 if (is_gimple_min_invariant (expr))
3431 if (TREE_CODE (expr) == INTEGER_CST)
3432 return new_cost (integer_cost [speed], 0);
3434 if (TREE_CODE (expr) == ADDR_EXPR)
3436 tree obj = TREE_OPERAND (expr, 0);
3438 if (TREE_CODE (obj) == VAR_DECL
3439 || TREE_CODE (obj) == PARM_DECL
3440 || TREE_CODE (obj) == RESULT_DECL)
3441 return new_cost (symbol_cost [speed], 0);
3444 return new_cost (address_cost [speed], 0);
3447 switch (TREE_CODE (expr))
3449 case POINTER_PLUS_EXPR:
3450 case PLUS_EXPR:
3451 case MINUS_EXPR:
3452 case MULT_EXPR:
3453 op0 = TREE_OPERAND (expr, 0);
3454 op1 = TREE_OPERAND (expr, 1);
3455 STRIP_NOPS (op0);
3456 STRIP_NOPS (op1);
3458 if (is_gimple_val (op0))
3459 cost0 = zero_cost;
3460 else
3461 cost0 = force_expr_to_var_cost (op0, speed);
3463 if (is_gimple_val (op1))
3464 cost1 = zero_cost;
3465 else
3466 cost1 = force_expr_to_var_cost (op1, speed);
3468 break;
3470 case NEGATE_EXPR:
3471 op0 = TREE_OPERAND (expr, 0);
3472 STRIP_NOPS (op0);
3473 op1 = NULL_TREE;
3475 if (is_gimple_val (op0))
3476 cost0 = zero_cost;
3477 else
3478 cost0 = force_expr_to_var_cost (op0, speed);
3480 cost1 = zero_cost;
3481 break;
3483 default:
3484 /* Just an arbitrary value, FIXME. */
3485 return new_cost (target_spill_cost[speed], 0);
3488 mode = TYPE_MODE (TREE_TYPE (expr));
3489 switch (TREE_CODE (expr))
3491 case POINTER_PLUS_EXPR:
3492 case PLUS_EXPR:
3493 case MINUS_EXPR:
3494 case NEGATE_EXPR:
3495 cost = new_cost (add_cost (mode, speed), 0);
3496 break;
3498 case MULT_EXPR:
3499 if (cst_and_fits_in_hwi (op0))
3500 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3501 else if (cst_and_fits_in_hwi (op1))
3502 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3503 else
3504 return new_cost (target_spill_cost [speed], 0);
3505 break;
3507 default:
3508 gcc_unreachable ();
3511 cost = add_costs (cost, cost0);
3512 cost = add_costs (cost, cost1);
3514 /* Bound the cost by target_spill_cost. The parts of complicated
3515 computations often are either loop invariant or at least can
3516 be shared between several iv uses, so letting this grow without
3517 limits would not give reasonable results. */
3518 if (cost.cost > (int) target_spill_cost [speed])
3519 cost.cost = target_spill_cost [speed];
3521 return cost;
3524 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3525 invariants the computation depends on. */
3527 static comp_cost
3528 force_var_cost (struct ivopts_data *data,
3529 tree expr, bitmap *depends_on)
3531 if (depends_on)
3533 fd_ivopts_data = data;
3534 walk_tree (&expr, find_depends, depends_on, NULL);
3537 return force_expr_to_var_cost (expr, data->speed);
3540 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3541 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3542 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3543 invariants the computation depends on. */
3545 static comp_cost
3546 split_address_cost (struct ivopts_data *data,
3547 tree addr, bool *symbol_present, bool *var_present,
3548 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3550 tree core;
3551 HOST_WIDE_INT bitsize;
3552 HOST_WIDE_INT bitpos;
3553 tree toffset;
3554 enum machine_mode mode;
3555 int unsignedp, volatilep;
3557 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3558 &unsignedp, &volatilep, false);
3560 if (toffset != 0
3561 || bitpos % BITS_PER_UNIT != 0
3562 || TREE_CODE (core) != VAR_DECL)
3564 *symbol_present = false;
3565 *var_present = true;
3566 fd_ivopts_data = data;
3567 walk_tree (&addr, find_depends, depends_on, NULL);
3568 return new_cost (target_spill_cost[data->speed], 0);
3571 *offset += bitpos / BITS_PER_UNIT;
3572 if (TREE_STATIC (core)
3573 || DECL_EXTERNAL (core))
3575 *symbol_present = true;
3576 *var_present = false;
3577 return zero_cost;
3580 *symbol_present = false;
3581 *var_present = true;
3582 return zero_cost;
3585 /* Estimates cost of expressing difference of addresses E1 - E2 as
3586 var + symbol + offset. The value of offset is added to OFFSET,
3587 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3588 part is missing. DEPENDS_ON is a set of the invariants the computation
3589 depends on. */
3591 static comp_cost
3592 ptr_difference_cost (struct ivopts_data *data,
3593 tree e1, tree e2, bool *symbol_present, bool *var_present,
3594 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3596 HOST_WIDE_INT diff = 0;
3597 aff_tree aff_e1, aff_e2;
3598 tree type;
3600 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3602 if (ptr_difference_const (e1, e2, &diff))
3604 *offset += diff;
3605 *symbol_present = false;
3606 *var_present = false;
3607 return zero_cost;
3610 if (integer_zerop (e2))
3611 return split_address_cost (data, TREE_OPERAND (e1, 0),
3612 symbol_present, var_present, offset, depends_on);
3614 *symbol_present = false;
3615 *var_present = true;
3617 type = signed_type_for (TREE_TYPE (e1));
3618 tree_to_aff_combination (e1, type, &aff_e1);
3619 tree_to_aff_combination (e2, type, &aff_e2);
3620 aff_combination_scale (&aff_e2, double_int_minus_one);
3621 aff_combination_add (&aff_e1, &aff_e2);
3623 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3626 /* Estimates cost of expressing difference E1 - E2 as
3627 var + symbol + offset. The value of offset is added to OFFSET,
3628 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3629 part is missing. DEPENDS_ON is a set of the invariants the computation
3630 depends on. */
3632 static comp_cost
3633 difference_cost (struct ivopts_data *data,
3634 tree e1, tree e2, bool *symbol_present, bool *var_present,
3635 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3637 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3638 unsigned HOST_WIDE_INT off1, off2;
3639 aff_tree aff_e1, aff_e2;
3640 tree type;
3642 e1 = strip_offset (e1, &off1);
3643 e2 = strip_offset (e2, &off2);
3644 *offset += off1 - off2;
3646 STRIP_NOPS (e1);
3647 STRIP_NOPS (e2);
3649 if (TREE_CODE (e1) == ADDR_EXPR)
3650 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3651 offset, depends_on);
3652 *symbol_present = false;
3654 if (operand_equal_p (e1, e2, 0))
3656 *var_present = false;
3657 return zero_cost;
3660 *var_present = true;
3662 if (integer_zerop (e2))
3663 return force_var_cost (data, e1, depends_on);
3665 if (integer_zerop (e1))
3667 comp_cost cost = force_var_cost (data, e2, depends_on);
3668 cost.cost += multiply_by_cost (-1, mode, data->speed);
3669 return cost;
3672 type = signed_type_for (TREE_TYPE (e1));
3673 tree_to_aff_combination (e1, type, &aff_e1);
3674 tree_to_aff_combination (e2, type, &aff_e2);
3675 aff_combination_scale (&aff_e2, double_int_minus_one);
3676 aff_combination_add (&aff_e1, &aff_e2);
3678 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3681 /* Determines the cost of the computation by that USE is expressed
3682 from induction variable CAND. If ADDRESS_P is true, we just need
3683 to create an address from it, otherwise we want to get it into
3684 register. A set of invariants we depend on is stored in
3685 DEPENDS_ON. AT is the statement at that the value is computed.
3686 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3687 addressing is likely. */
3689 static comp_cost
3690 get_computation_cost_at (struct ivopts_data *data,
3691 struct iv_use *use, struct iv_cand *cand,
3692 bool address_p, bitmap *depends_on, gimple at,
3693 bool *can_autoinc)
3695 tree ubase = use->iv->base, ustep = use->iv->step;
3696 tree cbase, cstep;
3697 tree utype = TREE_TYPE (ubase), ctype;
3698 unsigned HOST_WIDE_INT cstepi, offset = 0;
3699 HOST_WIDE_INT ratio, aratio;
3700 bool var_present, symbol_present, stmt_is_after_inc;
3701 comp_cost cost;
3702 double_int rat;
3703 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3705 *depends_on = NULL;
3707 /* Only consider real candidates. */
3708 if (!cand->iv)
3709 return infinite_cost;
3711 cbase = cand->iv->base;
3712 cstep = cand->iv->step;
3713 ctype = TREE_TYPE (cbase);
3715 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3717 /* We do not have a precision to express the values of use. */
3718 return infinite_cost;
3721 if (address_p)
3723 /* Do not try to express address of an object with computation based
3724 on address of a different object. This may cause problems in rtl
3725 level alias analysis (that does not expect this to be happening,
3726 as this is illegal in C), and would be unlikely to be useful
3727 anyway. */
3728 if (use->iv->base_object
3729 && cand->iv->base_object
3730 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3731 return infinite_cost;
3734 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3736 /* TODO -- add direct handling of this case. */
3737 goto fallback;
3740 /* CSTEPI is removed from the offset in case statement is after the
3741 increment. If the step is not constant, we use zero instead.
3742 This is a bit imprecise (there is the extra addition), but
3743 redundancy elimination is likely to transform the code so that
3744 it uses value of the variable before increment anyway,
3745 so it is not that much unrealistic. */
3746 if (cst_and_fits_in_hwi (cstep))
3747 cstepi = int_cst_value (cstep);
3748 else
3749 cstepi = 0;
3751 if (!constant_multiple_of (ustep, cstep, &rat))
3752 return infinite_cost;
3754 if (double_int_fits_in_shwi_p (rat))
3755 ratio = double_int_to_shwi (rat);
3756 else
3757 return infinite_cost;
3759 STRIP_NOPS (cbase);
3760 ctype = TREE_TYPE (cbase);
3762 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3763 or ratio == 1, it is better to handle this like
3765 ubase - ratio * cbase + ratio * var
3767 (also holds in the case ratio == -1, TODO. */
3769 if (cst_and_fits_in_hwi (cbase))
3771 offset = - ratio * int_cst_value (cbase);
3772 cost = difference_cost (data,
3773 ubase, build_int_cst (utype, 0),
3774 &symbol_present, &var_present, &offset,
3775 depends_on);
3777 else if (ratio == 1)
3779 cost = difference_cost (data,
3780 ubase, cbase,
3781 &symbol_present, &var_present, &offset,
3782 depends_on);
3784 else if (address_p
3785 && !POINTER_TYPE_P (ctype)
3786 && multiplier_allowed_in_address_p
3787 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3788 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3790 cbase
3791 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3792 cost = difference_cost (data,
3793 ubase, cbase,
3794 &symbol_present, &var_present, &offset,
3795 depends_on);
3797 else
3799 cost = force_var_cost (data, cbase, depends_on);
3800 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3801 cost = add_costs (cost,
3802 difference_cost (data,
3803 ubase, build_int_cst (utype, 0),
3804 &symbol_present, &var_present,
3805 &offset, depends_on));
3808 /* If we are after the increment, the value of the candidate is higher by
3809 one iteration. */
3810 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3811 if (stmt_is_after_inc)
3812 offset -= ratio * cstepi;
3814 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3815 (symbol/var1/const parts may be omitted). If we are looking for an
3816 address, find the cost of addressing this. */
3817 if (address_p)
3818 return add_costs (cost,
3819 get_address_cost (symbol_present, var_present,
3820 offset, ratio, cstepi,
3821 TYPE_MODE (TREE_TYPE (utype)),
3822 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3823 speed, stmt_is_after_inc,
3824 can_autoinc));
3826 /* Otherwise estimate the costs for computing the expression. */
3827 if (!symbol_present && !var_present && !offset)
3829 if (ratio != 1)
3830 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3831 return cost;
3834 /* Symbol + offset should be compile-time computable so consider that they
3835 are added once to the variable, if present. */
3836 if (var_present && (symbol_present || offset))
3837 cost.cost += add_cost (TYPE_MODE (ctype), speed)
3838 / AVG_LOOP_NITER (data->current_loop);
3840 /* Having offset does not affect runtime cost in case it is added to
3841 symbol, but it increases complexity. */
3842 if (offset)
3843 cost.complexity++;
3845 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3847 aratio = ratio > 0 ? ratio : -ratio;
3848 if (aratio != 1)
3849 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3851 fallback:
3852 if (can_autoinc)
3853 *can_autoinc = false;
3856 /* Just get the expression, expand it and measure the cost. */
3857 tree comp = get_computation_at (data->current_loop, use, cand, at);
3859 if (!comp)
3860 return infinite_cost;
3862 if (address_p)
3863 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3865 return new_cost (computation_cost (comp, speed), 0);
3869 /* Determines the cost of the computation by that USE is expressed
3870 from induction variable CAND. If ADDRESS_P is true, we just need
3871 to create an address from it, otherwise we want to get it into
3872 register. A set of invariants we depend on is stored in
3873 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3874 autoinc addressing is likely. */
3876 static comp_cost
3877 get_computation_cost (struct ivopts_data *data,
3878 struct iv_use *use, struct iv_cand *cand,
3879 bool address_p, bitmap *depends_on, bool *can_autoinc)
3881 return get_computation_cost_at (data,
3882 use, cand, address_p, depends_on, use->stmt,
3883 can_autoinc);
3886 /* Determines cost of basing replacement of USE on CAND in a generic
3887 expression. */
3889 static bool
3890 determine_use_iv_cost_generic (struct ivopts_data *data,
3891 struct iv_use *use, struct iv_cand *cand)
3893 bitmap depends_on;
3894 comp_cost cost;
3896 /* The simple case first -- if we need to express value of the preserved
3897 original biv, the cost is 0. This also prevents us from counting the
3898 cost of increment twice -- once at this use and once in the cost of
3899 the candidate. */
3900 if (cand->pos == IP_ORIGINAL
3901 && cand->incremented_at == use->stmt)
3903 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3904 return true;
3907 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3908 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3910 return !infinite_cost_p (cost);
3913 /* Determines cost of basing replacement of USE on CAND in an address. */
3915 static bool
3916 determine_use_iv_cost_address (struct ivopts_data *data,
3917 struct iv_use *use, struct iv_cand *cand)
3919 bitmap depends_on;
3920 bool can_autoinc;
3921 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3922 &can_autoinc);
3924 if (cand->ainc_use == use)
3926 if (can_autoinc)
3927 cost.cost -= cand->cost_step;
3928 /* If we generated the candidate solely for exploiting autoincrement
3929 opportunities, and it turns out it can't be used, set the cost to
3930 infinity to make sure we ignore it. */
3931 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3932 cost = infinite_cost;
3934 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3936 return !infinite_cost_p (cost);
3939 /* Computes value of candidate CAND at position AT in iteration NITER, and
3940 stores it to VAL. */
3942 static void
3943 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3944 aff_tree *val)
3946 aff_tree step, delta, nit;
3947 struct iv *iv = cand->iv;
3948 tree type = TREE_TYPE (iv->base);
3949 tree steptype = type;
3950 if (POINTER_TYPE_P (type))
3951 steptype = sizetype;
3953 tree_to_aff_combination (iv->step, steptype, &step);
3954 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3955 aff_combination_convert (&nit, steptype);
3956 aff_combination_mult (&nit, &step, &delta);
3957 if (stmt_after_increment (loop, cand, at))
3958 aff_combination_add (&delta, &step);
3960 tree_to_aff_combination (iv->base, type, val);
3961 aff_combination_add (val, &delta);
3964 /* Returns period of induction variable iv. */
3966 static tree
3967 iv_period (struct iv *iv)
3969 tree step = iv->step, period, type;
3970 tree pow2div;
3972 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3974 /* Period of the iv is gcd (step, type range). Since type range is power
3975 of two, it suffices to determine the maximum power of two that divides
3976 step. */
3977 pow2div = num_ending_zeros (step);
3978 type = unsigned_type_for (TREE_TYPE (step));
3980 period = build_low_bits_mask (type,
3981 (TYPE_PRECISION (type)
3982 - tree_low_cst (pow2div, 1)));
3984 return period;
3987 /* Returns the comparison operator used when eliminating the iv USE. */
3989 static enum tree_code
3990 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3992 struct loop *loop = data->current_loop;
3993 basic_block ex_bb;
3994 edge exit;
3996 ex_bb = gimple_bb (use->stmt);
3997 exit = EDGE_SUCC (ex_bb, 0);
3998 if (flow_bb_inside_loop_p (loop, exit->dest))
3999 exit = EDGE_SUCC (ex_bb, 1);
4001 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4004 /* Check whether it is possible to express the condition in USE by comparison
4005 of candidate CAND. If so, store the value compared with to BOUND. */
4007 static bool
4008 may_eliminate_iv (struct ivopts_data *data,
4009 struct iv_use *use, struct iv_cand *cand, tree *bound)
4011 basic_block ex_bb;
4012 edge exit;
4013 tree nit, period;
4014 struct loop *loop = data->current_loop;
4015 aff_tree bnd;
4017 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4018 return false;
4020 /* For now works only for exits that dominate the loop latch.
4021 TODO: extend to other conditions inside loop body. */
4022 ex_bb = gimple_bb (use->stmt);
4023 if (use->stmt != last_stmt (ex_bb)
4024 || gimple_code (use->stmt) != GIMPLE_COND
4025 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4026 return false;
4028 exit = EDGE_SUCC (ex_bb, 0);
4029 if (flow_bb_inside_loop_p (loop, exit->dest))
4030 exit = EDGE_SUCC (ex_bb, 1);
4031 if (flow_bb_inside_loop_p (loop, exit->dest))
4032 return false;
4034 nit = niter_for_exit (data, exit);
4035 if (!nit)
4036 return false;
4038 /* Determine whether we can use the variable to test the exit condition.
4039 This is the case iff the period of the induction variable is greater
4040 than the number of iterations for which the exit condition is true. */
4041 period = iv_period (cand->iv);
4043 /* If the number of iterations is constant, compare against it directly. */
4044 if (TREE_CODE (nit) == INTEGER_CST)
4046 if (!tree_int_cst_lt (nit, period))
4047 return false;
4050 /* If not, and if this is the only possible exit of the loop, see whether
4051 we can get a conservative estimate on the number of iterations of the
4052 entire loop and compare against that instead. */
4053 else if (loop_only_exit_p (loop, exit))
4055 double_int period_value, max_niter;
4056 if (!estimated_loop_iterations (loop, true, &max_niter))
4057 return false;
4058 period_value = tree_to_double_int (period);
4059 if (double_int_ucmp (max_niter, period_value) >= 0)
4060 return false;
4063 /* Otherwise, punt. */
4064 else
4065 return false;
4067 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4069 *bound = aff_combination_to_tree (&bnd);
4070 /* It is unlikely that computing the number of iterations using division
4071 would be more profitable than keeping the original induction variable. */
4072 if (expression_expensive_p (*bound))
4073 return false;
4074 return true;
4077 /* Determines cost of basing replacement of USE on CAND in a condition. */
4079 static bool
4080 determine_use_iv_cost_condition (struct ivopts_data *data,
4081 struct iv_use *use, struct iv_cand *cand)
4083 tree bound = NULL_TREE;
4084 struct iv *cmp_iv;
4085 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4086 comp_cost elim_cost, express_cost, cost;
4087 bool ok;
4089 /* Only consider real candidates. */
4090 if (!cand->iv)
4092 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4093 return false;
4096 /* Try iv elimination. */
4097 if (may_eliminate_iv (data, use, cand, &bound))
4099 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4100 /* The bound is a loop invariant, so it will be only computed
4101 once. */
4102 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
4104 else
4105 elim_cost = infinite_cost;
4107 /* Try expressing the original giv. If it is compared with an invariant,
4108 note that we cannot get rid of it. */
4109 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
4110 gcc_assert (ok);
4112 express_cost = get_computation_cost (data, use, cand, false,
4113 &depends_on_express, NULL);
4114 fd_ivopts_data = data;
4115 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4117 /* Choose the better approach, preferring the eliminated IV. */
4118 if (compare_costs (elim_cost, express_cost) <= 0)
4120 cost = elim_cost;
4121 depends_on = depends_on_elim;
4122 depends_on_elim = NULL;
4124 else
4126 cost = express_cost;
4127 depends_on = depends_on_express;
4128 depends_on_express = NULL;
4129 bound = NULL_TREE;
4132 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4134 if (depends_on_elim)
4135 BITMAP_FREE (depends_on_elim);
4136 if (depends_on_express)
4137 BITMAP_FREE (depends_on_express);
4139 return !infinite_cost_p (cost);
4142 /* Determines cost of basing replacement of USE on CAND. Returns false
4143 if USE cannot be based on CAND. */
4145 static bool
4146 determine_use_iv_cost (struct ivopts_data *data,
4147 struct iv_use *use, struct iv_cand *cand)
4149 switch (use->type)
4151 case USE_NONLINEAR_EXPR:
4152 return determine_use_iv_cost_generic (data, use, cand);
4154 case USE_ADDRESS:
4155 return determine_use_iv_cost_address (data, use, cand);
4157 case USE_COMPARE:
4158 return determine_use_iv_cost_condition (data, use, cand);
4160 default:
4161 gcc_unreachable ();
4165 /* Return true if get_computation_cost indicates that autoincrement is
4166 a possibility for the pair of USE and CAND, false otherwise. */
4168 static bool
4169 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4170 struct iv_cand *cand)
4172 bitmap depends_on;
4173 bool can_autoinc;
4174 comp_cost cost;
4176 if (use->type != USE_ADDRESS)
4177 return false;
4179 cost = get_computation_cost (data, use, cand, true, &depends_on,
4180 &can_autoinc);
4182 BITMAP_FREE (depends_on);
4184 return !infinite_cost_p (cost) && can_autoinc;
4187 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4188 use that allows autoincrement, and set their AINC_USE if possible. */
4190 static void
4191 set_autoinc_for_original_candidates (struct ivopts_data *data)
4193 unsigned i, j;
4195 for (i = 0; i < n_iv_cands (data); i++)
4197 struct iv_cand *cand = iv_cand (data, i);
4198 struct iv_use *closest = NULL;
4199 if (cand->pos != IP_ORIGINAL)
4200 continue;
4201 for (j = 0; j < n_iv_uses (data); j++)
4203 struct iv_use *use = iv_use (data, j);
4204 unsigned uid = gimple_uid (use->stmt);
4205 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4206 || uid > gimple_uid (cand->incremented_at))
4207 continue;
4208 if (closest == NULL || uid > gimple_uid (closest->stmt))
4209 closest = use;
4211 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4212 continue;
4213 cand->ainc_use = closest;
4217 /* Finds the candidates for the induction variables. */
4219 static void
4220 find_iv_candidates (struct ivopts_data *data)
4222 /* Add commonly used ivs. */
4223 add_standard_iv_candidates (data);
4225 /* Add old induction variables. */
4226 add_old_ivs_candidates (data);
4228 /* Add induction variables derived from uses. */
4229 add_derived_ivs_candidates (data);
4231 set_autoinc_for_original_candidates (data);
4233 /* Record the important candidates. */
4234 record_important_candidates (data);
4237 /* Determines costs of basing the use of the iv on an iv candidate. */
4239 static void
4240 determine_use_iv_costs (struct ivopts_data *data)
4242 unsigned i, j;
4243 struct iv_use *use;
4244 struct iv_cand *cand;
4245 bitmap to_clear = BITMAP_ALLOC (NULL);
4247 alloc_use_cost_map (data);
4249 for (i = 0; i < n_iv_uses (data); i++)
4251 use = iv_use (data, i);
4253 if (data->consider_all_candidates)
4255 for (j = 0; j < n_iv_cands (data); j++)
4257 cand = iv_cand (data, j);
4258 determine_use_iv_cost (data, use, cand);
4261 else
4263 bitmap_iterator bi;
4265 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4267 cand = iv_cand (data, j);
4268 if (!determine_use_iv_cost (data, use, cand))
4269 bitmap_set_bit (to_clear, j);
4272 /* Remove the candidates for that the cost is infinite from
4273 the list of related candidates. */
4274 bitmap_and_compl_into (use->related_cands, to_clear);
4275 bitmap_clear (to_clear);
4279 BITMAP_FREE (to_clear);
4281 if (dump_file && (dump_flags & TDF_DETAILS))
4283 fprintf (dump_file, "Use-candidate costs:\n");
4285 for (i = 0; i < n_iv_uses (data); i++)
4287 use = iv_use (data, i);
4289 fprintf (dump_file, "Use %d:\n", i);
4290 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4291 for (j = 0; j < use->n_map_members; j++)
4293 if (!use->cost_map[j].cand
4294 || infinite_cost_p (use->cost_map[j].cost))
4295 continue;
4297 fprintf (dump_file, " %d\t%d\t%d\t",
4298 use->cost_map[j].cand->id,
4299 use->cost_map[j].cost.cost,
4300 use->cost_map[j].cost.complexity);
4301 if (use->cost_map[j].depends_on)
4302 bitmap_print (dump_file,
4303 use->cost_map[j].depends_on, "","");
4304 fprintf (dump_file, "\n");
4307 fprintf (dump_file, "\n");
4309 fprintf (dump_file, "\n");
4313 /* Determines cost of the candidate CAND. */
4315 static void
4316 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4318 comp_cost cost_base;
4319 unsigned cost, cost_step;
4320 tree base;
4322 if (!cand->iv)
4324 cand->cost = 0;
4325 return;
4328 /* There are two costs associated with the candidate -- its increment
4329 and its initialization. The second is almost negligible for any loop
4330 that rolls enough, so we take it just very little into account. */
4332 base = cand->iv->base;
4333 cost_base = force_var_cost (data, base, NULL);
4334 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4336 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4338 /* Prefer the original ivs unless we may gain something by replacing it.
4339 The reason is to make debugging simpler; so this is not relevant for
4340 artificial ivs created by other optimization passes. */
4341 if (cand->pos != IP_ORIGINAL
4342 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4343 cost++;
4345 /* Prefer not to insert statements into latch unless there are some
4346 already (so that we do not create unnecessary jumps). */
4347 if (cand->pos == IP_END
4348 && empty_block_p (ip_end_pos (data->current_loop)))
4349 cost++;
4351 cand->cost = cost;
4352 cand->cost_step = cost_step;
4355 /* Determines costs of computation of the candidates. */
4357 static void
4358 determine_iv_costs (struct ivopts_data *data)
4360 unsigned i;
4362 if (dump_file && (dump_flags & TDF_DETAILS))
4364 fprintf (dump_file, "Candidate costs:\n");
4365 fprintf (dump_file, " cand\tcost\n");
4368 for (i = 0; i < n_iv_cands (data); i++)
4370 struct iv_cand *cand = iv_cand (data, i);
4372 determine_iv_cost (data, cand);
4374 if (dump_file && (dump_flags & TDF_DETAILS))
4375 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4378 if (dump_file && (dump_flags & TDF_DETAILS))
4379 fprintf (dump_file, "\n");
4382 /* Calculates cost for having SIZE induction variables. */
4384 static unsigned
4385 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4387 /* We add size to the cost, so that we prefer eliminating ivs
4388 if possible. */
4389 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4392 /* For each size of the induction variable set determine the penalty. */
4394 static void
4395 determine_set_costs (struct ivopts_data *data)
4397 unsigned j, n;
4398 gimple phi;
4399 gimple_stmt_iterator psi;
4400 tree op;
4401 struct loop *loop = data->current_loop;
4402 bitmap_iterator bi;
4404 /* We use the following model (definitely improvable, especially the
4405 cost function -- TODO):
4407 We estimate the number of registers available (using MD data), name it A.
4409 We estimate the number of registers used by the loop, name it U. This
4410 number is obtained as the number of loop phi nodes (not counting virtual
4411 registers and bivs) + the number of variables from outside of the loop.
4413 We set a reserve R (free regs that are used for temporary computations,
4414 etc.). For now the reserve is a constant 3.
4416 Let I be the number of induction variables.
4418 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4419 make a lot of ivs without a reason).
4420 -- if A - R < U + I <= A, the cost is I * PRES_COST
4421 -- if U + I > A, the cost is I * PRES_COST and
4422 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4424 if (dump_file && (dump_flags & TDF_DETAILS))
4426 fprintf (dump_file, "Global costs:\n");
4427 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4428 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4429 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4432 n = 0;
4433 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4435 phi = gsi_stmt (psi);
4436 op = PHI_RESULT (phi);
4438 if (!is_gimple_reg (op))
4439 continue;
4441 if (get_iv (data, op))
4442 continue;
4444 n++;
4447 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4449 struct version_info *info = ver_info (data, j);
4451 if (info->inv_id && info->has_nonlin_use)
4452 n++;
4455 data->regs_used = n;
4456 if (dump_file && (dump_flags & TDF_DETAILS))
4457 fprintf (dump_file, " regs_used %d\n", n);
4459 if (dump_file && (dump_flags & TDF_DETAILS))
4461 fprintf (dump_file, " cost for size:\n");
4462 fprintf (dump_file, " ivs\tcost\n");
4463 for (j = 0; j <= 2 * target_avail_regs; j++)
4464 fprintf (dump_file, " %d\t%d\n", j,
4465 ivopts_global_cost_for_size (data, j));
4466 fprintf (dump_file, "\n");
4470 /* Returns true if A is a cheaper cost pair than B. */
4472 static bool
4473 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4475 int cmp;
4477 if (!a)
4478 return false;
4480 if (!b)
4481 return true;
4483 cmp = compare_costs (a->cost, b->cost);
4484 if (cmp < 0)
4485 return true;
4487 if (cmp > 0)
4488 return false;
4490 /* In case the costs are the same, prefer the cheaper candidate. */
4491 if (a->cand->cost < b->cand->cost)
4492 return true;
4494 return false;
4497 /* Computes the cost field of IVS structure. */
4499 static void
4500 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4502 comp_cost cost = ivs->cand_use_cost;
4503 cost.cost += ivs->cand_cost;
4504 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4506 ivs->cost = cost;
4509 /* Remove invariants in set INVS to set IVS. */
4511 static void
4512 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4514 bitmap_iterator bi;
4515 unsigned iid;
4517 if (!invs)
4518 return;
4520 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4522 ivs->n_invariant_uses[iid]--;
4523 if (ivs->n_invariant_uses[iid] == 0)
4524 ivs->n_regs--;
4528 /* Set USE not to be expressed by any candidate in IVS. */
4530 static void
4531 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4532 struct iv_use *use)
4534 unsigned uid = use->id, cid;
4535 struct cost_pair *cp;
4537 cp = ivs->cand_for_use[uid];
4538 if (!cp)
4539 return;
4540 cid = cp->cand->id;
4542 ivs->bad_uses++;
4543 ivs->cand_for_use[uid] = NULL;
4544 ivs->n_cand_uses[cid]--;
4546 if (ivs->n_cand_uses[cid] == 0)
4548 bitmap_clear_bit (ivs->cands, cid);
4549 /* Do not count the pseudocandidates. */
4550 if (cp->cand->iv)
4551 ivs->n_regs--;
4552 ivs->n_cands--;
4553 ivs->cand_cost -= cp->cand->cost;
4555 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4558 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4560 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4561 iv_ca_recount_cost (data, ivs);
4564 /* Add invariants in set INVS to set IVS. */
4566 static void
4567 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4569 bitmap_iterator bi;
4570 unsigned iid;
4572 if (!invs)
4573 return;
4575 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4577 ivs->n_invariant_uses[iid]++;
4578 if (ivs->n_invariant_uses[iid] == 1)
4579 ivs->n_regs++;
4583 /* Set cost pair for USE in set IVS to CP. */
4585 static void
4586 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4587 struct iv_use *use, struct cost_pair *cp)
4589 unsigned uid = use->id, cid;
4591 if (ivs->cand_for_use[uid] == cp)
4592 return;
4594 if (ivs->cand_for_use[uid])
4595 iv_ca_set_no_cp (data, ivs, use);
4597 if (cp)
4599 cid = cp->cand->id;
4601 ivs->bad_uses--;
4602 ivs->cand_for_use[uid] = cp;
4603 ivs->n_cand_uses[cid]++;
4604 if (ivs->n_cand_uses[cid] == 1)
4606 bitmap_set_bit (ivs->cands, cid);
4607 /* Do not count the pseudocandidates. */
4608 if (cp->cand->iv)
4609 ivs->n_regs++;
4610 ivs->n_cands++;
4611 ivs->cand_cost += cp->cand->cost;
4613 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4616 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4617 iv_ca_set_add_invariants (ivs, cp->depends_on);
4618 iv_ca_recount_cost (data, ivs);
4622 /* Extend set IVS by expressing USE by some of the candidates in it
4623 if possible. */
4625 static void
4626 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4627 struct iv_use *use)
4629 struct cost_pair *best_cp = NULL, *cp;
4630 bitmap_iterator bi;
4631 unsigned i;
4633 gcc_assert (ivs->upto >= use->id);
4635 if (ivs->upto == use->id)
4637 ivs->upto++;
4638 ivs->bad_uses++;
4641 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4643 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4645 if (cheaper_cost_pair (cp, best_cp))
4646 best_cp = cp;
4649 iv_ca_set_cp (data, ivs, use, best_cp);
4652 /* Get cost for assignment IVS. */
4654 static comp_cost
4655 iv_ca_cost (struct iv_ca *ivs)
4657 /* This was a conditional expression but it triggered a bug in
4658 Sun C 5.5. */
4659 if (ivs->bad_uses)
4660 return infinite_cost;
4661 else
4662 return ivs->cost;
4665 /* Returns true if all dependences of CP are among invariants in IVS. */
4667 static bool
4668 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4670 unsigned i;
4671 bitmap_iterator bi;
4673 if (!cp->depends_on)
4674 return true;
4676 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4678 if (ivs->n_invariant_uses[i] == 0)
4679 return false;
4682 return true;
4685 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4686 it before NEXT_CHANGE. */
4688 static struct iv_ca_delta *
4689 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4690 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4692 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4694 change->use = use;
4695 change->old_cp = old_cp;
4696 change->new_cp = new_cp;
4697 change->next_change = next_change;
4699 return change;
4702 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4703 are rewritten. */
4705 static struct iv_ca_delta *
4706 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4708 struct iv_ca_delta *last;
4710 if (!l2)
4711 return l1;
4713 if (!l1)
4714 return l2;
4716 for (last = l1; last->next_change; last = last->next_change)
4717 continue;
4718 last->next_change = l2;
4720 return l1;
4723 /* Returns candidate by that USE is expressed in IVS. */
4725 static struct cost_pair *
4726 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4728 return ivs->cand_for_use[use->id];
4731 /* Reverse the list of changes DELTA, forming the inverse to it. */
4733 static struct iv_ca_delta *
4734 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4736 struct iv_ca_delta *act, *next, *prev = NULL;
4737 struct cost_pair *tmp;
4739 for (act = delta; act; act = next)
4741 next = act->next_change;
4742 act->next_change = prev;
4743 prev = act;
4745 tmp = act->old_cp;
4746 act->old_cp = act->new_cp;
4747 act->new_cp = tmp;
4750 return prev;
4753 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4754 reverted instead. */
4756 static void
4757 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4758 struct iv_ca_delta *delta, bool forward)
4760 struct cost_pair *from, *to;
4761 struct iv_ca_delta *act;
4763 if (!forward)
4764 delta = iv_ca_delta_reverse (delta);
4766 for (act = delta; act; act = act->next_change)
4768 from = act->old_cp;
4769 to = act->new_cp;
4770 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4771 iv_ca_set_cp (data, ivs, act->use, to);
4774 if (!forward)
4775 iv_ca_delta_reverse (delta);
4778 /* Returns true if CAND is used in IVS. */
4780 static bool
4781 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4783 return ivs->n_cand_uses[cand->id] > 0;
4786 /* Returns number of induction variable candidates in the set IVS. */
4788 static unsigned
4789 iv_ca_n_cands (struct iv_ca *ivs)
4791 return ivs->n_cands;
4794 /* Free the list of changes DELTA. */
4796 static void
4797 iv_ca_delta_free (struct iv_ca_delta **delta)
4799 struct iv_ca_delta *act, *next;
4801 for (act = *delta; act; act = next)
4803 next = act->next_change;
4804 free (act);
4807 *delta = NULL;
4810 /* Allocates new iv candidates assignment. */
4812 static struct iv_ca *
4813 iv_ca_new (struct ivopts_data *data)
4815 struct iv_ca *nw = XNEW (struct iv_ca);
4817 nw->upto = 0;
4818 nw->bad_uses = 0;
4819 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4820 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4821 nw->cands = BITMAP_ALLOC (NULL);
4822 nw->n_cands = 0;
4823 nw->n_regs = 0;
4824 nw->cand_use_cost = zero_cost;
4825 nw->cand_cost = 0;
4826 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4827 nw->cost = zero_cost;
4829 return nw;
4832 /* Free memory occupied by the set IVS. */
4834 static void
4835 iv_ca_free (struct iv_ca **ivs)
4837 free ((*ivs)->cand_for_use);
4838 free ((*ivs)->n_cand_uses);
4839 BITMAP_FREE ((*ivs)->cands);
4840 free ((*ivs)->n_invariant_uses);
4841 free (*ivs);
4842 *ivs = NULL;
4845 /* Dumps IVS to FILE. */
4847 static void
4848 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4850 const char *pref = " invariants ";
4851 unsigned i;
4852 comp_cost cost = iv_ca_cost (ivs);
4854 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4855 bitmap_print (file, ivs->cands, " candidates ","\n");
4857 for (i = 1; i <= data->max_inv_id; i++)
4858 if (ivs->n_invariant_uses[i])
4860 fprintf (file, "%s%d", pref, i);
4861 pref = ", ";
4863 fprintf (file, "\n");
4866 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4867 new set, and store differences in DELTA. Number of induction variables
4868 in the new set is stored to N_IVS. */
4870 static comp_cost
4871 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4872 struct iv_cand *cand, struct iv_ca_delta **delta,
4873 unsigned *n_ivs)
4875 unsigned i;
4876 comp_cost cost;
4877 struct iv_use *use;
4878 struct cost_pair *old_cp, *new_cp;
4880 *delta = NULL;
4881 for (i = 0; i < ivs->upto; i++)
4883 use = iv_use (data, i);
4884 old_cp = iv_ca_cand_for_use (ivs, use);
4886 if (old_cp
4887 && old_cp->cand == cand)
4888 continue;
4890 new_cp = get_use_iv_cost (data, use, cand);
4891 if (!new_cp)
4892 continue;
4894 if (!iv_ca_has_deps (ivs, new_cp))
4895 continue;
4897 if (!cheaper_cost_pair (new_cp, old_cp))
4898 continue;
4900 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4903 iv_ca_delta_commit (data, ivs, *delta, true);
4904 cost = iv_ca_cost (ivs);
4905 if (n_ivs)
4906 *n_ivs = iv_ca_n_cands (ivs);
4907 iv_ca_delta_commit (data, ivs, *delta, false);
4909 return cost;
4912 /* Try narrowing set IVS by removing CAND. Return the cost of
4913 the new set and store the differences in DELTA. */
4915 static comp_cost
4916 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4917 struct iv_cand *cand, struct iv_ca_delta **delta)
4919 unsigned i, ci;
4920 struct iv_use *use;
4921 struct cost_pair *old_cp, *new_cp, *cp;
4922 bitmap_iterator bi;
4923 struct iv_cand *cnd;
4924 comp_cost cost;
4926 *delta = NULL;
4927 for (i = 0; i < n_iv_uses (data); i++)
4929 use = iv_use (data, i);
4931 old_cp = iv_ca_cand_for_use (ivs, use);
4932 if (old_cp->cand != cand)
4933 continue;
4935 new_cp = NULL;
4937 if (data->consider_all_candidates)
4939 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4941 if (ci == cand->id)
4942 continue;
4944 cnd = iv_cand (data, ci);
4946 cp = get_use_iv_cost (data, use, cnd);
4947 if (!cp)
4948 continue;
4949 if (!iv_ca_has_deps (ivs, cp))
4950 continue;
4952 if (!cheaper_cost_pair (cp, new_cp))
4953 continue;
4955 new_cp = cp;
4958 else
4960 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4962 if (ci == cand->id)
4963 continue;
4965 cnd = iv_cand (data, ci);
4967 cp = get_use_iv_cost (data, use, cnd);
4968 if (!cp)
4969 continue;
4970 if (!iv_ca_has_deps (ivs, cp))
4971 continue;
4973 if (!cheaper_cost_pair (cp, new_cp))
4974 continue;
4976 new_cp = cp;
4980 if (!new_cp)
4982 iv_ca_delta_free (delta);
4983 return infinite_cost;
4986 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4989 iv_ca_delta_commit (data, ivs, *delta, true);
4990 cost = iv_ca_cost (ivs);
4991 iv_ca_delta_commit (data, ivs, *delta, false);
4993 return cost;
4996 /* Try optimizing the set of candidates IVS by removing candidates different
4997 from to EXCEPT_CAND from it. Return cost of the new set, and store
4998 differences in DELTA. */
5000 static comp_cost
5001 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5002 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5004 bitmap_iterator bi;
5005 struct iv_ca_delta *act_delta, *best_delta;
5006 unsigned i;
5007 comp_cost best_cost, acost;
5008 struct iv_cand *cand;
5010 best_delta = NULL;
5011 best_cost = iv_ca_cost (ivs);
5013 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5015 cand = iv_cand (data, i);
5017 if (cand == except_cand)
5018 continue;
5020 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5022 if (compare_costs (acost, best_cost) < 0)
5024 best_cost = acost;
5025 iv_ca_delta_free (&best_delta);
5026 best_delta = act_delta;
5028 else
5029 iv_ca_delta_free (&act_delta);
5032 if (!best_delta)
5034 *delta = NULL;
5035 return best_cost;
5038 /* Recurse to possibly remove other unnecessary ivs. */
5039 iv_ca_delta_commit (data, ivs, best_delta, true);
5040 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5041 iv_ca_delta_commit (data, ivs, best_delta, false);
5042 *delta = iv_ca_delta_join (best_delta, *delta);
5043 return best_cost;
5046 /* Tries to extend the sets IVS in the best possible way in order
5047 to express the USE. */
5049 static bool
5050 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5051 struct iv_use *use)
5053 comp_cost best_cost, act_cost;
5054 unsigned i;
5055 bitmap_iterator bi;
5056 struct iv_cand *cand;
5057 struct iv_ca_delta *best_delta = NULL, *act_delta;
5058 struct cost_pair *cp;
5060 iv_ca_add_use (data, ivs, use);
5061 best_cost = iv_ca_cost (ivs);
5063 cp = iv_ca_cand_for_use (ivs, use);
5064 if (cp)
5066 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5067 iv_ca_set_no_cp (data, ivs, use);
5070 /* First try important candidates not based on any memory object. Only if
5071 this fails, try the specific ones. Rationale -- in loops with many
5072 variables the best choice often is to use just one generic biv. If we
5073 added here many ivs specific to the uses, the optimization algorithm later
5074 would be likely to get stuck in a local minimum, thus causing us to create
5075 too many ivs. The approach from few ivs to more seems more likely to be
5076 successful -- starting from few ivs, replacing an expensive use by a
5077 specific iv should always be a win. */
5078 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5080 cand = iv_cand (data, i);
5082 if (cand->iv->base_object != NULL_TREE)
5083 continue;
5085 if (iv_ca_cand_used_p (ivs, cand))
5086 continue;
5088 cp = get_use_iv_cost (data, use, cand);
5089 if (!cp)
5090 continue;
5092 iv_ca_set_cp (data, ivs, use, cp);
5093 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5094 iv_ca_set_no_cp (data, ivs, use);
5095 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5097 if (compare_costs (act_cost, best_cost) < 0)
5099 best_cost = act_cost;
5101 iv_ca_delta_free (&best_delta);
5102 best_delta = act_delta;
5104 else
5105 iv_ca_delta_free (&act_delta);
5108 if (infinite_cost_p (best_cost))
5110 for (i = 0; i < use->n_map_members; i++)
5112 cp = use->cost_map + i;
5113 cand = cp->cand;
5114 if (!cand)
5115 continue;
5117 /* Already tried this. */
5118 if (cand->important && cand->iv->base_object == NULL_TREE)
5119 continue;
5121 if (iv_ca_cand_used_p (ivs, cand))
5122 continue;
5124 act_delta = NULL;
5125 iv_ca_set_cp (data, ivs, use, cp);
5126 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5127 iv_ca_set_no_cp (data, ivs, use);
5128 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5129 cp, act_delta);
5131 if (compare_costs (act_cost, best_cost) < 0)
5133 best_cost = act_cost;
5135 if (best_delta)
5136 iv_ca_delta_free (&best_delta);
5137 best_delta = act_delta;
5139 else
5140 iv_ca_delta_free (&act_delta);
5144 iv_ca_delta_commit (data, ivs, best_delta, true);
5145 iv_ca_delta_free (&best_delta);
5147 return !infinite_cost_p (best_cost);
5150 /* Finds an initial assignment of candidates to uses. */
5152 static struct iv_ca *
5153 get_initial_solution (struct ivopts_data *data)
5155 struct iv_ca *ivs = iv_ca_new (data);
5156 unsigned i;
5158 for (i = 0; i < n_iv_uses (data); i++)
5159 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5161 iv_ca_free (&ivs);
5162 return NULL;
5165 return ivs;
5168 /* Tries to improve set of induction variables IVS. */
5170 static bool
5171 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5173 unsigned i, n_ivs;
5174 comp_cost acost, best_cost = iv_ca_cost (ivs);
5175 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5176 struct iv_cand *cand;
5178 /* Try extending the set of induction variables by one. */
5179 for (i = 0; i < n_iv_cands (data); i++)
5181 cand = iv_cand (data, i);
5183 if (iv_ca_cand_used_p (ivs, cand))
5184 continue;
5186 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5187 if (!act_delta)
5188 continue;
5190 /* If we successfully added the candidate and the set is small enough,
5191 try optimizing it by removing other candidates. */
5192 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5194 iv_ca_delta_commit (data, ivs, act_delta, true);
5195 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5196 iv_ca_delta_commit (data, ivs, act_delta, false);
5197 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5200 if (compare_costs (acost, best_cost) < 0)
5202 best_cost = acost;
5203 iv_ca_delta_free (&best_delta);
5204 best_delta = act_delta;
5206 else
5207 iv_ca_delta_free (&act_delta);
5210 if (!best_delta)
5212 /* Try removing the candidates from the set instead. */
5213 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5215 /* Nothing more we can do. */
5216 if (!best_delta)
5217 return false;
5220 iv_ca_delta_commit (data, ivs, best_delta, true);
5221 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5222 iv_ca_delta_free (&best_delta);
5223 return true;
5226 /* Attempts to find the optimal set of induction variables. We do simple
5227 greedy heuristic -- we try to replace at most one candidate in the selected
5228 solution and remove the unused ivs while this improves the cost. */
5230 static struct iv_ca *
5231 find_optimal_iv_set (struct ivopts_data *data)
5233 unsigned i;
5234 struct iv_ca *set;
5235 struct iv_use *use;
5237 /* Get the initial solution. */
5238 set = get_initial_solution (data);
5239 if (!set)
5241 if (dump_file && (dump_flags & TDF_DETAILS))
5242 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5243 return NULL;
5246 if (dump_file && (dump_flags & TDF_DETAILS))
5248 fprintf (dump_file, "Initial set of candidates:\n");
5249 iv_ca_dump (data, dump_file, set);
5252 while (try_improve_iv_set (data, set))
5254 if (dump_file && (dump_flags & TDF_DETAILS))
5256 fprintf (dump_file, "Improved to:\n");
5257 iv_ca_dump (data, dump_file, set);
5261 if (dump_file && (dump_flags & TDF_DETAILS))
5263 comp_cost cost = iv_ca_cost (set);
5264 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5267 for (i = 0; i < n_iv_uses (data); i++)
5269 use = iv_use (data, i);
5270 use->selected = iv_ca_cand_for_use (set, use)->cand;
5273 return set;
5276 /* Creates a new induction variable corresponding to CAND. */
5278 static void
5279 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5281 gimple_stmt_iterator incr_pos;
5282 tree base;
5283 bool after = false;
5285 if (!cand->iv)
5286 return;
5288 switch (cand->pos)
5290 case IP_NORMAL:
5291 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5292 break;
5294 case IP_END:
5295 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5296 after = true;
5297 break;
5299 case IP_AFTER_USE:
5300 after = true;
5301 /* fall through */
5302 case IP_BEFORE_USE:
5303 incr_pos = gsi_for_stmt (cand->incremented_at);
5304 break;
5306 case IP_ORIGINAL:
5307 /* Mark that the iv is preserved. */
5308 name_info (data, cand->var_before)->preserve_biv = true;
5309 name_info (data, cand->var_after)->preserve_biv = true;
5311 /* Rewrite the increment so that it uses var_before directly. */
5312 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5314 return;
5317 gimple_add_tmp_var (cand->var_before);
5318 add_referenced_var (cand->var_before);
5320 base = unshare_expr (cand->iv->base);
5322 create_iv (base, unshare_expr (cand->iv->step),
5323 cand->var_before, data->current_loop,
5324 &incr_pos, after, &cand->var_before, &cand->var_after);
5327 /* Creates new induction variables described in SET. */
5329 static void
5330 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5332 unsigned i;
5333 struct iv_cand *cand;
5334 bitmap_iterator bi;
5336 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5338 cand = iv_cand (data, i);
5339 create_new_iv (data, cand);
5344 /* Rewrites USE (definition of iv used in a nonlinear expression)
5345 using candidate CAND. */
5347 static void
5348 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5349 struct iv_use *use, struct iv_cand *cand)
5351 tree comp;
5352 tree op, tgt;
5353 gimple ass;
5354 gimple_stmt_iterator bsi;
5356 /* An important special case -- if we are asked to express value of
5357 the original iv by itself, just exit; there is no need to
5358 introduce a new computation (that might also need casting the
5359 variable to unsigned and back). */
5360 if (cand->pos == IP_ORIGINAL
5361 && cand->incremented_at == use->stmt)
5363 tree step, ctype, utype;
5364 enum tree_code incr_code = PLUS_EXPR, old_code;
5366 gcc_assert (is_gimple_assign (use->stmt));
5367 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5369 step = cand->iv->step;
5370 ctype = TREE_TYPE (step);
5371 utype = TREE_TYPE (cand->var_after);
5372 if (TREE_CODE (step) == NEGATE_EXPR)
5374 incr_code = MINUS_EXPR;
5375 step = TREE_OPERAND (step, 0);
5378 /* Check whether we may leave the computation unchanged.
5379 This is the case only if it does not rely on other
5380 computations in the loop -- otherwise, the computation
5381 we rely upon may be removed in remove_unused_ivs,
5382 thus leading to ICE. */
5383 old_code = gimple_assign_rhs_code (use->stmt);
5384 if (old_code == PLUS_EXPR
5385 || old_code == MINUS_EXPR
5386 || old_code == POINTER_PLUS_EXPR)
5388 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5389 op = gimple_assign_rhs2 (use->stmt);
5390 else if (old_code != MINUS_EXPR
5391 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5392 op = gimple_assign_rhs1 (use->stmt);
5393 else
5394 op = NULL_TREE;
5396 else
5397 op = NULL_TREE;
5399 if (op
5400 && (TREE_CODE (op) == INTEGER_CST
5401 || operand_equal_p (op, step, 0)))
5402 return;
5404 /* Otherwise, add the necessary computations to express
5405 the iv. */
5406 op = fold_convert (ctype, cand->var_before);
5407 comp = fold_convert (utype,
5408 build2 (incr_code, ctype, op,
5409 unshare_expr (step)));
5411 else
5413 comp = get_computation (data->current_loop, use, cand);
5414 gcc_assert (comp != NULL_TREE);
5417 switch (gimple_code (use->stmt))
5419 case GIMPLE_PHI:
5420 tgt = PHI_RESULT (use->stmt);
5422 /* If we should keep the biv, do not replace it. */
5423 if (name_info (data, tgt)->preserve_biv)
5424 return;
5426 bsi = gsi_after_labels (gimple_bb (use->stmt));
5427 break;
5429 case GIMPLE_ASSIGN:
5430 tgt = gimple_assign_lhs (use->stmt);
5431 bsi = gsi_for_stmt (use->stmt);
5432 break;
5434 default:
5435 gcc_unreachable ();
5438 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5439 true, GSI_SAME_STMT);
5441 if (gimple_code (use->stmt) == GIMPLE_PHI)
5443 ass = gimple_build_assign (tgt, op);
5444 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5446 bsi = gsi_for_stmt (use->stmt);
5447 remove_phi_node (&bsi, false);
5449 else
5451 gimple_assign_set_rhs_from_tree (&bsi, op);
5452 use->stmt = gsi_stmt (bsi);
5456 /* Replaces ssa name in index IDX by its basic variable. Callback for
5457 for_each_index. */
5459 static bool
5460 idx_remove_ssa_names (tree base, tree *idx,
5461 void *data ATTRIBUTE_UNUSED)
5463 tree *op;
5465 if (TREE_CODE (*idx) == SSA_NAME)
5466 *idx = SSA_NAME_VAR (*idx);
5468 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5470 op = &TREE_OPERAND (base, 2);
5471 if (*op
5472 && TREE_CODE (*op) == SSA_NAME)
5473 *op = SSA_NAME_VAR (*op);
5474 op = &TREE_OPERAND (base, 3);
5475 if (*op
5476 && TREE_CODE (*op) == SSA_NAME)
5477 *op = SSA_NAME_VAR (*op);
5480 return true;
5483 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5485 static tree
5486 unshare_and_remove_ssa_names (tree ref)
5488 ref = unshare_expr (ref);
5489 for_each_index (&ref, idx_remove_ssa_names, NULL);
5491 return ref;
5494 /* Copies the reference information from OLD_REF to NEW_REF. */
5496 static void
5497 copy_ref_info (tree new_ref, tree old_ref)
5499 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5500 copy_mem_ref_info (new_ref, old_ref);
5501 else
5502 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5505 /* Rewrites USE (address that is an iv) using candidate CAND. */
5507 static void
5508 rewrite_use_address (struct ivopts_data *data,
5509 struct iv_use *use, struct iv_cand *cand)
5511 aff_tree aff;
5512 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5513 tree base_hint = NULL_TREE;
5514 tree ref;
5515 bool ok;
5517 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5518 gcc_assert (ok);
5519 unshare_aff_combination (&aff);
5521 /* To avoid undefined overflow problems, all IV candidates use unsigned
5522 integer types. The drawback is that this makes it impossible for
5523 create_mem_ref to distinguish an IV that is based on a memory object
5524 from one that represents simply an offset.
5526 To work around this problem, we pass a hint to create_mem_ref that
5527 indicates which variable (if any) in aff is an IV based on a memory
5528 object. Note that we only consider the candidate. If this is not
5529 based on an object, the base of the reference is in some subexpression
5530 of the use -- but these will use pointer types, so they are recognized
5531 by the create_mem_ref heuristics anyway. */
5532 if (cand->iv->base_object)
5533 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5535 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5536 data->speed);
5537 copy_ref_info (ref, *use->op_p);
5538 *use->op_p = ref;
5541 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5542 candidate CAND. */
5544 static void
5545 rewrite_use_compare (struct ivopts_data *data,
5546 struct iv_use *use, struct iv_cand *cand)
5548 tree comp, *var_p, op, bound;
5549 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5550 enum tree_code compare;
5551 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5552 bool ok;
5554 bound = cp->value;
5555 if (bound)
5557 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5558 tree var_type = TREE_TYPE (var);
5559 gimple_seq stmts;
5561 compare = iv_elimination_compare (data, use);
5562 bound = unshare_expr (fold_convert (var_type, bound));
5563 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5564 if (stmts)
5565 gsi_insert_seq_on_edge_immediate (
5566 loop_preheader_edge (data->current_loop),
5567 stmts);
5569 gimple_cond_set_lhs (use->stmt, var);
5570 gimple_cond_set_code (use->stmt, compare);
5571 gimple_cond_set_rhs (use->stmt, op);
5572 return;
5575 /* The induction variable elimination failed; just express the original
5576 giv. */
5577 comp = get_computation (data->current_loop, use, cand);
5578 gcc_assert (comp != NULL_TREE);
5580 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5581 gcc_assert (ok);
5583 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5584 true, GSI_SAME_STMT);
5587 /* Rewrites USE using candidate CAND. */
5589 static void
5590 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5592 switch (use->type)
5594 case USE_NONLINEAR_EXPR:
5595 rewrite_use_nonlinear_expr (data, use, cand);
5596 break;
5598 case USE_ADDRESS:
5599 rewrite_use_address (data, use, cand);
5600 break;
5602 case USE_COMPARE:
5603 rewrite_use_compare (data, use, cand);
5604 break;
5606 default:
5607 gcc_unreachable ();
5610 update_stmt (use->stmt);
5613 /* Rewrite the uses using the selected induction variables. */
5615 static void
5616 rewrite_uses (struct ivopts_data *data)
5618 unsigned i;
5619 struct iv_cand *cand;
5620 struct iv_use *use;
5622 for (i = 0; i < n_iv_uses (data); i++)
5624 use = iv_use (data, i);
5625 cand = use->selected;
5626 gcc_assert (cand);
5628 rewrite_use (data, use, cand);
5632 /* Removes the ivs that are not used after rewriting. */
5634 static void
5635 remove_unused_ivs (struct ivopts_data *data)
5637 unsigned j;
5638 bitmap_iterator bi;
5639 bitmap toremove = BITMAP_ALLOC (NULL);
5641 /* Figure out an order in which to release SSA DEFs so that we don't
5642 release something that we'd have to propagate into a debug stmt
5643 afterwards. */
5644 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5646 struct version_info *info;
5648 info = ver_info (data, j);
5649 if (info->iv
5650 && !integer_zerop (info->iv->step)
5651 && !info->inv_id
5652 && !info->iv->have_use_for
5653 && !info->preserve_biv)
5654 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5657 release_defs_bitset (toremove);
5659 BITMAP_FREE (toremove);
5662 /* Frees data allocated by the optimization of a single loop. */
5664 static void
5665 free_loop_data (struct ivopts_data *data)
5667 unsigned i, j;
5668 bitmap_iterator bi;
5669 tree obj;
5671 if (data->niters)
5673 pointer_map_destroy (data->niters);
5674 data->niters = NULL;
5677 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5679 struct version_info *info;
5681 info = ver_info (data, i);
5682 if (info->iv)
5683 free (info->iv);
5684 info->iv = NULL;
5685 info->has_nonlin_use = false;
5686 info->preserve_biv = false;
5687 info->inv_id = 0;
5689 bitmap_clear (data->relevant);
5690 bitmap_clear (data->important_candidates);
5692 for (i = 0; i < n_iv_uses (data); i++)
5694 struct iv_use *use = iv_use (data, i);
5696 free (use->iv);
5697 BITMAP_FREE (use->related_cands);
5698 for (j = 0; j < use->n_map_members; j++)
5699 if (use->cost_map[j].depends_on)
5700 BITMAP_FREE (use->cost_map[j].depends_on);
5701 free (use->cost_map);
5702 free (use);
5704 VEC_truncate (iv_use_p, data->iv_uses, 0);
5706 for (i = 0; i < n_iv_cands (data); i++)
5708 struct iv_cand *cand = iv_cand (data, i);
5710 if (cand->iv)
5711 free (cand->iv);
5712 if (cand->depends_on)
5713 BITMAP_FREE (cand->depends_on);
5714 free (cand);
5716 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5718 if (data->version_info_size < num_ssa_names)
5720 data->version_info_size = 2 * num_ssa_names;
5721 free (data->version_info);
5722 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5725 data->max_inv_id = 0;
5727 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5728 SET_DECL_RTL (obj, NULL_RTX);
5730 VEC_truncate (tree, decl_rtl_to_reset, 0);
5733 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5734 loop tree. */
5736 static void
5737 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5739 free_loop_data (data);
5740 free (data->version_info);
5741 BITMAP_FREE (data->relevant);
5742 BITMAP_FREE (data->important_candidates);
5744 VEC_free (tree, heap, decl_rtl_to_reset);
5745 VEC_free (iv_use_p, heap, data->iv_uses);
5746 VEC_free (iv_cand_p, heap, data->iv_candidates);
5749 /* Optimizes the LOOP. Returns true if anything changed. */
5751 static bool
5752 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5754 bool changed = false;
5755 struct iv_ca *iv_ca;
5756 edge exit;
5757 basic_block *body;
5759 gcc_assert (!data->niters);
5760 data->current_loop = loop;
5761 data->speed = optimize_loop_for_speed_p (loop);
5763 if (dump_file && (dump_flags & TDF_DETAILS))
5765 fprintf (dump_file, "Processing loop %d\n", loop->num);
5767 exit = single_dom_exit (loop);
5768 if (exit)
5770 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5771 exit->src->index, exit->dest->index);
5772 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5773 fprintf (dump_file, "\n");
5776 fprintf (dump_file, "\n");
5779 body = get_loop_body (loop);
5780 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5781 free (body);
5783 /* For each ssa name determines whether it behaves as an induction variable
5784 in some loop. */
5785 if (!find_induction_variables (data))
5786 goto finish;
5788 /* Finds interesting uses (item 1). */
5789 find_interesting_uses (data);
5790 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5791 goto finish;
5793 /* Finds candidates for the induction variables (item 2). */
5794 find_iv_candidates (data);
5796 /* Calculates the costs (item 3, part 1). */
5797 determine_iv_costs (data);
5798 determine_use_iv_costs (data);
5799 determine_set_costs (data);
5801 /* Find the optimal set of induction variables (item 3, part 2). */
5802 iv_ca = find_optimal_iv_set (data);
5803 if (!iv_ca)
5804 goto finish;
5805 changed = true;
5807 /* Create the new induction variables (item 4, part 1). */
5808 create_new_ivs (data, iv_ca);
5809 iv_ca_free (&iv_ca);
5811 /* Rewrite the uses (item 4, part 2). */
5812 rewrite_uses (data);
5814 /* Remove the ivs that are unused after rewriting. */
5815 remove_unused_ivs (data);
5817 /* We have changed the structure of induction variables; it might happen
5818 that definitions in the scev database refer to some of them that were
5819 eliminated. */
5820 scev_reset ();
5822 finish:
5823 free_loop_data (data);
5825 return changed;
5828 /* Main entry point. Optimizes induction variables in loops. */
5830 void
5831 tree_ssa_iv_optimize (void)
5833 struct loop *loop;
5834 struct ivopts_data data;
5835 loop_iterator li;
5837 tree_ssa_iv_optimize_init (&data);
5839 /* Optimize the loops starting with the innermost ones. */
5840 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5842 if (dump_file && (dump_flags & TDF_DETAILS))
5843 flow_loop_dump (loop, dump_file, NULL, 1);
5845 tree_ssa_iv_optimize_loop (&data, loop);
5848 tree_ssa_iv_optimize_finalize (&data);