2010-05-14 Steven G. Kargl <kargl@gcc.gnu.org>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob8662f4ce30d377f68c8df59ba3d9cbf30d4a6f35
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "expr.h"
81 #include "tree-pass.h"
82 #include "ggc.h"
83 #include "insn-config.h"
84 #include "recog.h"
85 #include "pointer-set.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
93 #include "target.h"
95 /* The infinite cost. */
96 #define INFTY 10000000
98 /* The expected number of loop iterations. TODO -- use profiling instead of
99 this. */
100 #define AVG_LOOP_NITER(LOOP) 5
103 /* Representation of the induction variable. */
104 struct iv
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
116 struct version_info
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
123 unsigned inv_id; /* Id of an invariant. */
126 /* Types of uses. */
127 enum use_type
129 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
130 USE_ADDRESS, /* Use in an address. */
131 USE_COMPARE /* Use is a compare. */
134 /* Cost of a computation. */
135 typedef struct
137 int cost; /* The runtime cost. */
138 unsigned complexity; /* The estimate of the complexity of the code for
139 the computation (in no concrete units --
140 complexity field should be larger for more
141 complex expressions and addressing modes). */
142 } comp_cost;
144 static const comp_cost zero_cost = {0, 0};
145 static const comp_cost infinite_cost = {INFTY, INFTY};
147 /* The candidate - cost pair. */
148 struct cost_pair
150 struct iv_cand *cand; /* The candidate. */
151 comp_cost cost; /* The cost. */
152 bitmap depends_on; /* The list of invariants that have to be
153 preserved. */
154 tree value; /* For final value elimination, the expression for
155 the final value of the iv. For iv elimination,
156 the new bound to compare with. */
159 /* Use. */
160 struct iv_use
162 unsigned id; /* The id of the use. */
163 enum use_type type; /* Type of the use. */
164 struct iv *iv; /* The induction variable it is based on. */
165 gimple stmt; /* Statement in that it occurs. */
166 tree *op_p; /* The place where it occurs. */
167 bitmap related_cands; /* The set of "related" iv candidates, plus the common
168 important ones. */
170 unsigned n_map_members; /* Number of candidates in the cost_map list. */
171 struct cost_pair *cost_map;
172 /* The costs wrto the iv candidates. */
174 struct iv_cand *selected;
175 /* The selected candidate. */
178 /* The position where the iv is computed. */
179 enum iv_position
181 IP_NORMAL, /* At the end, just before the exit condition. */
182 IP_END, /* At the end of the latch block. */
183 IP_BEFORE_USE, /* Immediately before a specific use. */
184 IP_AFTER_USE, /* Immediately after a specific use. */
185 IP_ORIGINAL /* The original biv. */
188 /* The induction variable candidate. */
189 struct iv_cand
191 unsigned id; /* The number of the candidate. */
192 bool important; /* Whether this is an "important" candidate, i.e. such
193 that it should be considered by all uses. */
194 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
195 gimple incremented_at;/* For original biv, the statement where it is
196 incremented. */
197 tree var_before; /* The variable used for it before increment. */
198 tree var_after; /* The variable used for it after increment. */
199 struct iv *iv; /* The value of the candidate. NULL for
200 "pseudocandidate" used to indicate the possibility
201 to replace the final value of an iv by direct
202 computation of the value. */
203 unsigned cost; /* Cost of the candidate. */
204 unsigned cost_step; /* Cost of the candidate's increment operation. */
205 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
206 where it is incremented. */
207 bitmap depends_on; /* The list of invariants that are used in step of the
208 biv. */
211 /* The data used by the induction variable optimizations. */
213 typedef struct iv_use *iv_use_p;
214 DEF_VEC_P(iv_use_p);
215 DEF_VEC_ALLOC_P(iv_use_p,heap);
217 typedef struct iv_cand *iv_cand_p;
218 DEF_VEC_P(iv_cand_p);
219 DEF_VEC_ALLOC_P(iv_cand_p,heap);
221 struct ivopts_data
223 /* The currently optimized loop. */
224 struct loop *current_loop;
226 /* Numbers of iterations for all exits of the current loop. */
227 struct pointer_map_t *niters;
229 /* Number of registers used in it. */
230 unsigned regs_used;
232 /* The size of version_info array allocated. */
233 unsigned version_info_size;
235 /* The array of information for the ssa names. */
236 struct version_info *version_info;
238 /* The bitmap of indices in version_info whose value was changed. */
239 bitmap relevant;
241 /* The uses of induction variables. */
242 VEC(iv_use_p,heap) *iv_uses;
244 /* The candidates. */
245 VEC(iv_cand_p,heap) *iv_candidates;
247 /* A bitmap of important candidates. */
248 bitmap important_candidates;
250 /* The maximum invariant id. */
251 unsigned max_inv_id;
253 /* Whether to consider just related and important candidates when replacing a
254 use. */
255 bool consider_all_candidates;
257 /* Are we optimizing for speed? */
258 bool speed;
261 /* An assignment of iv candidates to uses. */
263 struct iv_ca
265 /* The number of uses covered by the assignment. */
266 unsigned upto;
268 /* Number of uses that cannot be expressed by the candidates in the set. */
269 unsigned bad_uses;
271 /* Candidate assigned to a use, together with the related costs. */
272 struct cost_pair **cand_for_use;
274 /* Number of times each candidate is used. */
275 unsigned *n_cand_uses;
277 /* The candidates used. */
278 bitmap cands;
280 /* The number of candidates in the set. */
281 unsigned n_cands;
283 /* Total number of registers needed. */
284 unsigned n_regs;
286 /* Total cost of expressing uses. */
287 comp_cost cand_use_cost;
289 /* Total cost of candidates. */
290 unsigned cand_cost;
292 /* Number of times each invariant is used. */
293 unsigned *n_invariant_uses;
295 /* Total cost of the assignment. */
296 comp_cost cost;
299 /* Difference of two iv candidate assignments. */
301 struct iv_ca_delta
303 /* Changed use. */
304 struct iv_use *use;
306 /* An old assignment (for rollback purposes). */
307 struct cost_pair *old_cp;
309 /* A new assignment. */
310 struct cost_pair *new_cp;
312 /* Next change in the list. */
313 struct iv_ca_delta *next_change;
316 /* Bound on number of candidates below that all candidates are considered. */
318 #define CONSIDER_ALL_CANDIDATES_BOUND \
319 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
321 /* If there are more iv occurrences, we just give up (it is quite unlikely that
322 optimizing such a loop would help, and it would take ages). */
324 #define MAX_CONSIDERED_USES \
325 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
327 /* If there are at most this number of ivs in the set, try removing unnecessary
328 ivs from the set always. */
330 #define ALWAYS_PRUNE_CAND_SET_BOUND \
331 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
333 /* The list of trees for that the decl_rtl field must be reset is stored
334 here. */
336 static VEC(tree,heap) *decl_rtl_to_reset;
338 /* Number of uses recorded in DATA. */
340 static inline unsigned
341 n_iv_uses (struct ivopts_data *data)
343 return VEC_length (iv_use_p, data->iv_uses);
346 /* Ith use recorded in DATA. */
348 static inline struct iv_use *
349 iv_use (struct ivopts_data *data, unsigned i)
351 return VEC_index (iv_use_p, data->iv_uses, i);
354 /* Number of candidates recorded in DATA. */
356 static inline unsigned
357 n_iv_cands (struct ivopts_data *data)
359 return VEC_length (iv_cand_p, data->iv_candidates);
362 /* Ith candidate recorded in DATA. */
364 static inline struct iv_cand *
365 iv_cand (struct ivopts_data *data, unsigned i)
367 return VEC_index (iv_cand_p, data->iv_candidates, i);
370 /* The single loop exit if it dominates the latch, NULL otherwise. */
372 edge
373 single_dom_exit (struct loop *loop)
375 edge exit = single_exit (loop);
377 if (!exit)
378 return NULL;
380 if (!just_once_each_iteration_p (loop, exit->src))
381 return NULL;
383 return exit;
386 /* Dumps information about the induction variable IV to FILE. */
388 extern void dump_iv (FILE *, struct iv *);
389 void
390 dump_iv (FILE *file, struct iv *iv)
392 if (iv->ssa_name)
394 fprintf (file, "ssa name ");
395 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
396 fprintf (file, "\n");
399 fprintf (file, " type ");
400 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
401 fprintf (file, "\n");
403 if (iv->step)
405 fprintf (file, " base ");
406 print_generic_expr (file, iv->base, TDF_SLIM);
407 fprintf (file, "\n");
409 fprintf (file, " step ");
410 print_generic_expr (file, iv->step, TDF_SLIM);
411 fprintf (file, "\n");
413 else
415 fprintf (file, " invariant ");
416 print_generic_expr (file, iv->base, TDF_SLIM);
417 fprintf (file, "\n");
420 if (iv->base_object)
422 fprintf (file, " base object ");
423 print_generic_expr (file, iv->base_object, TDF_SLIM);
424 fprintf (file, "\n");
427 if (iv->biv_p)
428 fprintf (file, " is a biv\n");
431 /* Dumps information about the USE to FILE. */
433 extern void dump_use (FILE *, struct iv_use *);
434 void
435 dump_use (FILE *file, struct iv_use *use)
437 fprintf (file, "use %d\n", use->id);
439 switch (use->type)
441 case USE_NONLINEAR_EXPR:
442 fprintf (file, " generic\n");
443 break;
445 case USE_ADDRESS:
446 fprintf (file, " address\n");
447 break;
449 case USE_COMPARE:
450 fprintf (file, " compare\n");
451 break;
453 default:
454 gcc_unreachable ();
457 fprintf (file, " in statement ");
458 print_gimple_stmt (file, use->stmt, 0, 0);
459 fprintf (file, "\n");
461 fprintf (file, " at position ");
462 if (use->op_p)
463 print_generic_expr (file, *use->op_p, TDF_SLIM);
464 fprintf (file, "\n");
466 dump_iv (file, use->iv);
468 if (use->related_cands)
470 fprintf (file, " related candidates ");
471 dump_bitmap (file, use->related_cands);
475 /* Dumps information about the uses to FILE. */
477 extern void dump_uses (FILE *, struct ivopts_data *);
478 void
479 dump_uses (FILE *file, struct ivopts_data *data)
481 unsigned i;
482 struct iv_use *use;
484 for (i = 0; i < n_iv_uses (data); i++)
486 use = iv_use (data, i);
488 dump_use (file, use);
489 fprintf (file, "\n");
493 /* Dumps information about induction variable candidate CAND to FILE. */
495 extern void dump_cand (FILE *, struct iv_cand *);
496 void
497 dump_cand (FILE *file, struct iv_cand *cand)
499 struct iv *iv = cand->iv;
501 fprintf (file, "candidate %d%s\n",
502 cand->id, cand->important ? " (important)" : "");
504 if (cand->depends_on)
506 fprintf (file, " depends on ");
507 dump_bitmap (file, cand->depends_on);
510 if (!iv)
512 fprintf (file, " final value replacement\n");
513 return;
516 switch (cand->pos)
518 case IP_NORMAL:
519 fprintf (file, " incremented before exit test\n");
520 break;
522 case IP_BEFORE_USE:
523 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
524 break;
526 case IP_AFTER_USE:
527 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
528 break;
530 case IP_END:
531 fprintf (file, " incremented at end\n");
532 break;
534 case IP_ORIGINAL:
535 fprintf (file, " original biv\n");
536 break;
539 dump_iv (file, iv);
542 /* Returns the info for ssa version VER. */
544 static inline struct version_info *
545 ver_info (struct ivopts_data *data, unsigned ver)
547 return data->version_info + ver;
550 /* Returns the info for ssa name NAME. */
552 static inline struct version_info *
553 name_info (struct ivopts_data *data, tree name)
555 return ver_info (data, SSA_NAME_VERSION (name));
558 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
559 emitted in LOOP. */
561 static bool
562 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
564 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
566 gcc_assert (bb);
568 if (sbb == loop->latch)
569 return true;
571 if (sbb != bb)
572 return false;
574 return stmt == last_stmt (bb);
577 /* Returns true if STMT if after the place where the original induction
578 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
579 if the positions are identical. */
581 static bool
582 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
584 basic_block cand_bb = gimple_bb (cand->incremented_at);
585 basic_block stmt_bb = gimple_bb (stmt);
587 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
588 return false;
590 if (stmt_bb != cand_bb)
591 return true;
593 if (true_if_equal
594 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
595 return true;
596 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
599 /* Returns true if STMT if after the place where the induction variable
600 CAND is incremented in LOOP. */
602 static bool
603 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
605 switch (cand->pos)
607 case IP_END:
608 return false;
610 case IP_NORMAL:
611 return stmt_after_ip_normal_pos (loop, stmt);
613 case IP_ORIGINAL:
614 case IP_AFTER_USE:
615 return stmt_after_inc_pos (cand, stmt, false);
617 case IP_BEFORE_USE:
618 return stmt_after_inc_pos (cand, stmt, true);
620 default:
621 gcc_unreachable ();
625 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
627 static bool
628 abnormal_ssa_name_p (tree exp)
630 if (!exp)
631 return false;
633 if (TREE_CODE (exp) != SSA_NAME)
634 return false;
636 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
639 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
640 abnormal phi node. Callback for for_each_index. */
642 static bool
643 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
644 void *data ATTRIBUTE_UNUSED)
646 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
648 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
649 return false;
650 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
651 return false;
654 return !abnormal_ssa_name_p (*index);
657 /* Returns true if EXPR contains a ssa name that occurs in an
658 abnormal phi node. */
660 bool
661 contains_abnormal_ssa_name_p (tree expr)
663 enum tree_code code;
664 enum tree_code_class codeclass;
666 if (!expr)
667 return false;
669 code = TREE_CODE (expr);
670 codeclass = TREE_CODE_CLASS (code);
672 if (code == SSA_NAME)
673 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
675 if (code == INTEGER_CST
676 || is_gimple_min_invariant (expr))
677 return false;
679 if (code == ADDR_EXPR)
680 return !for_each_index (&TREE_OPERAND (expr, 0),
681 idx_contains_abnormal_ssa_name_p,
682 NULL);
684 switch (codeclass)
686 case tcc_binary:
687 case tcc_comparison:
688 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
689 return true;
691 /* Fallthru. */
692 case tcc_unary:
693 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
694 return true;
696 break;
698 default:
699 gcc_unreachable ();
702 return false;
705 /* Returns tree describing number of iterations determined from
706 EXIT of DATA->current_loop, or NULL if something goes wrong. */
708 static tree
709 niter_for_exit (struct ivopts_data *data, edge exit)
711 struct tree_niter_desc desc;
712 tree niter;
713 void **slot;
715 if (!data->niters)
717 data->niters = pointer_map_create ();
718 slot = NULL;
720 else
721 slot = pointer_map_contains (data->niters, exit);
723 if (!slot)
725 /* Try to determine number of iterations. We must know it
726 unconditionally (i.e., without possibility of # of iterations
727 being zero). Also, we cannot safely work with ssa names that
728 appear in phi nodes on abnormal edges, so that we do not create
729 overlapping life ranges for them (PR 27283). */
730 if (number_of_iterations_exit (data->current_loop,
731 exit, &desc, true)
732 && integer_zerop (desc.may_be_zero)
733 && !contains_abnormal_ssa_name_p (desc.niter))
734 niter = desc.niter;
735 else
736 niter = NULL_TREE;
738 *pointer_map_insert (data->niters, exit) = niter;
740 else
741 niter = (tree) *slot;
743 return niter;
746 /* Returns tree describing number of iterations determined from
747 single dominating exit of DATA->current_loop, or NULL if something
748 goes wrong. */
750 static tree
751 niter_for_single_dom_exit (struct ivopts_data *data)
753 edge exit = single_dom_exit (data->current_loop);
755 if (!exit)
756 return NULL;
758 return niter_for_exit (data, exit);
761 /* Initializes data structures used by the iv optimization pass, stored
762 in DATA. */
764 static void
765 tree_ssa_iv_optimize_init (struct ivopts_data *data)
767 data->version_info_size = 2 * num_ssa_names;
768 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
769 data->relevant = BITMAP_ALLOC (NULL);
770 data->important_candidates = BITMAP_ALLOC (NULL);
771 data->max_inv_id = 0;
772 data->niters = NULL;
773 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
774 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
775 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
778 /* Returns a memory object to that EXPR points. In case we are able to
779 determine that it does not point to any such object, NULL is returned. */
781 static tree
782 determine_base_object (tree expr)
784 enum tree_code code = TREE_CODE (expr);
785 tree base, obj;
787 /* If this is a pointer casted to any type, we need to determine
788 the base object for the pointer; so handle conversions before
789 throwing away non-pointer expressions. */
790 if (CONVERT_EXPR_P (expr))
791 return determine_base_object (TREE_OPERAND (expr, 0));
793 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
794 return NULL_TREE;
796 switch (code)
798 case INTEGER_CST:
799 return NULL_TREE;
801 case ADDR_EXPR:
802 obj = TREE_OPERAND (expr, 0);
803 base = get_base_address (obj);
805 if (!base)
806 return expr;
808 if (TREE_CODE (base) == INDIRECT_REF)
809 return determine_base_object (TREE_OPERAND (base, 0));
811 return fold_convert (ptr_type_node,
812 build_fold_addr_expr (base));
814 case POINTER_PLUS_EXPR:
815 return determine_base_object (TREE_OPERAND (expr, 0));
817 case PLUS_EXPR:
818 case MINUS_EXPR:
819 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
820 gcc_unreachable ();
822 default:
823 return fold_convert (ptr_type_node, expr);
827 /* Allocates an induction variable with given initial value BASE and step STEP
828 for loop LOOP. */
830 static struct iv *
831 alloc_iv (tree base, tree step)
833 struct iv *iv = XCNEW (struct iv);
834 gcc_assert (step != NULL_TREE);
836 iv->base = base;
837 iv->base_object = determine_base_object (base);
838 iv->step = step;
839 iv->biv_p = false;
840 iv->have_use_for = false;
841 iv->use_id = 0;
842 iv->ssa_name = NULL_TREE;
844 return iv;
847 /* Sets STEP and BASE for induction variable IV. */
849 static void
850 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
852 struct version_info *info = name_info (data, iv);
854 gcc_assert (!info->iv);
856 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
857 info->iv = alloc_iv (base, step);
858 info->iv->ssa_name = iv;
861 /* Finds induction variable declaration for VAR. */
863 static struct iv *
864 get_iv (struct ivopts_data *data, tree var)
866 basic_block bb;
867 tree type = TREE_TYPE (var);
869 if (!POINTER_TYPE_P (type)
870 && !INTEGRAL_TYPE_P (type))
871 return NULL;
873 if (!name_info (data, var)->iv)
875 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
877 if (!bb
878 || !flow_bb_inside_loop_p (data->current_loop, bb))
879 set_iv (data, var, var, build_int_cst (type, 0));
882 return name_info (data, var)->iv;
885 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
886 not define a simple affine biv with nonzero step. */
888 static tree
889 determine_biv_step (gimple phi)
891 struct loop *loop = gimple_bb (phi)->loop_father;
892 tree name = PHI_RESULT (phi);
893 affine_iv iv;
895 if (!is_gimple_reg (name))
896 return NULL_TREE;
898 if (!simple_iv (loop, loop, name, &iv, true))
899 return NULL_TREE;
901 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
904 /* Finds basic ivs. */
906 static bool
907 find_bivs (struct ivopts_data *data)
909 gimple phi;
910 tree step, type, base;
911 bool found = false;
912 struct loop *loop = data->current_loop;
913 gimple_stmt_iterator psi;
915 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
917 phi = gsi_stmt (psi);
919 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
920 continue;
922 step = determine_biv_step (phi);
923 if (!step)
924 continue;
926 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
927 base = expand_simple_operations (base);
928 if (contains_abnormal_ssa_name_p (base)
929 || contains_abnormal_ssa_name_p (step))
930 continue;
932 type = TREE_TYPE (PHI_RESULT (phi));
933 base = fold_convert (type, base);
934 if (step)
936 if (POINTER_TYPE_P (type))
937 step = fold_convert (sizetype, step);
938 else
939 step = fold_convert (type, step);
942 set_iv (data, PHI_RESULT (phi), base, step);
943 found = true;
946 return found;
949 /* Marks basic ivs. */
951 static void
952 mark_bivs (struct ivopts_data *data)
954 gimple phi;
955 tree var;
956 struct iv *iv, *incr_iv;
957 struct loop *loop = data->current_loop;
958 basic_block incr_bb;
959 gimple_stmt_iterator psi;
961 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
963 phi = gsi_stmt (psi);
965 iv = get_iv (data, PHI_RESULT (phi));
966 if (!iv)
967 continue;
969 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
970 incr_iv = get_iv (data, var);
971 if (!incr_iv)
972 continue;
974 /* If the increment is in the subloop, ignore it. */
975 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
976 if (incr_bb->loop_father != data->current_loop
977 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
978 continue;
980 iv->biv_p = true;
981 incr_iv->biv_p = true;
985 /* Checks whether STMT defines a linear induction variable and stores its
986 parameters to IV. */
988 static bool
989 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
991 tree lhs;
992 struct loop *loop = data->current_loop;
994 iv->base = NULL_TREE;
995 iv->step = NULL_TREE;
997 if (gimple_code (stmt) != GIMPLE_ASSIGN)
998 return false;
1000 lhs = gimple_assign_lhs (stmt);
1001 if (TREE_CODE (lhs) != SSA_NAME)
1002 return false;
1004 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1005 return false;
1006 iv->base = expand_simple_operations (iv->base);
1008 if (contains_abnormal_ssa_name_p (iv->base)
1009 || contains_abnormal_ssa_name_p (iv->step))
1010 return false;
1012 return true;
1015 /* Finds general ivs in statement STMT. */
1017 static void
1018 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1020 affine_iv iv;
1022 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1023 return;
1025 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1028 /* Finds general ivs in basic block BB. */
1030 static void
1031 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1033 gimple_stmt_iterator bsi;
1035 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1036 find_givs_in_stmt (data, gsi_stmt (bsi));
1039 /* Finds general ivs. */
1041 static void
1042 find_givs (struct ivopts_data *data)
1044 struct loop *loop = data->current_loop;
1045 basic_block *body = get_loop_body_in_dom_order (loop);
1046 unsigned i;
1048 for (i = 0; i < loop->num_nodes; i++)
1049 find_givs_in_bb (data, body[i]);
1050 free (body);
1053 /* For each ssa name defined in LOOP determines whether it is an induction
1054 variable and if so, its initial value and step. */
1056 static bool
1057 find_induction_variables (struct ivopts_data *data)
1059 unsigned i;
1060 bitmap_iterator bi;
1062 if (!find_bivs (data))
1063 return false;
1065 find_givs (data);
1066 mark_bivs (data);
1068 if (dump_file && (dump_flags & TDF_DETAILS))
1070 tree niter = niter_for_single_dom_exit (data);
1072 if (niter)
1074 fprintf (dump_file, " number of iterations ");
1075 print_generic_expr (dump_file, niter, TDF_SLIM);
1076 fprintf (dump_file, "\n\n");
1079 fprintf (dump_file, "Induction variables:\n\n");
1081 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1083 if (ver_info (data, i)->iv)
1084 dump_iv (dump_file, ver_info (data, i)->iv);
1088 return true;
1091 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1093 static struct iv_use *
1094 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1095 gimple stmt, enum use_type use_type)
1097 struct iv_use *use = XCNEW (struct iv_use);
1099 use->id = n_iv_uses (data);
1100 use->type = use_type;
1101 use->iv = iv;
1102 use->stmt = stmt;
1103 use->op_p = use_p;
1104 use->related_cands = BITMAP_ALLOC (NULL);
1106 /* To avoid showing ssa name in the dumps, if it was not reset by the
1107 caller. */
1108 iv->ssa_name = NULL_TREE;
1110 if (dump_file && (dump_flags & TDF_DETAILS))
1111 dump_use (dump_file, use);
1113 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1115 return use;
1118 /* Checks whether OP is a loop-level invariant and if so, records it.
1119 NONLINEAR_USE is true if the invariant is used in a way we do not
1120 handle specially. */
1122 static void
1123 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1125 basic_block bb;
1126 struct version_info *info;
1128 if (TREE_CODE (op) != SSA_NAME
1129 || !is_gimple_reg (op))
1130 return;
1132 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1133 if (bb
1134 && flow_bb_inside_loop_p (data->current_loop, bb))
1135 return;
1137 info = name_info (data, op);
1138 info->name = op;
1139 info->has_nonlin_use |= nonlinear_use;
1140 if (!info->inv_id)
1141 info->inv_id = ++data->max_inv_id;
1142 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1145 /* Checks whether the use OP is interesting and if so, records it. */
1147 static struct iv_use *
1148 find_interesting_uses_op (struct ivopts_data *data, tree op)
1150 struct iv *iv;
1151 struct iv *civ;
1152 gimple stmt;
1153 struct iv_use *use;
1155 if (TREE_CODE (op) != SSA_NAME)
1156 return NULL;
1158 iv = get_iv (data, op);
1159 if (!iv)
1160 return NULL;
1162 if (iv->have_use_for)
1164 use = iv_use (data, iv->use_id);
1166 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1167 return use;
1170 if (integer_zerop (iv->step))
1172 record_invariant (data, op, true);
1173 return NULL;
1175 iv->have_use_for = true;
1177 civ = XNEW (struct iv);
1178 *civ = *iv;
1180 stmt = SSA_NAME_DEF_STMT (op);
1181 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1182 || is_gimple_assign (stmt));
1184 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1185 iv->use_id = use->id;
1187 return use;
1190 /* Given a condition in statement STMT, checks whether it is a compare
1191 of an induction variable and an invariant. If this is the case,
1192 CONTROL_VAR is set to location of the iv, BOUND to the location of
1193 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1194 induction variable descriptions, and true is returned. If this is not
1195 the case, CONTROL_VAR and BOUND are set to the arguments of the
1196 condition and false is returned. */
1198 static bool
1199 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1200 tree **control_var, tree **bound,
1201 struct iv **iv_var, struct iv **iv_bound)
1203 /* The objects returned when COND has constant operands. */
1204 static struct iv const_iv;
1205 static tree zero;
1206 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1207 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1208 bool ret = false;
1210 if (gimple_code (stmt) == GIMPLE_COND)
1212 op0 = gimple_cond_lhs_ptr (stmt);
1213 op1 = gimple_cond_rhs_ptr (stmt);
1215 else
1217 op0 = gimple_assign_rhs1_ptr (stmt);
1218 op1 = gimple_assign_rhs2_ptr (stmt);
1221 zero = integer_zero_node;
1222 const_iv.step = integer_zero_node;
1224 if (TREE_CODE (*op0) == SSA_NAME)
1225 iv0 = get_iv (data, *op0);
1226 if (TREE_CODE (*op1) == SSA_NAME)
1227 iv1 = get_iv (data, *op1);
1229 /* Exactly one of the compared values must be an iv, and the other one must
1230 be an invariant. */
1231 if (!iv0 || !iv1)
1232 goto end;
1234 if (integer_zerop (iv0->step))
1236 /* Control variable may be on the other side. */
1237 tmp_op = op0; op0 = op1; op1 = tmp_op;
1238 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1240 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1242 end:
1243 if (control_var)
1244 *control_var = op0;;
1245 if (iv_var)
1246 *iv_var = iv0;;
1247 if (bound)
1248 *bound = op1;
1249 if (iv_bound)
1250 *iv_bound = iv1;
1252 return ret;
1255 /* Checks whether the condition in STMT is interesting and if so,
1256 records it. */
1258 static void
1259 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1261 tree *var_p, *bound_p;
1262 struct iv *var_iv, *civ;
1264 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1266 find_interesting_uses_op (data, *var_p);
1267 find_interesting_uses_op (data, *bound_p);
1268 return;
1271 civ = XNEW (struct iv);
1272 *civ = *var_iv;
1273 record_use (data, NULL, civ, stmt, USE_COMPARE);
1276 /* Returns true if expression EXPR is obviously invariant in LOOP,
1277 i.e. if all its operands are defined outside of the LOOP. LOOP
1278 should not be the function body. */
1280 bool
1281 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1283 basic_block def_bb;
1284 unsigned i, len;
1286 gcc_assert (loop_depth (loop) > 0);
1288 if (is_gimple_min_invariant (expr))
1289 return true;
1291 if (TREE_CODE (expr) == SSA_NAME)
1293 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1294 if (def_bb
1295 && flow_bb_inside_loop_p (loop, def_bb))
1296 return false;
1298 return true;
1301 if (!EXPR_P (expr))
1302 return false;
1304 len = TREE_OPERAND_LENGTH (expr);
1305 for (i = 0; i < len; i++)
1306 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1307 return false;
1309 return true;
1312 /* Returns true if statement STMT is obviously invariant in LOOP,
1313 i.e. if all its operands on the RHS are defined outside of the LOOP.
1314 LOOP should not be the function body. */
1316 bool
1317 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1319 unsigned i;
1320 tree lhs;
1322 gcc_assert (loop_depth (loop) > 0);
1324 lhs = gimple_get_lhs (stmt);
1325 for (i = 0; i < gimple_num_ops (stmt); i++)
1327 tree op = gimple_op (stmt, i);
1328 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1329 return false;
1332 return true;
1335 /* Cumulates the steps of indices into DATA and replaces their values with the
1336 initial ones. Returns false when the value of the index cannot be determined.
1337 Callback for for_each_index. */
1339 struct ifs_ivopts_data
1341 struct ivopts_data *ivopts_data;
1342 gimple stmt;
1343 tree step;
1346 static bool
1347 idx_find_step (tree base, tree *idx, void *data)
1349 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1350 struct iv *iv;
1351 tree step, iv_base, iv_step, lbound, off;
1352 struct loop *loop = dta->ivopts_data->current_loop;
1354 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1355 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1356 return false;
1358 /* If base is a component ref, require that the offset of the reference
1359 be invariant. */
1360 if (TREE_CODE (base) == COMPONENT_REF)
1362 off = component_ref_field_offset (base);
1363 return expr_invariant_in_loop_p (loop, off);
1366 /* If base is array, first check whether we will be able to move the
1367 reference out of the loop (in order to take its address in strength
1368 reduction). In order for this to work we need both lower bound
1369 and step to be loop invariants. */
1370 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1372 /* Moreover, for a range, the size needs to be invariant as well. */
1373 if (TREE_CODE (base) == ARRAY_RANGE_REF
1374 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1375 return false;
1377 step = array_ref_element_size (base);
1378 lbound = array_ref_low_bound (base);
1380 if (!expr_invariant_in_loop_p (loop, step)
1381 || !expr_invariant_in_loop_p (loop, lbound))
1382 return false;
1385 if (TREE_CODE (*idx) != SSA_NAME)
1386 return true;
1388 iv = get_iv (dta->ivopts_data, *idx);
1389 if (!iv)
1390 return false;
1392 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1393 *&x[0], which is not folded and does not trigger the
1394 ARRAY_REF path below. */
1395 *idx = iv->base;
1397 if (integer_zerop (iv->step))
1398 return true;
1400 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1402 step = array_ref_element_size (base);
1404 /* We only handle addresses whose step is an integer constant. */
1405 if (TREE_CODE (step) != INTEGER_CST)
1406 return false;
1408 else
1409 /* The step for pointer arithmetics already is 1 byte. */
1410 step = build_int_cst (sizetype, 1);
1412 iv_base = iv->base;
1413 iv_step = iv->step;
1414 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1415 sizetype, &iv_base, &iv_step, dta->stmt,
1416 false))
1418 /* The index might wrap. */
1419 return false;
1422 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1423 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1425 return true;
1428 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1429 object is passed to it in DATA. */
1431 static bool
1432 idx_record_use (tree base, tree *idx,
1433 void *vdata)
1435 struct ivopts_data *data = (struct ivopts_data *) vdata;
1436 find_interesting_uses_op (data, *idx);
1437 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1439 find_interesting_uses_op (data, array_ref_element_size (base));
1440 find_interesting_uses_op (data, array_ref_low_bound (base));
1442 return true;
1445 /* If we can prove that TOP = cst * BOT for some constant cst,
1446 store cst to MUL and return true. Otherwise return false.
1447 The returned value is always sign-extended, regardless of the
1448 signedness of TOP and BOT. */
1450 static bool
1451 constant_multiple_of (tree top, tree bot, double_int *mul)
1453 tree mby;
1454 enum tree_code code;
1455 double_int res, p0, p1;
1456 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1458 STRIP_NOPS (top);
1459 STRIP_NOPS (bot);
1461 if (operand_equal_p (top, bot, 0))
1463 *mul = double_int_one;
1464 return true;
1467 code = TREE_CODE (top);
1468 switch (code)
1470 case MULT_EXPR:
1471 mby = TREE_OPERAND (top, 1);
1472 if (TREE_CODE (mby) != INTEGER_CST)
1473 return false;
1475 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1476 return false;
1478 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1479 precision);
1480 return true;
1482 case PLUS_EXPR:
1483 case MINUS_EXPR:
1484 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1485 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1486 return false;
1488 if (code == MINUS_EXPR)
1489 p1 = double_int_neg (p1);
1490 *mul = double_int_sext (double_int_add (p0, p1), precision);
1491 return true;
1493 case INTEGER_CST:
1494 if (TREE_CODE (bot) != INTEGER_CST)
1495 return false;
1497 p0 = double_int_sext (tree_to_double_int (top), precision);
1498 p1 = double_int_sext (tree_to_double_int (bot), precision);
1499 if (double_int_zero_p (p1))
1500 return false;
1501 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1502 precision);
1503 return double_int_zero_p (res);
1505 default:
1506 return false;
1510 /* Returns true if memory reference REF with step STEP may be unaligned. */
1512 static bool
1513 may_be_unaligned_p (tree ref, tree step)
1515 tree base;
1516 tree base_type;
1517 HOST_WIDE_INT bitsize;
1518 HOST_WIDE_INT bitpos;
1519 tree toffset;
1520 enum machine_mode mode;
1521 int unsignedp, volatilep;
1522 unsigned base_align;
1524 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1525 thus they are not misaligned. */
1526 if (TREE_CODE (ref) == TARGET_MEM_REF)
1527 return false;
1529 /* The test below is basically copy of what expr.c:normal_inner_ref
1530 does to check whether the object must be loaded by parts when
1531 STRICT_ALIGNMENT is true. */
1532 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1533 &unsignedp, &volatilep, true);
1534 base_type = TREE_TYPE (base);
1535 base_align = TYPE_ALIGN (base_type);
1537 if (mode != BLKmode)
1539 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1541 if (base_align < mode_align
1542 || (bitpos % mode_align) != 0
1543 || (bitpos % BITS_PER_UNIT) != 0)
1544 return true;
1546 if (toffset
1547 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1548 return true;
1550 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1551 return true;
1554 return false;
1557 /* Return true if EXPR may be non-addressable. */
1559 static bool
1560 may_be_nonaddressable_p (tree expr)
1562 switch (TREE_CODE (expr))
1564 case TARGET_MEM_REF:
1565 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1566 target, thus they are always addressable. */
1567 return false;
1569 case COMPONENT_REF:
1570 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1571 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1573 case VIEW_CONVERT_EXPR:
1574 /* This kind of view-conversions may wrap non-addressable objects
1575 and make them look addressable. After some processing the
1576 non-addressability may be uncovered again, causing ADDR_EXPRs
1577 of inappropriate objects to be built. */
1578 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1579 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1580 return true;
1582 /* ... fall through ... */
1584 case ARRAY_REF:
1585 case ARRAY_RANGE_REF:
1586 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1588 CASE_CONVERT:
1589 return true;
1591 default:
1592 break;
1595 return false;
1598 /* Finds addresses in *OP_P inside STMT. */
1600 static void
1601 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1603 tree base = *op_p, step = build_int_cst (sizetype, 0);
1604 struct iv *civ;
1605 struct ifs_ivopts_data ifs_ivopts_data;
1607 /* Do not play with volatile memory references. A bit too conservative,
1608 perhaps, but safe. */
1609 if (gimple_has_volatile_ops (stmt))
1610 goto fail;
1612 /* Ignore bitfields for now. Not really something terribly complicated
1613 to handle. TODO. */
1614 if (TREE_CODE (base) == BIT_FIELD_REF)
1615 goto fail;
1617 base = unshare_expr (base);
1619 if (TREE_CODE (base) == TARGET_MEM_REF)
1621 tree type = build_pointer_type (TREE_TYPE (base));
1622 tree astep;
1624 if (TMR_BASE (base)
1625 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1627 civ = get_iv (data, TMR_BASE (base));
1628 if (!civ)
1629 goto fail;
1631 TMR_BASE (base) = civ->base;
1632 step = civ->step;
1634 if (TMR_INDEX (base)
1635 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1637 civ = get_iv (data, TMR_INDEX (base));
1638 if (!civ)
1639 goto fail;
1641 TMR_INDEX (base) = civ->base;
1642 astep = civ->step;
1644 if (astep)
1646 if (TMR_STEP (base))
1647 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1649 step = fold_build2 (PLUS_EXPR, type, step, astep);
1653 if (integer_zerop (step))
1654 goto fail;
1655 base = tree_mem_ref_addr (type, base);
1657 else
1659 ifs_ivopts_data.ivopts_data = data;
1660 ifs_ivopts_data.stmt = stmt;
1661 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1662 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1663 || integer_zerop (ifs_ivopts_data.step))
1664 goto fail;
1665 step = ifs_ivopts_data.step;
1667 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1668 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1670 /* Check that the base expression is addressable. This needs
1671 to be done after substituting bases of IVs into it. */
1672 if (may_be_nonaddressable_p (base))
1673 goto fail;
1675 /* Moreover, on strict alignment platforms, check that it is
1676 sufficiently aligned. */
1677 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1678 goto fail;
1680 base = build_fold_addr_expr (base);
1682 /* Substituting bases of IVs into the base expression might
1683 have caused folding opportunities. */
1684 if (TREE_CODE (base) == ADDR_EXPR)
1686 tree *ref = &TREE_OPERAND (base, 0);
1687 while (handled_component_p (*ref))
1688 ref = &TREE_OPERAND (*ref, 0);
1689 if (TREE_CODE (*ref) == INDIRECT_REF)
1691 tree tem = gimple_fold_indirect_ref (TREE_OPERAND (*ref, 0));
1692 if (tem)
1693 *ref = tem;
1698 civ = alloc_iv (base, step);
1699 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1700 return;
1702 fail:
1703 for_each_index (op_p, idx_record_use, data);
1706 /* Finds and records invariants used in STMT. */
1708 static void
1709 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1711 ssa_op_iter iter;
1712 use_operand_p use_p;
1713 tree op;
1715 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1717 op = USE_FROM_PTR (use_p);
1718 record_invariant (data, op, false);
1722 /* Finds interesting uses of induction variables in the statement STMT. */
1724 static void
1725 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1727 struct iv *iv;
1728 tree op, *lhs, *rhs;
1729 ssa_op_iter iter;
1730 use_operand_p use_p;
1731 enum tree_code code;
1733 find_invariants_stmt (data, stmt);
1735 if (gimple_code (stmt) == GIMPLE_COND)
1737 find_interesting_uses_cond (data, stmt);
1738 return;
1741 if (is_gimple_assign (stmt))
1743 lhs = gimple_assign_lhs_ptr (stmt);
1744 rhs = gimple_assign_rhs1_ptr (stmt);
1746 if (TREE_CODE (*lhs) == SSA_NAME)
1748 /* If the statement defines an induction variable, the uses are not
1749 interesting by themselves. */
1751 iv = get_iv (data, *lhs);
1753 if (iv && !integer_zerop (iv->step))
1754 return;
1757 code = gimple_assign_rhs_code (stmt);
1758 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1759 && (REFERENCE_CLASS_P (*rhs)
1760 || is_gimple_val (*rhs)))
1762 if (REFERENCE_CLASS_P (*rhs))
1763 find_interesting_uses_address (data, stmt, rhs);
1764 else
1765 find_interesting_uses_op (data, *rhs);
1767 if (REFERENCE_CLASS_P (*lhs))
1768 find_interesting_uses_address (data, stmt, lhs);
1769 return;
1771 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1773 find_interesting_uses_cond (data, stmt);
1774 return;
1777 /* TODO -- we should also handle address uses of type
1779 memory = call (whatever);
1783 call (memory). */
1786 if (gimple_code (stmt) == GIMPLE_PHI
1787 && gimple_bb (stmt) == data->current_loop->header)
1789 iv = get_iv (data, PHI_RESULT (stmt));
1791 if (iv && !integer_zerop (iv->step))
1792 return;
1795 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1797 op = USE_FROM_PTR (use_p);
1799 if (TREE_CODE (op) != SSA_NAME)
1800 continue;
1802 iv = get_iv (data, op);
1803 if (!iv)
1804 continue;
1806 find_interesting_uses_op (data, op);
1810 /* Finds interesting uses of induction variables outside of loops
1811 on loop exit edge EXIT. */
1813 static void
1814 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1816 gimple phi;
1817 gimple_stmt_iterator psi;
1818 tree def;
1820 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1822 phi = gsi_stmt (psi);
1823 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1824 if (is_gimple_reg (def))
1825 find_interesting_uses_op (data, def);
1829 /* Finds uses of the induction variables that are interesting. */
1831 static void
1832 find_interesting_uses (struct ivopts_data *data)
1834 basic_block bb;
1835 gimple_stmt_iterator bsi;
1836 basic_block *body = get_loop_body (data->current_loop);
1837 unsigned i;
1838 struct version_info *info;
1839 edge e;
1841 if (dump_file && (dump_flags & TDF_DETAILS))
1842 fprintf (dump_file, "Uses:\n\n");
1844 for (i = 0; i < data->current_loop->num_nodes; i++)
1846 edge_iterator ei;
1847 bb = body[i];
1849 FOR_EACH_EDGE (e, ei, bb->succs)
1850 if (e->dest != EXIT_BLOCK_PTR
1851 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1852 find_interesting_uses_outside (data, e);
1854 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1855 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1856 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1857 if (!is_gimple_debug (gsi_stmt (bsi)))
1858 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1861 if (dump_file && (dump_flags & TDF_DETAILS))
1863 bitmap_iterator bi;
1865 fprintf (dump_file, "\n");
1867 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1869 info = ver_info (data, i);
1870 if (info->inv_id)
1872 fprintf (dump_file, " ");
1873 print_generic_expr (dump_file, info->name, TDF_SLIM);
1874 fprintf (dump_file, " is invariant (%d)%s\n",
1875 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1879 fprintf (dump_file, "\n");
1882 free (body);
1885 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1886 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1887 we are at the top-level of the processed address. */
1889 static tree
1890 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1891 unsigned HOST_WIDE_INT *offset)
1893 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1894 enum tree_code code;
1895 tree type, orig_type = TREE_TYPE (expr);
1896 unsigned HOST_WIDE_INT off0, off1, st;
1897 tree orig_expr = expr;
1899 STRIP_NOPS (expr);
1901 type = TREE_TYPE (expr);
1902 code = TREE_CODE (expr);
1903 *offset = 0;
1905 switch (code)
1907 case INTEGER_CST:
1908 if (!cst_and_fits_in_hwi (expr)
1909 || integer_zerop (expr))
1910 return orig_expr;
1912 *offset = int_cst_value (expr);
1913 return build_int_cst (orig_type, 0);
1915 case POINTER_PLUS_EXPR:
1916 case PLUS_EXPR:
1917 case MINUS_EXPR:
1918 op0 = TREE_OPERAND (expr, 0);
1919 op1 = TREE_OPERAND (expr, 1);
1921 op0 = strip_offset_1 (op0, false, false, &off0);
1922 op1 = strip_offset_1 (op1, false, false, &off1);
1924 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1925 if (op0 == TREE_OPERAND (expr, 0)
1926 && op1 == TREE_OPERAND (expr, 1))
1927 return orig_expr;
1929 if (integer_zerop (op1))
1930 expr = op0;
1931 else if (integer_zerop (op0))
1933 if (code == MINUS_EXPR)
1934 expr = fold_build1 (NEGATE_EXPR, type, op1);
1935 else
1936 expr = op1;
1938 else
1939 expr = fold_build2 (code, type, op0, op1);
1941 return fold_convert (orig_type, expr);
1943 case MULT_EXPR:
1944 op1 = TREE_OPERAND (expr, 1);
1945 if (!cst_and_fits_in_hwi (op1))
1946 return orig_expr;
1948 op0 = TREE_OPERAND (expr, 0);
1949 op0 = strip_offset_1 (op0, false, false, &off0);
1950 if (op0 == TREE_OPERAND (expr, 0))
1951 return orig_expr;
1953 *offset = off0 * int_cst_value (op1);
1954 if (integer_zerop (op0))
1955 expr = op0;
1956 else
1957 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1959 return fold_convert (orig_type, expr);
1961 case ARRAY_REF:
1962 case ARRAY_RANGE_REF:
1963 if (!inside_addr)
1964 return orig_expr;
1966 step = array_ref_element_size (expr);
1967 if (!cst_and_fits_in_hwi (step))
1968 break;
1970 st = int_cst_value (step);
1971 op1 = TREE_OPERAND (expr, 1);
1972 op1 = strip_offset_1 (op1, false, false, &off1);
1973 *offset = off1 * st;
1975 if (top_compref
1976 && integer_zerop (op1))
1978 /* Strip the component reference completely. */
1979 op0 = TREE_OPERAND (expr, 0);
1980 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1981 *offset += off0;
1982 return op0;
1984 break;
1986 case COMPONENT_REF:
1987 if (!inside_addr)
1988 return orig_expr;
1990 tmp = component_ref_field_offset (expr);
1991 if (top_compref
1992 && cst_and_fits_in_hwi (tmp))
1994 /* Strip the component reference completely. */
1995 op0 = TREE_OPERAND (expr, 0);
1996 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1997 *offset = off0 + int_cst_value (tmp);
1998 return op0;
2000 break;
2002 case ADDR_EXPR:
2003 op0 = TREE_OPERAND (expr, 0);
2004 op0 = strip_offset_1 (op0, true, true, &off0);
2005 *offset += off0;
2007 if (op0 == TREE_OPERAND (expr, 0))
2008 return orig_expr;
2010 expr = build_fold_addr_expr (op0);
2011 return fold_convert (orig_type, expr);
2013 case INDIRECT_REF:
2014 inside_addr = false;
2015 break;
2017 default:
2018 return orig_expr;
2021 /* Default handling of expressions for that we want to recurse into
2022 the first operand. */
2023 op0 = TREE_OPERAND (expr, 0);
2024 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2025 *offset += off0;
2027 if (op0 == TREE_OPERAND (expr, 0)
2028 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2029 return orig_expr;
2031 expr = copy_node (expr);
2032 TREE_OPERAND (expr, 0) = op0;
2033 if (op1)
2034 TREE_OPERAND (expr, 1) = op1;
2036 /* Inside address, we might strip the top level component references,
2037 thus changing type of the expression. Handling of ADDR_EXPR
2038 will fix that. */
2039 expr = fold_convert (orig_type, expr);
2041 return expr;
2044 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2046 static tree
2047 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2049 return strip_offset_1 (expr, false, false, offset);
2052 /* Returns variant of TYPE that can be used as base for different uses.
2053 We return unsigned type with the same precision, which avoids problems
2054 with overflows. */
2056 static tree
2057 generic_type_for (tree type)
2059 if (POINTER_TYPE_P (type))
2060 return unsigned_type_for (type);
2062 if (TYPE_UNSIGNED (type))
2063 return type;
2065 return unsigned_type_for (type);
2068 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2069 the bitmap to that we should store it. */
2071 static struct ivopts_data *fd_ivopts_data;
2072 static tree
2073 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2075 bitmap *depends_on = (bitmap *) data;
2076 struct version_info *info;
2078 if (TREE_CODE (*expr_p) != SSA_NAME)
2079 return NULL_TREE;
2080 info = name_info (fd_ivopts_data, *expr_p);
2082 if (!info->inv_id || info->has_nonlin_use)
2083 return NULL_TREE;
2085 if (!*depends_on)
2086 *depends_on = BITMAP_ALLOC (NULL);
2087 bitmap_set_bit (*depends_on, info->inv_id);
2089 return NULL_TREE;
2092 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2093 position to POS. If USE is not NULL, the candidate is set as related to
2094 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2095 replacement of the final value of the iv by a direct computation. */
2097 static struct iv_cand *
2098 add_candidate_1 (struct ivopts_data *data,
2099 tree base, tree step, bool important, enum iv_position pos,
2100 struct iv_use *use, gimple incremented_at)
2102 unsigned i;
2103 struct iv_cand *cand = NULL;
2104 tree type, orig_type;
2106 if (base)
2108 orig_type = TREE_TYPE (base);
2109 type = generic_type_for (orig_type);
2110 if (type != orig_type)
2112 base = fold_convert (type, base);
2113 step = fold_convert (type, step);
2117 for (i = 0; i < n_iv_cands (data); i++)
2119 cand = iv_cand (data, i);
2121 if (cand->pos != pos)
2122 continue;
2124 if (cand->incremented_at != incremented_at
2125 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2126 && cand->ainc_use != use))
2127 continue;
2129 if (!cand->iv)
2131 if (!base && !step)
2132 break;
2134 continue;
2137 if (!base && !step)
2138 continue;
2140 if (operand_equal_p (base, cand->iv->base, 0)
2141 && operand_equal_p (step, cand->iv->step, 0))
2142 break;
2145 if (i == n_iv_cands (data))
2147 cand = XCNEW (struct iv_cand);
2148 cand->id = i;
2150 if (!base && !step)
2151 cand->iv = NULL;
2152 else
2153 cand->iv = alloc_iv (base, step);
2155 cand->pos = pos;
2156 if (pos != IP_ORIGINAL && cand->iv)
2158 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2159 cand->var_after = cand->var_before;
2161 cand->important = important;
2162 cand->incremented_at = incremented_at;
2163 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2165 if (step
2166 && TREE_CODE (step) != INTEGER_CST)
2168 fd_ivopts_data = data;
2169 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2172 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2173 cand->ainc_use = use;
2174 else
2175 cand->ainc_use = NULL;
2177 if (dump_file && (dump_flags & TDF_DETAILS))
2178 dump_cand (dump_file, cand);
2181 if (important && !cand->important)
2183 cand->important = true;
2184 if (dump_file && (dump_flags & TDF_DETAILS))
2185 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2188 if (use)
2190 bitmap_set_bit (use->related_cands, i);
2191 if (dump_file && (dump_flags & TDF_DETAILS))
2192 fprintf (dump_file, "Candidate %d is related to use %d\n",
2193 cand->id, use->id);
2196 return cand;
2199 /* Returns true if incrementing the induction variable at the end of the LOOP
2200 is allowed.
2202 The purpose is to avoid splitting latch edge with a biv increment, thus
2203 creating a jump, possibly confusing other optimization passes and leaving
2204 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2205 is not available (so we do not have a better alternative), or if the latch
2206 edge is already nonempty. */
2208 static bool
2209 allow_ip_end_pos_p (struct loop *loop)
2211 if (!ip_normal_pos (loop))
2212 return true;
2214 if (!empty_block_p (ip_end_pos (loop)))
2215 return true;
2217 return false;
2220 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2221 Important field is set to IMPORTANT. */
2223 static void
2224 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2225 bool important, struct iv_use *use)
2227 basic_block use_bb = gimple_bb (use->stmt);
2228 enum machine_mode mem_mode;
2229 unsigned HOST_WIDE_INT cstepi;
2231 /* If we insert the increment in any position other than the standard
2232 ones, we must ensure that it is incremented once per iteration.
2233 It must not be in an inner nested loop, or one side of an if
2234 statement. */
2235 if (use_bb->loop_father != data->current_loop
2236 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2237 || stmt_could_throw_p (use->stmt)
2238 || !cst_and_fits_in_hwi (step))
2239 return;
2241 cstepi = int_cst_value (step);
2243 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2244 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2245 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2247 enum tree_code code = MINUS_EXPR;
2248 tree new_base;
2249 tree new_step = step;
2251 if (POINTER_TYPE_P (TREE_TYPE (base)))
2253 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2254 code = POINTER_PLUS_EXPR;
2256 else
2257 new_step = fold_convert (TREE_TYPE (base), new_step);
2258 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2259 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2260 use->stmt);
2262 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2263 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2265 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2266 use->stmt);
2270 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2271 position to POS. If USE is not NULL, the candidate is set as related to
2272 it. The candidate computation is scheduled on all available positions. */
2274 static void
2275 add_candidate (struct ivopts_data *data,
2276 tree base, tree step, bool important, struct iv_use *use)
2278 if (ip_normal_pos (data->current_loop))
2279 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2280 if (ip_end_pos (data->current_loop)
2281 && allow_ip_end_pos_p (data->current_loop))
2282 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2284 if (use != NULL && use->type == USE_ADDRESS)
2285 add_autoinc_candidates (data, base, step, important, use);
2288 /* Add a standard "0 + 1 * iteration" iv candidate for a
2289 type with SIZE bits. */
2291 static void
2292 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2293 unsigned int size)
2295 tree type = lang_hooks.types.type_for_size (size, true);
2296 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2297 true, NULL);
2300 /* Adds standard iv candidates. */
2302 static void
2303 add_standard_iv_candidates (struct ivopts_data *data)
2305 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2307 /* The same for a double-integer type if it is still fast enough. */
2308 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2309 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2313 /* Adds candidates bases on the old induction variable IV. */
2315 static void
2316 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2318 gimple phi;
2319 tree def;
2320 struct iv_cand *cand;
2322 add_candidate (data, iv->base, iv->step, true, NULL);
2324 /* The same, but with initial value zero. */
2325 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2326 add_candidate (data, size_int (0), iv->step, true, NULL);
2327 else
2328 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2329 iv->step, true, NULL);
2331 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2332 if (gimple_code (phi) == GIMPLE_PHI)
2334 /* Additionally record the possibility of leaving the original iv
2335 untouched. */
2336 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2337 cand = add_candidate_1 (data,
2338 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2339 SSA_NAME_DEF_STMT (def));
2340 cand->var_before = iv->ssa_name;
2341 cand->var_after = def;
2345 /* Adds candidates based on the old induction variables. */
2347 static void
2348 add_old_ivs_candidates (struct ivopts_data *data)
2350 unsigned i;
2351 struct iv *iv;
2352 bitmap_iterator bi;
2354 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2356 iv = ver_info (data, i)->iv;
2357 if (iv && iv->biv_p && !integer_zerop (iv->step))
2358 add_old_iv_candidates (data, iv);
2362 /* Adds candidates based on the value of the induction variable IV and USE. */
2364 static void
2365 add_iv_value_candidates (struct ivopts_data *data,
2366 struct iv *iv, struct iv_use *use)
2368 unsigned HOST_WIDE_INT offset;
2369 tree base;
2370 tree basetype;
2372 add_candidate (data, iv->base, iv->step, false, use);
2374 /* The same, but with initial value zero. Make such variable important,
2375 since it is generic enough so that possibly many uses may be based
2376 on it. */
2377 basetype = TREE_TYPE (iv->base);
2378 if (POINTER_TYPE_P (basetype))
2379 basetype = sizetype;
2380 add_candidate (data, build_int_cst (basetype, 0),
2381 iv->step, true, use);
2383 /* Third, try removing the constant offset. Make sure to even
2384 add a candidate for &a[0] vs. (T *)&a. */
2385 base = strip_offset (iv->base, &offset);
2386 if (offset
2387 || base != iv->base)
2388 add_candidate (data, base, iv->step, false, use);
2391 /* Adds candidates based on the uses. */
2393 static void
2394 add_derived_ivs_candidates (struct ivopts_data *data)
2396 unsigned i;
2398 for (i = 0; i < n_iv_uses (data); i++)
2400 struct iv_use *use = iv_use (data, i);
2402 if (!use)
2403 continue;
2405 switch (use->type)
2407 case USE_NONLINEAR_EXPR:
2408 case USE_COMPARE:
2409 case USE_ADDRESS:
2410 /* Just add the ivs based on the value of the iv used here. */
2411 add_iv_value_candidates (data, use->iv, use);
2412 break;
2414 default:
2415 gcc_unreachable ();
2420 /* Record important candidates and add them to related_cands bitmaps
2421 if needed. */
2423 static void
2424 record_important_candidates (struct ivopts_data *data)
2426 unsigned i;
2427 struct iv_use *use;
2429 for (i = 0; i < n_iv_cands (data); i++)
2431 struct iv_cand *cand = iv_cand (data, i);
2433 if (cand->important)
2434 bitmap_set_bit (data->important_candidates, i);
2437 data->consider_all_candidates = (n_iv_cands (data)
2438 <= CONSIDER_ALL_CANDIDATES_BOUND);
2440 if (data->consider_all_candidates)
2442 /* We will not need "related_cands" bitmaps in this case,
2443 so release them to decrease peak memory consumption. */
2444 for (i = 0; i < n_iv_uses (data); i++)
2446 use = iv_use (data, i);
2447 BITMAP_FREE (use->related_cands);
2450 else
2452 /* Add important candidates to the related_cands bitmaps. */
2453 for (i = 0; i < n_iv_uses (data); i++)
2454 bitmap_ior_into (iv_use (data, i)->related_cands,
2455 data->important_candidates);
2459 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2460 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2461 we allocate a simple list to every use. */
2463 static void
2464 alloc_use_cost_map (struct ivopts_data *data)
2466 unsigned i, size, s, j;
2468 for (i = 0; i < n_iv_uses (data); i++)
2470 struct iv_use *use = iv_use (data, i);
2471 bitmap_iterator bi;
2473 if (data->consider_all_candidates)
2474 size = n_iv_cands (data);
2475 else
2477 s = 0;
2478 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2480 s++;
2483 /* Round up to the power of two, so that moduling by it is fast. */
2484 for (size = 1; size < s; size <<= 1)
2485 continue;
2488 use->n_map_members = size;
2489 use->cost_map = XCNEWVEC (struct cost_pair, size);
2493 /* Returns description of computation cost of expression whose runtime
2494 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2496 static comp_cost
2497 new_cost (unsigned runtime, unsigned complexity)
2499 comp_cost cost;
2501 cost.cost = runtime;
2502 cost.complexity = complexity;
2504 return cost;
2507 /* Adds costs COST1 and COST2. */
2509 static comp_cost
2510 add_costs (comp_cost cost1, comp_cost cost2)
2512 cost1.cost += cost2.cost;
2513 cost1.complexity += cost2.complexity;
2515 return cost1;
2517 /* Subtracts costs COST1 and COST2. */
2519 static comp_cost
2520 sub_costs (comp_cost cost1, comp_cost cost2)
2522 cost1.cost -= cost2.cost;
2523 cost1.complexity -= cost2.complexity;
2525 return cost1;
2528 /* Returns a negative number if COST1 < COST2, a positive number if
2529 COST1 > COST2, and 0 if COST1 = COST2. */
2531 static int
2532 compare_costs (comp_cost cost1, comp_cost cost2)
2534 if (cost1.cost == cost2.cost)
2535 return cost1.complexity - cost2.complexity;
2537 return cost1.cost - cost2.cost;
2540 /* Returns true if COST is infinite. */
2542 static bool
2543 infinite_cost_p (comp_cost cost)
2545 return cost.cost == INFTY;
2548 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2549 on invariants DEPENDS_ON and that the value used in expressing it
2550 is VALUE. */
2552 static void
2553 set_use_iv_cost (struct ivopts_data *data,
2554 struct iv_use *use, struct iv_cand *cand,
2555 comp_cost cost, bitmap depends_on, tree value)
2557 unsigned i, s;
2559 if (infinite_cost_p (cost))
2561 BITMAP_FREE (depends_on);
2562 return;
2565 if (data->consider_all_candidates)
2567 use->cost_map[cand->id].cand = cand;
2568 use->cost_map[cand->id].cost = cost;
2569 use->cost_map[cand->id].depends_on = depends_on;
2570 use->cost_map[cand->id].value = value;
2571 return;
2574 /* n_map_members is a power of two, so this computes modulo. */
2575 s = cand->id & (use->n_map_members - 1);
2576 for (i = s; i < use->n_map_members; i++)
2577 if (!use->cost_map[i].cand)
2578 goto found;
2579 for (i = 0; i < s; i++)
2580 if (!use->cost_map[i].cand)
2581 goto found;
2583 gcc_unreachable ();
2585 found:
2586 use->cost_map[i].cand = cand;
2587 use->cost_map[i].cost = cost;
2588 use->cost_map[i].depends_on = depends_on;
2589 use->cost_map[i].value = value;
2592 /* Gets cost of (USE, CANDIDATE) pair. */
2594 static struct cost_pair *
2595 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2596 struct iv_cand *cand)
2598 unsigned i, s;
2599 struct cost_pair *ret;
2601 if (!cand)
2602 return NULL;
2604 if (data->consider_all_candidates)
2606 ret = use->cost_map + cand->id;
2607 if (!ret->cand)
2608 return NULL;
2610 return ret;
2613 /* n_map_members is a power of two, so this computes modulo. */
2614 s = cand->id & (use->n_map_members - 1);
2615 for (i = s; i < use->n_map_members; i++)
2616 if (use->cost_map[i].cand == cand)
2617 return use->cost_map + i;
2619 for (i = 0; i < s; i++)
2620 if (use->cost_map[i].cand == cand)
2621 return use->cost_map + i;
2623 return NULL;
2626 /* Returns estimate on cost of computing SEQ. */
2628 static unsigned
2629 seq_cost (rtx seq, bool speed)
2631 unsigned cost = 0;
2632 rtx set;
2634 for (; seq; seq = NEXT_INSN (seq))
2636 set = single_set (seq);
2637 if (set)
2638 cost += rtx_cost (set, SET,speed);
2639 else
2640 cost++;
2643 return cost;
2646 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2647 static rtx
2648 produce_memory_decl_rtl (tree obj, int *regno)
2650 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2651 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2652 rtx x;
2654 gcc_assert (obj);
2655 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2657 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2658 x = gen_rtx_SYMBOL_REF (address_mode, name);
2659 SET_SYMBOL_REF_DECL (x, obj);
2660 x = gen_rtx_MEM (DECL_MODE (obj), x);
2661 set_mem_addr_space (x, as);
2662 targetm.encode_section_info (obj, x, true);
2664 else
2666 x = gen_raw_REG (address_mode, (*regno)++);
2667 x = gen_rtx_MEM (DECL_MODE (obj), x);
2668 set_mem_addr_space (x, as);
2671 return x;
2674 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2675 walk_tree. DATA contains the actual fake register number. */
2677 static tree
2678 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2680 tree obj = NULL_TREE;
2681 rtx x = NULL_RTX;
2682 int *regno = (int *) data;
2684 switch (TREE_CODE (*expr_p))
2686 case ADDR_EXPR:
2687 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2688 handled_component_p (*expr_p);
2689 expr_p = &TREE_OPERAND (*expr_p, 0))
2690 continue;
2691 obj = *expr_p;
2692 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2693 x = produce_memory_decl_rtl (obj, regno);
2694 break;
2696 case SSA_NAME:
2697 *ws = 0;
2698 obj = SSA_NAME_VAR (*expr_p);
2699 if (!DECL_RTL_SET_P (obj))
2700 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2701 break;
2703 case VAR_DECL:
2704 case PARM_DECL:
2705 case RESULT_DECL:
2706 *ws = 0;
2707 obj = *expr_p;
2709 if (DECL_RTL_SET_P (obj))
2710 break;
2712 if (DECL_MODE (obj) == BLKmode)
2713 x = produce_memory_decl_rtl (obj, regno);
2714 else
2715 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2717 break;
2719 default:
2720 break;
2723 if (x)
2725 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2726 SET_DECL_RTL (obj, x);
2729 return NULL_TREE;
2732 /* Determines cost of the computation of EXPR. */
2734 static unsigned
2735 computation_cost (tree expr, bool speed)
2737 rtx seq, rslt;
2738 tree type = TREE_TYPE (expr);
2739 unsigned cost;
2740 /* Avoid using hard regs in ways which may be unsupported. */
2741 int regno = LAST_VIRTUAL_REGISTER + 1;
2742 struct cgraph_node *node = cgraph_node (current_function_decl);
2743 enum node_frequency real_frequency = node->frequency;
2745 node->frequency = NODE_FREQUENCY_NORMAL;
2746 crtl->maybe_hot_insn_p = speed;
2747 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2748 start_sequence ();
2749 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2750 seq = get_insns ();
2751 end_sequence ();
2752 default_rtl_profile ();
2753 node->frequency = real_frequency;
2755 cost = seq_cost (seq, speed);
2756 if (MEM_P (rslt))
2757 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2758 TYPE_ADDR_SPACE (type), speed);
2760 return cost;
2763 /* Returns variable containing the value of candidate CAND at statement AT. */
2765 static tree
2766 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2768 if (stmt_after_increment (loop, cand, stmt))
2769 return cand->var_after;
2770 else
2771 return cand->var_before;
2774 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2775 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2778 tree_int_cst_sign_bit (const_tree t)
2780 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2781 unsigned HOST_WIDE_INT w;
2783 if (bitno < HOST_BITS_PER_WIDE_INT)
2784 w = TREE_INT_CST_LOW (t);
2785 else
2787 w = TREE_INT_CST_HIGH (t);
2788 bitno -= HOST_BITS_PER_WIDE_INT;
2791 return (w >> bitno) & 1;
2794 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2795 same precision that is at least as wide as the precision of TYPE, stores
2796 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2797 type of A and B. */
2799 static tree
2800 determine_common_wider_type (tree *a, tree *b)
2802 tree wider_type = NULL;
2803 tree suba, subb;
2804 tree atype = TREE_TYPE (*a);
2806 if (CONVERT_EXPR_P (*a))
2808 suba = TREE_OPERAND (*a, 0);
2809 wider_type = TREE_TYPE (suba);
2810 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2811 return atype;
2813 else
2814 return atype;
2816 if (CONVERT_EXPR_P (*b))
2818 subb = TREE_OPERAND (*b, 0);
2819 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2820 return atype;
2822 else
2823 return atype;
2825 *a = suba;
2826 *b = subb;
2827 return wider_type;
2830 /* Determines the expression by that USE is expressed from induction variable
2831 CAND at statement AT in LOOP. The expression is stored in a decomposed
2832 form into AFF. Returns false if USE cannot be expressed using CAND. */
2834 static bool
2835 get_computation_aff (struct loop *loop,
2836 struct iv_use *use, struct iv_cand *cand, gimple at,
2837 struct affine_tree_combination *aff)
2839 tree ubase = use->iv->base;
2840 tree ustep = use->iv->step;
2841 tree cbase = cand->iv->base;
2842 tree cstep = cand->iv->step, cstep_common;
2843 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2844 tree common_type, var;
2845 tree uutype;
2846 aff_tree cbase_aff, var_aff;
2847 double_int rat;
2849 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2851 /* We do not have a precision to express the values of use. */
2852 return false;
2855 var = var_at_stmt (loop, cand, at);
2856 uutype = unsigned_type_for (utype);
2858 /* If the conversion is not noop, perform it. */
2859 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2861 cstep = fold_convert (uutype, cstep);
2862 cbase = fold_convert (uutype, cbase);
2863 var = fold_convert (uutype, var);
2866 if (!constant_multiple_of (ustep, cstep, &rat))
2867 return false;
2869 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2870 type, we achieve better folding by computing their difference in this
2871 wider type, and cast the result to UUTYPE. We do not need to worry about
2872 overflows, as all the arithmetics will in the end be performed in UUTYPE
2873 anyway. */
2874 common_type = determine_common_wider_type (&ubase, &cbase);
2876 /* use = ubase - ratio * cbase + ratio * var. */
2877 tree_to_aff_combination (ubase, common_type, aff);
2878 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2879 tree_to_aff_combination (var, uutype, &var_aff);
2881 /* We need to shift the value if we are after the increment. */
2882 if (stmt_after_increment (loop, cand, at))
2884 aff_tree cstep_aff;
2886 if (common_type != uutype)
2887 cstep_common = fold_convert (common_type, cstep);
2888 else
2889 cstep_common = cstep;
2891 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2892 aff_combination_add (&cbase_aff, &cstep_aff);
2895 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2896 aff_combination_add (aff, &cbase_aff);
2897 if (common_type != uutype)
2898 aff_combination_convert (aff, uutype);
2900 aff_combination_scale (&var_aff, rat);
2901 aff_combination_add (aff, &var_aff);
2903 return true;
2906 /* Determines the expression by that USE is expressed from induction variable
2907 CAND at statement AT in LOOP. The computation is unshared. */
2909 static tree
2910 get_computation_at (struct loop *loop,
2911 struct iv_use *use, struct iv_cand *cand, gimple at)
2913 aff_tree aff;
2914 tree type = TREE_TYPE (use->iv->base);
2916 if (!get_computation_aff (loop, use, cand, at, &aff))
2917 return NULL_TREE;
2918 unshare_aff_combination (&aff);
2919 return fold_convert (type, aff_combination_to_tree (&aff));
2922 /* Determines the expression by that USE is expressed from induction variable
2923 CAND in LOOP. The computation is unshared. */
2925 static tree
2926 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2928 return get_computation_at (loop, use, cand, use->stmt);
2931 /* Returns cost of addition in MODE. */
2933 static unsigned
2934 add_cost (enum machine_mode mode, bool speed)
2936 static unsigned costs[NUM_MACHINE_MODES];
2937 rtx seq;
2938 unsigned cost;
2940 if (costs[mode])
2941 return costs[mode];
2943 start_sequence ();
2944 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2945 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2946 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2947 NULL_RTX);
2948 seq = get_insns ();
2949 end_sequence ();
2951 cost = seq_cost (seq, speed);
2952 if (!cost)
2953 cost = 1;
2955 costs[mode] = cost;
2957 if (dump_file && (dump_flags & TDF_DETAILS))
2958 fprintf (dump_file, "Addition in %s costs %d\n",
2959 GET_MODE_NAME (mode), cost);
2960 return cost;
2963 /* Entry in a hashtable of already known costs for multiplication. */
2964 struct mbc_entry
2966 HOST_WIDE_INT cst; /* The constant to multiply by. */
2967 enum machine_mode mode; /* In mode. */
2968 unsigned cost; /* The cost. */
2971 /* Counts hash value for the ENTRY. */
2973 static hashval_t
2974 mbc_entry_hash (const void *entry)
2976 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2978 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2981 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2983 static int
2984 mbc_entry_eq (const void *entry1, const void *entry2)
2986 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2987 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2989 return (e1->mode == e2->mode
2990 && e1->cst == e2->cst);
2993 /* Returns cost of multiplication by constant CST in MODE. */
2995 unsigned
2996 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2998 static htab_t costs;
2999 struct mbc_entry **cached, act;
3000 rtx seq;
3001 unsigned cost;
3003 if (!costs)
3004 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3006 act.mode = mode;
3007 act.cst = cst;
3008 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3009 if (*cached)
3010 return (*cached)->cost;
3012 *cached = XNEW (struct mbc_entry);
3013 (*cached)->mode = mode;
3014 (*cached)->cst = cst;
3016 start_sequence ();
3017 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3018 gen_int_mode (cst, mode), NULL_RTX, 0);
3019 seq = get_insns ();
3020 end_sequence ();
3022 cost = seq_cost (seq, speed);
3024 if (dump_file && (dump_flags & TDF_DETAILS))
3025 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3026 (int) cst, GET_MODE_NAME (mode), cost);
3028 (*cached)->cost = cost;
3030 return cost;
3033 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3034 validity for a memory reference accessing memory of mode MODE in
3035 address space AS. */
3037 DEF_VEC_P (sbitmap);
3038 DEF_VEC_ALLOC_P (sbitmap, heap);
3040 bool
3041 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3042 addr_space_t as)
3044 #define MAX_RATIO 128
3045 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3046 static VEC (sbitmap, heap) *valid_mult_list;
3047 sbitmap valid_mult;
3049 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3050 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3052 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3053 if (!valid_mult)
3055 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3056 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3057 rtx addr;
3058 HOST_WIDE_INT i;
3060 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3061 sbitmap_zero (valid_mult);
3062 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3063 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3065 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3066 if (memory_address_addr_space_p (mode, addr, as))
3067 SET_BIT (valid_mult, i + MAX_RATIO);
3070 if (dump_file && (dump_flags & TDF_DETAILS))
3072 fprintf (dump_file, " allowed multipliers:");
3073 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3074 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3075 fprintf (dump_file, " %d", (int) i);
3076 fprintf (dump_file, "\n");
3077 fprintf (dump_file, "\n");
3080 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3083 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3084 return false;
3086 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3089 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3090 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3091 variable is omitted. Compute the cost for a memory reference that accesses
3092 a memory location of mode MEM_MODE in address space AS.
3094 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3095 size of MEM_MODE / RATIO) is available. To make this determination, we
3096 look at the size of the increment to be made, which is given in CSTEP.
3097 CSTEP may be zero if the step is unknown.
3098 STMT_AFTER_INC is true iff the statement we're looking at is after the
3099 increment of the original biv.
3101 TODO -- there must be some better way. This all is quite crude. */
3103 typedef struct
3105 HOST_WIDE_INT min_offset, max_offset;
3106 unsigned costs[2][2][2][2];
3107 } *address_cost_data;
3109 DEF_VEC_P (address_cost_data);
3110 DEF_VEC_ALLOC_P (address_cost_data, heap);
3112 static comp_cost
3113 get_address_cost (bool symbol_present, bool var_present,
3114 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3115 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3116 addr_space_t as, bool speed,
3117 bool stmt_after_inc, bool *may_autoinc)
3119 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3120 static VEC(address_cost_data, heap) *address_cost_data_list;
3121 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3122 address_cost_data data;
3123 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3124 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3125 unsigned cost, acost, complexity;
3126 bool offset_p, ratio_p, autoinc;
3127 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3128 unsigned HOST_WIDE_INT mask;
3129 unsigned bits;
3131 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3132 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3133 data_index + 1);
3135 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3136 if (!data)
3138 HOST_WIDE_INT i;
3139 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3140 HOST_WIDE_INT rat, off;
3141 int old_cse_not_expected;
3142 unsigned sym_p, var_p, off_p, rat_p, add_c;
3143 rtx seq, addr, base;
3144 rtx reg0, reg1;
3146 data = (address_cost_data) xcalloc (1, sizeof (*data));
3148 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3150 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3151 for (i = start; i <= 1 << 20; i <<= 1)
3153 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3154 if (!memory_address_addr_space_p (mem_mode, addr, as))
3155 break;
3157 data->max_offset = i == start ? 0 : i >> 1;
3158 off = data->max_offset;
3160 for (i = start; i <= 1 << 20; i <<= 1)
3162 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3163 if (!memory_address_addr_space_p (mem_mode, addr, as))
3164 break;
3166 data->min_offset = i == start ? 0 : -(i >> 1);
3168 if (dump_file && (dump_flags & TDF_DETAILS))
3170 fprintf (dump_file, "get_address_cost:\n");
3171 fprintf (dump_file, " min offset %s %d\n",
3172 GET_MODE_NAME (mem_mode),
3173 (int) data->min_offset);
3174 fprintf (dump_file, " max offset %s %d\n",
3175 GET_MODE_NAME (mem_mode),
3176 (int) data->max_offset);
3179 rat = 1;
3180 for (i = 2; i <= MAX_RATIO; i++)
3181 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3183 rat = i;
3184 break;
3187 /* Compute the cost of various addressing modes. */
3188 acost = 0;
3189 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3190 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3192 if (HAVE_PRE_DECREMENT)
3194 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3195 has_predec[mem_mode]
3196 = memory_address_addr_space_p (mem_mode, addr, as);
3198 if (HAVE_POST_DECREMENT)
3200 addr = gen_rtx_POST_DEC (address_mode, reg0);
3201 has_postdec[mem_mode]
3202 = memory_address_addr_space_p (mem_mode, addr, as);
3204 if (HAVE_PRE_INCREMENT)
3206 addr = gen_rtx_PRE_INC (address_mode, reg0);
3207 has_preinc[mem_mode]
3208 = memory_address_addr_space_p (mem_mode, addr, as);
3210 if (HAVE_POST_INCREMENT)
3212 addr = gen_rtx_POST_INC (address_mode, reg0);
3213 has_postinc[mem_mode]
3214 = memory_address_addr_space_p (mem_mode, addr, as);
3216 for (i = 0; i < 16; i++)
3218 sym_p = i & 1;
3219 var_p = (i >> 1) & 1;
3220 off_p = (i >> 2) & 1;
3221 rat_p = (i >> 3) & 1;
3223 addr = reg0;
3224 if (rat_p)
3225 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3226 gen_int_mode (rat, address_mode));
3228 if (var_p)
3229 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3231 if (sym_p)
3233 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3234 /* ??? We can run into trouble with some backends by presenting
3235 it with symbols which haven't been properly passed through
3236 targetm.encode_section_info. By setting the local bit, we
3237 enhance the probability of things working. */
3238 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3240 if (off_p)
3241 base = gen_rtx_fmt_e (CONST, address_mode,
3242 gen_rtx_fmt_ee
3243 (PLUS, address_mode, base,
3244 gen_int_mode (off, address_mode)));
3246 else if (off_p)
3247 base = gen_int_mode (off, address_mode);
3248 else
3249 base = NULL_RTX;
3251 if (base)
3252 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3254 start_sequence ();
3255 /* To avoid splitting addressing modes, pretend that no cse will
3256 follow. */
3257 old_cse_not_expected = cse_not_expected;
3258 cse_not_expected = true;
3259 addr = memory_address_addr_space (mem_mode, addr, as);
3260 cse_not_expected = old_cse_not_expected;
3261 seq = get_insns ();
3262 end_sequence ();
3264 acost = seq_cost (seq, speed);
3265 acost += address_cost (addr, mem_mode, as, speed);
3267 if (!acost)
3268 acost = 1;
3269 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3272 /* On some targets, it is quite expensive to load symbol to a register,
3273 which makes addresses that contain symbols look much more expensive.
3274 However, the symbol will have to be loaded in any case before the
3275 loop (and quite likely we have it in register already), so it does not
3276 make much sense to penalize them too heavily. So make some final
3277 tweaks for the SYMBOL_PRESENT modes:
3279 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3280 var is cheaper, use this mode with small penalty.
3281 If VAR_PRESENT is true, try whether the mode with
3282 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3283 if this is the case, use it. */
3284 add_c = add_cost (address_mode, speed);
3285 for (i = 0; i < 8; i++)
3287 var_p = i & 1;
3288 off_p = (i >> 1) & 1;
3289 rat_p = (i >> 2) & 1;
3291 acost = data->costs[0][1][off_p][rat_p] + 1;
3292 if (var_p)
3293 acost += add_c;
3295 if (acost < data->costs[1][var_p][off_p][rat_p])
3296 data->costs[1][var_p][off_p][rat_p] = acost;
3299 if (dump_file && (dump_flags & TDF_DETAILS))
3301 fprintf (dump_file, "Address costs:\n");
3303 for (i = 0; i < 16; i++)
3305 sym_p = i & 1;
3306 var_p = (i >> 1) & 1;
3307 off_p = (i >> 2) & 1;
3308 rat_p = (i >> 3) & 1;
3310 fprintf (dump_file, " ");
3311 if (sym_p)
3312 fprintf (dump_file, "sym + ");
3313 if (var_p)
3314 fprintf (dump_file, "var + ");
3315 if (off_p)
3316 fprintf (dump_file, "cst + ");
3317 if (rat_p)
3318 fprintf (dump_file, "rat * ");
3320 acost = data->costs[sym_p][var_p][off_p][rat_p];
3321 fprintf (dump_file, "index costs %d\n", acost);
3323 if (has_predec[mem_mode] || has_postdec[mem_mode]
3324 || has_preinc[mem_mode] || has_postinc[mem_mode])
3325 fprintf (dump_file, " May include autoinc/dec\n");
3326 fprintf (dump_file, "\n");
3329 VEC_replace (address_cost_data, address_cost_data_list,
3330 data_index, data);
3333 bits = GET_MODE_BITSIZE (address_mode);
3334 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3335 offset &= mask;
3336 if ((offset >> (bits - 1) & 1))
3337 offset |= ~mask;
3338 s_offset = offset;
3340 autoinc = false;
3341 msize = GET_MODE_SIZE (mem_mode);
3342 autoinc_offset = offset;
3343 if (stmt_after_inc)
3344 autoinc_offset += ratio * cstep;
3345 if (symbol_present || var_present || ratio != 1)
3346 autoinc = false;
3347 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3348 && msize == cstep)
3349 || (has_postdec[mem_mode] && autoinc_offset == 0
3350 && msize == -cstep)
3351 || (has_preinc[mem_mode] && autoinc_offset == msize
3352 && msize == cstep)
3353 || (has_predec[mem_mode] && autoinc_offset == -msize
3354 && msize == -cstep))
3355 autoinc = true;
3357 cost = 0;
3358 offset_p = (s_offset != 0
3359 && data->min_offset <= s_offset
3360 && s_offset <= data->max_offset);
3361 ratio_p = (ratio != 1
3362 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3364 if (ratio != 1 && !ratio_p)
3365 cost += multiply_by_cost (ratio, address_mode, speed);
3367 if (s_offset && !offset_p && !symbol_present)
3368 cost += add_cost (address_mode, speed);
3370 if (may_autoinc)
3371 *may_autoinc = autoinc;
3372 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3373 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3374 return new_cost (cost + acost, complexity);
3377 /* Estimates cost of forcing expression EXPR into a variable. */
3379 static comp_cost
3380 force_expr_to_var_cost (tree expr, bool speed)
3382 static bool costs_initialized = false;
3383 static unsigned integer_cost [2];
3384 static unsigned symbol_cost [2];
3385 static unsigned address_cost [2];
3386 tree op0, op1;
3387 comp_cost cost0, cost1, cost;
3388 enum machine_mode mode;
3390 if (!costs_initialized)
3392 tree type = build_pointer_type (integer_type_node);
3393 tree var, addr;
3394 rtx x;
3395 int i;
3397 var = create_tmp_var_raw (integer_type_node, "test_var");
3398 TREE_STATIC (var) = 1;
3399 x = produce_memory_decl_rtl (var, NULL);
3400 SET_DECL_RTL (var, x);
3402 addr = build1 (ADDR_EXPR, type, var);
3405 for (i = 0; i < 2; i++)
3407 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3408 2000), i);
3410 symbol_cost[i] = computation_cost (addr, i) + 1;
3412 address_cost[i]
3413 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3414 addr,
3415 build_int_cst (sizetype, 2000)), i) + 1;
3416 if (dump_file && (dump_flags & TDF_DETAILS))
3418 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3419 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3420 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3421 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3422 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3423 fprintf (dump_file, "\n");
3427 costs_initialized = true;
3430 STRIP_NOPS (expr);
3432 if (SSA_VAR_P (expr))
3433 return zero_cost;
3435 if (is_gimple_min_invariant (expr))
3437 if (TREE_CODE (expr) == INTEGER_CST)
3438 return new_cost (integer_cost [speed], 0);
3440 if (TREE_CODE (expr) == ADDR_EXPR)
3442 tree obj = TREE_OPERAND (expr, 0);
3444 if (TREE_CODE (obj) == VAR_DECL
3445 || TREE_CODE (obj) == PARM_DECL
3446 || TREE_CODE (obj) == RESULT_DECL)
3447 return new_cost (symbol_cost [speed], 0);
3450 return new_cost (address_cost [speed], 0);
3453 switch (TREE_CODE (expr))
3455 case POINTER_PLUS_EXPR:
3456 case PLUS_EXPR:
3457 case MINUS_EXPR:
3458 case MULT_EXPR:
3459 op0 = TREE_OPERAND (expr, 0);
3460 op1 = TREE_OPERAND (expr, 1);
3461 STRIP_NOPS (op0);
3462 STRIP_NOPS (op1);
3464 if (is_gimple_val (op0))
3465 cost0 = zero_cost;
3466 else
3467 cost0 = force_expr_to_var_cost (op0, speed);
3469 if (is_gimple_val (op1))
3470 cost1 = zero_cost;
3471 else
3472 cost1 = force_expr_to_var_cost (op1, speed);
3474 break;
3476 case NEGATE_EXPR:
3477 op0 = TREE_OPERAND (expr, 0);
3478 STRIP_NOPS (op0);
3479 op1 = NULL_TREE;
3481 if (is_gimple_val (op0))
3482 cost0 = zero_cost;
3483 else
3484 cost0 = force_expr_to_var_cost (op0, speed);
3486 cost1 = zero_cost;
3487 break;
3489 default:
3490 /* Just an arbitrary value, FIXME. */
3491 return new_cost (target_spill_cost[speed], 0);
3494 mode = TYPE_MODE (TREE_TYPE (expr));
3495 switch (TREE_CODE (expr))
3497 case POINTER_PLUS_EXPR:
3498 case PLUS_EXPR:
3499 case MINUS_EXPR:
3500 case NEGATE_EXPR:
3501 cost = new_cost (add_cost (mode, speed), 0);
3502 break;
3504 case MULT_EXPR:
3505 if (cst_and_fits_in_hwi (op0))
3506 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3507 else if (cst_and_fits_in_hwi (op1))
3508 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3509 else
3510 return new_cost (target_spill_cost [speed], 0);
3511 break;
3513 default:
3514 gcc_unreachable ();
3517 cost = add_costs (cost, cost0);
3518 cost = add_costs (cost, cost1);
3520 /* Bound the cost by target_spill_cost. The parts of complicated
3521 computations often are either loop invariant or at least can
3522 be shared between several iv uses, so letting this grow without
3523 limits would not give reasonable results. */
3524 if (cost.cost > (int) target_spill_cost [speed])
3525 cost.cost = target_spill_cost [speed];
3527 return cost;
3530 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3531 invariants the computation depends on. */
3533 static comp_cost
3534 force_var_cost (struct ivopts_data *data,
3535 tree expr, bitmap *depends_on)
3537 if (depends_on)
3539 fd_ivopts_data = data;
3540 walk_tree (&expr, find_depends, depends_on, NULL);
3543 return force_expr_to_var_cost (expr, data->speed);
3546 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3547 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3548 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3549 invariants the computation depends on. */
3551 static comp_cost
3552 split_address_cost (struct ivopts_data *data,
3553 tree addr, bool *symbol_present, bool *var_present,
3554 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3556 tree core;
3557 HOST_WIDE_INT bitsize;
3558 HOST_WIDE_INT bitpos;
3559 tree toffset;
3560 enum machine_mode mode;
3561 int unsignedp, volatilep;
3563 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3564 &unsignedp, &volatilep, false);
3566 if (toffset != 0
3567 || bitpos % BITS_PER_UNIT != 0
3568 || TREE_CODE (core) != VAR_DECL)
3570 *symbol_present = false;
3571 *var_present = true;
3572 fd_ivopts_data = data;
3573 walk_tree (&addr, find_depends, depends_on, NULL);
3574 return new_cost (target_spill_cost[data->speed], 0);
3577 *offset += bitpos / BITS_PER_UNIT;
3578 if (TREE_STATIC (core)
3579 || DECL_EXTERNAL (core))
3581 *symbol_present = true;
3582 *var_present = false;
3583 return zero_cost;
3586 *symbol_present = false;
3587 *var_present = true;
3588 return zero_cost;
3591 /* Estimates cost of expressing difference of addresses E1 - E2 as
3592 var + symbol + offset. The value of offset is added to OFFSET,
3593 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3594 part is missing. DEPENDS_ON is a set of the invariants the computation
3595 depends on. */
3597 static comp_cost
3598 ptr_difference_cost (struct ivopts_data *data,
3599 tree e1, tree e2, bool *symbol_present, bool *var_present,
3600 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3602 HOST_WIDE_INT diff = 0;
3603 aff_tree aff_e1, aff_e2;
3604 tree type;
3606 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3608 if (ptr_difference_const (e1, e2, &diff))
3610 *offset += diff;
3611 *symbol_present = false;
3612 *var_present = false;
3613 return zero_cost;
3616 if (integer_zerop (e2))
3617 return split_address_cost (data, TREE_OPERAND (e1, 0),
3618 symbol_present, var_present, offset, depends_on);
3620 *symbol_present = false;
3621 *var_present = true;
3623 type = signed_type_for (TREE_TYPE (e1));
3624 tree_to_aff_combination (e1, type, &aff_e1);
3625 tree_to_aff_combination (e2, type, &aff_e2);
3626 aff_combination_scale (&aff_e2, double_int_minus_one);
3627 aff_combination_add (&aff_e1, &aff_e2);
3629 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3632 /* Estimates cost of expressing difference E1 - E2 as
3633 var + symbol + offset. The value of offset is added to OFFSET,
3634 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3635 part is missing. DEPENDS_ON is a set of the invariants the computation
3636 depends on. */
3638 static comp_cost
3639 difference_cost (struct ivopts_data *data,
3640 tree e1, tree e2, bool *symbol_present, bool *var_present,
3641 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3643 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3644 unsigned HOST_WIDE_INT off1, off2;
3645 aff_tree aff_e1, aff_e2;
3646 tree type;
3648 e1 = strip_offset (e1, &off1);
3649 e2 = strip_offset (e2, &off2);
3650 *offset += off1 - off2;
3652 STRIP_NOPS (e1);
3653 STRIP_NOPS (e2);
3655 if (TREE_CODE (e1) == ADDR_EXPR)
3656 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3657 offset, depends_on);
3658 *symbol_present = false;
3660 if (operand_equal_p (e1, e2, 0))
3662 *var_present = false;
3663 return zero_cost;
3666 *var_present = true;
3668 if (integer_zerop (e2))
3669 return force_var_cost (data, e1, depends_on);
3671 if (integer_zerop (e1))
3673 comp_cost cost = force_var_cost (data, e2, depends_on);
3674 cost.cost += multiply_by_cost (-1, mode, data->speed);
3675 return cost;
3678 type = signed_type_for (TREE_TYPE (e1));
3679 tree_to_aff_combination (e1, type, &aff_e1);
3680 tree_to_aff_combination (e2, type, &aff_e2);
3681 aff_combination_scale (&aff_e2, double_int_minus_one);
3682 aff_combination_add (&aff_e1, &aff_e2);
3684 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3687 /* Determines the cost of the computation by that USE is expressed
3688 from induction variable CAND. If ADDRESS_P is true, we just need
3689 to create an address from it, otherwise we want to get it into
3690 register. A set of invariants we depend on is stored in
3691 DEPENDS_ON. AT is the statement at that the value is computed.
3692 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3693 addressing is likely. */
3695 static comp_cost
3696 get_computation_cost_at (struct ivopts_data *data,
3697 struct iv_use *use, struct iv_cand *cand,
3698 bool address_p, bitmap *depends_on, gimple at,
3699 bool *can_autoinc)
3701 tree ubase = use->iv->base, ustep = use->iv->step;
3702 tree cbase, cstep;
3703 tree utype = TREE_TYPE (ubase), ctype;
3704 unsigned HOST_WIDE_INT cstepi, offset = 0;
3705 HOST_WIDE_INT ratio, aratio;
3706 bool var_present, symbol_present, stmt_is_after_inc;
3707 comp_cost cost;
3708 double_int rat;
3709 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3711 *depends_on = NULL;
3713 /* Only consider real candidates. */
3714 if (!cand->iv)
3715 return infinite_cost;
3717 cbase = cand->iv->base;
3718 cstep = cand->iv->step;
3719 ctype = TREE_TYPE (cbase);
3721 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3723 /* We do not have a precision to express the values of use. */
3724 return infinite_cost;
3727 if (address_p)
3729 /* Do not try to express address of an object with computation based
3730 on address of a different object. This may cause problems in rtl
3731 level alias analysis (that does not expect this to be happening,
3732 as this is illegal in C), and would be unlikely to be useful
3733 anyway. */
3734 if (use->iv->base_object
3735 && cand->iv->base_object
3736 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3737 return infinite_cost;
3740 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3742 /* TODO -- add direct handling of this case. */
3743 goto fallback;
3746 /* CSTEPI is removed from the offset in case statement is after the
3747 increment. If the step is not constant, we use zero instead.
3748 This is a bit imprecise (there is the extra addition), but
3749 redundancy elimination is likely to transform the code so that
3750 it uses value of the variable before increment anyway,
3751 so it is not that much unrealistic. */
3752 if (cst_and_fits_in_hwi (cstep))
3753 cstepi = int_cst_value (cstep);
3754 else
3755 cstepi = 0;
3757 if (!constant_multiple_of (ustep, cstep, &rat))
3758 return infinite_cost;
3760 if (double_int_fits_in_shwi_p (rat))
3761 ratio = double_int_to_shwi (rat);
3762 else
3763 return infinite_cost;
3765 STRIP_NOPS (cbase);
3766 ctype = TREE_TYPE (cbase);
3768 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3769 or ratio == 1, it is better to handle this like
3771 ubase - ratio * cbase + ratio * var
3773 (also holds in the case ratio == -1, TODO. */
3775 if (cst_and_fits_in_hwi (cbase))
3777 offset = - ratio * int_cst_value (cbase);
3778 cost = difference_cost (data,
3779 ubase, build_int_cst (utype, 0),
3780 &symbol_present, &var_present, &offset,
3781 depends_on);
3783 else if (ratio == 1)
3785 cost = difference_cost (data,
3786 ubase, cbase,
3787 &symbol_present, &var_present, &offset,
3788 depends_on);
3790 else if (address_p
3791 && !POINTER_TYPE_P (ctype)
3792 && multiplier_allowed_in_address_p
3793 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3794 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3796 cbase
3797 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3798 cost = difference_cost (data,
3799 ubase, cbase,
3800 &symbol_present, &var_present, &offset,
3801 depends_on);
3803 else
3805 cost = force_var_cost (data, cbase, depends_on);
3806 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3807 cost = add_costs (cost,
3808 difference_cost (data,
3809 ubase, build_int_cst (utype, 0),
3810 &symbol_present, &var_present,
3811 &offset, depends_on));
3814 /* If we are after the increment, the value of the candidate is higher by
3815 one iteration. */
3816 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3817 if (stmt_is_after_inc)
3818 offset -= ratio * cstepi;
3820 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3821 (symbol/var1/const parts may be omitted). If we are looking for an
3822 address, find the cost of addressing this. */
3823 if (address_p)
3824 return add_costs (cost,
3825 get_address_cost (symbol_present, var_present,
3826 offset, ratio, cstepi,
3827 TYPE_MODE (TREE_TYPE (utype)),
3828 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3829 speed, stmt_is_after_inc,
3830 can_autoinc));
3832 /* Otherwise estimate the costs for computing the expression. */
3833 if (!symbol_present && !var_present && !offset)
3835 if (ratio != 1)
3836 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3837 return cost;
3840 /* Symbol + offset should be compile-time computable so consider that they
3841 are added once to the variable, if present. */
3842 if (var_present && (symbol_present || offset))
3843 cost.cost += add_cost (TYPE_MODE (ctype), speed)
3844 / AVG_LOOP_NITER (data->current_loop);
3846 /* Having offset does not affect runtime cost in case it is added to
3847 symbol, but it increases complexity. */
3848 if (offset)
3849 cost.complexity++;
3851 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3853 aratio = ratio > 0 ? ratio : -ratio;
3854 if (aratio != 1)
3855 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3857 fallback:
3858 if (can_autoinc)
3859 *can_autoinc = false;
3862 /* Just get the expression, expand it and measure the cost. */
3863 tree comp = get_computation_at (data->current_loop, use, cand, at);
3865 if (!comp)
3866 return infinite_cost;
3868 if (address_p)
3869 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3871 return new_cost (computation_cost (comp, speed), 0);
3875 /* Determines the cost of the computation by that USE is expressed
3876 from induction variable CAND. If ADDRESS_P is true, we just need
3877 to create an address from it, otherwise we want to get it into
3878 register. A set of invariants we depend on is stored in
3879 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3880 autoinc addressing is likely. */
3882 static comp_cost
3883 get_computation_cost (struct ivopts_data *data,
3884 struct iv_use *use, struct iv_cand *cand,
3885 bool address_p, bitmap *depends_on, bool *can_autoinc)
3887 return get_computation_cost_at (data,
3888 use, cand, address_p, depends_on, use->stmt,
3889 can_autoinc);
3892 /* Determines cost of basing replacement of USE on CAND in a generic
3893 expression. */
3895 static bool
3896 determine_use_iv_cost_generic (struct ivopts_data *data,
3897 struct iv_use *use, struct iv_cand *cand)
3899 bitmap depends_on;
3900 comp_cost cost;
3902 /* The simple case first -- if we need to express value of the preserved
3903 original biv, the cost is 0. This also prevents us from counting the
3904 cost of increment twice -- once at this use and once in the cost of
3905 the candidate. */
3906 if (cand->pos == IP_ORIGINAL
3907 && cand->incremented_at == use->stmt)
3909 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3910 return true;
3913 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3914 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3916 return !infinite_cost_p (cost);
3919 /* Determines cost of basing replacement of USE on CAND in an address. */
3921 static bool
3922 determine_use_iv_cost_address (struct ivopts_data *data,
3923 struct iv_use *use, struct iv_cand *cand)
3925 bitmap depends_on;
3926 bool can_autoinc;
3927 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3928 &can_autoinc);
3930 if (cand->ainc_use == use)
3932 if (can_autoinc)
3933 cost.cost -= cand->cost_step;
3934 /* If we generated the candidate solely for exploiting autoincrement
3935 opportunities, and it turns out it can't be used, set the cost to
3936 infinity to make sure we ignore it. */
3937 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3938 cost = infinite_cost;
3940 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3942 return !infinite_cost_p (cost);
3945 /* Computes value of candidate CAND at position AT in iteration NITER, and
3946 stores it to VAL. */
3948 static void
3949 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3950 aff_tree *val)
3952 aff_tree step, delta, nit;
3953 struct iv *iv = cand->iv;
3954 tree type = TREE_TYPE (iv->base);
3955 tree steptype = type;
3956 if (POINTER_TYPE_P (type))
3957 steptype = sizetype;
3959 tree_to_aff_combination (iv->step, steptype, &step);
3960 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3961 aff_combination_convert (&nit, steptype);
3962 aff_combination_mult (&nit, &step, &delta);
3963 if (stmt_after_increment (loop, cand, at))
3964 aff_combination_add (&delta, &step);
3966 tree_to_aff_combination (iv->base, type, val);
3967 aff_combination_add (val, &delta);
3970 /* Returns period of induction variable iv. */
3972 static tree
3973 iv_period (struct iv *iv)
3975 tree step = iv->step, period, type;
3976 tree pow2div;
3978 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3980 /* Period of the iv is gcd (step, type range). Since type range is power
3981 of two, it suffices to determine the maximum power of two that divides
3982 step. */
3983 pow2div = num_ending_zeros (step);
3984 type = unsigned_type_for (TREE_TYPE (step));
3986 period = build_low_bits_mask (type,
3987 (TYPE_PRECISION (type)
3988 - tree_low_cst (pow2div, 1)));
3990 return period;
3993 /* Returns the comparison operator used when eliminating the iv USE. */
3995 static enum tree_code
3996 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3998 struct loop *loop = data->current_loop;
3999 basic_block ex_bb;
4000 edge exit;
4002 ex_bb = gimple_bb (use->stmt);
4003 exit = EDGE_SUCC (ex_bb, 0);
4004 if (flow_bb_inside_loop_p (loop, exit->dest))
4005 exit = EDGE_SUCC (ex_bb, 1);
4007 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4010 /* Check whether it is possible to express the condition in USE by comparison
4011 of candidate CAND. If so, store the value compared with to BOUND. */
4013 static bool
4014 may_eliminate_iv (struct ivopts_data *data,
4015 struct iv_use *use, struct iv_cand *cand, tree *bound)
4017 basic_block ex_bb;
4018 edge exit;
4019 tree nit, period;
4020 struct loop *loop = data->current_loop;
4021 aff_tree bnd;
4023 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4024 return false;
4026 /* For now works only for exits that dominate the loop latch.
4027 TODO: extend to other conditions inside loop body. */
4028 ex_bb = gimple_bb (use->stmt);
4029 if (use->stmt != last_stmt (ex_bb)
4030 || gimple_code (use->stmt) != GIMPLE_COND
4031 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4032 return false;
4034 exit = EDGE_SUCC (ex_bb, 0);
4035 if (flow_bb_inside_loop_p (loop, exit->dest))
4036 exit = EDGE_SUCC (ex_bb, 1);
4037 if (flow_bb_inside_loop_p (loop, exit->dest))
4038 return false;
4040 nit = niter_for_exit (data, exit);
4041 if (!nit)
4042 return false;
4044 /* Determine whether we can use the variable to test the exit condition.
4045 This is the case iff the period of the induction variable is greater
4046 than the number of iterations for which the exit condition is true. */
4047 period = iv_period (cand->iv);
4049 /* If the number of iterations is constant, compare against it directly. */
4050 if (TREE_CODE (nit) == INTEGER_CST)
4052 if (!tree_int_cst_lt (nit, period))
4053 return false;
4056 /* If not, and if this is the only possible exit of the loop, see whether
4057 we can get a conservative estimate on the number of iterations of the
4058 entire loop and compare against that instead. */
4059 else if (loop_only_exit_p (loop, exit))
4061 double_int period_value, max_niter;
4062 if (!estimated_loop_iterations (loop, true, &max_niter))
4063 return false;
4064 period_value = tree_to_double_int (period);
4065 if (double_int_ucmp (max_niter, period_value) >= 0)
4066 return false;
4069 /* Otherwise, punt. */
4070 else
4071 return false;
4073 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4075 *bound = aff_combination_to_tree (&bnd);
4076 /* It is unlikely that computing the number of iterations using division
4077 would be more profitable than keeping the original induction variable. */
4078 if (expression_expensive_p (*bound))
4079 return false;
4080 return true;
4083 /* Determines cost of basing replacement of USE on CAND in a condition. */
4085 static bool
4086 determine_use_iv_cost_condition (struct ivopts_data *data,
4087 struct iv_use *use, struct iv_cand *cand)
4089 tree bound = NULL_TREE;
4090 struct iv *cmp_iv;
4091 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4092 comp_cost elim_cost, express_cost, cost;
4093 bool ok;
4094 tree *control_var, *bound_cst;
4096 /* Only consider real candidates. */
4097 if (!cand->iv)
4099 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4100 return false;
4103 /* Try iv elimination. */
4104 if (may_eliminate_iv (data, use, cand, &bound))
4106 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4107 /* The bound is a loop invariant, so it will be only computed
4108 once. */
4109 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
4111 else
4112 elim_cost = infinite_cost;
4114 /* Try expressing the original giv. If it is compared with an invariant,
4115 note that we cannot get rid of it. */
4116 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4117 NULL, &cmp_iv);
4118 gcc_assert (ok);
4120 /* When the condition is a comparison of the candidate IV against
4121 zero, prefer this IV.
4123 TODO: The constant that we're substracting from the cost should
4124 be target-dependent. This information should be added to the
4125 target costs for each backend. */
4126 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4127 && integer_zerop (*bound_cst)
4128 && (operand_equal_p (*control_var, cand->var_after, 0)
4129 || operand_equal_p (*control_var, cand->var_before, 0)))
4130 elim_cost.cost -= 1;
4132 express_cost = get_computation_cost (data, use, cand, false,
4133 &depends_on_express, NULL);
4134 fd_ivopts_data = data;
4135 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4137 /* Choose the better approach, preferring the eliminated IV. */
4138 if (compare_costs (elim_cost, express_cost) <= 0)
4140 cost = elim_cost;
4141 depends_on = depends_on_elim;
4142 depends_on_elim = NULL;
4144 else
4146 cost = express_cost;
4147 depends_on = depends_on_express;
4148 depends_on_express = NULL;
4149 bound = NULL_TREE;
4152 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4154 if (depends_on_elim)
4155 BITMAP_FREE (depends_on_elim);
4156 if (depends_on_express)
4157 BITMAP_FREE (depends_on_express);
4159 return !infinite_cost_p (cost);
4162 /* Determines cost of basing replacement of USE on CAND. Returns false
4163 if USE cannot be based on CAND. */
4165 static bool
4166 determine_use_iv_cost (struct ivopts_data *data,
4167 struct iv_use *use, struct iv_cand *cand)
4169 switch (use->type)
4171 case USE_NONLINEAR_EXPR:
4172 return determine_use_iv_cost_generic (data, use, cand);
4174 case USE_ADDRESS:
4175 return determine_use_iv_cost_address (data, use, cand);
4177 case USE_COMPARE:
4178 return determine_use_iv_cost_condition (data, use, cand);
4180 default:
4181 gcc_unreachable ();
4185 /* Return true if get_computation_cost indicates that autoincrement is
4186 a possibility for the pair of USE and CAND, false otherwise. */
4188 static bool
4189 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4190 struct iv_cand *cand)
4192 bitmap depends_on;
4193 bool can_autoinc;
4194 comp_cost cost;
4196 if (use->type != USE_ADDRESS)
4197 return false;
4199 cost = get_computation_cost (data, use, cand, true, &depends_on,
4200 &can_autoinc);
4202 BITMAP_FREE (depends_on);
4204 return !infinite_cost_p (cost) && can_autoinc;
4207 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4208 use that allows autoincrement, and set their AINC_USE if possible. */
4210 static void
4211 set_autoinc_for_original_candidates (struct ivopts_data *data)
4213 unsigned i, j;
4215 for (i = 0; i < n_iv_cands (data); i++)
4217 struct iv_cand *cand = iv_cand (data, i);
4218 struct iv_use *closest = NULL;
4219 if (cand->pos != IP_ORIGINAL)
4220 continue;
4221 for (j = 0; j < n_iv_uses (data); j++)
4223 struct iv_use *use = iv_use (data, j);
4224 unsigned uid = gimple_uid (use->stmt);
4225 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4226 || uid > gimple_uid (cand->incremented_at))
4227 continue;
4228 if (closest == NULL || uid > gimple_uid (closest->stmt))
4229 closest = use;
4231 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4232 continue;
4233 cand->ainc_use = closest;
4237 /* Finds the candidates for the induction variables. */
4239 static void
4240 find_iv_candidates (struct ivopts_data *data)
4242 /* Add commonly used ivs. */
4243 add_standard_iv_candidates (data);
4245 /* Add old induction variables. */
4246 add_old_ivs_candidates (data);
4248 /* Add induction variables derived from uses. */
4249 add_derived_ivs_candidates (data);
4251 set_autoinc_for_original_candidates (data);
4253 /* Record the important candidates. */
4254 record_important_candidates (data);
4257 /* Determines costs of basing the use of the iv on an iv candidate. */
4259 static void
4260 determine_use_iv_costs (struct ivopts_data *data)
4262 unsigned i, j;
4263 struct iv_use *use;
4264 struct iv_cand *cand;
4265 bitmap to_clear = BITMAP_ALLOC (NULL);
4267 alloc_use_cost_map (data);
4269 for (i = 0; i < n_iv_uses (data); i++)
4271 use = iv_use (data, i);
4273 if (data->consider_all_candidates)
4275 for (j = 0; j < n_iv_cands (data); j++)
4277 cand = iv_cand (data, j);
4278 determine_use_iv_cost (data, use, cand);
4281 else
4283 bitmap_iterator bi;
4285 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4287 cand = iv_cand (data, j);
4288 if (!determine_use_iv_cost (data, use, cand))
4289 bitmap_set_bit (to_clear, j);
4292 /* Remove the candidates for that the cost is infinite from
4293 the list of related candidates. */
4294 bitmap_and_compl_into (use->related_cands, to_clear);
4295 bitmap_clear (to_clear);
4299 BITMAP_FREE (to_clear);
4301 if (dump_file && (dump_flags & TDF_DETAILS))
4303 fprintf (dump_file, "Use-candidate costs:\n");
4305 for (i = 0; i < n_iv_uses (data); i++)
4307 use = iv_use (data, i);
4309 fprintf (dump_file, "Use %d:\n", i);
4310 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4311 for (j = 0; j < use->n_map_members; j++)
4313 if (!use->cost_map[j].cand
4314 || infinite_cost_p (use->cost_map[j].cost))
4315 continue;
4317 fprintf (dump_file, " %d\t%d\t%d\t",
4318 use->cost_map[j].cand->id,
4319 use->cost_map[j].cost.cost,
4320 use->cost_map[j].cost.complexity);
4321 if (use->cost_map[j].depends_on)
4322 bitmap_print (dump_file,
4323 use->cost_map[j].depends_on, "","");
4324 fprintf (dump_file, "\n");
4327 fprintf (dump_file, "\n");
4329 fprintf (dump_file, "\n");
4333 /* Determines cost of the candidate CAND. */
4335 static void
4336 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4338 comp_cost cost_base;
4339 unsigned cost, cost_step;
4340 tree base;
4342 if (!cand->iv)
4344 cand->cost = 0;
4345 return;
4348 /* There are two costs associated with the candidate -- its increment
4349 and its initialization. The second is almost negligible for any loop
4350 that rolls enough, so we take it just very little into account. */
4352 base = cand->iv->base;
4353 cost_base = force_var_cost (data, base, NULL);
4354 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4356 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4358 /* Prefer the original ivs unless we may gain something by replacing it.
4359 The reason is to make debugging simpler; so this is not relevant for
4360 artificial ivs created by other optimization passes. */
4361 if (cand->pos != IP_ORIGINAL
4362 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4363 cost++;
4365 /* Prefer not to insert statements into latch unless there are some
4366 already (so that we do not create unnecessary jumps). */
4367 if (cand->pos == IP_END
4368 && empty_block_p (ip_end_pos (data->current_loop)))
4369 cost++;
4371 cand->cost = cost;
4372 cand->cost_step = cost_step;
4375 /* Determines costs of computation of the candidates. */
4377 static void
4378 determine_iv_costs (struct ivopts_data *data)
4380 unsigned i;
4382 if (dump_file && (dump_flags & TDF_DETAILS))
4384 fprintf (dump_file, "Candidate costs:\n");
4385 fprintf (dump_file, " cand\tcost\n");
4388 for (i = 0; i < n_iv_cands (data); i++)
4390 struct iv_cand *cand = iv_cand (data, i);
4392 determine_iv_cost (data, cand);
4394 if (dump_file && (dump_flags & TDF_DETAILS))
4395 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4398 if (dump_file && (dump_flags & TDF_DETAILS))
4399 fprintf (dump_file, "\n");
4402 /* Calculates cost for having SIZE induction variables. */
4404 static unsigned
4405 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4407 /* We add size to the cost, so that we prefer eliminating ivs
4408 if possible. */
4409 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4412 /* For each size of the induction variable set determine the penalty. */
4414 static void
4415 determine_set_costs (struct ivopts_data *data)
4417 unsigned j, n;
4418 gimple phi;
4419 gimple_stmt_iterator psi;
4420 tree op;
4421 struct loop *loop = data->current_loop;
4422 bitmap_iterator bi;
4424 /* We use the following model (definitely improvable, especially the
4425 cost function -- TODO):
4427 We estimate the number of registers available (using MD data), name it A.
4429 We estimate the number of registers used by the loop, name it U. This
4430 number is obtained as the number of loop phi nodes (not counting virtual
4431 registers and bivs) + the number of variables from outside of the loop.
4433 We set a reserve R (free regs that are used for temporary computations,
4434 etc.). For now the reserve is a constant 3.
4436 Let I be the number of induction variables.
4438 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4439 make a lot of ivs without a reason).
4440 -- if A - R < U + I <= A, the cost is I * PRES_COST
4441 -- if U + I > A, the cost is I * PRES_COST and
4442 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4444 if (dump_file && (dump_flags & TDF_DETAILS))
4446 fprintf (dump_file, "Global costs:\n");
4447 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4448 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4449 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4452 n = 0;
4453 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4455 phi = gsi_stmt (psi);
4456 op = PHI_RESULT (phi);
4458 if (!is_gimple_reg (op))
4459 continue;
4461 if (get_iv (data, op))
4462 continue;
4464 n++;
4467 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4469 struct version_info *info = ver_info (data, j);
4471 if (info->inv_id && info->has_nonlin_use)
4472 n++;
4475 data->regs_used = n;
4476 if (dump_file && (dump_flags & TDF_DETAILS))
4477 fprintf (dump_file, " regs_used %d\n", n);
4479 if (dump_file && (dump_flags & TDF_DETAILS))
4481 fprintf (dump_file, " cost for size:\n");
4482 fprintf (dump_file, " ivs\tcost\n");
4483 for (j = 0; j <= 2 * target_avail_regs; j++)
4484 fprintf (dump_file, " %d\t%d\n", j,
4485 ivopts_global_cost_for_size (data, j));
4486 fprintf (dump_file, "\n");
4490 /* Returns true if A is a cheaper cost pair than B. */
4492 static bool
4493 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4495 int cmp;
4497 if (!a)
4498 return false;
4500 if (!b)
4501 return true;
4503 cmp = compare_costs (a->cost, b->cost);
4504 if (cmp < 0)
4505 return true;
4507 if (cmp > 0)
4508 return false;
4510 /* In case the costs are the same, prefer the cheaper candidate. */
4511 if (a->cand->cost < b->cand->cost)
4512 return true;
4514 return false;
4517 /* Computes the cost field of IVS structure. */
4519 static void
4520 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4522 comp_cost cost = ivs->cand_use_cost;
4523 cost.cost += ivs->cand_cost;
4524 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4526 ivs->cost = cost;
4529 /* Remove invariants in set INVS to set IVS. */
4531 static void
4532 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4534 bitmap_iterator bi;
4535 unsigned iid;
4537 if (!invs)
4538 return;
4540 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4542 ivs->n_invariant_uses[iid]--;
4543 if (ivs->n_invariant_uses[iid] == 0)
4544 ivs->n_regs--;
4548 /* Set USE not to be expressed by any candidate in IVS. */
4550 static void
4551 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4552 struct iv_use *use)
4554 unsigned uid = use->id, cid;
4555 struct cost_pair *cp;
4557 cp = ivs->cand_for_use[uid];
4558 if (!cp)
4559 return;
4560 cid = cp->cand->id;
4562 ivs->bad_uses++;
4563 ivs->cand_for_use[uid] = NULL;
4564 ivs->n_cand_uses[cid]--;
4566 if (ivs->n_cand_uses[cid] == 0)
4568 bitmap_clear_bit (ivs->cands, cid);
4569 /* Do not count the pseudocandidates. */
4570 if (cp->cand->iv)
4571 ivs->n_regs--;
4572 ivs->n_cands--;
4573 ivs->cand_cost -= cp->cand->cost;
4575 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4578 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4580 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4581 iv_ca_recount_cost (data, ivs);
4584 /* Add invariants in set INVS to set IVS. */
4586 static void
4587 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4589 bitmap_iterator bi;
4590 unsigned iid;
4592 if (!invs)
4593 return;
4595 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4597 ivs->n_invariant_uses[iid]++;
4598 if (ivs->n_invariant_uses[iid] == 1)
4599 ivs->n_regs++;
4603 /* Set cost pair for USE in set IVS to CP. */
4605 static void
4606 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4607 struct iv_use *use, struct cost_pair *cp)
4609 unsigned uid = use->id, cid;
4611 if (ivs->cand_for_use[uid] == cp)
4612 return;
4614 if (ivs->cand_for_use[uid])
4615 iv_ca_set_no_cp (data, ivs, use);
4617 if (cp)
4619 cid = cp->cand->id;
4621 ivs->bad_uses--;
4622 ivs->cand_for_use[uid] = cp;
4623 ivs->n_cand_uses[cid]++;
4624 if (ivs->n_cand_uses[cid] == 1)
4626 bitmap_set_bit (ivs->cands, cid);
4627 /* Do not count the pseudocandidates. */
4628 if (cp->cand->iv)
4629 ivs->n_regs++;
4630 ivs->n_cands++;
4631 ivs->cand_cost += cp->cand->cost;
4633 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4636 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4637 iv_ca_set_add_invariants (ivs, cp->depends_on);
4638 iv_ca_recount_cost (data, ivs);
4642 /* Extend set IVS by expressing USE by some of the candidates in it
4643 if possible. */
4645 static void
4646 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4647 struct iv_use *use)
4649 struct cost_pair *best_cp = NULL, *cp;
4650 bitmap_iterator bi;
4651 unsigned i;
4653 gcc_assert (ivs->upto >= use->id);
4655 if (ivs->upto == use->id)
4657 ivs->upto++;
4658 ivs->bad_uses++;
4661 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4663 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4665 if (cheaper_cost_pair (cp, best_cp))
4666 best_cp = cp;
4669 iv_ca_set_cp (data, ivs, use, best_cp);
4672 /* Get cost for assignment IVS. */
4674 static comp_cost
4675 iv_ca_cost (struct iv_ca *ivs)
4677 /* This was a conditional expression but it triggered a bug in
4678 Sun C 5.5. */
4679 if (ivs->bad_uses)
4680 return infinite_cost;
4681 else
4682 return ivs->cost;
4685 /* Returns true if all dependences of CP are among invariants in IVS. */
4687 static bool
4688 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4690 unsigned i;
4691 bitmap_iterator bi;
4693 if (!cp->depends_on)
4694 return true;
4696 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4698 if (ivs->n_invariant_uses[i] == 0)
4699 return false;
4702 return true;
4705 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4706 it before NEXT_CHANGE. */
4708 static struct iv_ca_delta *
4709 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4710 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4712 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4714 change->use = use;
4715 change->old_cp = old_cp;
4716 change->new_cp = new_cp;
4717 change->next_change = next_change;
4719 return change;
4722 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4723 are rewritten. */
4725 static struct iv_ca_delta *
4726 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4728 struct iv_ca_delta *last;
4730 if (!l2)
4731 return l1;
4733 if (!l1)
4734 return l2;
4736 for (last = l1; last->next_change; last = last->next_change)
4737 continue;
4738 last->next_change = l2;
4740 return l1;
4743 /* Returns candidate by that USE is expressed in IVS. */
4745 static struct cost_pair *
4746 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4748 return ivs->cand_for_use[use->id];
4751 /* Reverse the list of changes DELTA, forming the inverse to it. */
4753 static struct iv_ca_delta *
4754 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4756 struct iv_ca_delta *act, *next, *prev = NULL;
4757 struct cost_pair *tmp;
4759 for (act = delta; act; act = next)
4761 next = act->next_change;
4762 act->next_change = prev;
4763 prev = act;
4765 tmp = act->old_cp;
4766 act->old_cp = act->new_cp;
4767 act->new_cp = tmp;
4770 return prev;
4773 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4774 reverted instead. */
4776 static void
4777 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4778 struct iv_ca_delta *delta, bool forward)
4780 struct cost_pair *from, *to;
4781 struct iv_ca_delta *act;
4783 if (!forward)
4784 delta = iv_ca_delta_reverse (delta);
4786 for (act = delta; act; act = act->next_change)
4788 from = act->old_cp;
4789 to = act->new_cp;
4790 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4791 iv_ca_set_cp (data, ivs, act->use, to);
4794 if (!forward)
4795 iv_ca_delta_reverse (delta);
4798 /* Returns true if CAND is used in IVS. */
4800 static bool
4801 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4803 return ivs->n_cand_uses[cand->id] > 0;
4806 /* Returns number of induction variable candidates in the set IVS. */
4808 static unsigned
4809 iv_ca_n_cands (struct iv_ca *ivs)
4811 return ivs->n_cands;
4814 /* Free the list of changes DELTA. */
4816 static void
4817 iv_ca_delta_free (struct iv_ca_delta **delta)
4819 struct iv_ca_delta *act, *next;
4821 for (act = *delta; act; act = next)
4823 next = act->next_change;
4824 free (act);
4827 *delta = NULL;
4830 /* Allocates new iv candidates assignment. */
4832 static struct iv_ca *
4833 iv_ca_new (struct ivopts_data *data)
4835 struct iv_ca *nw = XNEW (struct iv_ca);
4837 nw->upto = 0;
4838 nw->bad_uses = 0;
4839 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4840 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4841 nw->cands = BITMAP_ALLOC (NULL);
4842 nw->n_cands = 0;
4843 nw->n_regs = 0;
4844 nw->cand_use_cost = zero_cost;
4845 nw->cand_cost = 0;
4846 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4847 nw->cost = zero_cost;
4849 return nw;
4852 /* Free memory occupied by the set IVS. */
4854 static void
4855 iv_ca_free (struct iv_ca **ivs)
4857 free ((*ivs)->cand_for_use);
4858 free ((*ivs)->n_cand_uses);
4859 BITMAP_FREE ((*ivs)->cands);
4860 free ((*ivs)->n_invariant_uses);
4861 free (*ivs);
4862 *ivs = NULL;
4865 /* Dumps IVS to FILE. */
4867 static void
4868 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4870 const char *pref = " invariants ";
4871 unsigned i;
4872 comp_cost cost = iv_ca_cost (ivs);
4874 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4875 bitmap_print (file, ivs->cands, " candidates ","\n");
4877 for (i = 1; i <= data->max_inv_id; i++)
4878 if (ivs->n_invariant_uses[i])
4880 fprintf (file, "%s%d", pref, i);
4881 pref = ", ";
4883 fprintf (file, "\n");
4886 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4887 new set, and store differences in DELTA. Number of induction variables
4888 in the new set is stored to N_IVS. */
4890 static comp_cost
4891 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4892 struct iv_cand *cand, struct iv_ca_delta **delta,
4893 unsigned *n_ivs)
4895 unsigned i;
4896 comp_cost cost;
4897 struct iv_use *use;
4898 struct cost_pair *old_cp, *new_cp;
4900 *delta = NULL;
4901 for (i = 0; i < ivs->upto; i++)
4903 use = iv_use (data, i);
4904 old_cp = iv_ca_cand_for_use (ivs, use);
4906 if (old_cp
4907 && old_cp->cand == cand)
4908 continue;
4910 new_cp = get_use_iv_cost (data, use, cand);
4911 if (!new_cp)
4912 continue;
4914 if (!iv_ca_has_deps (ivs, new_cp))
4915 continue;
4917 if (!cheaper_cost_pair (new_cp, old_cp))
4918 continue;
4920 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4923 iv_ca_delta_commit (data, ivs, *delta, true);
4924 cost = iv_ca_cost (ivs);
4925 if (n_ivs)
4926 *n_ivs = iv_ca_n_cands (ivs);
4927 iv_ca_delta_commit (data, ivs, *delta, false);
4929 return cost;
4932 /* Try narrowing set IVS by removing CAND. Return the cost of
4933 the new set and store the differences in DELTA. */
4935 static comp_cost
4936 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4937 struct iv_cand *cand, struct iv_ca_delta **delta)
4939 unsigned i, ci;
4940 struct iv_use *use;
4941 struct cost_pair *old_cp, *new_cp, *cp;
4942 bitmap_iterator bi;
4943 struct iv_cand *cnd;
4944 comp_cost cost;
4946 *delta = NULL;
4947 for (i = 0; i < n_iv_uses (data); i++)
4949 use = iv_use (data, i);
4951 old_cp = iv_ca_cand_for_use (ivs, use);
4952 if (old_cp->cand != cand)
4953 continue;
4955 new_cp = NULL;
4957 if (data->consider_all_candidates)
4959 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4961 if (ci == cand->id)
4962 continue;
4964 cnd = iv_cand (data, ci);
4966 cp = get_use_iv_cost (data, use, cnd);
4967 if (!cp)
4968 continue;
4969 if (!iv_ca_has_deps (ivs, cp))
4970 continue;
4972 if (!cheaper_cost_pair (cp, new_cp))
4973 continue;
4975 new_cp = cp;
4978 else
4980 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4982 if (ci == cand->id)
4983 continue;
4985 cnd = iv_cand (data, ci);
4987 cp = get_use_iv_cost (data, use, cnd);
4988 if (!cp)
4989 continue;
4990 if (!iv_ca_has_deps (ivs, cp))
4991 continue;
4993 if (!cheaper_cost_pair (cp, new_cp))
4994 continue;
4996 new_cp = cp;
5000 if (!new_cp)
5002 iv_ca_delta_free (delta);
5003 return infinite_cost;
5006 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5009 iv_ca_delta_commit (data, ivs, *delta, true);
5010 cost = iv_ca_cost (ivs);
5011 iv_ca_delta_commit (data, ivs, *delta, false);
5013 return cost;
5016 /* Try optimizing the set of candidates IVS by removing candidates different
5017 from to EXCEPT_CAND from it. Return cost of the new set, and store
5018 differences in DELTA. */
5020 static comp_cost
5021 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5022 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5024 bitmap_iterator bi;
5025 struct iv_ca_delta *act_delta, *best_delta;
5026 unsigned i;
5027 comp_cost best_cost, acost;
5028 struct iv_cand *cand;
5030 best_delta = NULL;
5031 best_cost = iv_ca_cost (ivs);
5033 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5035 cand = iv_cand (data, i);
5037 if (cand == except_cand)
5038 continue;
5040 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5042 if (compare_costs (acost, best_cost) < 0)
5044 best_cost = acost;
5045 iv_ca_delta_free (&best_delta);
5046 best_delta = act_delta;
5048 else
5049 iv_ca_delta_free (&act_delta);
5052 if (!best_delta)
5054 *delta = NULL;
5055 return best_cost;
5058 /* Recurse to possibly remove other unnecessary ivs. */
5059 iv_ca_delta_commit (data, ivs, best_delta, true);
5060 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5061 iv_ca_delta_commit (data, ivs, best_delta, false);
5062 *delta = iv_ca_delta_join (best_delta, *delta);
5063 return best_cost;
5066 /* Tries to extend the sets IVS in the best possible way in order
5067 to express the USE. */
5069 static bool
5070 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5071 struct iv_use *use)
5073 comp_cost best_cost, act_cost;
5074 unsigned i;
5075 bitmap_iterator bi;
5076 struct iv_cand *cand;
5077 struct iv_ca_delta *best_delta = NULL, *act_delta;
5078 struct cost_pair *cp;
5080 iv_ca_add_use (data, ivs, use);
5081 best_cost = iv_ca_cost (ivs);
5083 cp = iv_ca_cand_for_use (ivs, use);
5084 if (cp)
5086 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5087 iv_ca_set_no_cp (data, ivs, use);
5090 /* First try important candidates not based on any memory object. Only if
5091 this fails, try the specific ones. Rationale -- in loops with many
5092 variables the best choice often is to use just one generic biv. If we
5093 added here many ivs specific to the uses, the optimization algorithm later
5094 would be likely to get stuck in a local minimum, thus causing us to create
5095 too many ivs. The approach from few ivs to more seems more likely to be
5096 successful -- starting from few ivs, replacing an expensive use by a
5097 specific iv should always be a win. */
5098 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5100 cand = iv_cand (data, i);
5102 if (cand->iv->base_object != NULL_TREE)
5103 continue;
5105 if (iv_ca_cand_used_p (ivs, cand))
5106 continue;
5108 cp = get_use_iv_cost (data, use, cand);
5109 if (!cp)
5110 continue;
5112 iv_ca_set_cp (data, ivs, use, cp);
5113 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5114 iv_ca_set_no_cp (data, ivs, use);
5115 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5117 if (compare_costs (act_cost, best_cost) < 0)
5119 best_cost = act_cost;
5121 iv_ca_delta_free (&best_delta);
5122 best_delta = act_delta;
5124 else
5125 iv_ca_delta_free (&act_delta);
5128 if (infinite_cost_p (best_cost))
5130 for (i = 0; i < use->n_map_members; i++)
5132 cp = use->cost_map + i;
5133 cand = cp->cand;
5134 if (!cand)
5135 continue;
5137 /* Already tried this. */
5138 if (cand->important && cand->iv->base_object == NULL_TREE)
5139 continue;
5141 if (iv_ca_cand_used_p (ivs, cand))
5142 continue;
5144 act_delta = NULL;
5145 iv_ca_set_cp (data, ivs, use, cp);
5146 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5147 iv_ca_set_no_cp (data, ivs, use);
5148 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5149 cp, act_delta);
5151 if (compare_costs (act_cost, best_cost) < 0)
5153 best_cost = act_cost;
5155 if (best_delta)
5156 iv_ca_delta_free (&best_delta);
5157 best_delta = act_delta;
5159 else
5160 iv_ca_delta_free (&act_delta);
5164 iv_ca_delta_commit (data, ivs, best_delta, true);
5165 iv_ca_delta_free (&best_delta);
5167 return !infinite_cost_p (best_cost);
5170 /* Finds an initial assignment of candidates to uses. */
5172 static struct iv_ca *
5173 get_initial_solution (struct ivopts_data *data)
5175 struct iv_ca *ivs = iv_ca_new (data);
5176 unsigned i;
5178 for (i = 0; i < n_iv_uses (data); i++)
5179 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5181 iv_ca_free (&ivs);
5182 return NULL;
5185 return ivs;
5188 /* Tries to improve set of induction variables IVS. */
5190 static bool
5191 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5193 unsigned i, n_ivs;
5194 comp_cost acost, best_cost = iv_ca_cost (ivs);
5195 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5196 struct iv_cand *cand;
5198 /* Try extending the set of induction variables by one. */
5199 for (i = 0; i < n_iv_cands (data); i++)
5201 cand = iv_cand (data, i);
5203 if (iv_ca_cand_used_p (ivs, cand))
5204 continue;
5206 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5207 if (!act_delta)
5208 continue;
5210 /* If we successfully added the candidate and the set is small enough,
5211 try optimizing it by removing other candidates. */
5212 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5214 iv_ca_delta_commit (data, ivs, act_delta, true);
5215 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5216 iv_ca_delta_commit (data, ivs, act_delta, false);
5217 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5220 if (compare_costs (acost, best_cost) < 0)
5222 best_cost = acost;
5223 iv_ca_delta_free (&best_delta);
5224 best_delta = act_delta;
5226 else
5227 iv_ca_delta_free (&act_delta);
5230 if (!best_delta)
5232 /* Try removing the candidates from the set instead. */
5233 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5235 /* Nothing more we can do. */
5236 if (!best_delta)
5237 return false;
5240 iv_ca_delta_commit (data, ivs, best_delta, true);
5241 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5242 iv_ca_delta_free (&best_delta);
5243 return true;
5246 /* Attempts to find the optimal set of induction variables. We do simple
5247 greedy heuristic -- we try to replace at most one candidate in the selected
5248 solution and remove the unused ivs while this improves the cost. */
5250 static struct iv_ca *
5251 find_optimal_iv_set (struct ivopts_data *data)
5253 unsigned i;
5254 struct iv_ca *set;
5255 struct iv_use *use;
5257 /* Get the initial solution. */
5258 set = get_initial_solution (data);
5259 if (!set)
5261 if (dump_file && (dump_flags & TDF_DETAILS))
5262 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5263 return NULL;
5266 if (dump_file && (dump_flags & TDF_DETAILS))
5268 fprintf (dump_file, "Initial set of candidates:\n");
5269 iv_ca_dump (data, dump_file, set);
5272 while (try_improve_iv_set (data, set))
5274 if (dump_file && (dump_flags & TDF_DETAILS))
5276 fprintf (dump_file, "Improved to:\n");
5277 iv_ca_dump (data, dump_file, set);
5281 if (dump_file && (dump_flags & TDF_DETAILS))
5283 comp_cost cost = iv_ca_cost (set);
5284 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5287 for (i = 0; i < n_iv_uses (data); i++)
5289 use = iv_use (data, i);
5290 use->selected = iv_ca_cand_for_use (set, use)->cand;
5293 return set;
5296 /* Creates a new induction variable corresponding to CAND. */
5298 static void
5299 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5301 gimple_stmt_iterator incr_pos;
5302 tree base;
5303 bool after = false;
5305 if (!cand->iv)
5306 return;
5308 switch (cand->pos)
5310 case IP_NORMAL:
5311 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5312 break;
5314 case IP_END:
5315 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5316 after = true;
5317 break;
5319 case IP_AFTER_USE:
5320 after = true;
5321 /* fall through */
5322 case IP_BEFORE_USE:
5323 incr_pos = gsi_for_stmt (cand->incremented_at);
5324 break;
5326 case IP_ORIGINAL:
5327 /* Mark that the iv is preserved. */
5328 name_info (data, cand->var_before)->preserve_biv = true;
5329 name_info (data, cand->var_after)->preserve_biv = true;
5331 /* Rewrite the increment so that it uses var_before directly. */
5332 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5334 return;
5337 gimple_add_tmp_var (cand->var_before);
5338 add_referenced_var (cand->var_before);
5340 base = unshare_expr (cand->iv->base);
5342 create_iv (base, unshare_expr (cand->iv->step),
5343 cand->var_before, data->current_loop,
5344 &incr_pos, after, &cand->var_before, &cand->var_after);
5347 /* Creates new induction variables described in SET. */
5349 static void
5350 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5352 unsigned i;
5353 struct iv_cand *cand;
5354 bitmap_iterator bi;
5356 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5358 cand = iv_cand (data, i);
5359 create_new_iv (data, cand);
5364 /* Rewrites USE (definition of iv used in a nonlinear expression)
5365 using candidate CAND. */
5367 static void
5368 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5369 struct iv_use *use, struct iv_cand *cand)
5371 tree comp;
5372 tree op, tgt;
5373 gimple ass;
5374 gimple_stmt_iterator bsi;
5376 /* An important special case -- if we are asked to express value of
5377 the original iv by itself, just exit; there is no need to
5378 introduce a new computation (that might also need casting the
5379 variable to unsigned and back). */
5380 if (cand->pos == IP_ORIGINAL
5381 && cand->incremented_at == use->stmt)
5383 tree step, ctype, utype;
5384 enum tree_code incr_code = PLUS_EXPR, old_code;
5386 gcc_assert (is_gimple_assign (use->stmt));
5387 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5389 step = cand->iv->step;
5390 ctype = TREE_TYPE (step);
5391 utype = TREE_TYPE (cand->var_after);
5392 if (TREE_CODE (step) == NEGATE_EXPR)
5394 incr_code = MINUS_EXPR;
5395 step = TREE_OPERAND (step, 0);
5398 /* Check whether we may leave the computation unchanged.
5399 This is the case only if it does not rely on other
5400 computations in the loop -- otherwise, the computation
5401 we rely upon may be removed in remove_unused_ivs,
5402 thus leading to ICE. */
5403 old_code = gimple_assign_rhs_code (use->stmt);
5404 if (old_code == PLUS_EXPR
5405 || old_code == MINUS_EXPR
5406 || old_code == POINTER_PLUS_EXPR)
5408 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5409 op = gimple_assign_rhs2 (use->stmt);
5410 else if (old_code != MINUS_EXPR
5411 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5412 op = gimple_assign_rhs1 (use->stmt);
5413 else
5414 op = NULL_TREE;
5416 else
5417 op = NULL_TREE;
5419 if (op
5420 && (TREE_CODE (op) == INTEGER_CST
5421 || operand_equal_p (op, step, 0)))
5422 return;
5424 /* Otherwise, add the necessary computations to express
5425 the iv. */
5426 op = fold_convert (ctype, cand->var_before);
5427 comp = fold_convert (utype,
5428 build2 (incr_code, ctype, op,
5429 unshare_expr (step)));
5431 else
5433 comp = get_computation (data->current_loop, use, cand);
5434 gcc_assert (comp != NULL_TREE);
5437 switch (gimple_code (use->stmt))
5439 case GIMPLE_PHI:
5440 tgt = PHI_RESULT (use->stmt);
5442 /* If we should keep the biv, do not replace it. */
5443 if (name_info (data, tgt)->preserve_biv)
5444 return;
5446 bsi = gsi_after_labels (gimple_bb (use->stmt));
5447 break;
5449 case GIMPLE_ASSIGN:
5450 tgt = gimple_assign_lhs (use->stmt);
5451 bsi = gsi_for_stmt (use->stmt);
5452 break;
5454 default:
5455 gcc_unreachable ();
5458 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5459 true, GSI_SAME_STMT);
5461 if (gimple_code (use->stmt) == GIMPLE_PHI)
5463 ass = gimple_build_assign (tgt, op);
5464 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5466 bsi = gsi_for_stmt (use->stmt);
5467 remove_phi_node (&bsi, false);
5469 else
5471 gimple_assign_set_rhs_from_tree (&bsi, op);
5472 use->stmt = gsi_stmt (bsi);
5476 /* Replaces ssa name in index IDX by its basic variable. Callback for
5477 for_each_index. */
5479 static bool
5480 idx_remove_ssa_names (tree base, tree *idx,
5481 void *data ATTRIBUTE_UNUSED)
5483 tree *op;
5485 if (TREE_CODE (*idx) == SSA_NAME)
5486 *idx = SSA_NAME_VAR (*idx);
5488 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5490 op = &TREE_OPERAND (base, 2);
5491 if (*op
5492 && TREE_CODE (*op) == SSA_NAME)
5493 *op = SSA_NAME_VAR (*op);
5494 op = &TREE_OPERAND (base, 3);
5495 if (*op
5496 && TREE_CODE (*op) == SSA_NAME)
5497 *op = SSA_NAME_VAR (*op);
5500 return true;
5503 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5505 static tree
5506 unshare_and_remove_ssa_names (tree ref)
5508 ref = unshare_expr (ref);
5509 for_each_index (&ref, idx_remove_ssa_names, NULL);
5511 return ref;
5514 /* Copies the reference information from OLD_REF to NEW_REF. */
5516 static void
5517 copy_ref_info (tree new_ref, tree old_ref)
5519 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5520 copy_mem_ref_info (new_ref, old_ref);
5521 else
5523 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5524 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5525 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5529 /* Rewrites USE (address that is an iv) using candidate CAND. */
5531 static void
5532 rewrite_use_address (struct ivopts_data *data,
5533 struct iv_use *use, struct iv_cand *cand)
5535 aff_tree aff;
5536 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5537 tree base_hint = NULL_TREE;
5538 tree ref;
5539 bool ok;
5541 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5542 gcc_assert (ok);
5543 unshare_aff_combination (&aff);
5545 /* To avoid undefined overflow problems, all IV candidates use unsigned
5546 integer types. The drawback is that this makes it impossible for
5547 create_mem_ref to distinguish an IV that is based on a memory object
5548 from one that represents simply an offset.
5550 To work around this problem, we pass a hint to create_mem_ref that
5551 indicates which variable (if any) in aff is an IV based on a memory
5552 object. Note that we only consider the candidate. If this is not
5553 based on an object, the base of the reference is in some subexpression
5554 of the use -- but these will use pointer types, so they are recognized
5555 by the create_mem_ref heuristics anyway. */
5556 if (cand->iv->base_object)
5557 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5559 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5560 data->speed);
5561 copy_ref_info (ref, *use->op_p);
5562 *use->op_p = ref;
5565 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5566 candidate CAND. */
5568 static void
5569 rewrite_use_compare (struct ivopts_data *data,
5570 struct iv_use *use, struct iv_cand *cand)
5572 tree comp, *var_p, op, bound;
5573 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5574 enum tree_code compare;
5575 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5576 bool ok;
5578 bound = cp->value;
5579 if (bound)
5581 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5582 tree var_type = TREE_TYPE (var);
5583 gimple_seq stmts;
5585 compare = iv_elimination_compare (data, use);
5586 bound = unshare_expr (fold_convert (var_type, bound));
5587 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5588 if (stmts)
5589 gsi_insert_seq_on_edge_immediate (
5590 loop_preheader_edge (data->current_loop),
5591 stmts);
5593 gimple_cond_set_lhs (use->stmt, var);
5594 gimple_cond_set_code (use->stmt, compare);
5595 gimple_cond_set_rhs (use->stmt, op);
5596 return;
5599 /* The induction variable elimination failed; just express the original
5600 giv. */
5601 comp = get_computation (data->current_loop, use, cand);
5602 gcc_assert (comp != NULL_TREE);
5604 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5605 gcc_assert (ok);
5607 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5608 true, GSI_SAME_STMT);
5611 /* Rewrites USE using candidate CAND. */
5613 static void
5614 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5616 switch (use->type)
5618 case USE_NONLINEAR_EXPR:
5619 rewrite_use_nonlinear_expr (data, use, cand);
5620 break;
5622 case USE_ADDRESS:
5623 rewrite_use_address (data, use, cand);
5624 break;
5626 case USE_COMPARE:
5627 rewrite_use_compare (data, use, cand);
5628 break;
5630 default:
5631 gcc_unreachable ();
5634 update_stmt (use->stmt);
5637 /* Rewrite the uses using the selected induction variables. */
5639 static void
5640 rewrite_uses (struct ivopts_data *data)
5642 unsigned i;
5643 struct iv_cand *cand;
5644 struct iv_use *use;
5646 for (i = 0; i < n_iv_uses (data); i++)
5648 use = iv_use (data, i);
5649 cand = use->selected;
5650 gcc_assert (cand);
5652 rewrite_use (data, use, cand);
5656 /* Removes the ivs that are not used after rewriting. */
5658 static void
5659 remove_unused_ivs (struct ivopts_data *data)
5661 unsigned j;
5662 bitmap_iterator bi;
5663 bitmap toremove = BITMAP_ALLOC (NULL);
5665 /* Figure out an order in which to release SSA DEFs so that we don't
5666 release something that we'd have to propagate into a debug stmt
5667 afterwards. */
5668 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5670 struct version_info *info;
5672 info = ver_info (data, j);
5673 if (info->iv
5674 && !integer_zerop (info->iv->step)
5675 && !info->inv_id
5676 && !info->iv->have_use_for
5677 && !info->preserve_biv)
5678 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5681 release_defs_bitset (toremove);
5683 BITMAP_FREE (toremove);
5686 /* Frees data allocated by the optimization of a single loop. */
5688 static void
5689 free_loop_data (struct ivopts_data *data)
5691 unsigned i, j;
5692 bitmap_iterator bi;
5693 tree obj;
5695 if (data->niters)
5697 pointer_map_destroy (data->niters);
5698 data->niters = NULL;
5701 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5703 struct version_info *info;
5705 info = ver_info (data, i);
5706 if (info->iv)
5707 free (info->iv);
5708 info->iv = NULL;
5709 info->has_nonlin_use = false;
5710 info->preserve_biv = false;
5711 info->inv_id = 0;
5713 bitmap_clear (data->relevant);
5714 bitmap_clear (data->important_candidates);
5716 for (i = 0; i < n_iv_uses (data); i++)
5718 struct iv_use *use = iv_use (data, i);
5720 free (use->iv);
5721 BITMAP_FREE (use->related_cands);
5722 for (j = 0; j < use->n_map_members; j++)
5723 if (use->cost_map[j].depends_on)
5724 BITMAP_FREE (use->cost_map[j].depends_on);
5725 free (use->cost_map);
5726 free (use);
5728 VEC_truncate (iv_use_p, data->iv_uses, 0);
5730 for (i = 0; i < n_iv_cands (data); i++)
5732 struct iv_cand *cand = iv_cand (data, i);
5734 if (cand->iv)
5735 free (cand->iv);
5736 if (cand->depends_on)
5737 BITMAP_FREE (cand->depends_on);
5738 free (cand);
5740 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5742 if (data->version_info_size < num_ssa_names)
5744 data->version_info_size = 2 * num_ssa_names;
5745 free (data->version_info);
5746 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5749 data->max_inv_id = 0;
5751 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5752 SET_DECL_RTL (obj, NULL_RTX);
5754 VEC_truncate (tree, decl_rtl_to_reset, 0);
5757 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5758 loop tree. */
5760 static void
5761 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5763 free_loop_data (data);
5764 free (data->version_info);
5765 BITMAP_FREE (data->relevant);
5766 BITMAP_FREE (data->important_candidates);
5768 VEC_free (tree, heap, decl_rtl_to_reset);
5769 VEC_free (iv_use_p, heap, data->iv_uses);
5770 VEC_free (iv_cand_p, heap, data->iv_candidates);
5773 /* Optimizes the LOOP. Returns true if anything changed. */
5775 static bool
5776 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5778 bool changed = false;
5779 struct iv_ca *iv_ca;
5780 edge exit;
5781 basic_block *body;
5783 gcc_assert (!data->niters);
5784 data->current_loop = loop;
5785 data->speed = optimize_loop_for_speed_p (loop);
5787 if (dump_file && (dump_flags & TDF_DETAILS))
5789 fprintf (dump_file, "Processing loop %d\n", loop->num);
5791 exit = single_dom_exit (loop);
5792 if (exit)
5794 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5795 exit->src->index, exit->dest->index);
5796 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5797 fprintf (dump_file, "\n");
5800 fprintf (dump_file, "\n");
5803 body = get_loop_body (loop);
5804 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5805 free (body);
5807 /* For each ssa name determines whether it behaves as an induction variable
5808 in some loop. */
5809 if (!find_induction_variables (data))
5810 goto finish;
5812 /* Finds interesting uses (item 1). */
5813 find_interesting_uses (data);
5814 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5815 goto finish;
5817 /* Finds candidates for the induction variables (item 2). */
5818 find_iv_candidates (data);
5820 /* Calculates the costs (item 3, part 1). */
5821 determine_iv_costs (data);
5822 determine_use_iv_costs (data);
5823 determine_set_costs (data);
5825 /* Find the optimal set of induction variables (item 3, part 2). */
5826 iv_ca = find_optimal_iv_set (data);
5827 if (!iv_ca)
5828 goto finish;
5829 changed = true;
5831 /* Create the new induction variables (item 4, part 1). */
5832 create_new_ivs (data, iv_ca);
5833 iv_ca_free (&iv_ca);
5835 /* Rewrite the uses (item 4, part 2). */
5836 rewrite_uses (data);
5838 /* Remove the ivs that are unused after rewriting. */
5839 remove_unused_ivs (data);
5841 /* We have changed the structure of induction variables; it might happen
5842 that definitions in the scev database refer to some of them that were
5843 eliminated. */
5844 scev_reset ();
5846 finish:
5847 free_loop_data (data);
5849 return changed;
5852 /* Main entry point. Optimizes induction variables in loops. */
5854 void
5855 tree_ssa_iv_optimize (void)
5857 struct loop *loop;
5858 struct ivopts_data data;
5859 loop_iterator li;
5861 tree_ssa_iv_optimize_init (&data);
5863 /* Optimize the loops starting with the innermost ones. */
5864 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5866 if (dump_file && (dump_flags & TDF_DETAILS))
5867 flow_loop_dump (loop, dump_file, NULL, 1);
5869 tree_ssa_iv_optimize_loop (&data, loop);
5872 tree_ssa_iv_optimize_finalize (&data);