Daily bump.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob7bcb9810e7a0c53ea80e9995ac7c58007cc1bdca
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "rtl.h"
70 #include "tm_p.h"
71 #include "hard-reg-set.h"
72 #include "basic-block.h"
73 #include "output.h"
74 #include "diagnostic.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "varray.h"
80 #include "expr.h"
81 #include "tree-pass.h"
82 #include "ggc.h"
83 #include "insn-config.h"
84 #include "recog.h"
85 #include "pointer-set.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
93 #include "target.h"
95 /* The infinite cost. */
96 #define INFTY 10000000
98 /* The expected number of loop iterations. TODO -- use profiling instead of
99 this. */
100 #define AVG_LOOP_NITER(LOOP) 5
103 /* Representation of the induction variable. */
104 struct iv
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
116 struct version_info
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 unsigned inv_id; /* Id of an invariant. */
123 bool preserve_biv; /* For the original biv, whether to preserve it. */
126 /* Types of uses. */
127 enum use_type
129 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
130 USE_ADDRESS, /* Use in an address. */
131 USE_COMPARE /* Use is a compare. */
134 /* Cost of a computation. */
135 typedef struct
137 unsigned cost; /* The runtime cost. */
138 unsigned complexity; /* The estimate of the complexity of the code for
139 the computation (in no concrete units --
140 complexity field should be larger for more
141 complex expressions and addressing modes). */
142 } comp_cost;
144 static const comp_cost zero_cost = {0, 0};
145 static const comp_cost infinite_cost = {INFTY, INFTY};
147 /* The candidate - cost pair. */
148 struct cost_pair
150 struct iv_cand *cand; /* The candidate. */
151 comp_cost cost; /* The cost. */
152 bitmap depends_on; /* The list of invariants that have to be
153 preserved. */
154 tree value; /* For final value elimination, the expression for
155 the final value of the iv. For iv elimination,
156 the new bound to compare with. */
159 /* Use. */
160 struct iv_use
162 unsigned id; /* The id of the use. */
163 enum use_type type; /* Type of the use. */
164 struct iv *iv; /* The induction variable it is based on. */
165 tree stmt; /* Statement in that it occurs. */
166 tree *op_p; /* The place where it occurs. */
167 bitmap related_cands; /* The set of "related" iv candidates, plus the common
168 important ones. */
170 unsigned n_map_members; /* Number of candidates in the cost_map list. */
171 struct cost_pair *cost_map;
172 /* The costs wrto the iv candidates. */
174 struct iv_cand *selected;
175 /* The selected candidate. */
178 /* The position where the iv is computed. */
179 enum iv_position
181 IP_NORMAL, /* At the end, just before the exit condition. */
182 IP_END, /* At the end of the latch block. */
183 IP_ORIGINAL /* The original biv. */
186 /* The induction variable candidate. */
187 struct iv_cand
189 unsigned id; /* The number of the candidate. */
190 bool important; /* Whether this is an "important" candidate, i.e. such
191 that it should be considered by all uses. */
192 enum iv_position pos; /* Where it is computed. */
193 tree incremented_at; /* For original biv, the statement where it is
194 incremented. */
195 tree var_before; /* The variable used for it before increment. */
196 tree var_after; /* The variable used for it after increment. */
197 struct iv *iv; /* The value of the candidate. NULL for
198 "pseudocandidate" used to indicate the possibility
199 to replace the final value of an iv by direct
200 computation of the value. */
201 unsigned cost; /* Cost of the candidate. */
202 bitmap depends_on; /* The list of invariants that are used in step of the
203 biv. */
206 /* The data used by the induction variable optimizations. */
208 typedef struct iv_use *iv_use_p;
209 DEF_VEC_P(iv_use_p);
210 DEF_VEC_ALLOC_P(iv_use_p,heap);
212 typedef struct iv_cand *iv_cand_p;
213 DEF_VEC_P(iv_cand_p);
214 DEF_VEC_ALLOC_P(iv_cand_p,heap);
216 struct ivopts_data
218 /* The currently optimized loop. */
219 struct loop *current_loop;
221 /* Number of registers used in it. */
222 unsigned regs_used;
224 /* Numbers of iterations for all exits of the current loop. */
225 struct pointer_map_t *niters;
227 /* The size of version_info array allocated. */
228 unsigned version_info_size;
230 /* The array of information for the ssa names. */
231 struct version_info *version_info;
233 /* The bitmap of indices in version_info whose value was changed. */
234 bitmap relevant;
236 /* The maximum invariant id. */
237 unsigned max_inv_id;
239 /* The uses of induction variables. */
240 VEC(iv_use_p,heap) *iv_uses;
242 /* The candidates. */
243 VEC(iv_cand_p,heap) *iv_candidates;
245 /* A bitmap of important candidates. */
246 bitmap important_candidates;
248 /* Whether to consider just related and important candidates when replacing a
249 use. */
250 bool consider_all_candidates;
253 /* An assignment of iv candidates to uses. */
255 struct iv_ca
257 /* The number of uses covered by the assignment. */
258 unsigned upto;
260 /* Number of uses that cannot be expressed by the candidates in the set. */
261 unsigned bad_uses;
263 /* Candidate assigned to a use, together with the related costs. */
264 struct cost_pair **cand_for_use;
266 /* Number of times each candidate is used. */
267 unsigned *n_cand_uses;
269 /* The candidates used. */
270 bitmap cands;
272 /* The number of candidates in the set. */
273 unsigned n_cands;
275 /* Total number of registers needed. */
276 unsigned n_regs;
278 /* Total cost of expressing uses. */
279 comp_cost cand_use_cost;
281 /* Total cost of candidates. */
282 unsigned cand_cost;
284 /* Number of times each invariant is used. */
285 unsigned *n_invariant_uses;
287 /* Total cost of the assignment. */
288 comp_cost cost;
291 /* Difference of two iv candidate assignments. */
293 struct iv_ca_delta
295 /* Changed use. */
296 struct iv_use *use;
298 /* An old assignment (for rollback purposes). */
299 struct cost_pair *old_cp;
301 /* A new assignment. */
302 struct cost_pair *new_cp;
304 /* Next change in the list. */
305 struct iv_ca_delta *next_change;
308 /* Bound on number of candidates below that all candidates are considered. */
310 #define CONSIDER_ALL_CANDIDATES_BOUND \
311 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
313 /* If there are more iv occurrences, we just give up (it is quite unlikely that
314 optimizing such a loop would help, and it would take ages). */
316 #define MAX_CONSIDERED_USES \
317 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
319 /* If there are at most this number of ivs in the set, try removing unnecessary
320 ivs from the set always. */
322 #define ALWAYS_PRUNE_CAND_SET_BOUND \
323 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
325 /* The list of trees for that the decl_rtl field must be reset is stored
326 here. */
328 static VEC(tree,heap) *decl_rtl_to_reset;
330 /* Number of uses recorded in DATA. */
332 static inline unsigned
333 n_iv_uses (struct ivopts_data *data)
335 return VEC_length (iv_use_p, data->iv_uses);
338 /* Ith use recorded in DATA. */
340 static inline struct iv_use *
341 iv_use (struct ivopts_data *data, unsigned i)
343 return VEC_index (iv_use_p, data->iv_uses, i);
346 /* Number of candidates recorded in DATA. */
348 static inline unsigned
349 n_iv_cands (struct ivopts_data *data)
351 return VEC_length (iv_cand_p, data->iv_candidates);
354 /* Ith candidate recorded in DATA. */
356 static inline struct iv_cand *
357 iv_cand (struct ivopts_data *data, unsigned i)
359 return VEC_index (iv_cand_p, data->iv_candidates, i);
362 /* The single loop exit if it dominates the latch, NULL otherwise. */
364 edge
365 single_dom_exit (struct loop *loop)
367 edge exit = single_exit (loop);
369 if (!exit)
370 return NULL;
372 if (!just_once_each_iteration_p (loop, exit->src))
373 return NULL;
375 return exit;
378 /* Dumps information about the induction variable IV to FILE. */
380 extern void dump_iv (FILE *, struct iv *);
381 void
382 dump_iv (FILE *file, struct iv *iv)
384 if (iv->ssa_name)
386 fprintf (file, "ssa name ");
387 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
388 fprintf (file, "\n");
391 fprintf (file, " type ");
392 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
393 fprintf (file, "\n");
395 if (iv->step)
397 fprintf (file, " base ");
398 print_generic_expr (file, iv->base, TDF_SLIM);
399 fprintf (file, "\n");
401 fprintf (file, " step ");
402 print_generic_expr (file, iv->step, TDF_SLIM);
403 fprintf (file, "\n");
405 else
407 fprintf (file, " invariant ");
408 print_generic_expr (file, iv->base, TDF_SLIM);
409 fprintf (file, "\n");
412 if (iv->base_object)
414 fprintf (file, " base object ");
415 print_generic_expr (file, iv->base_object, TDF_SLIM);
416 fprintf (file, "\n");
419 if (iv->biv_p)
420 fprintf (file, " is a biv\n");
423 /* Dumps information about the USE to FILE. */
425 extern void dump_use (FILE *, struct iv_use *);
426 void
427 dump_use (FILE *file, struct iv_use *use)
429 fprintf (file, "use %d\n", use->id);
431 switch (use->type)
433 case USE_NONLINEAR_EXPR:
434 fprintf (file, " generic\n");
435 break;
437 case USE_ADDRESS:
438 fprintf (file, " address\n");
439 break;
441 case USE_COMPARE:
442 fprintf (file, " compare\n");
443 break;
445 default:
446 gcc_unreachable ();
449 fprintf (file, " in statement ");
450 print_generic_expr (file, use->stmt, TDF_SLIM);
451 fprintf (file, "\n");
453 fprintf (file, " at position ");
454 if (use->op_p)
455 print_generic_expr (file, *use->op_p, TDF_SLIM);
456 fprintf (file, "\n");
458 dump_iv (file, use->iv);
460 if (use->related_cands)
462 fprintf (file, " related candidates ");
463 dump_bitmap (file, use->related_cands);
467 /* Dumps information about the uses to FILE. */
469 extern void dump_uses (FILE *, struct ivopts_data *);
470 void
471 dump_uses (FILE *file, struct ivopts_data *data)
473 unsigned i;
474 struct iv_use *use;
476 for (i = 0; i < n_iv_uses (data); i++)
478 use = iv_use (data, i);
480 dump_use (file, use);
481 fprintf (file, "\n");
485 /* Dumps information about induction variable candidate CAND to FILE. */
487 extern void dump_cand (FILE *, struct iv_cand *);
488 void
489 dump_cand (FILE *file, struct iv_cand *cand)
491 struct iv *iv = cand->iv;
493 fprintf (file, "candidate %d%s\n",
494 cand->id, cand->important ? " (important)" : "");
496 if (cand->depends_on)
498 fprintf (file, " depends on ");
499 dump_bitmap (file, cand->depends_on);
502 if (!iv)
504 fprintf (file, " final value replacement\n");
505 return;
508 switch (cand->pos)
510 case IP_NORMAL:
511 fprintf (file, " incremented before exit test\n");
512 break;
514 case IP_END:
515 fprintf (file, " incremented at end\n");
516 break;
518 case IP_ORIGINAL:
519 fprintf (file, " original biv\n");
520 break;
523 dump_iv (file, iv);
526 /* Returns the info for ssa version VER. */
528 static inline struct version_info *
529 ver_info (struct ivopts_data *data, unsigned ver)
531 return data->version_info + ver;
534 /* Returns the info for ssa name NAME. */
536 static inline struct version_info *
537 name_info (struct ivopts_data *data, tree name)
539 return ver_info (data, SSA_NAME_VERSION (name));
542 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
543 emitted in LOOP. */
545 static bool
546 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
548 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
550 gcc_assert (bb);
552 if (sbb == loop->latch)
553 return true;
555 if (sbb != bb)
556 return false;
558 return stmt == last_stmt (bb);
561 /* Returns true if STMT if after the place where the original induction
562 variable CAND is incremented. */
564 static bool
565 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
567 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
568 basic_block stmt_bb = bb_for_stmt (stmt);
569 block_stmt_iterator bsi;
571 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
572 return false;
574 if (stmt_bb != cand_bb)
575 return true;
577 /* Scan the block from the end, since the original ivs are usually
578 incremented at the end of the loop body. */
579 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
581 if (bsi_stmt (bsi) == cand->incremented_at)
582 return false;
583 if (bsi_stmt (bsi) == stmt)
584 return true;
588 /* Returns true if STMT if after the place where the induction variable
589 CAND is incremented in LOOP. */
591 static bool
592 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
594 switch (cand->pos)
596 case IP_END:
597 return false;
599 case IP_NORMAL:
600 return stmt_after_ip_normal_pos (loop, stmt);
602 case IP_ORIGINAL:
603 return stmt_after_ip_original_pos (cand, stmt);
605 default:
606 gcc_unreachable ();
610 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
612 static bool
613 abnormal_ssa_name_p (tree exp)
615 if (!exp)
616 return false;
618 if (TREE_CODE (exp) != SSA_NAME)
619 return false;
621 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
624 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
625 abnormal phi node. Callback for for_each_index. */
627 static bool
628 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
629 void *data ATTRIBUTE_UNUSED)
631 if (TREE_CODE (base) == ARRAY_REF)
633 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
634 return false;
635 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
636 return false;
639 return !abnormal_ssa_name_p (*index);
642 /* Returns true if EXPR contains a ssa name that occurs in an
643 abnormal phi node. */
645 bool
646 contains_abnormal_ssa_name_p (tree expr)
648 enum tree_code code;
649 enum tree_code_class codeclass;
651 if (!expr)
652 return false;
654 code = TREE_CODE (expr);
655 codeclass = TREE_CODE_CLASS (code);
657 if (code == SSA_NAME)
658 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
660 if (code == INTEGER_CST
661 || is_gimple_min_invariant (expr))
662 return false;
664 if (code == ADDR_EXPR)
665 return !for_each_index (&TREE_OPERAND (expr, 0),
666 idx_contains_abnormal_ssa_name_p,
667 NULL);
669 switch (codeclass)
671 case tcc_binary:
672 case tcc_comparison:
673 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
674 return true;
676 /* Fallthru. */
677 case tcc_unary:
678 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
679 return true;
681 break;
683 default:
684 gcc_unreachable ();
687 return false;
690 /* Returns tree describing number of iterations determined from
691 EXIT of DATA->current_loop, or NULL if something goes wrong. */
693 static tree
694 niter_for_exit (struct ivopts_data *data, edge exit)
696 struct tree_niter_desc desc;
697 tree niter;
698 void **slot;
700 if (!data->niters)
702 data->niters = pointer_map_create ();
703 slot = NULL;
705 else
706 slot = pointer_map_contains (data->niters, exit);
708 if (!slot)
710 /* Try to determine number of iterations. We must know it
711 unconditionally (i.e., without possibility of # of iterations
712 being zero). Also, we cannot safely work with ssa names that
713 appear in phi nodes on abnormal edges, so that we do not create
714 overlapping life ranges for them (PR 27283). */
715 if (number_of_iterations_exit (data->current_loop,
716 exit, &desc, true)
717 && integer_zerop (desc.may_be_zero)
718 && !contains_abnormal_ssa_name_p (desc.niter))
719 niter = desc.niter;
720 else
721 niter = NULL_TREE;
723 *pointer_map_insert (data->niters, exit) = niter;
725 else
726 niter = (tree) *slot;
728 return niter;
731 /* Returns tree describing number of iterations determined from
732 single dominating exit of DATA->current_loop, or NULL if something
733 goes wrong. */
735 static tree
736 niter_for_single_dom_exit (struct ivopts_data *data)
738 edge exit = single_dom_exit (data->current_loop);
740 if (!exit)
741 return NULL;
743 return niter_for_exit (data, exit);
746 /* Initializes data structures used by the iv optimization pass, stored
747 in DATA. */
749 static void
750 tree_ssa_iv_optimize_init (struct ivopts_data *data)
752 data->version_info_size = 2 * num_ssa_names;
753 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
754 data->relevant = BITMAP_ALLOC (NULL);
755 data->important_candidates = BITMAP_ALLOC (NULL);
756 data->max_inv_id = 0;
757 data->niters = NULL;
758 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
759 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
760 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
763 /* Returns a memory object to that EXPR points. In case we are able to
764 determine that it does not point to any such object, NULL is returned. */
766 static tree
767 determine_base_object (tree expr)
769 enum tree_code code = TREE_CODE (expr);
770 tree base, obj;
772 /* If this is a pointer casted to any type, we need to determine
773 the base object for the pointer; so handle conversions before
774 throwing away non-pointer expressions. */
775 if (TREE_CODE (expr) == NOP_EXPR
776 || TREE_CODE (expr) == CONVERT_EXPR)
777 return determine_base_object (TREE_OPERAND (expr, 0));
779 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
780 return NULL_TREE;
782 switch (code)
784 case INTEGER_CST:
785 return NULL_TREE;
787 case ADDR_EXPR:
788 obj = TREE_OPERAND (expr, 0);
789 base = get_base_address (obj);
791 if (!base)
792 return expr;
794 if (TREE_CODE (base) == INDIRECT_REF)
795 return determine_base_object (TREE_OPERAND (base, 0));
797 return fold_convert (ptr_type_node,
798 build_fold_addr_expr (base));
800 case POINTER_PLUS_EXPR:
801 return determine_base_object (TREE_OPERAND (expr, 0));
803 case PLUS_EXPR:
804 case MINUS_EXPR:
805 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
806 gcc_unreachable ();
808 default:
809 return fold_convert (ptr_type_node, expr);
813 /* Allocates an induction variable with given initial value BASE and step STEP
814 for loop LOOP. */
816 static struct iv *
817 alloc_iv (tree base, tree step)
819 struct iv *iv = XCNEW (struct iv);
820 gcc_assert (step != NULL_TREE);
822 iv->base = base;
823 iv->base_object = determine_base_object (base);
824 iv->step = step;
825 iv->biv_p = false;
826 iv->have_use_for = false;
827 iv->use_id = 0;
828 iv->ssa_name = NULL_TREE;
830 return iv;
833 /* Sets STEP and BASE for induction variable IV. */
835 static void
836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
838 struct version_info *info = name_info (data, iv);
840 gcc_assert (!info->iv);
842 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
843 info->iv = alloc_iv (base, step);
844 info->iv->ssa_name = iv;
847 /* Finds induction variable declaration for VAR. */
849 static struct iv *
850 get_iv (struct ivopts_data *data, tree var)
852 basic_block bb;
853 tree type = TREE_TYPE (var);
855 if (!POINTER_TYPE_P (type)
856 && !INTEGRAL_TYPE_P (type))
857 return NULL;
859 if (!name_info (data, var)->iv)
861 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
863 if (!bb
864 || !flow_bb_inside_loop_p (data->current_loop, bb))
865 set_iv (data, var, var, build_int_cst (type, 0));
868 return name_info (data, var)->iv;
871 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
872 not define a simple affine biv with nonzero step. */
874 static tree
875 determine_biv_step (tree phi)
877 struct loop *loop = bb_for_stmt (phi)->loop_father;
878 tree name = PHI_RESULT (phi);
879 affine_iv iv;
881 if (!is_gimple_reg (name))
882 return NULL_TREE;
884 if (!simple_iv (loop, phi, name, &iv, true))
885 return NULL_TREE;
887 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
890 /* Finds basic ivs. */
892 static bool
893 find_bivs (struct ivopts_data *data)
895 tree phi, step, type, base;
896 bool found = false;
897 struct loop *loop = data->current_loop;
899 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
901 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
902 continue;
904 step = determine_biv_step (phi);
905 if (!step)
906 continue;
908 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
909 base = expand_simple_operations (base);
910 if (contains_abnormal_ssa_name_p (base)
911 || contains_abnormal_ssa_name_p (step))
912 continue;
914 type = TREE_TYPE (PHI_RESULT (phi));
915 base = fold_convert (type, base);
916 if (step)
917 step = fold_convert (type, step);
919 set_iv (data, PHI_RESULT (phi), base, step);
920 found = true;
923 return found;
926 /* Marks basic ivs. */
928 static void
929 mark_bivs (struct ivopts_data *data)
931 tree phi, var;
932 struct iv *iv, *incr_iv;
933 struct loop *loop = data->current_loop;
934 basic_block incr_bb;
936 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
938 iv = get_iv (data, PHI_RESULT (phi));
939 if (!iv)
940 continue;
942 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
943 incr_iv = get_iv (data, var);
944 if (!incr_iv)
945 continue;
947 /* If the increment is in the subloop, ignore it. */
948 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
949 if (incr_bb->loop_father != data->current_loop
950 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
951 continue;
953 iv->biv_p = true;
954 incr_iv->biv_p = true;
958 /* Checks whether STMT defines a linear induction variable and stores its
959 parameters to IV. */
961 static bool
962 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
964 tree lhs;
965 struct loop *loop = data->current_loop;
967 iv->base = NULL_TREE;
968 iv->step = NULL_TREE;
970 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
971 return false;
973 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
974 if (TREE_CODE (lhs) != SSA_NAME)
975 return false;
977 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
978 return false;
979 iv->base = expand_simple_operations (iv->base);
981 if (contains_abnormal_ssa_name_p (iv->base)
982 || contains_abnormal_ssa_name_p (iv->step))
983 return false;
985 return true;
988 /* Finds general ivs in statement STMT. */
990 static void
991 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
993 affine_iv iv;
995 if (!find_givs_in_stmt_scev (data, stmt, &iv))
996 return;
998 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1001 /* Finds general ivs in basic block BB. */
1003 static void
1004 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1006 block_stmt_iterator bsi;
1008 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1009 find_givs_in_stmt (data, bsi_stmt (bsi));
1012 /* Finds general ivs. */
1014 static void
1015 find_givs (struct ivopts_data *data)
1017 struct loop *loop = data->current_loop;
1018 basic_block *body = get_loop_body_in_dom_order (loop);
1019 unsigned i;
1021 for (i = 0; i < loop->num_nodes; i++)
1022 find_givs_in_bb (data, body[i]);
1023 free (body);
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027 variable and if so, its initial value and step. */
1029 static bool
1030 find_induction_variables (struct ivopts_data *data)
1032 unsigned i;
1033 bitmap_iterator bi;
1035 if (!find_bivs (data))
1036 return false;
1038 find_givs (data);
1039 mark_bivs (data);
1041 if (dump_file && (dump_flags & TDF_DETAILS))
1043 tree niter = niter_for_single_dom_exit (data);
1045 if (niter)
1047 fprintf (dump_file, " number of iterations ");
1048 print_generic_expr (dump_file, niter, TDF_SLIM);
1049 fprintf (dump_file, "\n\n");
1052 fprintf (dump_file, "Induction variables:\n\n");
1054 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1056 if (ver_info (data, i)->iv)
1057 dump_iv (dump_file, ver_info (data, i)->iv);
1061 return true;
1064 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1066 static struct iv_use *
1067 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1068 tree stmt, enum use_type use_type)
1070 struct iv_use *use = XCNEW (struct iv_use);
1072 use->id = n_iv_uses (data);
1073 use->type = use_type;
1074 use->iv = iv;
1075 use->stmt = stmt;
1076 use->op_p = use_p;
1077 use->related_cands = BITMAP_ALLOC (NULL);
1079 /* To avoid showing ssa name in the dumps, if it was not reset by the
1080 caller. */
1081 iv->ssa_name = NULL_TREE;
1083 if (dump_file && (dump_flags & TDF_DETAILS))
1084 dump_use (dump_file, use);
1086 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1088 return use;
1091 /* Checks whether OP is a loop-level invariant and if so, records it.
1092 NONLINEAR_USE is true if the invariant is used in a way we do not
1093 handle specially. */
1095 static void
1096 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1098 basic_block bb;
1099 struct version_info *info;
1101 if (TREE_CODE (op) != SSA_NAME
1102 || !is_gimple_reg (op))
1103 return;
1105 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1106 if (bb
1107 && flow_bb_inside_loop_p (data->current_loop, bb))
1108 return;
1110 info = name_info (data, op);
1111 info->name = op;
1112 info->has_nonlin_use |= nonlinear_use;
1113 if (!info->inv_id)
1114 info->inv_id = ++data->max_inv_id;
1115 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1118 /* Checks whether the use OP is interesting and if so, records it. */
1120 static struct iv_use *
1121 find_interesting_uses_op (struct ivopts_data *data, tree op)
1123 struct iv *iv;
1124 struct iv *civ;
1125 tree stmt;
1126 struct iv_use *use;
1128 if (TREE_CODE (op) != SSA_NAME)
1129 return NULL;
1131 iv = get_iv (data, op);
1132 if (!iv)
1133 return NULL;
1135 if (iv->have_use_for)
1137 use = iv_use (data, iv->use_id);
1139 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1140 return use;
1143 if (integer_zerop (iv->step))
1145 record_invariant (data, op, true);
1146 return NULL;
1148 iv->have_use_for = true;
1150 civ = XNEW (struct iv);
1151 *civ = *iv;
1153 stmt = SSA_NAME_DEF_STMT (op);
1154 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1155 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1157 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1158 iv->use_id = use->id;
1160 return use;
1163 /* Given a condition *COND_P, checks whether it is a compare of an induction
1164 variable and an invariant. If this is the case, CONTROL_VAR is set
1165 to location of the iv, BOUND to the location of the invariant,
1166 IV_VAR and IV_BOUND are set to the corresponding induction variable
1167 descriptions, and true is returned. If this is not the case,
1168 CONTROL_VAR and BOUND are set to the arguments of the condition and
1169 false is returned. */
1171 static bool
1172 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1173 tree **control_var, tree **bound,
1174 struct iv **iv_var, struct iv **iv_bound)
1176 /* The nodes returned when COND has just one operand. Note that you should
1177 not modify anything in BOUND or IV_BOUND because of this. */
1178 static struct iv const_iv;
1179 static tree zero;
1180 tree cond = *cond_p;
1181 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1182 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1183 bool ret = false;
1185 zero = integer_zero_node;
1186 const_iv.step = integer_zero_node;
1188 if (TREE_CODE (cond) == SSA_NAME)
1190 op0 = cond_p;
1191 iv0 = get_iv (data, cond);
1192 ret = (iv0 && !integer_zerop (iv0->step));
1193 goto end;
1196 if (!COMPARISON_CLASS_P (cond))
1198 op0 = cond_p;
1199 goto end;
1202 op0 = &TREE_OPERAND (cond, 0);
1203 op1 = &TREE_OPERAND (cond, 1);
1204 if (TREE_CODE (*op0) == SSA_NAME)
1205 iv0 = get_iv (data, *op0);
1206 if (TREE_CODE (*op1) == SSA_NAME)
1207 iv1 = get_iv (data, *op1);
1209 /* Exactly one of the compared values must be an iv, and the other one must
1210 be an invariant. */
1211 if (!iv0 || !iv1)
1212 goto end;
1214 if (integer_zerop (iv0->step))
1216 /* Control variable may be on the other side. */
1217 tmp_op = op0; op0 = op1; op1 = tmp_op;
1218 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1220 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1222 end:
1223 if (control_var)
1224 *control_var = op0;;
1225 if (iv_var)
1226 *iv_var = iv0;;
1227 if (bound)
1228 *bound = op1;
1229 if (iv_bound)
1230 *iv_bound = iv1;
1232 return ret;
1235 /* Checks whether the condition *COND_P in STMT is interesting
1236 and if so, records it. */
1238 static void
1239 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1241 tree *var_p, *bound_p;
1242 struct iv *var_iv, *civ;
1244 if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1246 find_interesting_uses_op (data, *var_p);
1247 find_interesting_uses_op (data, *bound_p);
1248 return;
1251 civ = XNEW (struct iv);
1252 *civ = *var_iv;
1253 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1256 /* Returns true if expression EXPR is obviously invariant in LOOP,
1257 i.e. if all its operands are defined outside of the LOOP. */
1259 bool
1260 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1262 basic_block def_bb;
1263 unsigned i, len;
1265 if (is_gimple_min_invariant (expr))
1266 return true;
1268 if (TREE_CODE (expr) == SSA_NAME)
1270 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1271 if (def_bb
1272 && flow_bb_inside_loop_p (loop, def_bb))
1273 return false;
1275 return true;
1278 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1279 return false;
1281 len = TREE_OPERAND_LENGTH (expr);
1282 for (i = 0; i < len; i++)
1283 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1284 return false;
1286 return true;
1289 /* Cumulates the steps of indices into DATA and replaces their values with the
1290 initial ones. Returns false when the value of the index cannot be determined.
1291 Callback for for_each_index. */
1293 struct ifs_ivopts_data
1295 struct ivopts_data *ivopts_data;
1296 tree stmt;
1297 tree step;
1300 static bool
1301 idx_find_step (tree base, tree *idx, void *data)
1303 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1304 struct iv *iv;
1305 tree step, iv_base, iv_step, lbound, off;
1306 struct loop *loop = dta->ivopts_data->current_loop;
1308 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1309 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1310 return false;
1312 /* If base is a component ref, require that the offset of the reference
1313 be invariant. */
1314 if (TREE_CODE (base) == COMPONENT_REF)
1316 off = component_ref_field_offset (base);
1317 return expr_invariant_in_loop_p (loop, off);
1320 /* If base is array, first check whether we will be able to move the
1321 reference out of the loop (in order to take its address in strength
1322 reduction). In order for this to work we need both lower bound
1323 and step to be loop invariants. */
1324 if (TREE_CODE (base) == ARRAY_REF)
1326 step = array_ref_element_size (base);
1327 lbound = array_ref_low_bound (base);
1329 if (!expr_invariant_in_loop_p (loop, step)
1330 || !expr_invariant_in_loop_p (loop, lbound))
1331 return false;
1334 if (TREE_CODE (*idx) != SSA_NAME)
1335 return true;
1337 iv = get_iv (dta->ivopts_data, *idx);
1338 if (!iv)
1339 return false;
1341 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1342 *&x[0], which is not folded and does not trigger the
1343 ARRAY_REF path below. */
1344 *idx = iv->base;
1346 if (integer_zerop (iv->step))
1347 return true;
1349 if (TREE_CODE (base) == ARRAY_REF)
1351 step = array_ref_element_size (base);
1353 /* We only handle addresses whose step is an integer constant. */
1354 if (TREE_CODE (step) != INTEGER_CST)
1355 return false;
1357 else
1358 /* The step for pointer arithmetics already is 1 byte. */
1359 step = build_int_cst (sizetype, 1);
1361 iv_base = iv->base;
1362 iv_step = iv->step;
1363 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1364 sizetype, &iv_base, &iv_step, dta->stmt,
1365 false))
1367 /* The index might wrap. */
1368 return false;
1371 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1372 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1374 return true;
1377 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1378 object is passed to it in DATA. */
1380 static bool
1381 idx_record_use (tree base, tree *idx,
1382 void *vdata)
1384 struct ivopts_data *data = (struct ivopts_data *) vdata;
1385 find_interesting_uses_op (data, *idx);
1386 if (TREE_CODE (base) == ARRAY_REF)
1388 find_interesting_uses_op (data, array_ref_element_size (base));
1389 find_interesting_uses_op (data, array_ref_low_bound (base));
1391 return true;
1394 /* If we can prove that TOP = cst * BOT for some constant cst,
1395 store cst to MUL and return true. Otherwise return false.
1396 The returned value is always sign-extended, regardless of the
1397 signedness of TOP and BOT. */
1399 static bool
1400 constant_multiple_of (tree top, tree bot, double_int *mul)
1402 tree mby;
1403 enum tree_code code;
1404 double_int res, p0, p1;
1405 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1407 STRIP_NOPS (top);
1408 STRIP_NOPS (bot);
1410 if (operand_equal_p (top, bot, 0))
1412 *mul = double_int_one;
1413 return true;
1416 code = TREE_CODE (top);
1417 switch (code)
1419 case MULT_EXPR:
1420 mby = TREE_OPERAND (top, 1);
1421 if (TREE_CODE (mby) != INTEGER_CST)
1422 return false;
1424 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1425 return false;
1427 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1428 precision);
1429 return true;
1431 case PLUS_EXPR:
1432 case MINUS_EXPR:
1433 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1434 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1435 return false;
1437 if (code == MINUS_EXPR)
1438 p1 = double_int_neg (p1);
1439 *mul = double_int_sext (double_int_add (p0, p1), precision);
1440 return true;
1442 case INTEGER_CST:
1443 if (TREE_CODE (bot) != INTEGER_CST)
1444 return false;
1446 p0 = double_int_sext (tree_to_double_int (top), precision);
1447 p1 = double_int_sext (tree_to_double_int (bot), precision);
1448 if (double_int_zero_p (p1))
1449 return false;
1450 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1451 precision);
1452 return double_int_zero_p (res);
1454 default:
1455 return false;
1459 /* Returns true if memory reference REF with step STEP may be unaligned. */
1461 static bool
1462 may_be_unaligned_p (tree ref, tree step)
1464 tree base;
1465 tree base_type;
1466 HOST_WIDE_INT bitsize;
1467 HOST_WIDE_INT bitpos;
1468 tree toffset;
1469 enum machine_mode mode;
1470 int unsignedp, volatilep;
1471 unsigned base_align;
1473 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1474 thus they are not misaligned. */
1475 if (TREE_CODE (ref) == TARGET_MEM_REF)
1476 return false;
1478 /* The test below is basically copy of what expr.c:normal_inner_ref
1479 does to check whether the object must be loaded by parts when
1480 STRICT_ALIGNMENT is true. */
1481 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1482 &unsignedp, &volatilep, true);
1483 base_type = TREE_TYPE (base);
1484 base_align = TYPE_ALIGN (base_type);
1486 if (mode != BLKmode)
1488 double_int mul;
1489 tree al = build_int_cst (TREE_TYPE (step),
1490 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1492 if (base_align < GET_MODE_ALIGNMENT (mode)
1493 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1494 || bitpos % BITS_PER_UNIT != 0)
1495 return true;
1497 if (!constant_multiple_of (step, al, &mul))
1498 return true;
1501 return false;
1504 /* Return true if EXPR may be non-addressable. */
1506 static bool
1507 may_be_nonaddressable_p (tree expr)
1509 switch (TREE_CODE (expr))
1511 case TARGET_MEM_REF:
1512 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1513 target, thus they are always addressable. */
1514 return false;
1516 case COMPONENT_REF:
1517 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1518 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1520 case VIEW_CONVERT_EXPR:
1521 /* This kind of view-conversions may wrap non-addressable objects
1522 and make them look addressable. After some processing the
1523 non-addressability may be uncovered again, causing ADDR_EXPRs
1524 of inappropriate objects to be built. */
1525 if (AGGREGATE_TYPE_P (TREE_TYPE (expr))
1526 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0))))
1527 return true;
1529 /* ... fall through ... */
1531 case ARRAY_REF:
1532 case ARRAY_RANGE_REF:
1533 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1535 case CONVERT_EXPR:
1536 case NON_LVALUE_EXPR:
1537 case NOP_EXPR:
1538 return true;
1540 default:
1541 break;
1544 return false;
1547 /* Finds addresses in *OP_P inside STMT. */
1549 static void
1550 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1552 tree base = *op_p, step = build_int_cst (sizetype, 0);
1553 struct iv *civ;
1554 struct ifs_ivopts_data ifs_ivopts_data;
1556 /* Do not play with volatile memory references. A bit too conservative,
1557 perhaps, but safe. */
1558 if (stmt_ann (stmt)->has_volatile_ops)
1559 goto fail;
1561 /* Ignore bitfields for now. Not really something terribly complicated
1562 to handle. TODO. */
1563 if (TREE_CODE (base) == BIT_FIELD_REF)
1564 goto fail;
1566 base = unshare_expr (base);
1568 if (TREE_CODE (base) == TARGET_MEM_REF)
1570 tree type = build_pointer_type (TREE_TYPE (base));
1571 tree astep;
1573 if (TMR_BASE (base)
1574 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1576 civ = get_iv (data, TMR_BASE (base));
1577 if (!civ)
1578 goto fail;
1580 TMR_BASE (base) = civ->base;
1581 step = civ->step;
1583 if (TMR_INDEX (base)
1584 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1586 civ = get_iv (data, TMR_INDEX (base));
1587 if (!civ)
1588 goto fail;
1590 TMR_INDEX (base) = civ->base;
1591 astep = civ->step;
1593 if (astep)
1595 if (TMR_STEP (base))
1596 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1598 step = fold_build2 (PLUS_EXPR, type, step, astep);
1602 if (integer_zerop (step))
1603 goto fail;
1604 base = tree_mem_ref_addr (type, base);
1606 else
1608 ifs_ivopts_data.ivopts_data = data;
1609 ifs_ivopts_data.stmt = stmt;
1610 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1611 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1612 || integer_zerop (ifs_ivopts_data.step))
1613 goto fail;
1614 step = ifs_ivopts_data.step;
1616 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1617 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1619 /* Check that the base expression is addressable. This needs
1620 to be done after substituting bases of IVs into it. */
1621 if (may_be_nonaddressable_p (base))
1622 goto fail;
1624 /* Moreover, on strict alignment platforms, check that it is
1625 sufficiently aligned. */
1626 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1627 goto fail;
1629 base = build_fold_addr_expr (base);
1631 /* Substituting bases of IVs into the base expression might
1632 have caused folding opportunities. */
1633 if (TREE_CODE (base) == ADDR_EXPR)
1635 tree *ref = &TREE_OPERAND (base, 0);
1636 while (handled_component_p (*ref))
1637 ref = &TREE_OPERAND (*ref, 0);
1638 if (TREE_CODE (*ref) == INDIRECT_REF)
1639 *ref = fold_indirect_ref (*ref);
1643 civ = alloc_iv (base, step);
1644 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1645 return;
1647 fail:
1648 for_each_index (op_p, idx_record_use, data);
1651 /* Finds and records invariants used in STMT. */
1653 static void
1654 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1656 ssa_op_iter iter;
1657 use_operand_p use_p;
1658 tree op;
1660 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1662 op = USE_FROM_PTR (use_p);
1663 record_invariant (data, op, false);
1667 /* Finds interesting uses of induction variables in the statement STMT. */
1669 static void
1670 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1672 struct iv *iv;
1673 tree op, lhs, rhs;
1674 ssa_op_iter iter;
1675 use_operand_p use_p;
1677 find_invariants_stmt (data, stmt);
1679 if (TREE_CODE (stmt) == COND_EXPR)
1681 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1682 return;
1685 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1687 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1688 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1690 if (TREE_CODE (lhs) == SSA_NAME)
1692 /* If the statement defines an induction variable, the uses are not
1693 interesting by themselves. */
1695 iv = get_iv (data, lhs);
1697 if (iv && !integer_zerop (iv->step))
1698 return;
1701 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1703 case tcc_comparison:
1704 find_interesting_uses_cond (data, stmt,
1705 &GIMPLE_STMT_OPERAND (stmt, 1));
1706 return;
1708 case tcc_reference:
1709 find_interesting_uses_address (data, stmt,
1710 &GIMPLE_STMT_OPERAND (stmt, 1));
1711 if (REFERENCE_CLASS_P (lhs))
1712 find_interesting_uses_address (data, stmt,
1713 &GIMPLE_STMT_OPERAND (stmt, 0));
1714 return;
1716 default: ;
1719 if (REFERENCE_CLASS_P (lhs)
1720 && is_gimple_val (rhs))
1722 find_interesting_uses_address (data, stmt,
1723 &GIMPLE_STMT_OPERAND (stmt, 0));
1724 find_interesting_uses_op (data, rhs);
1725 return;
1728 /* TODO -- we should also handle address uses of type
1730 memory = call (whatever);
1734 call (memory). */
1737 if (TREE_CODE (stmt) == PHI_NODE
1738 && bb_for_stmt (stmt) == data->current_loop->header)
1740 lhs = PHI_RESULT (stmt);
1741 iv = get_iv (data, lhs);
1743 if (iv && !integer_zerop (iv->step))
1744 return;
1747 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1749 op = USE_FROM_PTR (use_p);
1751 if (TREE_CODE (op) != SSA_NAME)
1752 continue;
1754 iv = get_iv (data, op);
1755 if (!iv)
1756 continue;
1758 find_interesting_uses_op (data, op);
1762 /* Finds interesting uses of induction variables outside of loops
1763 on loop exit edge EXIT. */
1765 static void
1766 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1768 tree phi, def;
1770 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1772 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1773 if (is_gimple_reg (def))
1774 find_interesting_uses_op (data, def);
1778 /* Finds uses of the induction variables that are interesting. */
1780 static void
1781 find_interesting_uses (struct ivopts_data *data)
1783 basic_block bb;
1784 block_stmt_iterator bsi;
1785 tree phi;
1786 basic_block *body = get_loop_body (data->current_loop);
1787 unsigned i;
1788 struct version_info *info;
1789 edge e;
1791 if (dump_file && (dump_flags & TDF_DETAILS))
1792 fprintf (dump_file, "Uses:\n\n");
1794 for (i = 0; i < data->current_loop->num_nodes; i++)
1796 edge_iterator ei;
1797 bb = body[i];
1799 FOR_EACH_EDGE (e, ei, bb->succs)
1800 if (e->dest != EXIT_BLOCK_PTR
1801 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1802 find_interesting_uses_outside (data, e);
1804 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1805 find_interesting_uses_stmt (data, phi);
1806 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1807 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1810 if (dump_file && (dump_flags & TDF_DETAILS))
1812 bitmap_iterator bi;
1814 fprintf (dump_file, "\n");
1816 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1818 info = ver_info (data, i);
1819 if (info->inv_id)
1821 fprintf (dump_file, " ");
1822 print_generic_expr (dump_file, info->name, TDF_SLIM);
1823 fprintf (dump_file, " is invariant (%d)%s\n",
1824 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1828 fprintf (dump_file, "\n");
1831 free (body);
1834 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1835 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1836 we are at the top-level of the processed address. */
1838 static tree
1839 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1840 unsigned HOST_WIDE_INT *offset)
1842 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1843 enum tree_code code;
1844 tree type, orig_type = TREE_TYPE (expr);
1845 unsigned HOST_WIDE_INT off0, off1, st;
1846 tree orig_expr = expr;
1848 STRIP_NOPS (expr);
1850 type = TREE_TYPE (expr);
1851 code = TREE_CODE (expr);
1852 *offset = 0;
1854 switch (code)
1856 case INTEGER_CST:
1857 if (!cst_and_fits_in_hwi (expr)
1858 || integer_zerop (expr))
1859 return orig_expr;
1861 *offset = int_cst_value (expr);
1862 return build_int_cst (orig_type, 0);
1864 case POINTER_PLUS_EXPR:
1865 case PLUS_EXPR:
1866 case MINUS_EXPR:
1867 op0 = TREE_OPERAND (expr, 0);
1868 op1 = TREE_OPERAND (expr, 1);
1870 op0 = strip_offset_1 (op0, false, false, &off0);
1871 op1 = strip_offset_1 (op1, false, false, &off1);
1873 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1874 if (op0 == TREE_OPERAND (expr, 0)
1875 && op1 == TREE_OPERAND (expr, 1))
1876 return orig_expr;
1878 if (integer_zerop (op1))
1879 expr = op0;
1880 else if (integer_zerop (op0))
1882 if (code == MINUS_EXPR)
1883 expr = fold_build1 (NEGATE_EXPR, type, op1);
1884 else
1885 expr = op1;
1887 else
1888 expr = fold_build2 (code, type, op0, op1);
1890 return fold_convert (orig_type, expr);
1892 case ARRAY_REF:
1893 if (!inside_addr)
1894 return orig_expr;
1896 step = array_ref_element_size (expr);
1897 if (!cst_and_fits_in_hwi (step))
1898 break;
1900 st = int_cst_value (step);
1901 op1 = TREE_OPERAND (expr, 1);
1902 op1 = strip_offset_1 (op1, false, false, &off1);
1903 *offset = off1 * st;
1905 if (top_compref
1906 && integer_zerop (op1))
1908 /* Strip the component reference completely. */
1909 op0 = TREE_OPERAND (expr, 0);
1910 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1911 *offset += off0;
1912 return op0;
1914 break;
1916 case COMPONENT_REF:
1917 if (!inside_addr)
1918 return orig_expr;
1920 tmp = component_ref_field_offset (expr);
1921 if (top_compref
1922 && cst_and_fits_in_hwi (tmp))
1924 /* Strip the component reference completely. */
1925 op0 = TREE_OPERAND (expr, 0);
1926 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1927 *offset = off0 + int_cst_value (tmp);
1928 return op0;
1930 break;
1932 case ADDR_EXPR:
1933 op0 = TREE_OPERAND (expr, 0);
1934 op0 = strip_offset_1 (op0, true, true, &off0);
1935 *offset += off0;
1937 if (op0 == TREE_OPERAND (expr, 0))
1938 return orig_expr;
1940 expr = build_fold_addr_expr (op0);
1941 return fold_convert (orig_type, expr);
1943 case INDIRECT_REF:
1944 inside_addr = false;
1945 break;
1947 default:
1948 return orig_expr;
1951 /* Default handling of expressions for that we want to recurse into
1952 the first operand. */
1953 op0 = TREE_OPERAND (expr, 0);
1954 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1955 *offset += off0;
1957 if (op0 == TREE_OPERAND (expr, 0)
1958 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1959 return orig_expr;
1961 expr = copy_node (expr);
1962 TREE_OPERAND (expr, 0) = op0;
1963 if (op1)
1964 TREE_OPERAND (expr, 1) = op1;
1966 /* Inside address, we might strip the top level component references,
1967 thus changing type of the expression. Handling of ADDR_EXPR
1968 will fix that. */
1969 expr = fold_convert (orig_type, expr);
1971 return expr;
1974 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1976 static tree
1977 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1979 return strip_offset_1 (expr, false, false, offset);
1982 /* Returns variant of TYPE that can be used as base for different uses.
1983 We return unsigned type with the same precision, which avoids problems
1984 with overflows. */
1986 static tree
1987 generic_type_for (tree type)
1989 if (POINTER_TYPE_P (type))
1990 return unsigned_type_for (type);
1992 if (TYPE_UNSIGNED (type))
1993 return type;
1995 return unsigned_type_for (type);
1998 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1999 the bitmap to that we should store it. */
2001 static struct ivopts_data *fd_ivopts_data;
2002 static tree
2003 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2005 bitmap *depends_on = (bitmap *) data;
2006 struct version_info *info;
2008 if (TREE_CODE (*expr_p) != SSA_NAME)
2009 return NULL_TREE;
2010 info = name_info (fd_ivopts_data, *expr_p);
2012 if (!info->inv_id || info->has_nonlin_use)
2013 return NULL_TREE;
2015 if (!*depends_on)
2016 *depends_on = BITMAP_ALLOC (NULL);
2017 bitmap_set_bit (*depends_on, info->inv_id);
2019 return NULL_TREE;
2022 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2023 position to POS. If USE is not NULL, the candidate is set as related to
2024 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2025 replacement of the final value of the iv by a direct computation. */
2027 static struct iv_cand *
2028 add_candidate_1 (struct ivopts_data *data,
2029 tree base, tree step, bool important, enum iv_position pos,
2030 struct iv_use *use, tree incremented_at)
2032 unsigned i;
2033 struct iv_cand *cand = NULL;
2034 tree type, orig_type;
2036 if (base)
2038 orig_type = TREE_TYPE (base);
2039 type = generic_type_for (orig_type);
2040 if (type != orig_type)
2042 base = fold_convert (type, base);
2043 step = fold_convert (type, step);
2047 for (i = 0; i < n_iv_cands (data); i++)
2049 cand = iv_cand (data, i);
2051 if (cand->pos != pos)
2052 continue;
2054 if (cand->incremented_at != incremented_at)
2055 continue;
2057 if (!cand->iv)
2059 if (!base && !step)
2060 break;
2062 continue;
2065 if (!base && !step)
2066 continue;
2068 if (operand_equal_p (base, cand->iv->base, 0)
2069 && operand_equal_p (step, cand->iv->step, 0))
2070 break;
2073 if (i == n_iv_cands (data))
2075 cand = XCNEW (struct iv_cand);
2076 cand->id = i;
2078 if (!base && !step)
2079 cand->iv = NULL;
2080 else
2081 cand->iv = alloc_iv (base, step);
2083 cand->pos = pos;
2084 if (pos != IP_ORIGINAL && cand->iv)
2086 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2087 cand->var_after = cand->var_before;
2089 cand->important = important;
2090 cand->incremented_at = incremented_at;
2091 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2093 if (step
2094 && TREE_CODE (step) != INTEGER_CST)
2096 fd_ivopts_data = data;
2097 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2100 if (dump_file && (dump_flags & TDF_DETAILS))
2101 dump_cand (dump_file, cand);
2104 if (important && !cand->important)
2106 cand->important = true;
2107 if (dump_file && (dump_flags & TDF_DETAILS))
2108 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2111 if (use)
2113 bitmap_set_bit (use->related_cands, i);
2114 if (dump_file && (dump_flags & TDF_DETAILS))
2115 fprintf (dump_file, "Candidate %d is related to use %d\n",
2116 cand->id, use->id);
2119 return cand;
2122 /* Returns true if incrementing the induction variable at the end of the LOOP
2123 is allowed.
2125 The purpose is to avoid splitting latch edge with a biv increment, thus
2126 creating a jump, possibly confusing other optimization passes and leaving
2127 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2128 is not available (so we do not have a better alternative), or if the latch
2129 edge is already nonempty. */
2131 static bool
2132 allow_ip_end_pos_p (struct loop *loop)
2134 if (!ip_normal_pos (loop))
2135 return true;
2137 if (!empty_block_p (ip_end_pos (loop)))
2138 return true;
2140 return false;
2143 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2144 position to POS. If USE is not NULL, the candidate is set as related to
2145 it. The candidate computation is scheduled on all available positions. */
2147 static void
2148 add_candidate (struct ivopts_data *data,
2149 tree base, tree step, bool important, struct iv_use *use)
2151 if (ip_normal_pos (data->current_loop))
2152 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2153 if (ip_end_pos (data->current_loop)
2154 && allow_ip_end_pos_p (data->current_loop))
2155 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2158 /* Add a standard "0 + 1 * iteration" iv candidate for a
2159 type with SIZE bits. */
2161 static void
2162 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2163 unsigned int size)
2165 tree type = lang_hooks.types.type_for_size (size, true);
2166 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2167 true, NULL);
2170 /* Adds standard iv candidates. */
2172 static void
2173 add_standard_iv_candidates (struct ivopts_data *data)
2175 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2177 /* The same for a double-integer type if it is still fast enough. */
2178 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2179 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2183 /* Adds candidates bases on the old induction variable IV. */
2185 static void
2186 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2188 tree phi, def;
2189 struct iv_cand *cand;
2191 add_candidate (data, iv->base, iv->step, true, NULL);
2193 /* The same, but with initial value zero. */
2194 add_candidate (data,
2195 build_int_cst (TREE_TYPE (iv->base), 0),
2196 iv->step, true, NULL);
2198 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2199 if (TREE_CODE (phi) == PHI_NODE)
2201 /* Additionally record the possibility of leaving the original iv
2202 untouched. */
2203 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2204 cand = add_candidate_1 (data,
2205 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2206 SSA_NAME_DEF_STMT (def));
2207 cand->var_before = iv->ssa_name;
2208 cand->var_after = def;
2212 /* Adds candidates based on the old induction variables. */
2214 static void
2215 add_old_ivs_candidates (struct ivopts_data *data)
2217 unsigned i;
2218 struct iv *iv;
2219 bitmap_iterator bi;
2221 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2223 iv = ver_info (data, i)->iv;
2224 if (iv && iv->biv_p && !integer_zerop (iv->step))
2225 add_old_iv_candidates (data, iv);
2229 /* Adds candidates based on the value of the induction variable IV and USE. */
2231 static void
2232 add_iv_value_candidates (struct ivopts_data *data,
2233 struct iv *iv, struct iv_use *use)
2235 unsigned HOST_WIDE_INT offset;
2236 tree base;
2238 add_candidate (data, iv->base, iv->step, false, use);
2240 /* The same, but with initial value zero. Make such variable important,
2241 since it is generic enough so that possibly many uses may be based
2242 on it. */
2243 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2244 iv->step, true, use);
2246 /* Third, try removing the constant offset. */
2247 base = strip_offset (iv->base, &offset);
2248 if (offset)
2249 add_candidate (data, base, iv->step, false, use);
2252 /* Adds candidates based on the uses. */
2254 static void
2255 add_derived_ivs_candidates (struct ivopts_data *data)
2257 unsigned i;
2259 for (i = 0; i < n_iv_uses (data); i++)
2261 struct iv_use *use = iv_use (data, i);
2263 if (!use)
2264 continue;
2266 switch (use->type)
2268 case USE_NONLINEAR_EXPR:
2269 case USE_COMPARE:
2270 case USE_ADDRESS:
2271 /* Just add the ivs based on the value of the iv used here. */
2272 add_iv_value_candidates (data, use->iv, use);
2273 break;
2275 default:
2276 gcc_unreachable ();
2281 /* Record important candidates and add them to related_cands bitmaps
2282 if needed. */
2284 static void
2285 record_important_candidates (struct ivopts_data *data)
2287 unsigned i;
2288 struct iv_use *use;
2290 for (i = 0; i < n_iv_cands (data); i++)
2292 struct iv_cand *cand = iv_cand (data, i);
2294 if (cand->important)
2295 bitmap_set_bit (data->important_candidates, i);
2298 data->consider_all_candidates = (n_iv_cands (data)
2299 <= CONSIDER_ALL_CANDIDATES_BOUND);
2301 if (data->consider_all_candidates)
2303 /* We will not need "related_cands" bitmaps in this case,
2304 so release them to decrease peak memory consumption. */
2305 for (i = 0; i < n_iv_uses (data); i++)
2307 use = iv_use (data, i);
2308 BITMAP_FREE (use->related_cands);
2311 else
2313 /* Add important candidates to the related_cands bitmaps. */
2314 for (i = 0; i < n_iv_uses (data); i++)
2315 bitmap_ior_into (iv_use (data, i)->related_cands,
2316 data->important_candidates);
2320 /* Finds the candidates for the induction variables. */
2322 static void
2323 find_iv_candidates (struct ivopts_data *data)
2325 /* Add commonly used ivs. */
2326 add_standard_iv_candidates (data);
2328 /* Add old induction variables. */
2329 add_old_ivs_candidates (data);
2331 /* Add induction variables derived from uses. */
2332 add_derived_ivs_candidates (data);
2334 /* Record the important candidates. */
2335 record_important_candidates (data);
2338 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2339 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2340 we allocate a simple list to every use. */
2342 static void
2343 alloc_use_cost_map (struct ivopts_data *data)
2345 unsigned i, size, s, j;
2347 for (i = 0; i < n_iv_uses (data); i++)
2349 struct iv_use *use = iv_use (data, i);
2350 bitmap_iterator bi;
2352 if (data->consider_all_candidates)
2353 size = n_iv_cands (data);
2354 else
2356 s = 0;
2357 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2359 s++;
2362 /* Round up to the power of two, so that moduling by it is fast. */
2363 for (size = 1; size < s; size <<= 1)
2364 continue;
2367 use->n_map_members = size;
2368 use->cost_map = XCNEWVEC (struct cost_pair, size);
2372 /* Returns description of computation cost of expression whose runtime
2373 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2375 static comp_cost
2376 new_cost (unsigned runtime, unsigned complexity)
2378 comp_cost cost;
2380 cost.cost = runtime;
2381 cost.complexity = complexity;
2383 return cost;
2386 /* Adds costs COST1 and COST2. */
2388 static comp_cost
2389 add_costs (comp_cost cost1, comp_cost cost2)
2391 cost1.cost += cost2.cost;
2392 cost1.complexity += cost2.complexity;
2394 return cost1;
2396 /* Subtracts costs COST1 and COST2. */
2398 static comp_cost
2399 sub_costs (comp_cost cost1, comp_cost cost2)
2401 cost1.cost -= cost2.cost;
2402 cost1.complexity -= cost2.complexity;
2404 return cost1;
2407 /* Returns a negative number if COST1 < COST2, a positive number if
2408 COST1 > COST2, and 0 if COST1 = COST2. */
2410 static int
2411 compare_costs (comp_cost cost1, comp_cost cost2)
2413 if (cost1.cost == cost2.cost)
2414 return cost1.complexity - cost2.complexity;
2416 return cost1.cost - cost2.cost;
2419 /* Returns true if COST is infinite. */
2421 static bool
2422 infinite_cost_p (comp_cost cost)
2424 return cost.cost == INFTY;
2427 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2428 on invariants DEPENDS_ON and that the value used in expressing it
2429 is VALUE.*/
2431 static void
2432 set_use_iv_cost (struct ivopts_data *data,
2433 struct iv_use *use, struct iv_cand *cand,
2434 comp_cost cost, bitmap depends_on, tree value)
2436 unsigned i, s;
2438 if (infinite_cost_p (cost))
2440 BITMAP_FREE (depends_on);
2441 return;
2444 if (data->consider_all_candidates)
2446 use->cost_map[cand->id].cand = cand;
2447 use->cost_map[cand->id].cost = cost;
2448 use->cost_map[cand->id].depends_on = depends_on;
2449 use->cost_map[cand->id].value = value;
2450 return;
2453 /* n_map_members is a power of two, so this computes modulo. */
2454 s = cand->id & (use->n_map_members - 1);
2455 for (i = s; i < use->n_map_members; i++)
2456 if (!use->cost_map[i].cand)
2457 goto found;
2458 for (i = 0; i < s; i++)
2459 if (!use->cost_map[i].cand)
2460 goto found;
2462 gcc_unreachable ();
2464 found:
2465 use->cost_map[i].cand = cand;
2466 use->cost_map[i].cost = cost;
2467 use->cost_map[i].depends_on = depends_on;
2468 use->cost_map[i].value = value;
2471 /* Gets cost of (USE, CANDIDATE) pair. */
2473 static struct cost_pair *
2474 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2475 struct iv_cand *cand)
2477 unsigned i, s;
2478 struct cost_pair *ret;
2480 if (!cand)
2481 return NULL;
2483 if (data->consider_all_candidates)
2485 ret = use->cost_map + cand->id;
2486 if (!ret->cand)
2487 return NULL;
2489 return ret;
2492 /* n_map_members is a power of two, so this computes modulo. */
2493 s = cand->id & (use->n_map_members - 1);
2494 for (i = s; i < use->n_map_members; i++)
2495 if (use->cost_map[i].cand == cand)
2496 return use->cost_map + i;
2498 for (i = 0; i < s; i++)
2499 if (use->cost_map[i].cand == cand)
2500 return use->cost_map + i;
2502 return NULL;
2505 /* Returns estimate on cost of computing SEQ. */
2507 static unsigned
2508 seq_cost (rtx seq)
2510 unsigned cost = 0;
2511 rtx set;
2513 for (; seq; seq = NEXT_INSN (seq))
2515 set = single_set (seq);
2516 if (set)
2517 cost += rtx_cost (set, SET);
2518 else
2519 cost++;
2522 return cost;
2525 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2526 static rtx
2527 produce_memory_decl_rtl (tree obj, int *regno)
2529 rtx x;
2531 gcc_assert (obj);
2532 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2534 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2535 x = gen_rtx_SYMBOL_REF (Pmode, name);
2536 SET_SYMBOL_REF_DECL (x, obj);
2537 x = gen_rtx_MEM (DECL_MODE (obj), x);
2538 targetm.encode_section_info (obj, x, true);
2540 else
2542 x = gen_raw_REG (Pmode, (*regno)++);
2543 x = gen_rtx_MEM (DECL_MODE (obj), x);
2546 return x;
2549 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2550 walk_tree. DATA contains the actual fake register number. */
2552 static tree
2553 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2555 tree obj = NULL_TREE;
2556 rtx x = NULL_RTX;
2557 int *regno = (int *) data;
2559 switch (TREE_CODE (*expr_p))
2561 case ADDR_EXPR:
2562 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2563 handled_component_p (*expr_p);
2564 expr_p = &TREE_OPERAND (*expr_p, 0))
2565 continue;
2566 obj = *expr_p;
2567 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2568 x = produce_memory_decl_rtl (obj, regno);
2569 break;
2571 case SSA_NAME:
2572 *ws = 0;
2573 obj = SSA_NAME_VAR (*expr_p);
2574 if (!DECL_RTL_SET_P (obj))
2575 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2576 break;
2578 case VAR_DECL:
2579 case PARM_DECL:
2580 case RESULT_DECL:
2581 *ws = 0;
2582 obj = *expr_p;
2584 if (DECL_RTL_SET_P (obj))
2585 break;
2587 if (DECL_MODE (obj) == BLKmode)
2588 x = produce_memory_decl_rtl (obj, regno);
2589 else
2590 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2592 break;
2594 default:
2595 break;
2598 if (x)
2600 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2601 SET_DECL_RTL (obj, x);
2604 return NULL_TREE;
2607 /* Determines cost of the computation of EXPR. */
2609 static unsigned
2610 computation_cost (tree expr)
2612 rtx seq, rslt;
2613 tree type = TREE_TYPE (expr);
2614 unsigned cost;
2615 /* Avoid using hard regs in ways which may be unsupported. */
2616 int regno = LAST_VIRTUAL_REGISTER + 1;
2618 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2619 start_sequence ();
2620 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2621 seq = get_insns ();
2622 end_sequence ();
2624 cost = seq_cost (seq);
2625 if (MEM_P (rslt))
2626 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2628 return cost;
2631 /* Returns variable containing the value of candidate CAND at statement AT. */
2633 static tree
2634 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2636 if (stmt_after_increment (loop, cand, stmt))
2637 return cand->var_after;
2638 else
2639 return cand->var_before;
2642 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2643 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2646 tree_int_cst_sign_bit (const_tree t)
2648 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2649 unsigned HOST_WIDE_INT w;
2651 if (bitno < HOST_BITS_PER_WIDE_INT)
2652 w = TREE_INT_CST_LOW (t);
2653 else
2655 w = TREE_INT_CST_HIGH (t);
2656 bitno -= HOST_BITS_PER_WIDE_INT;
2659 return (w >> bitno) & 1;
2662 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2663 same precision that is at least as wide as the precision of TYPE, stores
2664 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2665 type of A and B. */
2667 static tree
2668 determine_common_wider_type (tree *a, tree *b)
2670 tree wider_type = NULL;
2671 tree suba, subb;
2672 tree atype = TREE_TYPE (*a);
2674 if ((TREE_CODE (*a) == NOP_EXPR
2675 || TREE_CODE (*a) == CONVERT_EXPR))
2677 suba = TREE_OPERAND (*a, 0);
2678 wider_type = TREE_TYPE (suba);
2679 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2680 return atype;
2682 else
2683 return atype;
2685 if ((TREE_CODE (*b) == NOP_EXPR
2686 || TREE_CODE (*b) == CONVERT_EXPR))
2688 subb = TREE_OPERAND (*b, 0);
2689 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2690 return atype;
2692 else
2693 return atype;
2695 *a = suba;
2696 *b = subb;
2697 return wider_type;
2700 /* Determines the expression by that USE is expressed from induction variable
2701 CAND at statement AT in LOOP. The expression is stored in a decomposed
2702 form into AFF. Returns false if USE cannot be expressed using CAND. */
2704 static bool
2705 get_computation_aff (struct loop *loop,
2706 struct iv_use *use, struct iv_cand *cand, tree at,
2707 struct affine_tree_combination *aff)
2709 tree ubase = use->iv->base;
2710 tree ustep = use->iv->step;
2711 tree cbase = cand->iv->base;
2712 tree cstep = cand->iv->step, cstep_common;
2713 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2714 tree common_type, var;
2715 tree uutype;
2716 aff_tree cbase_aff, var_aff;
2717 double_int rat;
2719 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2721 /* We do not have a precision to express the values of use. */
2722 return false;
2725 var = var_at_stmt (loop, cand, at);
2726 uutype = unsigned_type_for (utype);
2728 /* If the conversion is not noop, perform it. */
2729 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2731 cstep = fold_convert (uutype, cstep);
2732 cbase = fold_convert (uutype, cbase);
2733 var = fold_convert (uutype, var);
2736 if (!constant_multiple_of (ustep, cstep, &rat))
2737 return false;
2739 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2740 type, we achieve better folding by computing their difference in this
2741 wider type, and cast the result to UUTYPE. We do not need to worry about
2742 overflows, as all the arithmetics will in the end be performed in UUTYPE
2743 anyway. */
2744 common_type = determine_common_wider_type (&ubase, &cbase);
2746 /* use = ubase - ratio * cbase + ratio * var. */
2747 tree_to_aff_combination (ubase, common_type, aff);
2748 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2749 tree_to_aff_combination (var, uutype, &var_aff);
2751 /* We need to shift the value if we are after the increment. */
2752 if (stmt_after_increment (loop, cand, at))
2754 aff_tree cstep_aff;
2756 if (common_type != uutype)
2757 cstep_common = fold_convert (common_type, cstep);
2758 else
2759 cstep_common = cstep;
2761 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2762 aff_combination_add (&cbase_aff, &cstep_aff);
2765 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2766 aff_combination_add (aff, &cbase_aff);
2767 if (common_type != uutype)
2768 aff_combination_convert (aff, uutype);
2770 aff_combination_scale (&var_aff, rat);
2771 aff_combination_add (aff, &var_aff);
2773 return true;
2776 /* Determines the expression by that USE is expressed from induction variable
2777 CAND at statement AT in LOOP. The computation is unshared. */
2779 static tree
2780 get_computation_at (struct loop *loop,
2781 struct iv_use *use, struct iv_cand *cand, tree at)
2783 aff_tree aff;
2784 tree type = TREE_TYPE (use->iv->base);
2786 if (!get_computation_aff (loop, use, cand, at, &aff))
2787 return NULL_TREE;
2788 unshare_aff_combination (&aff);
2789 return fold_convert (type, aff_combination_to_tree (&aff));
2792 /* Determines the expression by that USE is expressed from induction variable
2793 CAND in LOOP. The computation is unshared. */
2795 static tree
2796 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2798 return get_computation_at (loop, use, cand, use->stmt);
2801 /* Returns cost of addition in MODE. */
2803 static unsigned
2804 add_cost (enum machine_mode mode)
2806 static unsigned costs[NUM_MACHINE_MODES];
2807 rtx seq;
2808 unsigned cost;
2810 if (costs[mode])
2811 return costs[mode];
2813 start_sequence ();
2814 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2815 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2816 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2817 NULL_RTX);
2818 seq = get_insns ();
2819 end_sequence ();
2821 cost = seq_cost (seq);
2822 if (!cost)
2823 cost = 1;
2825 costs[mode] = cost;
2827 if (dump_file && (dump_flags & TDF_DETAILS))
2828 fprintf (dump_file, "Addition in %s costs %d\n",
2829 GET_MODE_NAME (mode), cost);
2830 return cost;
2833 /* Entry in a hashtable of already known costs for multiplication. */
2834 struct mbc_entry
2836 HOST_WIDE_INT cst; /* The constant to multiply by. */
2837 enum machine_mode mode; /* In mode. */
2838 unsigned cost; /* The cost. */
2841 /* Counts hash value for the ENTRY. */
2843 static hashval_t
2844 mbc_entry_hash (const void *entry)
2846 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2848 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2851 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2853 static int
2854 mbc_entry_eq (const void *entry1, const void *entry2)
2856 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2857 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2859 return (e1->mode == e2->mode
2860 && e1->cst == e2->cst);
2863 /* Returns cost of multiplication by constant CST in MODE. */
2865 unsigned
2866 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2868 static htab_t costs;
2869 struct mbc_entry **cached, act;
2870 rtx seq;
2871 unsigned cost;
2873 if (!costs)
2874 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2876 act.mode = mode;
2877 act.cst = cst;
2878 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2879 if (*cached)
2880 return (*cached)->cost;
2882 *cached = XNEW (struct mbc_entry);
2883 (*cached)->mode = mode;
2884 (*cached)->cst = cst;
2886 start_sequence ();
2887 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2888 gen_int_mode (cst, mode), NULL_RTX, 0);
2889 seq = get_insns ();
2890 end_sequence ();
2892 cost = seq_cost (seq);
2894 if (dump_file && (dump_flags & TDF_DETAILS))
2895 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2896 (int) cst, GET_MODE_NAME (mode), cost);
2898 (*cached)->cost = cost;
2900 return cost;
2903 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2904 validity for a memory reference accessing memory of mode MODE. */
2906 bool
2907 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2909 #define MAX_RATIO 128
2910 static sbitmap valid_mult[MAX_MACHINE_MODE];
2912 if (!valid_mult[mode])
2914 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2915 rtx addr;
2916 HOST_WIDE_INT i;
2918 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2919 sbitmap_zero (valid_mult[mode]);
2920 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2921 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2923 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2924 if (memory_address_p (mode, addr))
2925 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2928 if (dump_file && (dump_flags & TDF_DETAILS))
2930 fprintf (dump_file, " allowed multipliers:");
2931 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2932 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2933 fprintf (dump_file, " %d", (int) i);
2934 fprintf (dump_file, "\n");
2935 fprintf (dump_file, "\n");
2939 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2940 return false;
2942 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2945 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2946 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2947 variable is omitted. Compute the cost for a memory reference that accesses
2948 a memory location of mode MEM_MODE.
2950 TODO -- there must be some better way. This all is quite crude. */
2952 static comp_cost
2953 get_address_cost (bool symbol_present, bool var_present,
2954 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2955 enum machine_mode mem_mode)
2957 static bool initialized[MAX_MACHINE_MODE];
2958 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2959 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2960 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2961 unsigned cost, acost, complexity;
2962 bool offset_p, ratio_p;
2963 HOST_WIDE_INT s_offset;
2964 unsigned HOST_WIDE_INT mask;
2965 unsigned bits;
2967 if (!initialized[mem_mode])
2969 HOST_WIDE_INT i;
2970 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2971 int old_cse_not_expected;
2972 unsigned sym_p, var_p, off_p, rat_p, add_c;
2973 rtx seq, addr, base;
2974 rtx reg0, reg1;
2976 initialized[mem_mode] = true;
2978 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2980 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2981 for (i = start; i <= 1 << 20; i <<= 1)
2983 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2984 if (!memory_address_p (mem_mode, addr))
2985 break;
2987 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2988 off[mem_mode] = max_offset[mem_mode];
2990 for (i = start; i <= 1 << 20; i <<= 1)
2992 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2993 if (!memory_address_p (mem_mode, addr))
2994 break;
2996 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2998 if (dump_file && (dump_flags & TDF_DETAILS))
3000 fprintf (dump_file, "get_address_cost:\n");
3001 fprintf (dump_file, " min offset %s %d\n",
3002 GET_MODE_NAME (mem_mode),
3003 (int) min_offset[mem_mode]);
3004 fprintf (dump_file, " max offset %s %d\n",
3005 GET_MODE_NAME (mem_mode),
3006 (int) max_offset[mem_mode]);
3009 rat[mem_mode] = 1;
3010 for (i = 2; i <= MAX_RATIO; i++)
3011 if (multiplier_allowed_in_address_p (i, mem_mode))
3013 rat[mem_mode] = i;
3014 break;
3017 /* Compute the cost of various addressing modes. */
3018 acost = 0;
3019 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3020 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3022 for (i = 0; i < 16; i++)
3024 sym_p = i & 1;
3025 var_p = (i >> 1) & 1;
3026 off_p = (i >> 2) & 1;
3027 rat_p = (i >> 3) & 1;
3029 addr = reg0;
3030 if (rat_p)
3031 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3032 gen_int_mode (rat[mem_mode], Pmode));
3034 if (var_p)
3035 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3037 if (sym_p)
3039 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3040 /* ??? We can run into trouble with some backends by presenting
3041 it with symbols which havn't been properly passed through
3042 targetm.encode_section_info. By setting the local bit, we
3043 enhance the probability of things working. */
3044 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3046 if (off_p)
3047 base = gen_rtx_fmt_e (CONST, Pmode,
3048 gen_rtx_fmt_ee (PLUS, Pmode,
3049 base,
3050 gen_int_mode (off[mem_mode],
3051 Pmode)));
3053 else if (off_p)
3054 base = gen_int_mode (off[mem_mode], Pmode);
3055 else
3056 base = NULL_RTX;
3058 if (base)
3059 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3061 start_sequence ();
3062 /* To avoid splitting addressing modes, pretend that no cse will
3063 follow. */
3064 old_cse_not_expected = cse_not_expected;
3065 cse_not_expected = true;
3066 addr = memory_address (mem_mode, addr);
3067 cse_not_expected = old_cse_not_expected;
3068 seq = get_insns ();
3069 end_sequence ();
3071 acost = seq_cost (seq);
3072 acost += address_cost (addr, mem_mode);
3074 if (!acost)
3075 acost = 1;
3076 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3079 /* On some targets, it is quite expensive to load symbol to a register,
3080 which makes addresses that contain symbols look much more expensive.
3081 However, the symbol will have to be loaded in any case before the
3082 loop (and quite likely we have it in register already), so it does not
3083 make much sense to penalize them too heavily. So make some final
3084 tweaks for the SYMBOL_PRESENT modes:
3086 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3087 var is cheaper, use this mode with small penalty.
3088 If VAR_PRESENT is true, try whether the mode with
3089 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3090 if this is the case, use it. */
3091 add_c = add_cost (Pmode);
3092 for (i = 0; i < 8; i++)
3094 var_p = i & 1;
3095 off_p = (i >> 1) & 1;
3096 rat_p = (i >> 2) & 1;
3098 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3099 if (var_p)
3100 acost += add_c;
3102 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3103 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3106 if (dump_file && (dump_flags & TDF_DETAILS))
3108 fprintf (dump_file, "Address costs:\n");
3110 for (i = 0; i < 16; i++)
3112 sym_p = i & 1;
3113 var_p = (i >> 1) & 1;
3114 off_p = (i >> 2) & 1;
3115 rat_p = (i >> 3) & 1;
3117 fprintf (dump_file, " ");
3118 if (sym_p)
3119 fprintf (dump_file, "sym + ");
3120 if (var_p)
3121 fprintf (dump_file, "var + ");
3122 if (off_p)
3123 fprintf (dump_file, "cst + ");
3124 if (rat_p)
3125 fprintf (dump_file, "rat * ");
3127 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3128 fprintf (dump_file, "index costs %d\n", acost);
3130 fprintf (dump_file, "\n");
3134 bits = GET_MODE_BITSIZE (Pmode);
3135 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3136 offset &= mask;
3137 if ((offset >> (bits - 1) & 1))
3138 offset |= ~mask;
3139 s_offset = offset;
3141 cost = 0;
3142 offset_p = (s_offset != 0
3143 && min_offset[mem_mode] <= s_offset
3144 && s_offset <= max_offset[mem_mode]);
3145 ratio_p = (ratio != 1
3146 && multiplier_allowed_in_address_p (ratio, mem_mode));
3148 if (ratio != 1 && !ratio_p)
3149 cost += multiply_by_cost (ratio, Pmode);
3151 if (s_offset && !offset_p && !symbol_present)
3152 cost += add_cost (Pmode);
3154 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3155 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3156 return new_cost (cost + acost, complexity);
3159 /* Estimates cost of forcing expression EXPR into a variable. */
3161 static comp_cost
3162 force_expr_to_var_cost (tree expr)
3164 static bool costs_initialized = false;
3165 static unsigned integer_cost;
3166 static unsigned symbol_cost;
3167 static unsigned address_cost;
3168 tree op0, op1;
3169 comp_cost cost0, cost1, cost;
3170 enum machine_mode mode;
3172 if (!costs_initialized)
3174 tree type = build_pointer_type (integer_type_node);
3175 tree var, addr;
3176 rtx x;
3178 var = create_tmp_var_raw (integer_type_node, "test_var");
3179 TREE_STATIC (var) = 1;
3180 x = produce_memory_decl_rtl (var, NULL);
3181 SET_DECL_RTL (var, x);
3183 integer_cost = computation_cost (build_int_cst (integer_type_node,
3184 2000));
3186 addr = build1 (ADDR_EXPR, type, var);
3187 symbol_cost = computation_cost (addr) + 1;
3189 address_cost
3190 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3191 addr,
3192 build_int_cst (sizetype, 2000))) + 1;
3193 if (dump_file && (dump_flags & TDF_DETAILS))
3195 fprintf (dump_file, "force_expr_to_var_cost:\n");
3196 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3197 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3198 fprintf (dump_file, " address %d\n", (int) address_cost);
3199 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3200 fprintf (dump_file, "\n");
3203 costs_initialized = true;
3206 STRIP_NOPS (expr);
3208 if (SSA_VAR_P (expr))
3209 return zero_cost;
3211 if (TREE_INVARIANT (expr))
3213 if (TREE_CODE (expr) == INTEGER_CST)
3214 return new_cost (integer_cost, 0);
3216 if (TREE_CODE (expr) == ADDR_EXPR)
3218 tree obj = TREE_OPERAND (expr, 0);
3220 if (TREE_CODE (obj) == VAR_DECL
3221 || TREE_CODE (obj) == PARM_DECL
3222 || TREE_CODE (obj) == RESULT_DECL)
3223 return new_cost (symbol_cost, 0);
3226 return new_cost (address_cost, 0);
3229 switch (TREE_CODE (expr))
3231 case POINTER_PLUS_EXPR:
3232 case PLUS_EXPR:
3233 case MINUS_EXPR:
3234 case MULT_EXPR:
3235 op0 = TREE_OPERAND (expr, 0);
3236 op1 = TREE_OPERAND (expr, 1);
3237 STRIP_NOPS (op0);
3238 STRIP_NOPS (op1);
3240 if (is_gimple_val (op0))
3241 cost0 = zero_cost;
3242 else
3243 cost0 = force_expr_to_var_cost (op0);
3245 if (is_gimple_val (op1))
3246 cost1 = zero_cost;
3247 else
3248 cost1 = force_expr_to_var_cost (op1);
3250 break;
3252 default:
3253 /* Just an arbitrary value, FIXME. */
3254 return new_cost (target_spill_cost, 0);
3257 mode = TYPE_MODE (TREE_TYPE (expr));
3258 switch (TREE_CODE (expr))
3260 case POINTER_PLUS_EXPR:
3261 case PLUS_EXPR:
3262 case MINUS_EXPR:
3263 cost = new_cost (add_cost (mode), 0);
3264 break;
3266 case MULT_EXPR:
3267 if (cst_and_fits_in_hwi (op0))
3268 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3269 else if (cst_and_fits_in_hwi (op1))
3270 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3271 else
3272 return new_cost (target_spill_cost, 0);
3273 break;
3275 default:
3276 gcc_unreachable ();
3279 cost = add_costs (cost, cost0);
3280 cost = add_costs (cost, cost1);
3282 /* Bound the cost by target_spill_cost. The parts of complicated
3283 computations often are either loop invariant or at least can
3284 be shared between several iv uses, so letting this grow without
3285 limits would not give reasonable results. */
3286 if (cost.cost > target_spill_cost)
3287 cost.cost = target_spill_cost;
3289 return cost;
3292 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3293 invariants the computation depends on. */
3295 static comp_cost
3296 force_var_cost (struct ivopts_data *data,
3297 tree expr, bitmap *depends_on)
3299 if (depends_on)
3301 fd_ivopts_data = data;
3302 walk_tree (&expr, find_depends, depends_on, NULL);
3305 return force_expr_to_var_cost (expr);
3308 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3309 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3310 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3311 invariants the computation depends on. */
3313 static comp_cost
3314 split_address_cost (struct ivopts_data *data,
3315 tree addr, bool *symbol_present, bool *var_present,
3316 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3318 tree core;
3319 HOST_WIDE_INT bitsize;
3320 HOST_WIDE_INT bitpos;
3321 tree toffset;
3322 enum machine_mode mode;
3323 int unsignedp, volatilep;
3325 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3326 &unsignedp, &volatilep, false);
3328 if (toffset != 0
3329 || bitpos % BITS_PER_UNIT != 0
3330 || TREE_CODE (core) != VAR_DECL)
3332 *symbol_present = false;
3333 *var_present = true;
3334 fd_ivopts_data = data;
3335 walk_tree (&addr, find_depends, depends_on, NULL);
3336 return new_cost (target_spill_cost, 0);
3339 *offset += bitpos / BITS_PER_UNIT;
3340 if (TREE_STATIC (core)
3341 || DECL_EXTERNAL (core))
3343 *symbol_present = true;
3344 *var_present = false;
3345 return zero_cost;
3348 *symbol_present = false;
3349 *var_present = true;
3350 return zero_cost;
3353 /* Estimates cost of expressing difference of addresses E1 - E2 as
3354 var + symbol + offset. The value of offset is added to OFFSET,
3355 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3356 part is missing. DEPENDS_ON is a set of the invariants the computation
3357 depends on. */
3359 static comp_cost
3360 ptr_difference_cost (struct ivopts_data *data,
3361 tree e1, tree e2, bool *symbol_present, bool *var_present,
3362 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3364 HOST_WIDE_INT diff = 0;
3365 comp_cost cost;
3367 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3369 if (ptr_difference_const (e1, e2, &diff))
3371 *offset += diff;
3372 *symbol_present = false;
3373 *var_present = false;
3374 return zero_cost;
3377 if (integer_zerop (e2))
3378 return split_address_cost (data, TREE_OPERAND (e1, 0),
3379 symbol_present, var_present, offset, depends_on);
3381 *symbol_present = false;
3382 *var_present = true;
3384 cost = force_var_cost (data, e1, depends_on);
3385 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3386 cost.cost += add_cost (Pmode);
3388 return cost;
3391 /* Estimates cost of expressing difference E1 - E2 as
3392 var + symbol + offset. The value of offset is added to OFFSET,
3393 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3394 part is missing. DEPENDS_ON is a set of the invariants the computation
3395 depends on. */
3397 static comp_cost
3398 difference_cost (struct ivopts_data *data,
3399 tree e1, tree e2, bool *symbol_present, bool *var_present,
3400 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3402 comp_cost cost;
3403 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3404 unsigned HOST_WIDE_INT off1, off2;
3406 e1 = strip_offset (e1, &off1);
3407 e2 = strip_offset (e2, &off2);
3408 *offset += off1 - off2;
3410 STRIP_NOPS (e1);
3411 STRIP_NOPS (e2);
3413 if (TREE_CODE (e1) == ADDR_EXPR)
3414 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3415 depends_on);
3416 *symbol_present = false;
3418 if (operand_equal_p (e1, e2, 0))
3420 *var_present = false;
3421 return zero_cost;
3423 *var_present = true;
3424 if (integer_zerop (e2))
3425 return force_var_cost (data, e1, depends_on);
3427 if (integer_zerop (e1))
3429 cost = force_var_cost (data, e2, depends_on);
3430 cost.cost += multiply_by_cost (-1, mode);
3432 return cost;
3435 cost = force_var_cost (data, e1, depends_on);
3436 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3437 cost.cost += add_cost (mode);
3439 return cost;
3442 /* Determines the cost of the computation by that USE is expressed
3443 from induction variable CAND. If ADDRESS_P is true, we just need
3444 to create an address from it, otherwise we want to get it into
3445 register. A set of invariants we depend on is stored in
3446 DEPENDS_ON. AT is the statement at that the value is computed. */
3448 static comp_cost
3449 get_computation_cost_at (struct ivopts_data *data,
3450 struct iv_use *use, struct iv_cand *cand,
3451 bool address_p, bitmap *depends_on, tree at)
3453 tree ubase = use->iv->base, ustep = use->iv->step;
3454 tree cbase, cstep;
3455 tree utype = TREE_TYPE (ubase), ctype;
3456 unsigned HOST_WIDE_INT cstepi, offset = 0;
3457 HOST_WIDE_INT ratio, aratio;
3458 bool var_present, symbol_present;
3459 comp_cost cost;
3460 unsigned n_sums;
3461 double_int rat;
3463 *depends_on = NULL;
3465 /* Only consider real candidates. */
3466 if (!cand->iv)
3467 return infinite_cost;
3469 cbase = cand->iv->base;
3470 cstep = cand->iv->step;
3471 ctype = TREE_TYPE (cbase);
3473 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3475 /* We do not have a precision to express the values of use. */
3476 return infinite_cost;
3479 if (address_p)
3481 /* Do not try to express address of an object with computation based
3482 on address of a different object. This may cause problems in rtl
3483 level alias analysis (that does not expect this to be happening,
3484 as this is illegal in C), and would be unlikely to be useful
3485 anyway. */
3486 if (use->iv->base_object
3487 && cand->iv->base_object
3488 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3489 return infinite_cost;
3492 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3494 /* TODO -- add direct handling of this case. */
3495 goto fallback;
3498 /* CSTEPI is removed from the offset in case statement is after the
3499 increment. If the step is not constant, we use zero instead.
3500 This is a bit imprecise (there is the extra addition), but
3501 redundancy elimination is likely to transform the code so that
3502 it uses value of the variable before increment anyway,
3503 so it is not that much unrealistic. */
3504 if (cst_and_fits_in_hwi (cstep))
3505 cstepi = int_cst_value (cstep);
3506 else
3507 cstepi = 0;
3509 if (!constant_multiple_of (ustep, cstep, &rat))
3510 return infinite_cost;
3512 if (double_int_fits_in_shwi_p (rat))
3513 ratio = double_int_to_shwi (rat);
3514 else
3515 return infinite_cost;
3517 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3518 or ratio == 1, it is better to handle this like
3520 ubase - ratio * cbase + ratio * var
3522 (also holds in the case ratio == -1, TODO. */
3524 if (cst_and_fits_in_hwi (cbase))
3526 offset = - ratio * int_cst_value (cbase);
3527 cost = difference_cost (data,
3528 ubase, build_int_cst (utype, 0),
3529 &symbol_present, &var_present, &offset,
3530 depends_on);
3532 else if (ratio == 1)
3534 cost = difference_cost (data,
3535 ubase, cbase,
3536 &symbol_present, &var_present, &offset,
3537 depends_on);
3539 else
3541 cost = force_var_cost (data, cbase, depends_on);
3542 cost.cost += add_cost (TYPE_MODE (ctype));
3543 cost = add_costs (cost,
3544 difference_cost (data,
3545 ubase, build_int_cst (utype, 0),
3546 &symbol_present, &var_present,
3547 &offset, depends_on));
3550 /* If we are after the increment, the value of the candidate is higher by
3551 one iteration. */
3552 if (stmt_after_increment (data->current_loop, cand, at))
3553 offset -= ratio * cstepi;
3555 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3556 (symbol/var/const parts may be omitted). If we are looking for an address,
3557 find the cost of addressing this. */
3558 if (address_p)
3559 return add_costs (cost, get_address_cost (symbol_present, var_present,
3560 offset, ratio,
3561 TYPE_MODE (TREE_TYPE (*use->op_p))));
3563 /* Otherwise estimate the costs for computing the expression. */
3564 aratio = ratio > 0 ? ratio : -ratio;
3565 if (!symbol_present && !var_present && !offset)
3567 if (ratio != 1)
3568 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3570 return cost;
3573 if (aratio != 1)
3574 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3576 n_sums = 1;
3577 if (var_present
3578 /* Symbol + offset should be compile-time computable. */
3579 && (symbol_present || offset))
3580 n_sums++;
3582 /* Having offset does not affect runtime cost in case it is added to
3583 symbol, but it increases complexity. */
3584 if (offset)
3585 cost.complexity++;
3587 cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3588 return cost;
3590 fallback:
3592 /* Just get the expression, expand it and measure the cost. */
3593 tree comp = get_computation_at (data->current_loop, use, cand, at);
3595 if (!comp)
3596 return infinite_cost;
3598 if (address_p)
3599 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3601 return new_cost (computation_cost (comp), 0);
3605 /* Determines the cost of the computation by that USE is expressed
3606 from induction variable CAND. If ADDRESS_P is true, we just need
3607 to create an address from it, otherwise we want to get it into
3608 register. A set of invariants we depend on is stored in
3609 DEPENDS_ON. */
3611 static comp_cost
3612 get_computation_cost (struct ivopts_data *data,
3613 struct iv_use *use, struct iv_cand *cand,
3614 bool address_p, bitmap *depends_on)
3616 return get_computation_cost_at (data,
3617 use, cand, address_p, depends_on, use->stmt);
3620 /* Determines cost of basing replacement of USE on CAND in a generic
3621 expression. */
3623 static bool
3624 determine_use_iv_cost_generic (struct ivopts_data *data,
3625 struct iv_use *use, struct iv_cand *cand)
3627 bitmap depends_on;
3628 comp_cost cost;
3630 /* The simple case first -- if we need to express value of the preserved
3631 original biv, the cost is 0. This also prevents us from counting the
3632 cost of increment twice -- once at this use and once in the cost of
3633 the candidate. */
3634 if (cand->pos == IP_ORIGINAL
3635 && cand->incremented_at == use->stmt)
3637 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3638 return true;
3641 cost = get_computation_cost (data, use, cand, false, &depends_on);
3642 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3644 return !infinite_cost_p (cost);
3647 /* Determines cost of basing replacement of USE on CAND in an address. */
3649 static bool
3650 determine_use_iv_cost_address (struct ivopts_data *data,
3651 struct iv_use *use, struct iv_cand *cand)
3653 bitmap depends_on;
3654 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3656 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3658 return !infinite_cost_p (cost);
3661 /* Computes value of candidate CAND at position AT in iteration NITER, and
3662 stores it to VAL. */
3664 static void
3665 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3666 aff_tree *val)
3668 aff_tree step, delta, nit;
3669 struct iv *iv = cand->iv;
3670 tree type = TREE_TYPE (iv->base);
3672 tree_to_aff_combination (iv->step, type, &step);
3673 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3674 aff_combination_convert (&nit, type);
3675 aff_combination_mult (&nit, &step, &delta);
3676 if (stmt_after_increment (loop, cand, at))
3677 aff_combination_add (&delta, &step);
3679 tree_to_aff_combination (iv->base, type, val);
3680 aff_combination_add (val, &delta);
3683 /* Returns period of induction variable iv. */
3685 static tree
3686 iv_period (struct iv *iv)
3688 tree step = iv->step, period, type;
3689 tree pow2div;
3691 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3693 /* Period of the iv is gcd (step, type range). Since type range is power
3694 of two, it suffices to determine the maximum power of two that divides
3695 step. */
3696 pow2div = num_ending_zeros (step);
3697 type = unsigned_type_for (TREE_TYPE (step));
3699 period = build_low_bits_mask (type,
3700 (TYPE_PRECISION (type)
3701 - tree_low_cst (pow2div, 1)));
3703 return period;
3706 /* Returns the comparison operator used when eliminating the iv USE. */
3708 static enum tree_code
3709 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3711 struct loop *loop = data->current_loop;
3712 basic_block ex_bb;
3713 edge exit;
3715 ex_bb = bb_for_stmt (use->stmt);
3716 exit = EDGE_SUCC (ex_bb, 0);
3717 if (flow_bb_inside_loop_p (loop, exit->dest))
3718 exit = EDGE_SUCC (ex_bb, 1);
3720 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3723 /* Check whether it is possible to express the condition in USE by comparison
3724 of candidate CAND. If so, store the value compared with to BOUND. */
3726 static bool
3727 may_eliminate_iv (struct ivopts_data *data,
3728 struct iv_use *use, struct iv_cand *cand, tree *bound)
3730 basic_block ex_bb;
3731 edge exit;
3732 tree nit, period;
3733 struct loop *loop = data->current_loop;
3734 aff_tree bnd;
3735 double_int period_value, max_niter;
3737 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3738 return false;
3740 /* For now works only for exits that dominate the loop latch. TODO -- extend
3741 for other conditions inside loop body. */
3742 ex_bb = bb_for_stmt (use->stmt);
3743 if (use->stmt != last_stmt (ex_bb)
3744 || TREE_CODE (use->stmt) != COND_EXPR)
3745 return false;
3746 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3747 return false;
3749 exit = EDGE_SUCC (ex_bb, 0);
3750 if (flow_bb_inside_loop_p (loop, exit->dest))
3751 exit = EDGE_SUCC (ex_bb, 1);
3752 if (flow_bb_inside_loop_p (loop, exit->dest))
3753 return false;
3755 nit = niter_for_exit (data, exit);
3756 if (!nit)
3757 return false;
3759 /* Determine whether we may use the variable to test whether niter iterations
3760 elapsed. This is the case iff the period of the induction variable is
3761 greater than the number of iterations. */
3762 period = iv_period (cand->iv);
3763 if (!period)
3764 return false;
3766 /* Compare the period with the estimate on the number of iterations of the
3767 loop. */
3768 if (!estimated_loop_iterations (loop, true, &max_niter))
3769 return false;
3770 period_value = tree_to_double_int (period);
3771 if (double_int_ucmp (period_value, max_niter) <= 0)
3772 return false;
3774 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3775 *bound = aff_combination_to_tree (&bnd);
3776 return true;
3779 /* Determines cost of basing replacement of USE on CAND in a condition. */
3781 static bool
3782 determine_use_iv_cost_condition (struct ivopts_data *data,
3783 struct iv_use *use, struct iv_cand *cand)
3785 tree bound = NULL_TREE;
3786 struct iv *cmp_iv;
3787 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3788 comp_cost elim_cost, express_cost, cost;
3789 bool ok;
3791 /* Only consider real candidates. */
3792 if (!cand->iv)
3794 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3795 return false;
3798 /* Try iv elimination. */
3799 if (may_eliminate_iv (data, use, cand, &bound))
3801 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3802 /* The bound is a loop invariant, so it will be only computed
3803 once. */
3804 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3806 else
3807 elim_cost = infinite_cost;
3809 /* Try expressing the original giv. If it is compared with an invariant,
3810 note that we cannot get rid of it. */
3811 ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3812 gcc_assert (ok);
3814 express_cost = get_computation_cost (data, use, cand, false,
3815 &depends_on_express);
3816 fd_ivopts_data = data;
3817 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3819 /* Choose the better approach. */
3820 if (compare_costs (elim_cost, express_cost) < 0)
3822 cost = elim_cost;
3823 depends_on = depends_on_elim;
3824 depends_on_elim = NULL;
3826 else
3828 cost = express_cost;
3829 depends_on = depends_on_express;
3830 depends_on_express = NULL;
3831 bound = NULL_TREE;
3834 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3836 if (depends_on_elim)
3837 BITMAP_FREE (depends_on_elim);
3838 if (depends_on_express)
3839 BITMAP_FREE (depends_on_express);
3841 return !infinite_cost_p (cost);
3844 /* Determines cost of basing replacement of USE on CAND. Returns false
3845 if USE cannot be based on CAND. */
3847 static bool
3848 determine_use_iv_cost (struct ivopts_data *data,
3849 struct iv_use *use, struct iv_cand *cand)
3851 switch (use->type)
3853 case USE_NONLINEAR_EXPR:
3854 return determine_use_iv_cost_generic (data, use, cand);
3856 case USE_ADDRESS:
3857 return determine_use_iv_cost_address (data, use, cand);
3859 case USE_COMPARE:
3860 return determine_use_iv_cost_condition (data, use, cand);
3862 default:
3863 gcc_unreachable ();
3867 /* Determines costs of basing the use of the iv on an iv candidate. */
3869 static void
3870 determine_use_iv_costs (struct ivopts_data *data)
3872 unsigned i, j;
3873 struct iv_use *use;
3874 struct iv_cand *cand;
3875 bitmap to_clear = BITMAP_ALLOC (NULL);
3877 alloc_use_cost_map (data);
3879 for (i = 0; i < n_iv_uses (data); i++)
3881 use = iv_use (data, i);
3883 if (data->consider_all_candidates)
3885 for (j = 0; j < n_iv_cands (data); j++)
3887 cand = iv_cand (data, j);
3888 determine_use_iv_cost (data, use, cand);
3891 else
3893 bitmap_iterator bi;
3895 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3897 cand = iv_cand (data, j);
3898 if (!determine_use_iv_cost (data, use, cand))
3899 bitmap_set_bit (to_clear, j);
3902 /* Remove the candidates for that the cost is infinite from
3903 the list of related candidates. */
3904 bitmap_and_compl_into (use->related_cands, to_clear);
3905 bitmap_clear (to_clear);
3909 BITMAP_FREE (to_clear);
3911 if (dump_file && (dump_flags & TDF_DETAILS))
3913 fprintf (dump_file, "Use-candidate costs:\n");
3915 for (i = 0; i < n_iv_uses (data); i++)
3917 use = iv_use (data, i);
3919 fprintf (dump_file, "Use %d:\n", i);
3920 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
3921 for (j = 0; j < use->n_map_members; j++)
3923 if (!use->cost_map[j].cand
3924 || infinite_cost_p (use->cost_map[j].cost))
3925 continue;
3927 fprintf (dump_file, " %d\t%d\t%d\t",
3928 use->cost_map[j].cand->id,
3929 use->cost_map[j].cost.cost,
3930 use->cost_map[j].cost.complexity);
3931 if (use->cost_map[j].depends_on)
3932 bitmap_print (dump_file,
3933 use->cost_map[j].depends_on, "","");
3934 fprintf (dump_file, "\n");
3937 fprintf (dump_file, "\n");
3939 fprintf (dump_file, "\n");
3943 /* Determines cost of the candidate CAND. */
3945 static void
3946 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3948 comp_cost cost_base;
3949 unsigned cost, cost_step;
3950 tree base;
3952 if (!cand->iv)
3954 cand->cost = 0;
3955 return;
3958 /* There are two costs associated with the candidate -- its increment
3959 and its initialization. The second is almost negligible for any loop
3960 that rolls enough, so we take it just very little into account. */
3962 base = cand->iv->base;
3963 cost_base = force_var_cost (data, base, NULL);
3964 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3966 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3968 /* Prefer the original ivs unless we may gain something by replacing it.
3969 The reason is to makee debugging simpler; so this is not relevant for
3970 artificial ivs created by other optimization passes. */
3971 if (cand->pos != IP_ORIGINAL
3972 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3973 cost++;
3975 /* Prefer not to insert statements into latch unless there are some
3976 already (so that we do not create unnecessary jumps). */
3977 if (cand->pos == IP_END
3978 && empty_block_p (ip_end_pos (data->current_loop)))
3979 cost++;
3981 cand->cost = cost;
3984 /* Determines costs of computation of the candidates. */
3986 static void
3987 determine_iv_costs (struct ivopts_data *data)
3989 unsigned i;
3991 if (dump_file && (dump_flags & TDF_DETAILS))
3993 fprintf (dump_file, "Candidate costs:\n");
3994 fprintf (dump_file, " cand\tcost\n");
3997 for (i = 0; i < n_iv_cands (data); i++)
3999 struct iv_cand *cand = iv_cand (data, i);
4001 determine_iv_cost (data, cand);
4003 if (dump_file && (dump_flags & TDF_DETAILS))
4004 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4007 if (dump_file && (dump_flags & TDF_DETAILS))
4008 fprintf (dump_file, "\n");
4011 /* Calculates cost for having SIZE induction variables. */
4013 static unsigned
4014 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4016 /* We add size to the cost, so that we prefer eliminating ivs
4017 if possible. */
4018 return size + estimate_reg_pressure_cost (size, data->regs_used);
4021 /* For each size of the induction variable set determine the penalty. */
4023 static void
4024 determine_set_costs (struct ivopts_data *data)
4026 unsigned j, n;
4027 tree phi, op;
4028 struct loop *loop = data->current_loop;
4029 bitmap_iterator bi;
4031 /* We use the following model (definitely improvable, especially the
4032 cost function -- TODO):
4034 We estimate the number of registers available (using MD data), name it A.
4036 We estimate the number of registers used by the loop, name it U. This
4037 number is obtained as the number of loop phi nodes (not counting virtual
4038 registers and bivs) + the number of variables from outside of the loop.
4040 We set a reserve R (free regs that are used for temporary computations,
4041 etc.). For now the reserve is a constant 3.
4043 Let I be the number of induction variables.
4045 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4046 make a lot of ivs without a reason).
4047 -- if A - R < U + I <= A, the cost is I * PRES_COST
4048 -- if U + I > A, the cost is I * PRES_COST and
4049 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4051 if (dump_file && (dump_flags & TDF_DETAILS))
4053 fprintf (dump_file, "Global costs:\n");
4054 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4055 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost);
4056 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4059 n = 0;
4060 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4062 op = PHI_RESULT (phi);
4064 if (!is_gimple_reg (op))
4065 continue;
4067 if (get_iv (data, op))
4068 continue;
4070 n++;
4073 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4075 struct version_info *info = ver_info (data, j);
4077 if (info->inv_id && info->has_nonlin_use)
4078 n++;
4081 data->regs_used = n;
4082 if (dump_file && (dump_flags & TDF_DETAILS))
4083 fprintf (dump_file, " regs_used %d\n", n);
4085 if (dump_file && (dump_flags & TDF_DETAILS))
4087 fprintf (dump_file, " cost for size:\n");
4088 fprintf (dump_file, " ivs\tcost\n");
4089 for (j = 0; j <= 2 * target_avail_regs; j++)
4090 fprintf (dump_file, " %d\t%d\n", j,
4091 ivopts_global_cost_for_size (data, j));
4092 fprintf (dump_file, "\n");
4096 /* Returns true if A is a cheaper cost pair than B. */
4098 static bool
4099 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4101 int cmp;
4103 if (!a)
4104 return false;
4106 if (!b)
4107 return true;
4109 cmp = compare_costs (a->cost, b->cost);
4110 if (cmp < 0)
4111 return true;
4113 if (cmp > 0)
4114 return false;
4116 /* In case the costs are the same, prefer the cheaper candidate. */
4117 if (a->cand->cost < b->cand->cost)
4118 return true;
4120 return false;
4123 /* Computes the cost field of IVS structure. */
4125 static void
4126 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4128 comp_cost cost = ivs->cand_use_cost;
4129 cost.cost += ivs->cand_cost;
4130 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4132 ivs->cost = cost;
4135 /* Remove invariants in set INVS to set IVS. */
4137 static void
4138 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4140 bitmap_iterator bi;
4141 unsigned iid;
4143 if (!invs)
4144 return;
4146 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4148 ivs->n_invariant_uses[iid]--;
4149 if (ivs->n_invariant_uses[iid] == 0)
4150 ivs->n_regs--;
4154 /* Set USE not to be expressed by any candidate in IVS. */
4156 static void
4157 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4158 struct iv_use *use)
4160 unsigned uid = use->id, cid;
4161 struct cost_pair *cp;
4163 cp = ivs->cand_for_use[uid];
4164 if (!cp)
4165 return;
4166 cid = cp->cand->id;
4168 ivs->bad_uses++;
4169 ivs->cand_for_use[uid] = NULL;
4170 ivs->n_cand_uses[cid]--;
4172 if (ivs->n_cand_uses[cid] == 0)
4174 bitmap_clear_bit (ivs->cands, cid);
4175 /* Do not count the pseudocandidates. */
4176 if (cp->cand->iv)
4177 ivs->n_regs--;
4178 ivs->n_cands--;
4179 ivs->cand_cost -= cp->cand->cost;
4181 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4184 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4186 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4187 iv_ca_recount_cost (data, ivs);
4190 /* Add invariants in set INVS to set IVS. */
4192 static void
4193 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4195 bitmap_iterator bi;
4196 unsigned iid;
4198 if (!invs)
4199 return;
4201 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4203 ivs->n_invariant_uses[iid]++;
4204 if (ivs->n_invariant_uses[iid] == 1)
4205 ivs->n_regs++;
4209 /* Set cost pair for USE in set IVS to CP. */
4211 static void
4212 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4213 struct iv_use *use, struct cost_pair *cp)
4215 unsigned uid = use->id, cid;
4217 if (ivs->cand_for_use[uid] == cp)
4218 return;
4220 if (ivs->cand_for_use[uid])
4221 iv_ca_set_no_cp (data, ivs, use);
4223 if (cp)
4225 cid = cp->cand->id;
4227 ivs->bad_uses--;
4228 ivs->cand_for_use[uid] = cp;
4229 ivs->n_cand_uses[cid]++;
4230 if (ivs->n_cand_uses[cid] == 1)
4232 bitmap_set_bit (ivs->cands, cid);
4233 /* Do not count the pseudocandidates. */
4234 if (cp->cand->iv)
4235 ivs->n_regs++;
4236 ivs->n_cands++;
4237 ivs->cand_cost += cp->cand->cost;
4239 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4242 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4243 iv_ca_set_add_invariants (ivs, cp->depends_on);
4244 iv_ca_recount_cost (data, ivs);
4248 /* Extend set IVS by expressing USE by some of the candidates in it
4249 if possible. */
4251 static void
4252 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4253 struct iv_use *use)
4255 struct cost_pair *best_cp = NULL, *cp;
4256 bitmap_iterator bi;
4257 unsigned i;
4259 gcc_assert (ivs->upto >= use->id);
4261 if (ivs->upto == use->id)
4263 ivs->upto++;
4264 ivs->bad_uses++;
4267 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4269 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4271 if (cheaper_cost_pair (cp, best_cp))
4272 best_cp = cp;
4275 iv_ca_set_cp (data, ivs, use, best_cp);
4278 /* Get cost for assignment IVS. */
4280 static comp_cost
4281 iv_ca_cost (struct iv_ca *ivs)
4283 return (ivs->bad_uses ? infinite_cost : ivs->cost);
4286 /* Returns true if all dependences of CP are among invariants in IVS. */
4288 static bool
4289 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4291 unsigned i;
4292 bitmap_iterator bi;
4294 if (!cp->depends_on)
4295 return true;
4297 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4299 if (ivs->n_invariant_uses[i] == 0)
4300 return false;
4303 return true;
4306 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4307 it before NEXT_CHANGE. */
4309 static struct iv_ca_delta *
4310 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4311 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4313 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4315 change->use = use;
4316 change->old_cp = old_cp;
4317 change->new_cp = new_cp;
4318 change->next_change = next_change;
4320 return change;
4323 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4324 are rewritten. */
4326 static struct iv_ca_delta *
4327 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4329 struct iv_ca_delta *last;
4331 if (!l2)
4332 return l1;
4334 if (!l1)
4335 return l2;
4337 for (last = l1; last->next_change; last = last->next_change)
4338 continue;
4339 last->next_change = l2;
4341 return l1;
4344 /* Returns candidate by that USE is expressed in IVS. */
4346 static struct cost_pair *
4347 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4349 return ivs->cand_for_use[use->id];
4352 /* Reverse the list of changes DELTA, forming the inverse to it. */
4354 static struct iv_ca_delta *
4355 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4357 struct iv_ca_delta *act, *next, *prev = NULL;
4358 struct cost_pair *tmp;
4360 for (act = delta; act; act = next)
4362 next = act->next_change;
4363 act->next_change = prev;
4364 prev = act;
4366 tmp = act->old_cp;
4367 act->old_cp = act->new_cp;
4368 act->new_cp = tmp;
4371 return prev;
4374 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4375 reverted instead. */
4377 static void
4378 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4379 struct iv_ca_delta *delta, bool forward)
4381 struct cost_pair *from, *to;
4382 struct iv_ca_delta *act;
4384 if (!forward)
4385 delta = iv_ca_delta_reverse (delta);
4387 for (act = delta; act; act = act->next_change)
4389 from = act->old_cp;
4390 to = act->new_cp;
4391 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4392 iv_ca_set_cp (data, ivs, act->use, to);
4395 if (!forward)
4396 iv_ca_delta_reverse (delta);
4399 /* Returns true if CAND is used in IVS. */
4401 static bool
4402 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4404 return ivs->n_cand_uses[cand->id] > 0;
4407 /* Returns number of induction variable candidates in the set IVS. */
4409 static unsigned
4410 iv_ca_n_cands (struct iv_ca *ivs)
4412 return ivs->n_cands;
4415 /* Free the list of changes DELTA. */
4417 static void
4418 iv_ca_delta_free (struct iv_ca_delta **delta)
4420 struct iv_ca_delta *act, *next;
4422 for (act = *delta; act; act = next)
4424 next = act->next_change;
4425 free (act);
4428 *delta = NULL;
4431 /* Allocates new iv candidates assignment. */
4433 static struct iv_ca *
4434 iv_ca_new (struct ivopts_data *data)
4436 struct iv_ca *nw = XNEW (struct iv_ca);
4438 nw->upto = 0;
4439 nw->bad_uses = 0;
4440 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4441 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4442 nw->cands = BITMAP_ALLOC (NULL);
4443 nw->n_cands = 0;
4444 nw->n_regs = 0;
4445 nw->cand_use_cost = zero_cost;
4446 nw->cand_cost = 0;
4447 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4448 nw->cost = zero_cost;
4450 return nw;
4453 /* Free memory occupied by the set IVS. */
4455 static void
4456 iv_ca_free (struct iv_ca **ivs)
4458 free ((*ivs)->cand_for_use);
4459 free ((*ivs)->n_cand_uses);
4460 BITMAP_FREE ((*ivs)->cands);
4461 free ((*ivs)->n_invariant_uses);
4462 free (*ivs);
4463 *ivs = NULL;
4466 /* Dumps IVS to FILE. */
4468 static void
4469 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4471 const char *pref = " invariants ";
4472 unsigned i;
4473 comp_cost cost = iv_ca_cost (ivs);
4475 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4476 bitmap_print (file, ivs->cands, " candidates ","\n");
4478 for (i = 1; i <= data->max_inv_id; i++)
4479 if (ivs->n_invariant_uses[i])
4481 fprintf (file, "%s%d", pref, i);
4482 pref = ", ";
4484 fprintf (file, "\n");
4487 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4488 new set, and store differences in DELTA. Number of induction variables
4489 in the new set is stored to N_IVS. */
4491 static comp_cost
4492 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4493 struct iv_cand *cand, struct iv_ca_delta **delta,
4494 unsigned *n_ivs)
4496 unsigned i;
4497 comp_cost cost;
4498 struct iv_use *use;
4499 struct cost_pair *old_cp, *new_cp;
4501 *delta = NULL;
4502 for (i = 0; i < ivs->upto; i++)
4504 use = iv_use (data, i);
4505 old_cp = iv_ca_cand_for_use (ivs, use);
4507 if (old_cp
4508 && old_cp->cand == cand)
4509 continue;
4511 new_cp = get_use_iv_cost (data, use, cand);
4512 if (!new_cp)
4513 continue;
4515 if (!iv_ca_has_deps (ivs, new_cp))
4516 continue;
4518 if (!cheaper_cost_pair (new_cp, old_cp))
4519 continue;
4521 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4524 iv_ca_delta_commit (data, ivs, *delta, true);
4525 cost = iv_ca_cost (ivs);
4526 if (n_ivs)
4527 *n_ivs = iv_ca_n_cands (ivs);
4528 iv_ca_delta_commit (data, ivs, *delta, false);
4530 return cost;
4533 /* Try narrowing set IVS by removing CAND. Return the cost of
4534 the new set and store the differences in DELTA. */
4536 static comp_cost
4537 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4538 struct iv_cand *cand, struct iv_ca_delta **delta)
4540 unsigned i, ci;
4541 struct iv_use *use;
4542 struct cost_pair *old_cp, *new_cp, *cp;
4543 bitmap_iterator bi;
4544 struct iv_cand *cnd;
4545 comp_cost cost;
4547 *delta = NULL;
4548 for (i = 0; i < n_iv_uses (data); i++)
4550 use = iv_use (data, i);
4552 old_cp = iv_ca_cand_for_use (ivs, use);
4553 if (old_cp->cand != cand)
4554 continue;
4556 new_cp = NULL;
4558 if (data->consider_all_candidates)
4560 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4562 if (ci == cand->id)
4563 continue;
4565 cnd = iv_cand (data, ci);
4567 cp = get_use_iv_cost (data, use, cnd);
4568 if (!cp)
4569 continue;
4570 if (!iv_ca_has_deps (ivs, cp))
4571 continue;
4573 if (!cheaper_cost_pair (cp, new_cp))
4574 continue;
4576 new_cp = cp;
4579 else
4581 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4583 if (ci == cand->id)
4584 continue;
4586 cnd = iv_cand (data, ci);
4588 cp = get_use_iv_cost (data, use, cnd);
4589 if (!cp)
4590 continue;
4591 if (!iv_ca_has_deps (ivs, cp))
4592 continue;
4594 if (!cheaper_cost_pair (cp, new_cp))
4595 continue;
4597 new_cp = cp;
4601 if (!new_cp)
4603 iv_ca_delta_free (delta);
4604 return infinite_cost;
4607 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4610 iv_ca_delta_commit (data, ivs, *delta, true);
4611 cost = iv_ca_cost (ivs);
4612 iv_ca_delta_commit (data, ivs, *delta, false);
4614 return cost;
4617 /* Try optimizing the set of candidates IVS by removing candidates different
4618 from to EXCEPT_CAND from it. Return cost of the new set, and store
4619 differences in DELTA. */
4621 static comp_cost
4622 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4623 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4625 bitmap_iterator bi;
4626 struct iv_ca_delta *act_delta, *best_delta;
4627 unsigned i;
4628 comp_cost best_cost, acost;
4629 struct iv_cand *cand;
4631 best_delta = NULL;
4632 best_cost = iv_ca_cost (ivs);
4634 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4636 cand = iv_cand (data, i);
4638 if (cand == except_cand)
4639 continue;
4641 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4643 if (compare_costs (acost, best_cost) < 0)
4645 best_cost = acost;
4646 iv_ca_delta_free (&best_delta);
4647 best_delta = act_delta;
4649 else
4650 iv_ca_delta_free (&act_delta);
4653 if (!best_delta)
4655 *delta = NULL;
4656 return best_cost;
4659 /* Recurse to possibly remove other unnecessary ivs. */
4660 iv_ca_delta_commit (data, ivs, best_delta, true);
4661 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4662 iv_ca_delta_commit (data, ivs, best_delta, false);
4663 *delta = iv_ca_delta_join (best_delta, *delta);
4664 return best_cost;
4667 /* Tries to extend the sets IVS in the best possible way in order
4668 to express the USE. */
4670 static bool
4671 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4672 struct iv_use *use)
4674 comp_cost best_cost, act_cost;
4675 unsigned i;
4676 bitmap_iterator bi;
4677 struct iv_cand *cand;
4678 struct iv_ca_delta *best_delta = NULL, *act_delta;
4679 struct cost_pair *cp;
4681 iv_ca_add_use (data, ivs, use);
4682 best_cost = iv_ca_cost (ivs);
4684 cp = iv_ca_cand_for_use (ivs, use);
4685 if (cp)
4687 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4688 iv_ca_set_no_cp (data, ivs, use);
4691 /* First try important candidates not based on any memory object. Only if
4692 this fails, try the specific ones. Rationale -- in loops with many
4693 variables the best choice often is to use just one generic biv. If we
4694 added here many ivs specific to the uses, the optimization algorithm later
4695 would be likely to get stuck in a local minimum, thus causing us to create
4696 too many ivs. The approach from few ivs to more seems more likely to be
4697 successful -- starting from few ivs, replacing an expensive use by a
4698 specific iv should always be a win. */
4699 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4701 cand = iv_cand (data, i);
4703 if (cand->iv->base_object != NULL_TREE)
4704 continue;
4706 if (iv_ca_cand_used_p (ivs, cand))
4707 continue;
4709 cp = get_use_iv_cost (data, use, cand);
4710 if (!cp)
4711 continue;
4713 iv_ca_set_cp (data, ivs, use, cp);
4714 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4715 iv_ca_set_no_cp (data, ivs, use);
4716 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4718 if (compare_costs (act_cost, best_cost) < 0)
4720 best_cost = act_cost;
4722 iv_ca_delta_free (&best_delta);
4723 best_delta = act_delta;
4725 else
4726 iv_ca_delta_free (&act_delta);
4729 if (infinite_cost_p (best_cost))
4731 for (i = 0; i < use->n_map_members; i++)
4733 cp = use->cost_map + i;
4734 cand = cp->cand;
4735 if (!cand)
4736 continue;
4738 /* Already tried this. */
4739 if (cand->important && cand->iv->base_object == NULL_TREE)
4740 continue;
4742 if (iv_ca_cand_used_p (ivs, cand))
4743 continue;
4745 act_delta = NULL;
4746 iv_ca_set_cp (data, ivs, use, cp);
4747 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4748 iv_ca_set_no_cp (data, ivs, use);
4749 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4750 cp, act_delta);
4752 if (compare_costs (act_cost, best_cost) < 0)
4754 best_cost = act_cost;
4756 if (best_delta)
4757 iv_ca_delta_free (&best_delta);
4758 best_delta = act_delta;
4760 else
4761 iv_ca_delta_free (&act_delta);
4765 iv_ca_delta_commit (data, ivs, best_delta, true);
4766 iv_ca_delta_free (&best_delta);
4768 return !infinite_cost_p (best_cost);
4771 /* Finds an initial assignment of candidates to uses. */
4773 static struct iv_ca *
4774 get_initial_solution (struct ivopts_data *data)
4776 struct iv_ca *ivs = iv_ca_new (data);
4777 unsigned i;
4779 for (i = 0; i < n_iv_uses (data); i++)
4780 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4782 iv_ca_free (&ivs);
4783 return NULL;
4786 return ivs;
4789 /* Tries to improve set of induction variables IVS. */
4791 static bool
4792 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4794 unsigned i, n_ivs;
4795 comp_cost acost, best_cost = iv_ca_cost (ivs);
4796 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4797 struct iv_cand *cand;
4799 /* Try extending the set of induction variables by one. */
4800 for (i = 0; i < n_iv_cands (data); i++)
4802 cand = iv_cand (data, i);
4804 if (iv_ca_cand_used_p (ivs, cand))
4805 continue;
4807 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4808 if (!act_delta)
4809 continue;
4811 /* If we successfully added the candidate and the set is small enough,
4812 try optimizing it by removing other candidates. */
4813 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4815 iv_ca_delta_commit (data, ivs, act_delta, true);
4816 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4817 iv_ca_delta_commit (data, ivs, act_delta, false);
4818 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4821 if (compare_costs (acost, best_cost) < 0)
4823 best_cost = acost;
4824 iv_ca_delta_free (&best_delta);
4825 best_delta = act_delta;
4827 else
4828 iv_ca_delta_free (&act_delta);
4831 if (!best_delta)
4833 /* Try removing the candidates from the set instead. */
4834 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4836 /* Nothing more we can do. */
4837 if (!best_delta)
4838 return false;
4841 iv_ca_delta_commit (data, ivs, best_delta, true);
4842 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4843 iv_ca_delta_free (&best_delta);
4844 return true;
4847 /* Attempts to find the optimal set of induction variables. We do simple
4848 greedy heuristic -- we try to replace at most one candidate in the selected
4849 solution and remove the unused ivs while this improves the cost. */
4851 static struct iv_ca *
4852 find_optimal_iv_set (struct ivopts_data *data)
4854 unsigned i;
4855 struct iv_ca *set;
4856 struct iv_use *use;
4858 /* Get the initial solution. */
4859 set = get_initial_solution (data);
4860 if (!set)
4862 if (dump_file && (dump_flags & TDF_DETAILS))
4863 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4864 return NULL;
4867 if (dump_file && (dump_flags & TDF_DETAILS))
4869 fprintf (dump_file, "Initial set of candidates:\n");
4870 iv_ca_dump (data, dump_file, set);
4873 while (try_improve_iv_set (data, set))
4875 if (dump_file && (dump_flags & TDF_DETAILS))
4877 fprintf (dump_file, "Improved to:\n");
4878 iv_ca_dump (data, dump_file, set);
4882 if (dump_file && (dump_flags & TDF_DETAILS))
4884 comp_cost cost = iv_ca_cost (set);
4885 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4888 for (i = 0; i < n_iv_uses (data); i++)
4890 use = iv_use (data, i);
4891 use->selected = iv_ca_cand_for_use (set, use)->cand;
4894 return set;
4897 /* Creates a new induction variable corresponding to CAND. */
4899 static void
4900 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4902 block_stmt_iterator incr_pos;
4903 tree base;
4904 bool after = false;
4906 if (!cand->iv)
4907 return;
4909 switch (cand->pos)
4911 case IP_NORMAL:
4912 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4913 break;
4915 case IP_END:
4916 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4917 after = true;
4918 break;
4920 case IP_ORIGINAL:
4921 /* Mark that the iv is preserved. */
4922 name_info (data, cand->var_before)->preserve_biv = true;
4923 name_info (data, cand->var_after)->preserve_biv = true;
4925 /* Rewrite the increment so that it uses var_before directly. */
4926 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4928 return;
4931 gimple_add_tmp_var (cand->var_before);
4932 add_referenced_var (cand->var_before);
4934 base = unshare_expr (cand->iv->base);
4936 create_iv (base, unshare_expr (cand->iv->step),
4937 cand->var_before, data->current_loop,
4938 &incr_pos, after, &cand->var_before, &cand->var_after);
4941 /* Creates new induction variables described in SET. */
4943 static void
4944 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4946 unsigned i;
4947 struct iv_cand *cand;
4948 bitmap_iterator bi;
4950 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4952 cand = iv_cand (data, i);
4953 create_new_iv (data, cand);
4957 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4958 is true, remove also the ssa name defined by the statement. */
4960 static void
4961 remove_statement (tree stmt, bool including_defined_name)
4963 if (TREE_CODE (stmt) == PHI_NODE)
4965 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4967 else
4969 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4971 bsi_remove (&bsi, true);
4972 release_defs (stmt);
4976 /* Rewrites USE (definition of iv used in a nonlinear expression)
4977 using candidate CAND. */
4979 static void
4980 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4981 struct iv_use *use, struct iv_cand *cand)
4983 tree comp;
4984 tree op, tgt, ass;
4985 block_stmt_iterator bsi;
4987 /* An important special case -- if we are asked to express value of
4988 the original iv by itself, just exit; there is no need to
4989 introduce a new computation (that might also need casting the
4990 variable to unsigned and back). */
4991 if (cand->pos == IP_ORIGINAL
4992 && cand->incremented_at == use->stmt)
4994 tree step, ctype, utype;
4995 enum tree_code incr_code = PLUS_EXPR;
4997 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4998 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
5000 step = cand->iv->step;
5001 ctype = TREE_TYPE (step);
5002 utype = TREE_TYPE (cand->var_after);
5003 if (TREE_CODE (step) == NEGATE_EXPR)
5005 incr_code = MINUS_EXPR;
5006 step = TREE_OPERAND (step, 0);
5009 /* Check whether we may leave the computation unchanged.
5010 This is the case only if it does not rely on other
5011 computations in the loop -- otherwise, the computation
5012 we rely upon may be removed in remove_unused_ivs,
5013 thus leading to ICE. */
5014 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5015 if (TREE_CODE (op) == PLUS_EXPR
5016 || TREE_CODE (op) == MINUS_EXPR
5017 || TREE_CODE (op) == POINTER_PLUS_EXPR)
5019 if (TREE_OPERAND (op, 0) == cand->var_before)
5020 op = TREE_OPERAND (op, 1);
5021 else if (TREE_CODE (op) != MINUS_EXPR
5022 && TREE_OPERAND (op, 1) == cand->var_before)
5023 op = TREE_OPERAND (op, 0);
5024 else
5025 op = NULL_TREE;
5027 else
5028 op = NULL_TREE;
5030 if (op
5031 && (TREE_CODE (op) == INTEGER_CST
5032 || operand_equal_p (op, step, 0)))
5033 return;
5035 /* Otherwise, add the necessary computations to express
5036 the iv. */
5037 op = fold_convert (ctype, cand->var_before);
5038 comp = fold_convert (utype,
5039 build2 (incr_code, ctype, op,
5040 unshare_expr (step)));
5042 else
5044 comp = get_computation (data->current_loop, use, cand);
5045 gcc_assert (comp != NULL_TREE);
5048 switch (TREE_CODE (use->stmt))
5050 case PHI_NODE:
5051 tgt = PHI_RESULT (use->stmt);
5053 /* If we should keep the biv, do not replace it. */
5054 if (name_info (data, tgt)->preserve_biv)
5055 return;
5057 bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5058 break;
5060 case GIMPLE_MODIFY_STMT:
5061 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5062 bsi = bsi_for_stmt (use->stmt);
5063 break;
5065 default:
5066 gcc_unreachable ();
5069 op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5070 true, BSI_SAME_STMT);
5072 if (TREE_CODE (use->stmt) == PHI_NODE)
5074 ass = build_gimple_modify_stmt (tgt, op);
5075 bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5076 remove_statement (use->stmt, false);
5077 SSA_NAME_DEF_STMT (tgt) = ass;
5079 else
5080 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5083 /* Replaces ssa name in index IDX by its basic variable. Callback for
5084 for_each_index. */
5086 static bool
5087 idx_remove_ssa_names (tree base, tree *idx,
5088 void *data ATTRIBUTE_UNUSED)
5090 tree *op;
5092 if (TREE_CODE (*idx) == SSA_NAME)
5093 *idx = SSA_NAME_VAR (*idx);
5095 if (TREE_CODE (base) == ARRAY_REF)
5097 op = &TREE_OPERAND (base, 2);
5098 if (*op
5099 && TREE_CODE (*op) == SSA_NAME)
5100 *op = SSA_NAME_VAR (*op);
5101 op = &TREE_OPERAND (base, 3);
5102 if (*op
5103 && TREE_CODE (*op) == SSA_NAME)
5104 *op = SSA_NAME_VAR (*op);
5107 return true;
5110 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5112 static tree
5113 unshare_and_remove_ssa_names (tree ref)
5115 ref = unshare_expr (ref);
5116 for_each_index (&ref, idx_remove_ssa_names, NULL);
5118 return ref;
5121 /* Extract the alias analysis info for the memory reference REF. There are
5122 several ways how this information may be stored and what precisely is
5123 its semantics depending on the type of the reference, but there always is
5124 somewhere hidden one _DECL node that is used to determine the set of
5125 virtual operands for the reference. The code below deciphers this jungle
5126 and extracts this single useful piece of information. */
5128 static tree
5129 get_ref_tag (tree ref, tree orig)
5131 tree var = get_base_address (ref);
5132 tree aref = NULL_TREE, tag, sv;
5133 HOST_WIDE_INT offset, size, maxsize;
5135 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5137 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5138 if (ref)
5139 break;
5142 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5143 return aref;
5145 if (!var)
5146 return NULL_TREE;
5148 if (TREE_CODE (var) == INDIRECT_REF)
5150 /* If the base is a dereference of a pointer, first check its name memory
5151 tag. If it does not have one, use its symbol memory tag. */
5152 var = TREE_OPERAND (var, 0);
5153 if (TREE_CODE (var) != SSA_NAME)
5154 return NULL_TREE;
5156 if (SSA_NAME_PTR_INFO (var))
5158 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5159 if (tag)
5160 return tag;
5163 var = SSA_NAME_VAR (var);
5164 tag = symbol_mem_tag (var);
5165 gcc_assert (tag != NULL_TREE);
5166 return tag;
5168 else
5170 if (!DECL_P (var))
5171 return NULL_TREE;
5173 tag = symbol_mem_tag (var);
5174 if (tag)
5175 return tag;
5177 return var;
5181 /* Copies the reference information from OLD_REF to NEW_REF. */
5183 static void
5184 copy_ref_info (tree new_ref, tree old_ref)
5186 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5187 copy_mem_ref_info (new_ref, old_ref);
5188 else
5190 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5191 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5195 /* Rewrites USE (address that is an iv) using candidate CAND. */
5197 static void
5198 rewrite_use_address (struct ivopts_data *data,
5199 struct iv_use *use, struct iv_cand *cand)
5201 aff_tree aff;
5202 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5203 tree ref;
5204 bool ok;
5206 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5207 gcc_assert (ok);
5208 unshare_aff_combination (&aff);
5210 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5211 copy_ref_info (ref, *use->op_p);
5212 *use->op_p = ref;
5215 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5216 candidate CAND. */
5218 static void
5219 rewrite_use_compare (struct ivopts_data *data,
5220 struct iv_use *use, struct iv_cand *cand)
5222 tree comp, *var_p, op, bound;
5223 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5224 enum tree_code compare;
5225 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5226 bool ok;
5228 bound = cp->value;
5229 if (bound)
5231 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5232 tree var_type = TREE_TYPE (var);
5234 compare = iv_elimination_compare (data, use);
5235 bound = unshare_expr (fold_convert (var_type, bound));
5236 op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5237 true, BSI_SAME_STMT);
5239 *use->op_p = build2 (compare, boolean_type_node, var, op);
5240 return;
5243 /* The induction variable elimination failed; just express the original
5244 giv. */
5245 comp = get_computation (data->current_loop, use, cand);
5246 gcc_assert (comp != NULL_TREE);
5248 ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5249 gcc_assert (ok);
5251 *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5252 true, BSI_SAME_STMT);
5255 /* Rewrites USE using candidate CAND. */
5257 static void
5258 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5260 push_stmt_changes (&use->stmt);
5262 switch (use->type)
5264 case USE_NONLINEAR_EXPR:
5265 rewrite_use_nonlinear_expr (data, use, cand);
5266 break;
5268 case USE_ADDRESS:
5269 rewrite_use_address (data, use, cand);
5270 break;
5272 case USE_COMPARE:
5273 rewrite_use_compare (data, use, cand);
5274 break;
5276 default:
5277 gcc_unreachable ();
5280 pop_stmt_changes (&use->stmt);
5283 /* Rewrite the uses using the selected induction variables. */
5285 static void
5286 rewrite_uses (struct ivopts_data *data)
5288 unsigned i;
5289 struct iv_cand *cand;
5290 struct iv_use *use;
5292 for (i = 0; i < n_iv_uses (data); i++)
5294 use = iv_use (data, i);
5295 cand = use->selected;
5296 gcc_assert (cand);
5298 rewrite_use (data, use, cand);
5302 /* Removes the ivs that are not used after rewriting. */
5304 static void
5305 remove_unused_ivs (struct ivopts_data *data)
5307 unsigned j;
5308 bitmap_iterator bi;
5310 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5312 struct version_info *info;
5314 info = ver_info (data, j);
5315 if (info->iv
5316 && !integer_zerop (info->iv->step)
5317 && !info->inv_id
5318 && !info->iv->have_use_for
5319 && !info->preserve_biv)
5320 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5324 /* Frees data allocated by the optimization of a single loop. */
5326 static void
5327 free_loop_data (struct ivopts_data *data)
5329 unsigned i, j;
5330 bitmap_iterator bi;
5331 tree obj;
5333 if (data->niters)
5335 pointer_map_destroy (data->niters);
5336 data->niters = NULL;
5339 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5341 struct version_info *info;
5343 info = ver_info (data, i);
5344 if (info->iv)
5345 free (info->iv);
5346 info->iv = NULL;
5347 info->has_nonlin_use = false;
5348 info->preserve_biv = false;
5349 info->inv_id = 0;
5351 bitmap_clear (data->relevant);
5352 bitmap_clear (data->important_candidates);
5354 for (i = 0; i < n_iv_uses (data); i++)
5356 struct iv_use *use = iv_use (data, i);
5358 free (use->iv);
5359 BITMAP_FREE (use->related_cands);
5360 for (j = 0; j < use->n_map_members; j++)
5361 if (use->cost_map[j].depends_on)
5362 BITMAP_FREE (use->cost_map[j].depends_on);
5363 free (use->cost_map);
5364 free (use);
5366 VEC_truncate (iv_use_p, data->iv_uses, 0);
5368 for (i = 0; i < n_iv_cands (data); i++)
5370 struct iv_cand *cand = iv_cand (data, i);
5372 if (cand->iv)
5373 free (cand->iv);
5374 if (cand->depends_on)
5375 BITMAP_FREE (cand->depends_on);
5376 free (cand);
5378 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5380 if (data->version_info_size < num_ssa_names)
5382 data->version_info_size = 2 * num_ssa_names;
5383 free (data->version_info);
5384 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5387 data->max_inv_id = 0;
5389 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5390 SET_DECL_RTL (obj, NULL_RTX);
5392 VEC_truncate (tree, decl_rtl_to_reset, 0);
5395 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5396 loop tree. */
5398 static void
5399 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5401 free_loop_data (data);
5402 free (data->version_info);
5403 BITMAP_FREE (data->relevant);
5404 BITMAP_FREE (data->important_candidates);
5406 VEC_free (tree, heap, decl_rtl_to_reset);
5407 VEC_free (iv_use_p, heap, data->iv_uses);
5408 VEC_free (iv_cand_p, heap, data->iv_candidates);
5411 /* Optimizes the LOOP. Returns true if anything changed. */
5413 static bool
5414 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5416 bool changed = false;
5417 struct iv_ca *iv_ca;
5418 edge exit;
5420 gcc_assert (!data->niters);
5421 data->current_loop = loop;
5423 if (dump_file && (dump_flags & TDF_DETAILS))
5425 fprintf (dump_file, "Processing loop %d\n", loop->num);
5427 exit = single_dom_exit (loop);
5428 if (exit)
5430 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5431 exit->src->index, exit->dest->index);
5432 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5433 fprintf (dump_file, "\n");
5436 fprintf (dump_file, "\n");
5439 /* For each ssa name determines whether it behaves as an induction variable
5440 in some loop. */
5441 if (!find_induction_variables (data))
5442 goto finish;
5444 /* Finds interesting uses (item 1). */
5445 find_interesting_uses (data);
5446 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5447 goto finish;
5449 /* Finds candidates for the induction variables (item 2). */
5450 find_iv_candidates (data);
5452 /* Calculates the costs (item 3, part 1). */
5453 determine_use_iv_costs (data);
5454 determine_iv_costs (data);
5455 determine_set_costs (data);
5457 /* Find the optimal set of induction variables (item 3, part 2). */
5458 iv_ca = find_optimal_iv_set (data);
5459 if (!iv_ca)
5460 goto finish;
5461 changed = true;
5463 /* Create the new induction variables (item 4, part 1). */
5464 create_new_ivs (data, iv_ca);
5465 iv_ca_free (&iv_ca);
5467 /* Rewrite the uses (item 4, part 2). */
5468 rewrite_uses (data);
5470 /* Remove the ivs that are unused after rewriting. */
5471 remove_unused_ivs (data);
5473 /* We have changed the structure of induction variables; it might happen
5474 that definitions in the scev database refer to some of them that were
5475 eliminated. */
5476 scev_reset ();
5478 finish:
5479 free_loop_data (data);
5481 return changed;
5484 /* Main entry point. Optimizes induction variables in loops. */
5486 void
5487 tree_ssa_iv_optimize (void)
5489 struct loop *loop;
5490 struct ivopts_data data;
5491 loop_iterator li;
5493 tree_ssa_iv_optimize_init (&data);
5495 /* Optimize the loops starting with the innermost ones. */
5496 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5498 if (dump_file && (dump_flags & TDF_DETAILS))
5499 flow_loop_dump (loop, dump_file, NULL, 1);
5501 tree_ssa_iv_optimize_loop (&data, loop);
5504 tree_ssa_iv_optimize_finalize (&data);