revert: re PR c++/35049 (g++.dg/conversion/simd3.C:12: error: invalid operands to...
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob02fe707fb3fd8c762c914de2cbb049842dcc4904
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "rtl.h"
70 #include "tm_p.h"
71 #include "hard-reg-set.h"
72 #include "basic-block.h"
73 #include "output.h"
74 #include "diagnostic.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "varray.h"
80 #include "expr.h"
81 #include "tree-pass.h"
82 #include "ggc.h"
83 #include "insn-config.h"
84 #include "recog.h"
85 #include "pointer-set.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
93 #include "target.h"
95 /* The infinite cost. */
96 #define INFTY 10000000
98 /* The expected number of loop iterations. TODO -- use profiling instead of
99 this. */
100 #define AVG_LOOP_NITER(LOOP) 5
103 /* Representation of the induction variable. */
104 struct iv
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
116 struct version_info
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 unsigned inv_id; /* Id of an invariant. */
123 bool preserve_biv; /* For the original biv, whether to preserve it. */
126 /* Types of uses. */
127 enum use_type
129 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
130 USE_ADDRESS, /* Use in an address. */
131 USE_COMPARE /* Use is a compare. */
134 /* Cost of a computation. */
135 typedef struct
137 unsigned cost; /* The runtime cost. */
138 unsigned complexity; /* The estimate of the complexity of the code for
139 the computation (in no concrete units --
140 complexity field should be larger for more
141 complex expressions and addressing modes). */
142 } comp_cost;
144 static const comp_cost zero_cost = {0, 0};
145 static const comp_cost infinite_cost = {INFTY, INFTY};
147 /* The candidate - cost pair. */
148 struct cost_pair
150 struct iv_cand *cand; /* The candidate. */
151 comp_cost cost; /* The cost. */
152 bitmap depends_on; /* The list of invariants that have to be
153 preserved. */
154 tree value; /* For final value elimination, the expression for
155 the final value of the iv. For iv elimination,
156 the new bound to compare with. */
159 /* Use. */
160 struct iv_use
162 unsigned id; /* The id of the use. */
163 enum use_type type; /* Type of the use. */
164 struct iv *iv; /* The induction variable it is based on. */
165 tree stmt; /* Statement in that it occurs. */
166 tree *op_p; /* The place where it occurs. */
167 bitmap related_cands; /* The set of "related" iv candidates, plus the common
168 important ones. */
170 unsigned n_map_members; /* Number of candidates in the cost_map list. */
171 struct cost_pair *cost_map;
172 /* The costs wrto the iv candidates. */
174 struct iv_cand *selected;
175 /* The selected candidate. */
178 /* The position where the iv is computed. */
179 enum iv_position
181 IP_NORMAL, /* At the end, just before the exit condition. */
182 IP_END, /* At the end of the latch block. */
183 IP_ORIGINAL /* The original biv. */
186 /* The induction variable candidate. */
187 struct iv_cand
189 unsigned id; /* The number of the candidate. */
190 bool important; /* Whether this is an "important" candidate, i.e. such
191 that it should be considered by all uses. */
192 enum iv_position pos; /* Where it is computed. */
193 tree incremented_at; /* For original biv, the statement where it is
194 incremented. */
195 tree var_before; /* The variable used for it before increment. */
196 tree var_after; /* The variable used for it after increment. */
197 struct iv *iv; /* The value of the candidate. NULL for
198 "pseudocandidate" used to indicate the possibility
199 to replace the final value of an iv by direct
200 computation of the value. */
201 unsigned cost; /* Cost of the candidate. */
202 bitmap depends_on; /* The list of invariants that are used in step of the
203 biv. */
206 /* The data used by the induction variable optimizations. */
208 typedef struct iv_use *iv_use_p;
209 DEF_VEC_P(iv_use_p);
210 DEF_VEC_ALLOC_P(iv_use_p,heap);
212 typedef struct iv_cand *iv_cand_p;
213 DEF_VEC_P(iv_cand_p);
214 DEF_VEC_ALLOC_P(iv_cand_p,heap);
216 struct ivopts_data
218 /* The currently optimized loop. */
219 struct loop *current_loop;
221 /* Number of registers used in it. */
222 unsigned regs_used;
224 /* Numbers of iterations for all exits of the current loop. */
225 struct pointer_map_t *niters;
227 /* The size of version_info array allocated. */
228 unsigned version_info_size;
230 /* The array of information for the ssa names. */
231 struct version_info *version_info;
233 /* The bitmap of indices in version_info whose value was changed. */
234 bitmap relevant;
236 /* The maximum invariant id. */
237 unsigned max_inv_id;
239 /* The uses of induction variables. */
240 VEC(iv_use_p,heap) *iv_uses;
242 /* The candidates. */
243 VEC(iv_cand_p,heap) *iv_candidates;
245 /* A bitmap of important candidates. */
246 bitmap important_candidates;
248 /* Whether to consider just related and important candidates when replacing a
249 use. */
250 bool consider_all_candidates;
253 /* An assignment of iv candidates to uses. */
255 struct iv_ca
257 /* The number of uses covered by the assignment. */
258 unsigned upto;
260 /* Number of uses that cannot be expressed by the candidates in the set. */
261 unsigned bad_uses;
263 /* Candidate assigned to a use, together with the related costs. */
264 struct cost_pair **cand_for_use;
266 /* Number of times each candidate is used. */
267 unsigned *n_cand_uses;
269 /* The candidates used. */
270 bitmap cands;
272 /* The number of candidates in the set. */
273 unsigned n_cands;
275 /* Total number of registers needed. */
276 unsigned n_regs;
278 /* Total cost of expressing uses. */
279 comp_cost cand_use_cost;
281 /* Total cost of candidates. */
282 unsigned cand_cost;
284 /* Number of times each invariant is used. */
285 unsigned *n_invariant_uses;
287 /* Total cost of the assignment. */
288 comp_cost cost;
291 /* Difference of two iv candidate assignments. */
293 struct iv_ca_delta
295 /* Changed use. */
296 struct iv_use *use;
298 /* An old assignment (for rollback purposes). */
299 struct cost_pair *old_cp;
301 /* A new assignment. */
302 struct cost_pair *new_cp;
304 /* Next change in the list. */
305 struct iv_ca_delta *next_change;
308 /* Bound on number of candidates below that all candidates are considered. */
310 #define CONSIDER_ALL_CANDIDATES_BOUND \
311 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
313 /* If there are more iv occurrences, we just give up (it is quite unlikely that
314 optimizing such a loop would help, and it would take ages). */
316 #define MAX_CONSIDERED_USES \
317 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
319 /* If there are at most this number of ivs in the set, try removing unnecessary
320 ivs from the set always. */
322 #define ALWAYS_PRUNE_CAND_SET_BOUND \
323 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
325 /* The list of trees for that the decl_rtl field must be reset is stored
326 here. */
328 static VEC(tree,heap) *decl_rtl_to_reset;
330 /* Number of uses recorded in DATA. */
332 static inline unsigned
333 n_iv_uses (struct ivopts_data *data)
335 return VEC_length (iv_use_p, data->iv_uses);
338 /* Ith use recorded in DATA. */
340 static inline struct iv_use *
341 iv_use (struct ivopts_data *data, unsigned i)
343 return VEC_index (iv_use_p, data->iv_uses, i);
346 /* Number of candidates recorded in DATA. */
348 static inline unsigned
349 n_iv_cands (struct ivopts_data *data)
351 return VEC_length (iv_cand_p, data->iv_candidates);
354 /* Ith candidate recorded in DATA. */
356 static inline struct iv_cand *
357 iv_cand (struct ivopts_data *data, unsigned i)
359 return VEC_index (iv_cand_p, data->iv_candidates, i);
362 /* The single loop exit if it dominates the latch, NULL otherwise. */
364 edge
365 single_dom_exit (struct loop *loop)
367 edge exit = single_exit (loop);
369 if (!exit)
370 return NULL;
372 if (!just_once_each_iteration_p (loop, exit->src))
373 return NULL;
375 return exit;
378 /* Dumps information about the induction variable IV to FILE. */
380 extern void dump_iv (FILE *, struct iv *);
381 void
382 dump_iv (FILE *file, struct iv *iv)
384 if (iv->ssa_name)
386 fprintf (file, "ssa name ");
387 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
388 fprintf (file, "\n");
391 fprintf (file, " type ");
392 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
393 fprintf (file, "\n");
395 if (iv->step)
397 fprintf (file, " base ");
398 print_generic_expr (file, iv->base, TDF_SLIM);
399 fprintf (file, "\n");
401 fprintf (file, " step ");
402 print_generic_expr (file, iv->step, TDF_SLIM);
403 fprintf (file, "\n");
405 else
407 fprintf (file, " invariant ");
408 print_generic_expr (file, iv->base, TDF_SLIM);
409 fprintf (file, "\n");
412 if (iv->base_object)
414 fprintf (file, " base object ");
415 print_generic_expr (file, iv->base_object, TDF_SLIM);
416 fprintf (file, "\n");
419 if (iv->biv_p)
420 fprintf (file, " is a biv\n");
423 /* Dumps information about the USE to FILE. */
425 extern void dump_use (FILE *, struct iv_use *);
426 void
427 dump_use (FILE *file, struct iv_use *use)
429 fprintf (file, "use %d\n", use->id);
431 switch (use->type)
433 case USE_NONLINEAR_EXPR:
434 fprintf (file, " generic\n");
435 break;
437 case USE_ADDRESS:
438 fprintf (file, " address\n");
439 break;
441 case USE_COMPARE:
442 fprintf (file, " compare\n");
443 break;
445 default:
446 gcc_unreachable ();
449 fprintf (file, " in statement ");
450 print_generic_expr (file, use->stmt, TDF_SLIM);
451 fprintf (file, "\n");
453 fprintf (file, " at position ");
454 if (use->op_p)
455 print_generic_expr (file, *use->op_p, TDF_SLIM);
456 fprintf (file, "\n");
458 dump_iv (file, use->iv);
460 if (use->related_cands)
462 fprintf (file, " related candidates ");
463 dump_bitmap (file, use->related_cands);
467 /* Dumps information about the uses to FILE. */
469 extern void dump_uses (FILE *, struct ivopts_data *);
470 void
471 dump_uses (FILE *file, struct ivopts_data *data)
473 unsigned i;
474 struct iv_use *use;
476 for (i = 0; i < n_iv_uses (data); i++)
478 use = iv_use (data, i);
480 dump_use (file, use);
481 fprintf (file, "\n");
485 /* Dumps information about induction variable candidate CAND to FILE. */
487 extern void dump_cand (FILE *, struct iv_cand *);
488 void
489 dump_cand (FILE *file, struct iv_cand *cand)
491 struct iv *iv = cand->iv;
493 fprintf (file, "candidate %d%s\n",
494 cand->id, cand->important ? " (important)" : "");
496 if (cand->depends_on)
498 fprintf (file, " depends on ");
499 dump_bitmap (file, cand->depends_on);
502 if (!iv)
504 fprintf (file, " final value replacement\n");
505 return;
508 switch (cand->pos)
510 case IP_NORMAL:
511 fprintf (file, " incremented before exit test\n");
512 break;
514 case IP_END:
515 fprintf (file, " incremented at end\n");
516 break;
518 case IP_ORIGINAL:
519 fprintf (file, " original biv\n");
520 break;
523 dump_iv (file, iv);
526 /* Returns the info for ssa version VER. */
528 static inline struct version_info *
529 ver_info (struct ivopts_data *data, unsigned ver)
531 return data->version_info + ver;
534 /* Returns the info for ssa name NAME. */
536 static inline struct version_info *
537 name_info (struct ivopts_data *data, tree name)
539 return ver_info (data, SSA_NAME_VERSION (name));
542 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
543 emitted in LOOP. */
545 static bool
546 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
548 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
550 gcc_assert (bb);
552 if (sbb == loop->latch)
553 return true;
555 if (sbb != bb)
556 return false;
558 return stmt == last_stmt (bb);
561 /* Returns true if STMT if after the place where the original induction
562 variable CAND is incremented. */
564 static bool
565 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
567 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
568 basic_block stmt_bb = bb_for_stmt (stmt);
569 block_stmt_iterator bsi;
571 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
572 return false;
574 if (stmt_bb != cand_bb)
575 return true;
577 /* Scan the block from the end, since the original ivs are usually
578 incremented at the end of the loop body. */
579 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
581 if (bsi_stmt (bsi) == cand->incremented_at)
582 return false;
583 if (bsi_stmt (bsi) == stmt)
584 return true;
588 /* Returns true if STMT if after the place where the induction variable
589 CAND is incremented in LOOP. */
591 static bool
592 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
594 switch (cand->pos)
596 case IP_END:
597 return false;
599 case IP_NORMAL:
600 return stmt_after_ip_normal_pos (loop, stmt);
602 case IP_ORIGINAL:
603 return stmt_after_ip_original_pos (cand, stmt);
605 default:
606 gcc_unreachable ();
610 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
612 static bool
613 abnormal_ssa_name_p (tree exp)
615 if (!exp)
616 return false;
618 if (TREE_CODE (exp) != SSA_NAME)
619 return false;
621 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
624 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
625 abnormal phi node. Callback for for_each_index. */
627 static bool
628 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
629 void *data ATTRIBUTE_UNUSED)
631 if (TREE_CODE (base) == ARRAY_REF)
633 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
634 return false;
635 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
636 return false;
639 return !abnormal_ssa_name_p (*index);
642 /* Returns true if EXPR contains a ssa name that occurs in an
643 abnormal phi node. */
645 bool
646 contains_abnormal_ssa_name_p (tree expr)
648 enum tree_code code;
649 enum tree_code_class codeclass;
651 if (!expr)
652 return false;
654 code = TREE_CODE (expr);
655 codeclass = TREE_CODE_CLASS (code);
657 if (code == SSA_NAME)
658 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
660 if (code == INTEGER_CST
661 || is_gimple_min_invariant (expr))
662 return false;
664 if (code == ADDR_EXPR)
665 return !for_each_index (&TREE_OPERAND (expr, 0),
666 idx_contains_abnormal_ssa_name_p,
667 NULL);
669 switch (codeclass)
671 case tcc_binary:
672 case tcc_comparison:
673 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
674 return true;
676 /* Fallthru. */
677 case tcc_unary:
678 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
679 return true;
681 break;
683 default:
684 gcc_unreachable ();
687 return false;
690 /* Returns tree describing number of iterations determined from
691 EXIT of DATA->current_loop, or NULL if something goes wrong. */
693 static tree
694 niter_for_exit (struct ivopts_data *data, edge exit)
696 struct tree_niter_desc desc;
697 tree niter;
698 void **slot;
700 if (!data->niters)
702 data->niters = pointer_map_create ();
703 slot = NULL;
705 else
706 slot = pointer_map_contains (data->niters, exit);
708 if (!slot)
710 /* Try to determine number of iterations. We must know it
711 unconditionally (i.e., without possibility of # of iterations
712 being zero). Also, we cannot safely work with ssa names that
713 appear in phi nodes on abnormal edges, so that we do not create
714 overlapping life ranges for them (PR 27283). */
715 if (number_of_iterations_exit (data->current_loop,
716 exit, &desc, true)
717 && integer_zerop (desc.may_be_zero)
718 && !contains_abnormal_ssa_name_p (desc.niter))
719 niter = desc.niter;
720 else
721 niter = NULL_TREE;
723 *pointer_map_insert (data->niters, exit) = niter;
725 else
726 niter = (tree) *slot;
728 return niter;
731 /* Returns tree describing number of iterations determined from
732 single dominating exit of DATA->current_loop, or NULL if something
733 goes wrong. */
735 static tree
736 niter_for_single_dom_exit (struct ivopts_data *data)
738 edge exit = single_dom_exit (data->current_loop);
740 if (!exit)
741 return NULL;
743 return niter_for_exit (data, exit);
746 /* Initializes data structures used by the iv optimization pass, stored
747 in DATA. */
749 static void
750 tree_ssa_iv_optimize_init (struct ivopts_data *data)
752 data->version_info_size = 2 * num_ssa_names;
753 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
754 data->relevant = BITMAP_ALLOC (NULL);
755 data->important_candidates = BITMAP_ALLOC (NULL);
756 data->max_inv_id = 0;
757 data->niters = NULL;
758 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
759 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
760 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
763 /* Returns a memory object to that EXPR points. In case we are able to
764 determine that it does not point to any such object, NULL is returned. */
766 static tree
767 determine_base_object (tree expr)
769 enum tree_code code = TREE_CODE (expr);
770 tree base, obj;
772 /* If this is a pointer casted to any type, we need to determine
773 the base object for the pointer; so handle conversions before
774 throwing away non-pointer expressions. */
775 if (TREE_CODE (expr) == NOP_EXPR
776 || TREE_CODE (expr) == CONVERT_EXPR)
777 return determine_base_object (TREE_OPERAND (expr, 0));
779 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
780 return NULL_TREE;
782 switch (code)
784 case INTEGER_CST:
785 return NULL_TREE;
787 case ADDR_EXPR:
788 obj = TREE_OPERAND (expr, 0);
789 base = get_base_address (obj);
791 if (!base)
792 return expr;
794 if (TREE_CODE (base) == INDIRECT_REF)
795 return determine_base_object (TREE_OPERAND (base, 0));
797 return fold_convert (ptr_type_node,
798 build_fold_addr_expr (base));
800 case POINTER_PLUS_EXPR:
801 return determine_base_object (TREE_OPERAND (expr, 0));
803 case PLUS_EXPR:
804 case MINUS_EXPR:
805 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
806 gcc_unreachable ();
808 default:
809 return fold_convert (ptr_type_node, expr);
813 /* Allocates an induction variable with given initial value BASE and step STEP
814 for loop LOOP. */
816 static struct iv *
817 alloc_iv (tree base, tree step)
819 struct iv *iv = XCNEW (struct iv);
820 gcc_assert (step != NULL_TREE);
822 iv->base = base;
823 iv->base_object = determine_base_object (base);
824 iv->step = step;
825 iv->biv_p = false;
826 iv->have_use_for = false;
827 iv->use_id = 0;
828 iv->ssa_name = NULL_TREE;
830 return iv;
833 /* Sets STEP and BASE for induction variable IV. */
835 static void
836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
838 struct version_info *info = name_info (data, iv);
840 gcc_assert (!info->iv);
842 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
843 info->iv = alloc_iv (base, step);
844 info->iv->ssa_name = iv;
847 /* Finds induction variable declaration for VAR. */
849 static struct iv *
850 get_iv (struct ivopts_data *data, tree var)
852 basic_block bb;
853 tree type = TREE_TYPE (var);
855 if (!POINTER_TYPE_P (type)
856 && !INTEGRAL_TYPE_P (type))
857 return NULL;
859 if (!name_info (data, var)->iv)
861 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
863 if (!bb
864 || !flow_bb_inside_loop_p (data->current_loop, bb))
865 set_iv (data, var, var, build_int_cst (type, 0));
868 return name_info (data, var)->iv;
871 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
872 not define a simple affine biv with nonzero step. */
874 static tree
875 determine_biv_step (tree phi)
877 struct loop *loop = bb_for_stmt (phi)->loop_father;
878 tree name = PHI_RESULT (phi);
879 affine_iv iv;
881 if (!is_gimple_reg (name))
882 return NULL_TREE;
884 if (!simple_iv (loop, phi, name, &iv, true))
885 return NULL_TREE;
887 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
890 /* Finds basic ivs. */
892 static bool
893 find_bivs (struct ivopts_data *data)
895 tree phi, step, type, base;
896 bool found = false;
897 struct loop *loop = data->current_loop;
899 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
901 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
902 continue;
904 step = determine_biv_step (phi);
905 if (!step)
906 continue;
908 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
909 base = expand_simple_operations (base);
910 if (contains_abnormal_ssa_name_p (base)
911 || contains_abnormal_ssa_name_p (step))
912 continue;
914 type = TREE_TYPE (PHI_RESULT (phi));
915 base = fold_convert (type, base);
916 if (step)
917 step = fold_convert (type, step);
919 set_iv (data, PHI_RESULT (phi), base, step);
920 found = true;
923 return found;
926 /* Marks basic ivs. */
928 static void
929 mark_bivs (struct ivopts_data *data)
931 tree phi, var;
932 struct iv *iv, *incr_iv;
933 struct loop *loop = data->current_loop;
934 basic_block incr_bb;
936 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
938 iv = get_iv (data, PHI_RESULT (phi));
939 if (!iv)
940 continue;
942 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
943 incr_iv = get_iv (data, var);
944 if (!incr_iv)
945 continue;
947 /* If the increment is in the subloop, ignore it. */
948 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
949 if (incr_bb->loop_father != data->current_loop
950 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
951 continue;
953 iv->biv_p = true;
954 incr_iv->biv_p = true;
958 /* Checks whether STMT defines a linear induction variable and stores its
959 parameters to IV. */
961 static bool
962 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
964 tree lhs;
965 struct loop *loop = data->current_loop;
967 iv->base = NULL_TREE;
968 iv->step = NULL_TREE;
970 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
971 return false;
973 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
974 if (TREE_CODE (lhs) != SSA_NAME)
975 return false;
977 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
978 return false;
979 iv->base = expand_simple_operations (iv->base);
981 if (contains_abnormal_ssa_name_p (iv->base)
982 || contains_abnormal_ssa_name_p (iv->step))
983 return false;
985 return true;
988 /* Finds general ivs in statement STMT. */
990 static void
991 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
993 affine_iv iv;
995 if (!find_givs_in_stmt_scev (data, stmt, &iv))
996 return;
998 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1001 /* Finds general ivs in basic block BB. */
1003 static void
1004 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1006 block_stmt_iterator bsi;
1008 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1009 find_givs_in_stmt (data, bsi_stmt (bsi));
1012 /* Finds general ivs. */
1014 static void
1015 find_givs (struct ivopts_data *data)
1017 struct loop *loop = data->current_loop;
1018 basic_block *body = get_loop_body_in_dom_order (loop);
1019 unsigned i;
1021 for (i = 0; i < loop->num_nodes; i++)
1022 find_givs_in_bb (data, body[i]);
1023 free (body);
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027 variable and if so, its initial value and step. */
1029 static bool
1030 find_induction_variables (struct ivopts_data *data)
1032 unsigned i;
1033 bitmap_iterator bi;
1035 if (!find_bivs (data))
1036 return false;
1038 find_givs (data);
1039 mark_bivs (data);
1041 if (dump_file && (dump_flags & TDF_DETAILS))
1043 tree niter = niter_for_single_dom_exit (data);
1045 if (niter)
1047 fprintf (dump_file, " number of iterations ");
1048 print_generic_expr (dump_file, niter, TDF_SLIM);
1049 fprintf (dump_file, "\n\n");
1052 fprintf (dump_file, "Induction variables:\n\n");
1054 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1056 if (ver_info (data, i)->iv)
1057 dump_iv (dump_file, ver_info (data, i)->iv);
1061 return true;
1064 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1066 static struct iv_use *
1067 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1068 tree stmt, enum use_type use_type)
1070 struct iv_use *use = XCNEW (struct iv_use);
1072 use->id = n_iv_uses (data);
1073 use->type = use_type;
1074 use->iv = iv;
1075 use->stmt = stmt;
1076 use->op_p = use_p;
1077 use->related_cands = BITMAP_ALLOC (NULL);
1079 /* To avoid showing ssa name in the dumps, if it was not reset by the
1080 caller. */
1081 iv->ssa_name = NULL_TREE;
1083 if (dump_file && (dump_flags & TDF_DETAILS))
1084 dump_use (dump_file, use);
1086 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1088 return use;
1091 /* Checks whether OP is a loop-level invariant and if so, records it.
1092 NONLINEAR_USE is true if the invariant is used in a way we do not
1093 handle specially. */
1095 static void
1096 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1098 basic_block bb;
1099 struct version_info *info;
1101 if (TREE_CODE (op) != SSA_NAME
1102 || !is_gimple_reg (op))
1103 return;
1105 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1106 if (bb
1107 && flow_bb_inside_loop_p (data->current_loop, bb))
1108 return;
1110 info = name_info (data, op);
1111 info->name = op;
1112 info->has_nonlin_use |= nonlinear_use;
1113 if (!info->inv_id)
1114 info->inv_id = ++data->max_inv_id;
1115 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1118 /* Checks whether the use OP is interesting and if so, records it. */
1120 static struct iv_use *
1121 find_interesting_uses_op (struct ivopts_data *data, tree op)
1123 struct iv *iv;
1124 struct iv *civ;
1125 tree stmt;
1126 struct iv_use *use;
1128 if (TREE_CODE (op) != SSA_NAME)
1129 return NULL;
1131 iv = get_iv (data, op);
1132 if (!iv)
1133 return NULL;
1135 if (iv->have_use_for)
1137 use = iv_use (data, iv->use_id);
1139 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1140 return use;
1143 if (integer_zerop (iv->step))
1145 record_invariant (data, op, true);
1146 return NULL;
1148 iv->have_use_for = true;
1150 civ = XNEW (struct iv);
1151 *civ = *iv;
1153 stmt = SSA_NAME_DEF_STMT (op);
1154 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1155 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1157 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1158 iv->use_id = use->id;
1160 return use;
1163 /* Given a condition *COND_P, checks whether it is a compare of an induction
1164 variable and an invariant. If this is the case, CONTROL_VAR is set
1165 to location of the iv, BOUND to the location of the invariant,
1166 IV_VAR and IV_BOUND are set to the corresponding induction variable
1167 descriptions, and true is returned. If this is not the case,
1168 CONTROL_VAR and BOUND are set to the arguments of the condition and
1169 false is returned. */
1171 static bool
1172 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1173 tree **control_var, tree **bound,
1174 struct iv **iv_var, struct iv **iv_bound)
1176 /* The nodes returned when COND has just one operand. Note that you should
1177 not modify anything in BOUND or IV_BOUND because of this. */
1178 static struct iv const_iv;
1179 static tree zero;
1180 tree cond = *cond_p;
1181 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1182 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1183 bool ret = false;
1185 zero = integer_zero_node;
1186 const_iv.step = integer_zero_node;
1188 if (TREE_CODE (cond) == SSA_NAME)
1190 op0 = cond_p;
1191 iv0 = get_iv (data, cond);
1192 ret = (iv0 && !integer_zerop (iv0->step));
1193 goto end;
1196 if (!COMPARISON_CLASS_P (cond))
1198 op0 = cond_p;
1199 goto end;
1202 op0 = &TREE_OPERAND (cond, 0);
1203 op1 = &TREE_OPERAND (cond, 1);
1204 if (TREE_CODE (*op0) == SSA_NAME)
1205 iv0 = get_iv (data, *op0);
1206 if (TREE_CODE (*op1) == SSA_NAME)
1207 iv1 = get_iv (data, *op1);
1209 /* Exactly one of the compared values must be an iv, and the other one must
1210 be an invariant. */
1211 if (!iv0 || !iv1)
1212 goto end;
1214 if (integer_zerop (iv0->step))
1216 /* Control variable may be on the other side. */
1217 tmp_op = op0; op0 = op1; op1 = tmp_op;
1218 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1220 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1222 end:
1223 if (control_var)
1224 *control_var = op0;;
1225 if (iv_var)
1226 *iv_var = iv0;;
1227 if (bound)
1228 *bound = op1;
1229 if (iv_bound)
1230 *iv_bound = iv1;
1232 return ret;
1235 /* Checks whether the condition *COND_P in STMT is interesting
1236 and if so, records it. */
1238 static void
1239 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1241 tree *var_p, *bound_p;
1242 struct iv *var_iv, *civ;
1244 if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1246 find_interesting_uses_op (data, *var_p);
1247 find_interesting_uses_op (data, *bound_p);
1248 return;
1251 civ = XNEW (struct iv);
1252 *civ = *var_iv;
1253 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1256 /* Returns true if expression EXPR is obviously invariant in LOOP,
1257 i.e. if all its operands are defined outside of the LOOP. LOOP
1258 should not be the function body. */
1260 bool
1261 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1263 basic_block def_bb;
1264 unsigned i, len;
1266 gcc_assert (loop_depth (loop) > 0);
1268 if (is_gimple_min_invariant (expr))
1269 return true;
1271 if (TREE_CODE (expr) == SSA_NAME)
1273 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1274 if (def_bb
1275 && flow_bb_inside_loop_p (loop, def_bb))
1276 return false;
1278 return true;
1281 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1282 return false;
1284 len = TREE_OPERAND_LENGTH (expr);
1285 for (i = 0; i < len; i++)
1286 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1287 return false;
1289 return true;
1292 /* Cumulates the steps of indices into DATA and replaces their values with the
1293 initial ones. Returns false when the value of the index cannot be determined.
1294 Callback for for_each_index. */
1296 struct ifs_ivopts_data
1298 struct ivopts_data *ivopts_data;
1299 tree stmt;
1300 tree step;
1303 static bool
1304 idx_find_step (tree base, tree *idx, void *data)
1306 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1307 struct iv *iv;
1308 tree step, iv_base, iv_step, lbound, off;
1309 struct loop *loop = dta->ivopts_data->current_loop;
1311 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1312 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1313 return false;
1315 /* If base is a component ref, require that the offset of the reference
1316 be invariant. */
1317 if (TREE_CODE (base) == COMPONENT_REF)
1319 off = component_ref_field_offset (base);
1320 return expr_invariant_in_loop_p (loop, off);
1323 /* If base is array, first check whether we will be able to move the
1324 reference out of the loop (in order to take its address in strength
1325 reduction). In order for this to work we need both lower bound
1326 and step to be loop invariants. */
1327 if (TREE_CODE (base) == ARRAY_REF)
1329 step = array_ref_element_size (base);
1330 lbound = array_ref_low_bound (base);
1332 if (!expr_invariant_in_loop_p (loop, step)
1333 || !expr_invariant_in_loop_p (loop, lbound))
1334 return false;
1337 if (TREE_CODE (*idx) != SSA_NAME)
1338 return true;
1340 iv = get_iv (dta->ivopts_data, *idx);
1341 if (!iv)
1342 return false;
1344 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1345 *&x[0], which is not folded and does not trigger the
1346 ARRAY_REF path below. */
1347 *idx = iv->base;
1349 if (integer_zerop (iv->step))
1350 return true;
1352 if (TREE_CODE (base) == ARRAY_REF)
1354 step = array_ref_element_size (base);
1356 /* We only handle addresses whose step is an integer constant. */
1357 if (TREE_CODE (step) != INTEGER_CST)
1358 return false;
1360 else
1361 /* The step for pointer arithmetics already is 1 byte. */
1362 step = build_int_cst (sizetype, 1);
1364 iv_base = iv->base;
1365 iv_step = iv->step;
1366 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1367 sizetype, &iv_base, &iv_step, dta->stmt,
1368 false))
1370 /* The index might wrap. */
1371 return false;
1374 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1375 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1377 return true;
1380 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1381 object is passed to it in DATA. */
1383 static bool
1384 idx_record_use (tree base, tree *idx,
1385 void *vdata)
1387 struct ivopts_data *data = (struct ivopts_data *) vdata;
1388 find_interesting_uses_op (data, *idx);
1389 if (TREE_CODE (base) == ARRAY_REF)
1391 find_interesting_uses_op (data, array_ref_element_size (base));
1392 find_interesting_uses_op (data, array_ref_low_bound (base));
1394 return true;
1397 /* If we can prove that TOP = cst * BOT for some constant cst,
1398 store cst to MUL and return true. Otherwise return false.
1399 The returned value is always sign-extended, regardless of the
1400 signedness of TOP and BOT. */
1402 static bool
1403 constant_multiple_of (tree top, tree bot, double_int *mul)
1405 tree mby;
1406 enum tree_code code;
1407 double_int res, p0, p1;
1408 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1410 STRIP_NOPS (top);
1411 STRIP_NOPS (bot);
1413 if (operand_equal_p (top, bot, 0))
1415 *mul = double_int_one;
1416 return true;
1419 code = TREE_CODE (top);
1420 switch (code)
1422 case MULT_EXPR:
1423 mby = TREE_OPERAND (top, 1);
1424 if (TREE_CODE (mby) != INTEGER_CST)
1425 return false;
1427 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1428 return false;
1430 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1431 precision);
1432 return true;
1434 case PLUS_EXPR:
1435 case MINUS_EXPR:
1436 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1437 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1438 return false;
1440 if (code == MINUS_EXPR)
1441 p1 = double_int_neg (p1);
1442 *mul = double_int_sext (double_int_add (p0, p1), precision);
1443 return true;
1445 case INTEGER_CST:
1446 if (TREE_CODE (bot) != INTEGER_CST)
1447 return false;
1449 p0 = double_int_sext (tree_to_double_int (top), precision);
1450 p1 = double_int_sext (tree_to_double_int (bot), precision);
1451 if (double_int_zero_p (p1))
1452 return false;
1453 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1454 precision);
1455 return double_int_zero_p (res);
1457 default:
1458 return false;
1462 /* Returns true if memory reference REF with step STEP may be unaligned. */
1464 static bool
1465 may_be_unaligned_p (tree ref, tree step)
1467 tree base;
1468 tree base_type;
1469 HOST_WIDE_INT bitsize;
1470 HOST_WIDE_INT bitpos;
1471 tree toffset;
1472 enum machine_mode mode;
1473 int unsignedp, volatilep;
1474 unsigned base_align;
1476 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1477 thus they are not misaligned. */
1478 if (TREE_CODE (ref) == TARGET_MEM_REF)
1479 return false;
1481 /* The test below is basically copy of what expr.c:normal_inner_ref
1482 does to check whether the object must be loaded by parts when
1483 STRICT_ALIGNMENT is true. */
1484 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1485 &unsignedp, &volatilep, true);
1486 base_type = TREE_TYPE (base);
1487 base_align = TYPE_ALIGN (base_type);
1489 if (mode != BLKmode)
1491 double_int mul;
1492 tree al = build_int_cst (TREE_TYPE (step),
1493 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1495 if (base_align < GET_MODE_ALIGNMENT (mode)
1496 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1497 || bitpos % BITS_PER_UNIT != 0)
1498 return true;
1500 if (!constant_multiple_of (step, al, &mul))
1501 return true;
1504 return false;
1507 /* Return true if EXPR may be non-addressable. */
1509 static bool
1510 may_be_nonaddressable_p (tree expr)
1512 switch (TREE_CODE (expr))
1514 case TARGET_MEM_REF:
1515 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1516 target, thus they are always addressable. */
1517 return false;
1519 case COMPONENT_REF:
1520 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1521 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1523 case VIEW_CONVERT_EXPR:
1524 /* This kind of view-conversions may wrap non-addressable objects
1525 and make them look addressable. After some processing the
1526 non-addressability may be uncovered again, causing ADDR_EXPRs
1527 of inappropriate objects to be built. */
1528 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1529 || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1530 return true;
1532 /* ... fall through ... */
1534 case ARRAY_REF:
1535 case ARRAY_RANGE_REF:
1536 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1538 case CONVERT_EXPR:
1539 case NON_LVALUE_EXPR:
1540 case NOP_EXPR:
1541 return true;
1543 default:
1544 break;
1547 return false;
1550 /* Finds addresses in *OP_P inside STMT. */
1552 static void
1553 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1555 tree base = *op_p, step = build_int_cst (sizetype, 0);
1556 struct iv *civ;
1557 struct ifs_ivopts_data ifs_ivopts_data;
1559 /* Do not play with volatile memory references. A bit too conservative,
1560 perhaps, but safe. */
1561 if (stmt_ann (stmt)->has_volatile_ops)
1562 goto fail;
1564 /* Ignore bitfields for now. Not really something terribly complicated
1565 to handle. TODO. */
1566 if (TREE_CODE (base) == BIT_FIELD_REF)
1567 goto fail;
1569 base = unshare_expr (base);
1571 if (TREE_CODE (base) == TARGET_MEM_REF)
1573 tree type = build_pointer_type (TREE_TYPE (base));
1574 tree astep;
1576 if (TMR_BASE (base)
1577 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1579 civ = get_iv (data, TMR_BASE (base));
1580 if (!civ)
1581 goto fail;
1583 TMR_BASE (base) = civ->base;
1584 step = civ->step;
1586 if (TMR_INDEX (base)
1587 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1589 civ = get_iv (data, TMR_INDEX (base));
1590 if (!civ)
1591 goto fail;
1593 TMR_INDEX (base) = civ->base;
1594 astep = civ->step;
1596 if (astep)
1598 if (TMR_STEP (base))
1599 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1601 step = fold_build2 (PLUS_EXPR, type, step, astep);
1605 if (integer_zerop (step))
1606 goto fail;
1607 base = tree_mem_ref_addr (type, base);
1609 else
1611 ifs_ivopts_data.ivopts_data = data;
1612 ifs_ivopts_data.stmt = stmt;
1613 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1614 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1615 || integer_zerop (ifs_ivopts_data.step))
1616 goto fail;
1617 step = ifs_ivopts_data.step;
1619 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1620 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1622 /* Check that the base expression is addressable. This needs
1623 to be done after substituting bases of IVs into it. */
1624 if (may_be_nonaddressable_p (base))
1625 goto fail;
1627 /* Moreover, on strict alignment platforms, check that it is
1628 sufficiently aligned. */
1629 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1630 goto fail;
1632 base = build_fold_addr_expr (base);
1634 /* Substituting bases of IVs into the base expression might
1635 have caused folding opportunities. */
1636 if (TREE_CODE (base) == ADDR_EXPR)
1638 tree *ref = &TREE_OPERAND (base, 0);
1639 while (handled_component_p (*ref))
1640 ref = &TREE_OPERAND (*ref, 0);
1641 if (TREE_CODE (*ref) == INDIRECT_REF)
1642 *ref = fold_indirect_ref (*ref);
1646 civ = alloc_iv (base, step);
1647 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1648 return;
1650 fail:
1651 for_each_index (op_p, idx_record_use, data);
1654 /* Finds and records invariants used in STMT. */
1656 static void
1657 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1659 ssa_op_iter iter;
1660 use_operand_p use_p;
1661 tree op;
1663 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1665 op = USE_FROM_PTR (use_p);
1666 record_invariant (data, op, false);
1670 /* Finds interesting uses of induction variables in the statement STMT. */
1672 static void
1673 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1675 struct iv *iv;
1676 tree op, lhs, rhs;
1677 ssa_op_iter iter;
1678 use_operand_p use_p;
1680 find_invariants_stmt (data, stmt);
1682 if (TREE_CODE (stmt) == COND_EXPR)
1684 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1685 return;
1688 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1690 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1691 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1693 if (TREE_CODE (lhs) == SSA_NAME)
1695 /* If the statement defines an induction variable, the uses are not
1696 interesting by themselves. */
1698 iv = get_iv (data, lhs);
1700 if (iv && !integer_zerop (iv->step))
1701 return;
1704 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1706 case tcc_comparison:
1707 find_interesting_uses_cond (data, stmt,
1708 &GIMPLE_STMT_OPERAND (stmt, 1));
1709 return;
1711 case tcc_reference:
1712 find_interesting_uses_address (data, stmt,
1713 &GIMPLE_STMT_OPERAND (stmt, 1));
1714 if (REFERENCE_CLASS_P (lhs))
1715 find_interesting_uses_address (data, stmt,
1716 &GIMPLE_STMT_OPERAND (stmt, 0));
1717 return;
1719 default: ;
1722 if (REFERENCE_CLASS_P (lhs)
1723 && is_gimple_val (rhs))
1725 find_interesting_uses_address (data, stmt,
1726 &GIMPLE_STMT_OPERAND (stmt, 0));
1727 find_interesting_uses_op (data, rhs);
1728 return;
1731 /* TODO -- we should also handle address uses of type
1733 memory = call (whatever);
1737 call (memory). */
1740 if (TREE_CODE (stmt) == PHI_NODE
1741 && bb_for_stmt (stmt) == data->current_loop->header)
1743 lhs = PHI_RESULT (stmt);
1744 iv = get_iv (data, lhs);
1746 if (iv && !integer_zerop (iv->step))
1747 return;
1750 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1752 op = USE_FROM_PTR (use_p);
1754 if (TREE_CODE (op) != SSA_NAME)
1755 continue;
1757 iv = get_iv (data, op);
1758 if (!iv)
1759 continue;
1761 find_interesting_uses_op (data, op);
1765 /* Finds interesting uses of induction variables outside of loops
1766 on loop exit edge EXIT. */
1768 static void
1769 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1771 tree phi, def;
1773 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1775 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1776 if (is_gimple_reg (def))
1777 find_interesting_uses_op (data, def);
1781 /* Finds uses of the induction variables that are interesting. */
1783 static void
1784 find_interesting_uses (struct ivopts_data *data)
1786 basic_block bb;
1787 block_stmt_iterator bsi;
1788 tree phi;
1789 basic_block *body = get_loop_body (data->current_loop);
1790 unsigned i;
1791 struct version_info *info;
1792 edge e;
1794 if (dump_file && (dump_flags & TDF_DETAILS))
1795 fprintf (dump_file, "Uses:\n\n");
1797 for (i = 0; i < data->current_loop->num_nodes; i++)
1799 edge_iterator ei;
1800 bb = body[i];
1802 FOR_EACH_EDGE (e, ei, bb->succs)
1803 if (e->dest != EXIT_BLOCK_PTR
1804 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1805 find_interesting_uses_outside (data, e);
1807 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1808 find_interesting_uses_stmt (data, phi);
1809 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1810 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1813 if (dump_file && (dump_flags & TDF_DETAILS))
1815 bitmap_iterator bi;
1817 fprintf (dump_file, "\n");
1819 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1821 info = ver_info (data, i);
1822 if (info->inv_id)
1824 fprintf (dump_file, " ");
1825 print_generic_expr (dump_file, info->name, TDF_SLIM);
1826 fprintf (dump_file, " is invariant (%d)%s\n",
1827 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1831 fprintf (dump_file, "\n");
1834 free (body);
1837 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1838 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1839 we are at the top-level of the processed address. */
1841 static tree
1842 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1843 unsigned HOST_WIDE_INT *offset)
1845 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1846 enum tree_code code;
1847 tree type, orig_type = TREE_TYPE (expr);
1848 unsigned HOST_WIDE_INT off0, off1, st;
1849 tree orig_expr = expr;
1851 STRIP_NOPS (expr);
1853 type = TREE_TYPE (expr);
1854 code = TREE_CODE (expr);
1855 *offset = 0;
1857 switch (code)
1859 case INTEGER_CST:
1860 if (!cst_and_fits_in_hwi (expr)
1861 || integer_zerop (expr))
1862 return orig_expr;
1864 *offset = int_cst_value (expr);
1865 return build_int_cst (orig_type, 0);
1867 case POINTER_PLUS_EXPR:
1868 case PLUS_EXPR:
1869 case MINUS_EXPR:
1870 op0 = TREE_OPERAND (expr, 0);
1871 op1 = TREE_OPERAND (expr, 1);
1873 op0 = strip_offset_1 (op0, false, false, &off0);
1874 op1 = strip_offset_1 (op1, false, false, &off1);
1876 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1877 if (op0 == TREE_OPERAND (expr, 0)
1878 && op1 == TREE_OPERAND (expr, 1))
1879 return orig_expr;
1881 if (integer_zerop (op1))
1882 expr = op0;
1883 else if (integer_zerop (op0))
1885 if (code == MINUS_EXPR)
1886 expr = fold_build1 (NEGATE_EXPR, type, op1);
1887 else
1888 expr = op1;
1890 else
1891 expr = fold_build2 (code, type, op0, op1);
1893 return fold_convert (orig_type, expr);
1895 case ARRAY_REF:
1896 if (!inside_addr)
1897 return orig_expr;
1899 step = array_ref_element_size (expr);
1900 if (!cst_and_fits_in_hwi (step))
1901 break;
1903 st = int_cst_value (step);
1904 op1 = TREE_OPERAND (expr, 1);
1905 op1 = strip_offset_1 (op1, false, false, &off1);
1906 *offset = off1 * st;
1908 if (top_compref
1909 && integer_zerop (op1))
1911 /* Strip the component reference completely. */
1912 op0 = TREE_OPERAND (expr, 0);
1913 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1914 *offset += off0;
1915 return op0;
1917 break;
1919 case COMPONENT_REF:
1920 if (!inside_addr)
1921 return orig_expr;
1923 tmp = component_ref_field_offset (expr);
1924 if (top_compref
1925 && cst_and_fits_in_hwi (tmp))
1927 /* Strip the component reference completely. */
1928 op0 = TREE_OPERAND (expr, 0);
1929 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1930 *offset = off0 + int_cst_value (tmp);
1931 return op0;
1933 break;
1935 case ADDR_EXPR:
1936 op0 = TREE_OPERAND (expr, 0);
1937 op0 = strip_offset_1 (op0, true, true, &off0);
1938 *offset += off0;
1940 if (op0 == TREE_OPERAND (expr, 0))
1941 return orig_expr;
1943 expr = build_fold_addr_expr (op0);
1944 return fold_convert (orig_type, expr);
1946 case INDIRECT_REF:
1947 inside_addr = false;
1948 break;
1950 default:
1951 return orig_expr;
1954 /* Default handling of expressions for that we want to recurse into
1955 the first operand. */
1956 op0 = TREE_OPERAND (expr, 0);
1957 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1958 *offset += off0;
1960 if (op0 == TREE_OPERAND (expr, 0)
1961 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1962 return orig_expr;
1964 expr = copy_node (expr);
1965 TREE_OPERAND (expr, 0) = op0;
1966 if (op1)
1967 TREE_OPERAND (expr, 1) = op1;
1969 /* Inside address, we might strip the top level component references,
1970 thus changing type of the expression. Handling of ADDR_EXPR
1971 will fix that. */
1972 expr = fold_convert (orig_type, expr);
1974 return expr;
1977 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1979 static tree
1980 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1982 return strip_offset_1 (expr, false, false, offset);
1985 /* Returns variant of TYPE that can be used as base for different uses.
1986 We return unsigned type with the same precision, which avoids problems
1987 with overflows. */
1989 static tree
1990 generic_type_for (tree type)
1992 if (POINTER_TYPE_P (type))
1993 return unsigned_type_for (type);
1995 if (TYPE_UNSIGNED (type))
1996 return type;
1998 return unsigned_type_for (type);
2001 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2002 the bitmap to that we should store it. */
2004 static struct ivopts_data *fd_ivopts_data;
2005 static tree
2006 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2008 bitmap *depends_on = (bitmap *) data;
2009 struct version_info *info;
2011 if (TREE_CODE (*expr_p) != SSA_NAME)
2012 return NULL_TREE;
2013 info = name_info (fd_ivopts_data, *expr_p);
2015 if (!info->inv_id || info->has_nonlin_use)
2016 return NULL_TREE;
2018 if (!*depends_on)
2019 *depends_on = BITMAP_ALLOC (NULL);
2020 bitmap_set_bit (*depends_on, info->inv_id);
2022 return NULL_TREE;
2025 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2026 position to POS. If USE is not NULL, the candidate is set as related to
2027 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2028 replacement of the final value of the iv by a direct computation. */
2030 static struct iv_cand *
2031 add_candidate_1 (struct ivopts_data *data,
2032 tree base, tree step, bool important, enum iv_position pos,
2033 struct iv_use *use, tree incremented_at)
2035 unsigned i;
2036 struct iv_cand *cand = NULL;
2037 tree type, orig_type;
2039 if (base)
2041 orig_type = TREE_TYPE (base);
2042 type = generic_type_for (orig_type);
2043 if (type != orig_type)
2045 base = fold_convert (type, base);
2046 step = fold_convert (type, step);
2050 for (i = 0; i < n_iv_cands (data); i++)
2052 cand = iv_cand (data, i);
2054 if (cand->pos != pos)
2055 continue;
2057 if (cand->incremented_at != incremented_at)
2058 continue;
2060 if (!cand->iv)
2062 if (!base && !step)
2063 break;
2065 continue;
2068 if (!base && !step)
2069 continue;
2071 if (operand_equal_p (base, cand->iv->base, 0)
2072 && operand_equal_p (step, cand->iv->step, 0))
2073 break;
2076 if (i == n_iv_cands (data))
2078 cand = XCNEW (struct iv_cand);
2079 cand->id = i;
2081 if (!base && !step)
2082 cand->iv = NULL;
2083 else
2084 cand->iv = alloc_iv (base, step);
2086 cand->pos = pos;
2087 if (pos != IP_ORIGINAL && cand->iv)
2089 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2090 cand->var_after = cand->var_before;
2092 cand->important = important;
2093 cand->incremented_at = incremented_at;
2094 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2096 if (step
2097 && TREE_CODE (step) != INTEGER_CST)
2099 fd_ivopts_data = data;
2100 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2103 if (dump_file && (dump_flags & TDF_DETAILS))
2104 dump_cand (dump_file, cand);
2107 if (important && !cand->important)
2109 cand->important = true;
2110 if (dump_file && (dump_flags & TDF_DETAILS))
2111 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2114 if (use)
2116 bitmap_set_bit (use->related_cands, i);
2117 if (dump_file && (dump_flags & TDF_DETAILS))
2118 fprintf (dump_file, "Candidate %d is related to use %d\n",
2119 cand->id, use->id);
2122 return cand;
2125 /* Returns true if incrementing the induction variable at the end of the LOOP
2126 is allowed.
2128 The purpose is to avoid splitting latch edge with a biv increment, thus
2129 creating a jump, possibly confusing other optimization passes and leaving
2130 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2131 is not available (so we do not have a better alternative), or if the latch
2132 edge is already nonempty. */
2134 static bool
2135 allow_ip_end_pos_p (struct loop *loop)
2137 if (!ip_normal_pos (loop))
2138 return true;
2140 if (!empty_block_p (ip_end_pos (loop)))
2141 return true;
2143 return false;
2146 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2147 position to POS. If USE is not NULL, the candidate is set as related to
2148 it. The candidate computation is scheduled on all available positions. */
2150 static void
2151 add_candidate (struct ivopts_data *data,
2152 tree base, tree step, bool important, struct iv_use *use)
2154 if (ip_normal_pos (data->current_loop))
2155 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2156 if (ip_end_pos (data->current_loop)
2157 && allow_ip_end_pos_p (data->current_loop))
2158 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2161 /* Add a standard "0 + 1 * iteration" iv candidate for a
2162 type with SIZE bits. */
2164 static void
2165 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2166 unsigned int size)
2168 tree type = lang_hooks.types.type_for_size (size, true);
2169 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2170 true, NULL);
2173 /* Adds standard iv candidates. */
2175 static void
2176 add_standard_iv_candidates (struct ivopts_data *data)
2178 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2180 /* The same for a double-integer type if it is still fast enough. */
2181 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2182 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2186 /* Adds candidates bases on the old induction variable IV. */
2188 static void
2189 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2191 tree phi, def;
2192 struct iv_cand *cand;
2194 add_candidate (data, iv->base, iv->step, true, NULL);
2196 /* The same, but with initial value zero. */
2197 add_candidate (data,
2198 build_int_cst (TREE_TYPE (iv->base), 0),
2199 iv->step, true, NULL);
2201 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2202 if (TREE_CODE (phi) == PHI_NODE)
2204 /* Additionally record the possibility of leaving the original iv
2205 untouched. */
2206 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2207 cand = add_candidate_1 (data,
2208 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2209 SSA_NAME_DEF_STMT (def));
2210 cand->var_before = iv->ssa_name;
2211 cand->var_after = def;
2215 /* Adds candidates based on the old induction variables. */
2217 static void
2218 add_old_ivs_candidates (struct ivopts_data *data)
2220 unsigned i;
2221 struct iv *iv;
2222 bitmap_iterator bi;
2224 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2226 iv = ver_info (data, i)->iv;
2227 if (iv && iv->biv_p && !integer_zerop (iv->step))
2228 add_old_iv_candidates (data, iv);
2232 /* Adds candidates based on the value of the induction variable IV and USE. */
2234 static void
2235 add_iv_value_candidates (struct ivopts_data *data,
2236 struct iv *iv, struct iv_use *use)
2238 unsigned HOST_WIDE_INT offset;
2239 tree base;
2241 add_candidate (data, iv->base, iv->step, false, use);
2243 /* The same, but with initial value zero. Make such variable important,
2244 since it is generic enough so that possibly many uses may be based
2245 on it. */
2246 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2247 iv->step, true, use);
2249 /* Third, try removing the constant offset. */
2250 base = strip_offset (iv->base, &offset);
2251 if (offset)
2252 add_candidate (data, base, iv->step, false, use);
2255 /* Adds candidates based on the uses. */
2257 static void
2258 add_derived_ivs_candidates (struct ivopts_data *data)
2260 unsigned i;
2262 for (i = 0; i < n_iv_uses (data); i++)
2264 struct iv_use *use = iv_use (data, i);
2266 if (!use)
2267 continue;
2269 switch (use->type)
2271 case USE_NONLINEAR_EXPR:
2272 case USE_COMPARE:
2273 case USE_ADDRESS:
2274 /* Just add the ivs based on the value of the iv used here. */
2275 add_iv_value_candidates (data, use->iv, use);
2276 break;
2278 default:
2279 gcc_unreachable ();
2284 /* Record important candidates and add them to related_cands bitmaps
2285 if needed. */
2287 static void
2288 record_important_candidates (struct ivopts_data *data)
2290 unsigned i;
2291 struct iv_use *use;
2293 for (i = 0; i < n_iv_cands (data); i++)
2295 struct iv_cand *cand = iv_cand (data, i);
2297 if (cand->important)
2298 bitmap_set_bit (data->important_candidates, i);
2301 data->consider_all_candidates = (n_iv_cands (data)
2302 <= CONSIDER_ALL_CANDIDATES_BOUND);
2304 if (data->consider_all_candidates)
2306 /* We will not need "related_cands" bitmaps in this case,
2307 so release them to decrease peak memory consumption. */
2308 for (i = 0; i < n_iv_uses (data); i++)
2310 use = iv_use (data, i);
2311 BITMAP_FREE (use->related_cands);
2314 else
2316 /* Add important candidates to the related_cands bitmaps. */
2317 for (i = 0; i < n_iv_uses (data); i++)
2318 bitmap_ior_into (iv_use (data, i)->related_cands,
2319 data->important_candidates);
2323 /* Finds the candidates for the induction variables. */
2325 static void
2326 find_iv_candidates (struct ivopts_data *data)
2328 /* Add commonly used ivs. */
2329 add_standard_iv_candidates (data);
2331 /* Add old induction variables. */
2332 add_old_ivs_candidates (data);
2334 /* Add induction variables derived from uses. */
2335 add_derived_ivs_candidates (data);
2337 /* Record the important candidates. */
2338 record_important_candidates (data);
2341 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2342 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2343 we allocate a simple list to every use. */
2345 static void
2346 alloc_use_cost_map (struct ivopts_data *data)
2348 unsigned i, size, s, j;
2350 for (i = 0; i < n_iv_uses (data); i++)
2352 struct iv_use *use = iv_use (data, i);
2353 bitmap_iterator bi;
2355 if (data->consider_all_candidates)
2356 size = n_iv_cands (data);
2357 else
2359 s = 0;
2360 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2362 s++;
2365 /* Round up to the power of two, so that moduling by it is fast. */
2366 for (size = 1; size < s; size <<= 1)
2367 continue;
2370 use->n_map_members = size;
2371 use->cost_map = XCNEWVEC (struct cost_pair, size);
2375 /* Returns description of computation cost of expression whose runtime
2376 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2378 static comp_cost
2379 new_cost (unsigned runtime, unsigned complexity)
2381 comp_cost cost;
2383 cost.cost = runtime;
2384 cost.complexity = complexity;
2386 return cost;
2389 /* Adds costs COST1 and COST2. */
2391 static comp_cost
2392 add_costs (comp_cost cost1, comp_cost cost2)
2394 cost1.cost += cost2.cost;
2395 cost1.complexity += cost2.complexity;
2397 return cost1;
2399 /* Subtracts costs COST1 and COST2. */
2401 static comp_cost
2402 sub_costs (comp_cost cost1, comp_cost cost2)
2404 cost1.cost -= cost2.cost;
2405 cost1.complexity -= cost2.complexity;
2407 return cost1;
2410 /* Returns a negative number if COST1 < COST2, a positive number if
2411 COST1 > COST2, and 0 if COST1 = COST2. */
2413 static int
2414 compare_costs (comp_cost cost1, comp_cost cost2)
2416 if (cost1.cost == cost2.cost)
2417 return cost1.complexity - cost2.complexity;
2419 return cost1.cost - cost2.cost;
2422 /* Returns true if COST is infinite. */
2424 static bool
2425 infinite_cost_p (comp_cost cost)
2427 return cost.cost == INFTY;
2430 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2431 on invariants DEPENDS_ON and that the value used in expressing it
2432 is VALUE.*/
2434 static void
2435 set_use_iv_cost (struct ivopts_data *data,
2436 struct iv_use *use, struct iv_cand *cand,
2437 comp_cost cost, bitmap depends_on, tree value)
2439 unsigned i, s;
2441 if (infinite_cost_p (cost))
2443 BITMAP_FREE (depends_on);
2444 return;
2447 if (data->consider_all_candidates)
2449 use->cost_map[cand->id].cand = cand;
2450 use->cost_map[cand->id].cost = cost;
2451 use->cost_map[cand->id].depends_on = depends_on;
2452 use->cost_map[cand->id].value = value;
2453 return;
2456 /* n_map_members is a power of two, so this computes modulo. */
2457 s = cand->id & (use->n_map_members - 1);
2458 for (i = s; i < use->n_map_members; i++)
2459 if (!use->cost_map[i].cand)
2460 goto found;
2461 for (i = 0; i < s; i++)
2462 if (!use->cost_map[i].cand)
2463 goto found;
2465 gcc_unreachable ();
2467 found:
2468 use->cost_map[i].cand = cand;
2469 use->cost_map[i].cost = cost;
2470 use->cost_map[i].depends_on = depends_on;
2471 use->cost_map[i].value = value;
2474 /* Gets cost of (USE, CANDIDATE) pair. */
2476 static struct cost_pair *
2477 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2478 struct iv_cand *cand)
2480 unsigned i, s;
2481 struct cost_pair *ret;
2483 if (!cand)
2484 return NULL;
2486 if (data->consider_all_candidates)
2488 ret = use->cost_map + cand->id;
2489 if (!ret->cand)
2490 return NULL;
2492 return ret;
2495 /* n_map_members is a power of two, so this computes modulo. */
2496 s = cand->id & (use->n_map_members - 1);
2497 for (i = s; i < use->n_map_members; i++)
2498 if (use->cost_map[i].cand == cand)
2499 return use->cost_map + i;
2501 for (i = 0; i < s; i++)
2502 if (use->cost_map[i].cand == cand)
2503 return use->cost_map + i;
2505 return NULL;
2508 /* Returns estimate on cost of computing SEQ. */
2510 static unsigned
2511 seq_cost (rtx seq)
2513 unsigned cost = 0;
2514 rtx set;
2516 for (; seq; seq = NEXT_INSN (seq))
2518 set = single_set (seq);
2519 if (set)
2520 cost += rtx_cost (set, SET);
2521 else
2522 cost++;
2525 return cost;
2528 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2529 static rtx
2530 produce_memory_decl_rtl (tree obj, int *regno)
2532 rtx x;
2534 gcc_assert (obj);
2535 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2537 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2538 x = gen_rtx_SYMBOL_REF (Pmode, name);
2539 SET_SYMBOL_REF_DECL (x, obj);
2540 x = gen_rtx_MEM (DECL_MODE (obj), x);
2541 targetm.encode_section_info (obj, x, true);
2543 else
2545 x = gen_raw_REG (Pmode, (*regno)++);
2546 x = gen_rtx_MEM (DECL_MODE (obj), x);
2549 return x;
2552 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2553 walk_tree. DATA contains the actual fake register number. */
2555 static tree
2556 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2558 tree obj = NULL_TREE;
2559 rtx x = NULL_RTX;
2560 int *regno = (int *) data;
2562 switch (TREE_CODE (*expr_p))
2564 case ADDR_EXPR:
2565 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2566 handled_component_p (*expr_p);
2567 expr_p = &TREE_OPERAND (*expr_p, 0))
2568 continue;
2569 obj = *expr_p;
2570 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2571 x = produce_memory_decl_rtl (obj, regno);
2572 break;
2574 case SSA_NAME:
2575 *ws = 0;
2576 obj = SSA_NAME_VAR (*expr_p);
2577 if (!DECL_RTL_SET_P (obj))
2578 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2579 break;
2581 case VAR_DECL:
2582 case PARM_DECL:
2583 case RESULT_DECL:
2584 *ws = 0;
2585 obj = *expr_p;
2587 if (DECL_RTL_SET_P (obj))
2588 break;
2590 if (DECL_MODE (obj) == BLKmode)
2591 x = produce_memory_decl_rtl (obj, regno);
2592 else
2593 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2595 break;
2597 default:
2598 break;
2601 if (x)
2603 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2604 SET_DECL_RTL (obj, x);
2607 return NULL_TREE;
2610 /* Determines cost of the computation of EXPR. */
2612 static unsigned
2613 computation_cost (tree expr)
2615 rtx seq, rslt;
2616 tree type = TREE_TYPE (expr);
2617 unsigned cost;
2618 /* Avoid using hard regs in ways which may be unsupported. */
2619 int regno = LAST_VIRTUAL_REGISTER + 1;
2621 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2622 start_sequence ();
2623 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2624 seq = get_insns ();
2625 end_sequence ();
2627 cost = seq_cost (seq);
2628 if (MEM_P (rslt))
2629 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2631 return cost;
2634 /* Returns variable containing the value of candidate CAND at statement AT. */
2636 static tree
2637 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2639 if (stmt_after_increment (loop, cand, stmt))
2640 return cand->var_after;
2641 else
2642 return cand->var_before;
2645 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2646 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2649 tree_int_cst_sign_bit (const_tree t)
2651 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2652 unsigned HOST_WIDE_INT w;
2654 if (bitno < HOST_BITS_PER_WIDE_INT)
2655 w = TREE_INT_CST_LOW (t);
2656 else
2658 w = TREE_INT_CST_HIGH (t);
2659 bitno -= HOST_BITS_PER_WIDE_INT;
2662 return (w >> bitno) & 1;
2665 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2666 same precision that is at least as wide as the precision of TYPE, stores
2667 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2668 type of A and B. */
2670 static tree
2671 determine_common_wider_type (tree *a, tree *b)
2673 tree wider_type = NULL;
2674 tree suba, subb;
2675 tree atype = TREE_TYPE (*a);
2677 if ((TREE_CODE (*a) == NOP_EXPR
2678 || TREE_CODE (*a) == CONVERT_EXPR))
2680 suba = TREE_OPERAND (*a, 0);
2681 wider_type = TREE_TYPE (suba);
2682 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2683 return atype;
2685 else
2686 return atype;
2688 if ((TREE_CODE (*b) == NOP_EXPR
2689 || TREE_CODE (*b) == CONVERT_EXPR))
2691 subb = TREE_OPERAND (*b, 0);
2692 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2693 return atype;
2695 else
2696 return atype;
2698 *a = suba;
2699 *b = subb;
2700 return wider_type;
2703 /* Determines the expression by that USE is expressed from induction variable
2704 CAND at statement AT in LOOP. The expression is stored in a decomposed
2705 form into AFF. Returns false if USE cannot be expressed using CAND. */
2707 static bool
2708 get_computation_aff (struct loop *loop,
2709 struct iv_use *use, struct iv_cand *cand, tree at,
2710 struct affine_tree_combination *aff)
2712 tree ubase = use->iv->base;
2713 tree ustep = use->iv->step;
2714 tree cbase = cand->iv->base;
2715 tree cstep = cand->iv->step, cstep_common;
2716 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2717 tree common_type, var;
2718 tree uutype;
2719 aff_tree cbase_aff, var_aff;
2720 double_int rat;
2722 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2724 /* We do not have a precision to express the values of use. */
2725 return false;
2728 var = var_at_stmt (loop, cand, at);
2729 uutype = unsigned_type_for (utype);
2731 /* If the conversion is not noop, perform it. */
2732 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2734 cstep = fold_convert (uutype, cstep);
2735 cbase = fold_convert (uutype, cbase);
2736 var = fold_convert (uutype, var);
2739 if (!constant_multiple_of (ustep, cstep, &rat))
2740 return false;
2742 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2743 type, we achieve better folding by computing their difference in this
2744 wider type, and cast the result to UUTYPE. We do not need to worry about
2745 overflows, as all the arithmetics will in the end be performed in UUTYPE
2746 anyway. */
2747 common_type = determine_common_wider_type (&ubase, &cbase);
2749 /* use = ubase - ratio * cbase + ratio * var. */
2750 tree_to_aff_combination (ubase, common_type, aff);
2751 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2752 tree_to_aff_combination (var, uutype, &var_aff);
2754 /* We need to shift the value if we are after the increment. */
2755 if (stmt_after_increment (loop, cand, at))
2757 aff_tree cstep_aff;
2759 if (common_type != uutype)
2760 cstep_common = fold_convert (common_type, cstep);
2761 else
2762 cstep_common = cstep;
2764 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2765 aff_combination_add (&cbase_aff, &cstep_aff);
2768 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2769 aff_combination_add (aff, &cbase_aff);
2770 if (common_type != uutype)
2771 aff_combination_convert (aff, uutype);
2773 aff_combination_scale (&var_aff, rat);
2774 aff_combination_add (aff, &var_aff);
2776 return true;
2779 /* Determines the expression by that USE is expressed from induction variable
2780 CAND at statement AT in LOOP. The computation is unshared. */
2782 static tree
2783 get_computation_at (struct loop *loop,
2784 struct iv_use *use, struct iv_cand *cand, tree at)
2786 aff_tree aff;
2787 tree type = TREE_TYPE (use->iv->base);
2789 if (!get_computation_aff (loop, use, cand, at, &aff))
2790 return NULL_TREE;
2791 unshare_aff_combination (&aff);
2792 return fold_convert (type, aff_combination_to_tree (&aff));
2795 /* Determines the expression by that USE is expressed from induction variable
2796 CAND in LOOP. The computation is unshared. */
2798 static tree
2799 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2801 return get_computation_at (loop, use, cand, use->stmt);
2804 /* Returns cost of addition in MODE. */
2806 static unsigned
2807 add_cost (enum machine_mode mode)
2809 static unsigned costs[NUM_MACHINE_MODES];
2810 rtx seq;
2811 unsigned cost;
2813 if (costs[mode])
2814 return costs[mode];
2816 start_sequence ();
2817 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2818 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2819 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2820 NULL_RTX);
2821 seq = get_insns ();
2822 end_sequence ();
2824 cost = seq_cost (seq);
2825 if (!cost)
2826 cost = 1;
2828 costs[mode] = cost;
2830 if (dump_file && (dump_flags & TDF_DETAILS))
2831 fprintf (dump_file, "Addition in %s costs %d\n",
2832 GET_MODE_NAME (mode), cost);
2833 return cost;
2836 /* Entry in a hashtable of already known costs for multiplication. */
2837 struct mbc_entry
2839 HOST_WIDE_INT cst; /* The constant to multiply by. */
2840 enum machine_mode mode; /* In mode. */
2841 unsigned cost; /* The cost. */
2844 /* Counts hash value for the ENTRY. */
2846 static hashval_t
2847 mbc_entry_hash (const void *entry)
2849 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2851 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2854 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2856 static int
2857 mbc_entry_eq (const void *entry1, const void *entry2)
2859 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2860 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2862 return (e1->mode == e2->mode
2863 && e1->cst == e2->cst);
2866 /* Returns cost of multiplication by constant CST in MODE. */
2868 unsigned
2869 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2871 static htab_t costs;
2872 struct mbc_entry **cached, act;
2873 rtx seq;
2874 unsigned cost;
2876 if (!costs)
2877 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2879 act.mode = mode;
2880 act.cst = cst;
2881 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2882 if (*cached)
2883 return (*cached)->cost;
2885 *cached = XNEW (struct mbc_entry);
2886 (*cached)->mode = mode;
2887 (*cached)->cst = cst;
2889 start_sequence ();
2890 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2891 gen_int_mode (cst, mode), NULL_RTX, 0);
2892 seq = get_insns ();
2893 end_sequence ();
2895 cost = seq_cost (seq);
2897 if (dump_file && (dump_flags & TDF_DETAILS))
2898 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2899 (int) cst, GET_MODE_NAME (mode), cost);
2901 (*cached)->cost = cost;
2903 return cost;
2906 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2907 validity for a memory reference accessing memory of mode MODE. */
2909 bool
2910 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2912 #define MAX_RATIO 128
2913 static sbitmap valid_mult[MAX_MACHINE_MODE];
2915 if (!valid_mult[mode])
2917 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2918 rtx addr;
2919 HOST_WIDE_INT i;
2921 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2922 sbitmap_zero (valid_mult[mode]);
2923 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2924 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2926 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2927 if (memory_address_p (mode, addr))
2928 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2931 if (dump_file && (dump_flags & TDF_DETAILS))
2933 fprintf (dump_file, " allowed multipliers:");
2934 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2935 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2936 fprintf (dump_file, " %d", (int) i);
2937 fprintf (dump_file, "\n");
2938 fprintf (dump_file, "\n");
2942 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2943 return false;
2945 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2948 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2949 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2950 variable is omitted. Compute the cost for a memory reference that accesses
2951 a memory location of mode MEM_MODE.
2953 TODO -- there must be some better way. This all is quite crude. */
2955 static comp_cost
2956 get_address_cost (bool symbol_present, bool var_present,
2957 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2958 enum machine_mode mem_mode)
2960 static bool initialized[MAX_MACHINE_MODE];
2961 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2962 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2963 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2964 unsigned cost, acost, complexity;
2965 bool offset_p, ratio_p;
2966 HOST_WIDE_INT s_offset;
2967 unsigned HOST_WIDE_INT mask;
2968 unsigned bits;
2970 if (!initialized[mem_mode])
2972 HOST_WIDE_INT i;
2973 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2974 int old_cse_not_expected;
2975 unsigned sym_p, var_p, off_p, rat_p, add_c;
2976 rtx seq, addr, base;
2977 rtx reg0, reg1;
2979 initialized[mem_mode] = true;
2981 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2983 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2984 for (i = start; i <= 1 << 20; i <<= 1)
2986 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2987 if (!memory_address_p (mem_mode, addr))
2988 break;
2990 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2991 off[mem_mode] = max_offset[mem_mode];
2993 for (i = start; i <= 1 << 20; i <<= 1)
2995 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2996 if (!memory_address_p (mem_mode, addr))
2997 break;
2999 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3001 if (dump_file && (dump_flags & TDF_DETAILS))
3003 fprintf (dump_file, "get_address_cost:\n");
3004 fprintf (dump_file, " min offset %s %d\n",
3005 GET_MODE_NAME (mem_mode),
3006 (int) min_offset[mem_mode]);
3007 fprintf (dump_file, " max offset %s %d\n",
3008 GET_MODE_NAME (mem_mode),
3009 (int) max_offset[mem_mode]);
3012 rat[mem_mode] = 1;
3013 for (i = 2; i <= MAX_RATIO; i++)
3014 if (multiplier_allowed_in_address_p (i, mem_mode))
3016 rat[mem_mode] = i;
3017 break;
3020 /* Compute the cost of various addressing modes. */
3021 acost = 0;
3022 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3023 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3025 for (i = 0; i < 16; i++)
3027 sym_p = i & 1;
3028 var_p = (i >> 1) & 1;
3029 off_p = (i >> 2) & 1;
3030 rat_p = (i >> 3) & 1;
3032 addr = reg0;
3033 if (rat_p)
3034 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3035 gen_int_mode (rat[mem_mode], Pmode));
3037 if (var_p)
3038 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3040 if (sym_p)
3042 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3043 /* ??? We can run into trouble with some backends by presenting
3044 it with symbols which havn't been properly passed through
3045 targetm.encode_section_info. By setting the local bit, we
3046 enhance the probability of things working. */
3047 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3049 if (off_p)
3050 base = gen_rtx_fmt_e (CONST, Pmode,
3051 gen_rtx_fmt_ee (PLUS, Pmode,
3052 base,
3053 gen_int_mode (off[mem_mode],
3054 Pmode)));
3056 else if (off_p)
3057 base = gen_int_mode (off[mem_mode], Pmode);
3058 else
3059 base = NULL_RTX;
3061 if (base)
3062 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3064 start_sequence ();
3065 /* To avoid splitting addressing modes, pretend that no cse will
3066 follow. */
3067 old_cse_not_expected = cse_not_expected;
3068 cse_not_expected = true;
3069 addr = memory_address (mem_mode, addr);
3070 cse_not_expected = old_cse_not_expected;
3071 seq = get_insns ();
3072 end_sequence ();
3074 acost = seq_cost (seq);
3075 acost += address_cost (addr, mem_mode);
3077 if (!acost)
3078 acost = 1;
3079 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3082 /* On some targets, it is quite expensive to load symbol to a register,
3083 which makes addresses that contain symbols look much more expensive.
3084 However, the symbol will have to be loaded in any case before the
3085 loop (and quite likely we have it in register already), so it does not
3086 make much sense to penalize them too heavily. So make some final
3087 tweaks for the SYMBOL_PRESENT modes:
3089 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3090 var is cheaper, use this mode with small penalty.
3091 If VAR_PRESENT is true, try whether the mode with
3092 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3093 if this is the case, use it. */
3094 add_c = add_cost (Pmode);
3095 for (i = 0; i < 8; i++)
3097 var_p = i & 1;
3098 off_p = (i >> 1) & 1;
3099 rat_p = (i >> 2) & 1;
3101 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3102 if (var_p)
3103 acost += add_c;
3105 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3106 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3109 if (dump_file && (dump_flags & TDF_DETAILS))
3111 fprintf (dump_file, "Address costs:\n");
3113 for (i = 0; i < 16; i++)
3115 sym_p = i & 1;
3116 var_p = (i >> 1) & 1;
3117 off_p = (i >> 2) & 1;
3118 rat_p = (i >> 3) & 1;
3120 fprintf (dump_file, " ");
3121 if (sym_p)
3122 fprintf (dump_file, "sym + ");
3123 if (var_p)
3124 fprintf (dump_file, "var + ");
3125 if (off_p)
3126 fprintf (dump_file, "cst + ");
3127 if (rat_p)
3128 fprintf (dump_file, "rat * ");
3130 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3131 fprintf (dump_file, "index costs %d\n", acost);
3133 fprintf (dump_file, "\n");
3137 bits = GET_MODE_BITSIZE (Pmode);
3138 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3139 offset &= mask;
3140 if ((offset >> (bits - 1) & 1))
3141 offset |= ~mask;
3142 s_offset = offset;
3144 cost = 0;
3145 offset_p = (s_offset != 0
3146 && min_offset[mem_mode] <= s_offset
3147 && s_offset <= max_offset[mem_mode]);
3148 ratio_p = (ratio != 1
3149 && multiplier_allowed_in_address_p (ratio, mem_mode));
3151 if (ratio != 1 && !ratio_p)
3152 cost += multiply_by_cost (ratio, Pmode);
3154 if (s_offset && !offset_p && !symbol_present)
3155 cost += add_cost (Pmode);
3157 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3158 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3159 return new_cost (cost + acost, complexity);
3162 /* Estimates cost of forcing expression EXPR into a variable. */
3164 static comp_cost
3165 force_expr_to_var_cost (tree expr)
3167 static bool costs_initialized = false;
3168 static unsigned integer_cost;
3169 static unsigned symbol_cost;
3170 static unsigned address_cost;
3171 tree op0, op1;
3172 comp_cost cost0, cost1, cost;
3173 enum machine_mode mode;
3175 if (!costs_initialized)
3177 tree type = build_pointer_type (integer_type_node);
3178 tree var, addr;
3179 rtx x;
3181 var = create_tmp_var_raw (integer_type_node, "test_var");
3182 TREE_STATIC (var) = 1;
3183 x = produce_memory_decl_rtl (var, NULL);
3184 SET_DECL_RTL (var, x);
3186 integer_cost = computation_cost (build_int_cst (integer_type_node,
3187 2000));
3189 addr = build1 (ADDR_EXPR, type, var);
3190 symbol_cost = computation_cost (addr) + 1;
3192 address_cost
3193 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3194 addr,
3195 build_int_cst (sizetype, 2000))) + 1;
3196 if (dump_file && (dump_flags & TDF_DETAILS))
3198 fprintf (dump_file, "force_expr_to_var_cost:\n");
3199 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3200 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3201 fprintf (dump_file, " address %d\n", (int) address_cost);
3202 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3203 fprintf (dump_file, "\n");
3206 costs_initialized = true;
3209 STRIP_NOPS (expr);
3211 if (SSA_VAR_P (expr))
3212 return zero_cost;
3214 if (TREE_INVARIANT (expr))
3216 if (TREE_CODE (expr) == INTEGER_CST)
3217 return new_cost (integer_cost, 0);
3219 if (TREE_CODE (expr) == ADDR_EXPR)
3221 tree obj = TREE_OPERAND (expr, 0);
3223 if (TREE_CODE (obj) == VAR_DECL
3224 || TREE_CODE (obj) == PARM_DECL
3225 || TREE_CODE (obj) == RESULT_DECL)
3226 return new_cost (symbol_cost, 0);
3229 return new_cost (address_cost, 0);
3232 switch (TREE_CODE (expr))
3234 case POINTER_PLUS_EXPR:
3235 case PLUS_EXPR:
3236 case MINUS_EXPR:
3237 case MULT_EXPR:
3238 op0 = TREE_OPERAND (expr, 0);
3239 op1 = TREE_OPERAND (expr, 1);
3240 STRIP_NOPS (op0);
3241 STRIP_NOPS (op1);
3243 if (is_gimple_val (op0))
3244 cost0 = zero_cost;
3245 else
3246 cost0 = force_expr_to_var_cost (op0);
3248 if (is_gimple_val (op1))
3249 cost1 = zero_cost;
3250 else
3251 cost1 = force_expr_to_var_cost (op1);
3253 break;
3255 default:
3256 /* Just an arbitrary value, FIXME. */
3257 return new_cost (target_spill_cost, 0);
3260 mode = TYPE_MODE (TREE_TYPE (expr));
3261 switch (TREE_CODE (expr))
3263 case POINTER_PLUS_EXPR:
3264 case PLUS_EXPR:
3265 case MINUS_EXPR:
3266 cost = new_cost (add_cost (mode), 0);
3267 break;
3269 case MULT_EXPR:
3270 if (cst_and_fits_in_hwi (op0))
3271 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3272 else if (cst_and_fits_in_hwi (op1))
3273 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3274 else
3275 return new_cost (target_spill_cost, 0);
3276 break;
3278 default:
3279 gcc_unreachable ();
3282 cost = add_costs (cost, cost0);
3283 cost = add_costs (cost, cost1);
3285 /* Bound the cost by target_spill_cost. The parts of complicated
3286 computations often are either loop invariant or at least can
3287 be shared between several iv uses, so letting this grow without
3288 limits would not give reasonable results. */
3289 if (cost.cost > target_spill_cost)
3290 cost.cost = target_spill_cost;
3292 return cost;
3295 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3296 invariants the computation depends on. */
3298 static comp_cost
3299 force_var_cost (struct ivopts_data *data,
3300 tree expr, bitmap *depends_on)
3302 if (depends_on)
3304 fd_ivopts_data = data;
3305 walk_tree (&expr, find_depends, depends_on, NULL);
3308 return force_expr_to_var_cost (expr);
3311 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3312 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3313 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3314 invariants the computation depends on. */
3316 static comp_cost
3317 split_address_cost (struct ivopts_data *data,
3318 tree addr, bool *symbol_present, bool *var_present,
3319 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3321 tree core;
3322 HOST_WIDE_INT bitsize;
3323 HOST_WIDE_INT bitpos;
3324 tree toffset;
3325 enum machine_mode mode;
3326 int unsignedp, volatilep;
3328 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3329 &unsignedp, &volatilep, false);
3331 if (toffset != 0
3332 || bitpos % BITS_PER_UNIT != 0
3333 || TREE_CODE (core) != VAR_DECL)
3335 *symbol_present = false;
3336 *var_present = true;
3337 fd_ivopts_data = data;
3338 walk_tree (&addr, find_depends, depends_on, NULL);
3339 return new_cost (target_spill_cost, 0);
3342 *offset += bitpos / BITS_PER_UNIT;
3343 if (TREE_STATIC (core)
3344 || DECL_EXTERNAL (core))
3346 *symbol_present = true;
3347 *var_present = false;
3348 return zero_cost;
3351 *symbol_present = false;
3352 *var_present = true;
3353 return zero_cost;
3356 /* Estimates cost of expressing difference of addresses E1 - E2 as
3357 var + symbol + offset. The value of offset is added to OFFSET,
3358 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3359 part is missing. DEPENDS_ON is a set of the invariants the computation
3360 depends on. */
3362 static comp_cost
3363 ptr_difference_cost (struct ivopts_data *data,
3364 tree e1, tree e2, bool *symbol_present, bool *var_present,
3365 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3367 HOST_WIDE_INT diff = 0;
3368 comp_cost cost;
3370 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3372 if (ptr_difference_const (e1, e2, &diff))
3374 *offset += diff;
3375 *symbol_present = false;
3376 *var_present = false;
3377 return zero_cost;
3380 if (integer_zerop (e2))
3381 return split_address_cost (data, TREE_OPERAND (e1, 0),
3382 symbol_present, var_present, offset, depends_on);
3384 *symbol_present = false;
3385 *var_present = true;
3387 cost = force_var_cost (data, e1, depends_on);
3388 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3389 cost.cost += add_cost (Pmode);
3391 return cost;
3394 /* Estimates cost of expressing difference E1 - E2 as
3395 var + symbol + offset. The value of offset is added to OFFSET,
3396 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3397 part is missing. DEPENDS_ON is a set of the invariants the computation
3398 depends on. */
3400 static comp_cost
3401 difference_cost (struct ivopts_data *data,
3402 tree e1, tree e2, bool *symbol_present, bool *var_present,
3403 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3405 comp_cost cost;
3406 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3407 unsigned HOST_WIDE_INT off1, off2;
3409 e1 = strip_offset (e1, &off1);
3410 e2 = strip_offset (e2, &off2);
3411 *offset += off1 - off2;
3413 STRIP_NOPS (e1);
3414 STRIP_NOPS (e2);
3416 if (TREE_CODE (e1) == ADDR_EXPR)
3417 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3418 depends_on);
3419 *symbol_present = false;
3421 if (operand_equal_p (e1, e2, 0))
3423 *var_present = false;
3424 return zero_cost;
3426 *var_present = true;
3427 if (integer_zerop (e2))
3428 return force_var_cost (data, e1, depends_on);
3430 if (integer_zerop (e1))
3432 cost = force_var_cost (data, e2, depends_on);
3433 cost.cost += multiply_by_cost (-1, mode);
3435 return cost;
3438 cost = force_var_cost (data, e1, depends_on);
3439 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3440 cost.cost += add_cost (mode);
3442 return cost;
3445 /* Determines the cost of the computation by that USE is expressed
3446 from induction variable CAND. If ADDRESS_P is true, we just need
3447 to create an address from it, otherwise we want to get it into
3448 register. A set of invariants we depend on is stored in
3449 DEPENDS_ON. AT is the statement at that the value is computed. */
3451 static comp_cost
3452 get_computation_cost_at (struct ivopts_data *data,
3453 struct iv_use *use, struct iv_cand *cand,
3454 bool address_p, bitmap *depends_on, tree at)
3456 tree ubase = use->iv->base, ustep = use->iv->step;
3457 tree cbase, cstep;
3458 tree utype = TREE_TYPE (ubase), ctype;
3459 unsigned HOST_WIDE_INT cstepi, offset = 0;
3460 HOST_WIDE_INT ratio, aratio;
3461 bool var_present, symbol_present;
3462 comp_cost cost;
3463 unsigned n_sums;
3464 double_int rat;
3466 *depends_on = NULL;
3468 /* Only consider real candidates. */
3469 if (!cand->iv)
3470 return infinite_cost;
3472 cbase = cand->iv->base;
3473 cstep = cand->iv->step;
3474 ctype = TREE_TYPE (cbase);
3476 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3478 /* We do not have a precision to express the values of use. */
3479 return infinite_cost;
3482 if (address_p)
3484 /* Do not try to express address of an object with computation based
3485 on address of a different object. This may cause problems in rtl
3486 level alias analysis (that does not expect this to be happening,
3487 as this is illegal in C), and would be unlikely to be useful
3488 anyway. */
3489 if (use->iv->base_object
3490 && cand->iv->base_object
3491 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3492 return infinite_cost;
3495 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3497 /* TODO -- add direct handling of this case. */
3498 goto fallback;
3501 /* CSTEPI is removed from the offset in case statement is after the
3502 increment. If the step is not constant, we use zero instead.
3503 This is a bit imprecise (there is the extra addition), but
3504 redundancy elimination is likely to transform the code so that
3505 it uses value of the variable before increment anyway,
3506 so it is not that much unrealistic. */
3507 if (cst_and_fits_in_hwi (cstep))
3508 cstepi = int_cst_value (cstep);
3509 else
3510 cstepi = 0;
3512 if (!constant_multiple_of (ustep, cstep, &rat))
3513 return infinite_cost;
3515 if (double_int_fits_in_shwi_p (rat))
3516 ratio = double_int_to_shwi (rat);
3517 else
3518 return infinite_cost;
3520 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3521 or ratio == 1, it is better to handle this like
3523 ubase - ratio * cbase + ratio * var
3525 (also holds in the case ratio == -1, TODO. */
3527 if (cst_and_fits_in_hwi (cbase))
3529 offset = - ratio * int_cst_value (cbase);
3530 cost = difference_cost (data,
3531 ubase, build_int_cst (utype, 0),
3532 &symbol_present, &var_present, &offset,
3533 depends_on);
3535 else if (ratio == 1)
3537 cost = difference_cost (data,
3538 ubase, cbase,
3539 &symbol_present, &var_present, &offset,
3540 depends_on);
3542 else
3544 cost = force_var_cost (data, cbase, depends_on);
3545 cost.cost += add_cost (TYPE_MODE (ctype));
3546 cost = add_costs (cost,
3547 difference_cost (data,
3548 ubase, build_int_cst (utype, 0),
3549 &symbol_present, &var_present,
3550 &offset, depends_on));
3553 /* If we are after the increment, the value of the candidate is higher by
3554 one iteration. */
3555 if (stmt_after_increment (data->current_loop, cand, at))
3556 offset -= ratio * cstepi;
3558 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3559 (symbol/var/const parts may be omitted). If we are looking for an address,
3560 find the cost of addressing this. */
3561 if (address_p)
3562 return add_costs (cost, get_address_cost (symbol_present, var_present,
3563 offset, ratio,
3564 TYPE_MODE (TREE_TYPE (*use->op_p))));
3566 /* Otherwise estimate the costs for computing the expression. */
3567 aratio = ratio > 0 ? ratio : -ratio;
3568 if (!symbol_present && !var_present && !offset)
3570 if (ratio != 1)
3571 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3573 return cost;
3576 if (aratio != 1)
3577 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3579 n_sums = 1;
3580 if (var_present
3581 /* Symbol + offset should be compile-time computable. */
3582 && (symbol_present || offset))
3583 n_sums++;
3585 /* Having offset does not affect runtime cost in case it is added to
3586 symbol, but it increases complexity. */
3587 if (offset)
3588 cost.complexity++;
3590 cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3591 return cost;
3593 fallback:
3595 /* Just get the expression, expand it and measure the cost. */
3596 tree comp = get_computation_at (data->current_loop, use, cand, at);
3598 if (!comp)
3599 return infinite_cost;
3601 if (address_p)
3602 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3604 return new_cost (computation_cost (comp), 0);
3608 /* Determines the cost of the computation by that USE is expressed
3609 from induction variable CAND. If ADDRESS_P is true, we just need
3610 to create an address from it, otherwise we want to get it into
3611 register. A set of invariants we depend on is stored in
3612 DEPENDS_ON. */
3614 static comp_cost
3615 get_computation_cost (struct ivopts_data *data,
3616 struct iv_use *use, struct iv_cand *cand,
3617 bool address_p, bitmap *depends_on)
3619 return get_computation_cost_at (data,
3620 use, cand, address_p, depends_on, use->stmt);
3623 /* Determines cost of basing replacement of USE on CAND in a generic
3624 expression. */
3626 static bool
3627 determine_use_iv_cost_generic (struct ivopts_data *data,
3628 struct iv_use *use, struct iv_cand *cand)
3630 bitmap depends_on;
3631 comp_cost cost;
3633 /* The simple case first -- if we need to express value of the preserved
3634 original biv, the cost is 0. This also prevents us from counting the
3635 cost of increment twice -- once at this use and once in the cost of
3636 the candidate. */
3637 if (cand->pos == IP_ORIGINAL
3638 && cand->incremented_at == use->stmt)
3640 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3641 return true;
3644 cost = get_computation_cost (data, use, cand, false, &depends_on);
3645 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3647 return !infinite_cost_p (cost);
3650 /* Determines cost of basing replacement of USE on CAND in an address. */
3652 static bool
3653 determine_use_iv_cost_address (struct ivopts_data *data,
3654 struct iv_use *use, struct iv_cand *cand)
3656 bitmap depends_on;
3657 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3659 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3661 return !infinite_cost_p (cost);
3664 /* Computes value of candidate CAND at position AT in iteration NITER, and
3665 stores it to VAL. */
3667 static void
3668 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3669 aff_tree *val)
3671 aff_tree step, delta, nit;
3672 struct iv *iv = cand->iv;
3673 tree type = TREE_TYPE (iv->base);
3675 tree_to_aff_combination (iv->step, type, &step);
3676 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3677 aff_combination_convert (&nit, type);
3678 aff_combination_mult (&nit, &step, &delta);
3679 if (stmt_after_increment (loop, cand, at))
3680 aff_combination_add (&delta, &step);
3682 tree_to_aff_combination (iv->base, type, val);
3683 aff_combination_add (val, &delta);
3686 /* Returns period of induction variable iv. */
3688 static tree
3689 iv_period (struct iv *iv)
3691 tree step = iv->step, period, type;
3692 tree pow2div;
3694 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3696 /* Period of the iv is gcd (step, type range). Since type range is power
3697 of two, it suffices to determine the maximum power of two that divides
3698 step. */
3699 pow2div = num_ending_zeros (step);
3700 type = unsigned_type_for (TREE_TYPE (step));
3702 period = build_low_bits_mask (type,
3703 (TYPE_PRECISION (type)
3704 - tree_low_cst (pow2div, 1)));
3706 return period;
3709 /* Returns the comparison operator used when eliminating the iv USE. */
3711 static enum tree_code
3712 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3714 struct loop *loop = data->current_loop;
3715 basic_block ex_bb;
3716 edge exit;
3718 ex_bb = bb_for_stmt (use->stmt);
3719 exit = EDGE_SUCC (ex_bb, 0);
3720 if (flow_bb_inside_loop_p (loop, exit->dest))
3721 exit = EDGE_SUCC (ex_bb, 1);
3723 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3726 /* Check whether it is possible to express the condition in USE by comparison
3727 of candidate CAND. If so, store the value compared with to BOUND. */
3729 static bool
3730 may_eliminate_iv (struct ivopts_data *data,
3731 struct iv_use *use, struct iv_cand *cand, tree *bound)
3733 basic_block ex_bb;
3734 edge exit;
3735 tree nit, period;
3736 struct loop *loop = data->current_loop;
3737 aff_tree bnd;
3738 double_int period_value, max_niter;
3740 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3741 return false;
3743 /* For now works only for exits that dominate the loop latch. TODO -- extend
3744 for other conditions inside loop body. */
3745 ex_bb = bb_for_stmt (use->stmt);
3746 if (use->stmt != last_stmt (ex_bb)
3747 || TREE_CODE (use->stmt) != COND_EXPR)
3748 return false;
3749 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3750 return false;
3752 exit = EDGE_SUCC (ex_bb, 0);
3753 if (flow_bb_inside_loop_p (loop, exit->dest))
3754 exit = EDGE_SUCC (ex_bb, 1);
3755 if (flow_bb_inside_loop_p (loop, exit->dest))
3756 return false;
3758 nit = niter_for_exit (data, exit);
3759 if (!nit)
3760 return false;
3762 /* Determine whether we may use the variable to test whether niter iterations
3763 elapsed. This is the case iff the period of the induction variable is
3764 greater than the number of iterations. */
3765 period = iv_period (cand->iv);
3766 if (!period)
3767 return false;
3769 /* Compare the period with the estimate on the number of iterations of the
3770 loop. */
3771 if (!estimated_loop_iterations (loop, true, &max_niter))
3772 return false;
3773 period_value = tree_to_double_int (period);
3774 if (double_int_ucmp (period_value, max_niter) <= 0)
3775 return false;
3777 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3778 *bound = aff_combination_to_tree (&bnd);
3779 return true;
3782 /* Determines cost of basing replacement of USE on CAND in a condition. */
3784 static bool
3785 determine_use_iv_cost_condition (struct ivopts_data *data,
3786 struct iv_use *use, struct iv_cand *cand)
3788 tree bound = NULL_TREE;
3789 struct iv *cmp_iv;
3790 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3791 comp_cost elim_cost, express_cost, cost;
3792 bool ok;
3794 /* Only consider real candidates. */
3795 if (!cand->iv)
3797 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3798 return false;
3801 /* Try iv elimination. */
3802 if (may_eliminate_iv (data, use, cand, &bound))
3804 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3805 /* The bound is a loop invariant, so it will be only computed
3806 once. */
3807 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3809 else
3810 elim_cost = infinite_cost;
3812 /* Try expressing the original giv. If it is compared with an invariant,
3813 note that we cannot get rid of it. */
3814 ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3815 gcc_assert (ok);
3817 express_cost = get_computation_cost (data, use, cand, false,
3818 &depends_on_express);
3819 fd_ivopts_data = data;
3820 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3822 /* Choose the better approach. */
3823 if (compare_costs (elim_cost, express_cost) < 0)
3825 cost = elim_cost;
3826 depends_on = depends_on_elim;
3827 depends_on_elim = NULL;
3829 else
3831 cost = express_cost;
3832 depends_on = depends_on_express;
3833 depends_on_express = NULL;
3834 bound = NULL_TREE;
3837 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3839 if (depends_on_elim)
3840 BITMAP_FREE (depends_on_elim);
3841 if (depends_on_express)
3842 BITMAP_FREE (depends_on_express);
3844 return !infinite_cost_p (cost);
3847 /* Determines cost of basing replacement of USE on CAND. Returns false
3848 if USE cannot be based on CAND. */
3850 static bool
3851 determine_use_iv_cost (struct ivopts_data *data,
3852 struct iv_use *use, struct iv_cand *cand)
3854 switch (use->type)
3856 case USE_NONLINEAR_EXPR:
3857 return determine_use_iv_cost_generic (data, use, cand);
3859 case USE_ADDRESS:
3860 return determine_use_iv_cost_address (data, use, cand);
3862 case USE_COMPARE:
3863 return determine_use_iv_cost_condition (data, use, cand);
3865 default:
3866 gcc_unreachable ();
3870 /* Determines costs of basing the use of the iv on an iv candidate. */
3872 static void
3873 determine_use_iv_costs (struct ivopts_data *data)
3875 unsigned i, j;
3876 struct iv_use *use;
3877 struct iv_cand *cand;
3878 bitmap to_clear = BITMAP_ALLOC (NULL);
3880 alloc_use_cost_map (data);
3882 for (i = 0; i < n_iv_uses (data); i++)
3884 use = iv_use (data, i);
3886 if (data->consider_all_candidates)
3888 for (j = 0; j < n_iv_cands (data); j++)
3890 cand = iv_cand (data, j);
3891 determine_use_iv_cost (data, use, cand);
3894 else
3896 bitmap_iterator bi;
3898 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3900 cand = iv_cand (data, j);
3901 if (!determine_use_iv_cost (data, use, cand))
3902 bitmap_set_bit (to_clear, j);
3905 /* Remove the candidates for that the cost is infinite from
3906 the list of related candidates. */
3907 bitmap_and_compl_into (use->related_cands, to_clear);
3908 bitmap_clear (to_clear);
3912 BITMAP_FREE (to_clear);
3914 if (dump_file && (dump_flags & TDF_DETAILS))
3916 fprintf (dump_file, "Use-candidate costs:\n");
3918 for (i = 0; i < n_iv_uses (data); i++)
3920 use = iv_use (data, i);
3922 fprintf (dump_file, "Use %d:\n", i);
3923 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
3924 for (j = 0; j < use->n_map_members; j++)
3926 if (!use->cost_map[j].cand
3927 || infinite_cost_p (use->cost_map[j].cost))
3928 continue;
3930 fprintf (dump_file, " %d\t%d\t%d\t",
3931 use->cost_map[j].cand->id,
3932 use->cost_map[j].cost.cost,
3933 use->cost_map[j].cost.complexity);
3934 if (use->cost_map[j].depends_on)
3935 bitmap_print (dump_file,
3936 use->cost_map[j].depends_on, "","");
3937 fprintf (dump_file, "\n");
3940 fprintf (dump_file, "\n");
3942 fprintf (dump_file, "\n");
3946 /* Determines cost of the candidate CAND. */
3948 static void
3949 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3951 comp_cost cost_base;
3952 unsigned cost, cost_step;
3953 tree base;
3955 if (!cand->iv)
3957 cand->cost = 0;
3958 return;
3961 /* There are two costs associated with the candidate -- its increment
3962 and its initialization. The second is almost negligible for any loop
3963 that rolls enough, so we take it just very little into account. */
3965 base = cand->iv->base;
3966 cost_base = force_var_cost (data, base, NULL);
3967 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3969 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3971 /* Prefer the original ivs unless we may gain something by replacing it.
3972 The reason is to makee debugging simpler; so this is not relevant for
3973 artificial ivs created by other optimization passes. */
3974 if (cand->pos != IP_ORIGINAL
3975 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3976 cost++;
3978 /* Prefer not to insert statements into latch unless there are some
3979 already (so that we do not create unnecessary jumps). */
3980 if (cand->pos == IP_END
3981 && empty_block_p (ip_end_pos (data->current_loop)))
3982 cost++;
3984 cand->cost = cost;
3987 /* Determines costs of computation of the candidates. */
3989 static void
3990 determine_iv_costs (struct ivopts_data *data)
3992 unsigned i;
3994 if (dump_file && (dump_flags & TDF_DETAILS))
3996 fprintf (dump_file, "Candidate costs:\n");
3997 fprintf (dump_file, " cand\tcost\n");
4000 for (i = 0; i < n_iv_cands (data); i++)
4002 struct iv_cand *cand = iv_cand (data, i);
4004 determine_iv_cost (data, cand);
4006 if (dump_file && (dump_flags & TDF_DETAILS))
4007 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4010 if (dump_file && (dump_flags & TDF_DETAILS))
4011 fprintf (dump_file, "\n");
4014 /* Calculates cost for having SIZE induction variables. */
4016 static unsigned
4017 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4019 /* We add size to the cost, so that we prefer eliminating ivs
4020 if possible. */
4021 return size + estimate_reg_pressure_cost (size, data->regs_used);
4024 /* For each size of the induction variable set determine the penalty. */
4026 static void
4027 determine_set_costs (struct ivopts_data *data)
4029 unsigned j, n;
4030 tree phi, op;
4031 struct loop *loop = data->current_loop;
4032 bitmap_iterator bi;
4034 /* We use the following model (definitely improvable, especially the
4035 cost function -- TODO):
4037 We estimate the number of registers available (using MD data), name it A.
4039 We estimate the number of registers used by the loop, name it U. This
4040 number is obtained as the number of loop phi nodes (not counting virtual
4041 registers and bivs) + the number of variables from outside of the loop.
4043 We set a reserve R (free regs that are used for temporary computations,
4044 etc.). For now the reserve is a constant 3.
4046 Let I be the number of induction variables.
4048 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4049 make a lot of ivs without a reason).
4050 -- if A - R < U + I <= A, the cost is I * PRES_COST
4051 -- if U + I > A, the cost is I * PRES_COST and
4052 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4054 if (dump_file && (dump_flags & TDF_DETAILS))
4056 fprintf (dump_file, "Global costs:\n");
4057 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4058 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost);
4059 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4062 n = 0;
4063 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4065 op = PHI_RESULT (phi);
4067 if (!is_gimple_reg (op))
4068 continue;
4070 if (get_iv (data, op))
4071 continue;
4073 n++;
4076 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4078 struct version_info *info = ver_info (data, j);
4080 if (info->inv_id && info->has_nonlin_use)
4081 n++;
4084 data->regs_used = n;
4085 if (dump_file && (dump_flags & TDF_DETAILS))
4086 fprintf (dump_file, " regs_used %d\n", n);
4088 if (dump_file && (dump_flags & TDF_DETAILS))
4090 fprintf (dump_file, " cost for size:\n");
4091 fprintf (dump_file, " ivs\tcost\n");
4092 for (j = 0; j <= 2 * target_avail_regs; j++)
4093 fprintf (dump_file, " %d\t%d\n", j,
4094 ivopts_global_cost_for_size (data, j));
4095 fprintf (dump_file, "\n");
4099 /* Returns true if A is a cheaper cost pair than B. */
4101 static bool
4102 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4104 int cmp;
4106 if (!a)
4107 return false;
4109 if (!b)
4110 return true;
4112 cmp = compare_costs (a->cost, b->cost);
4113 if (cmp < 0)
4114 return true;
4116 if (cmp > 0)
4117 return false;
4119 /* In case the costs are the same, prefer the cheaper candidate. */
4120 if (a->cand->cost < b->cand->cost)
4121 return true;
4123 return false;
4126 /* Computes the cost field of IVS structure. */
4128 static void
4129 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4131 comp_cost cost = ivs->cand_use_cost;
4132 cost.cost += ivs->cand_cost;
4133 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4135 ivs->cost = cost;
4138 /* Remove invariants in set INVS to set IVS. */
4140 static void
4141 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4143 bitmap_iterator bi;
4144 unsigned iid;
4146 if (!invs)
4147 return;
4149 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4151 ivs->n_invariant_uses[iid]--;
4152 if (ivs->n_invariant_uses[iid] == 0)
4153 ivs->n_regs--;
4157 /* Set USE not to be expressed by any candidate in IVS. */
4159 static void
4160 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4161 struct iv_use *use)
4163 unsigned uid = use->id, cid;
4164 struct cost_pair *cp;
4166 cp = ivs->cand_for_use[uid];
4167 if (!cp)
4168 return;
4169 cid = cp->cand->id;
4171 ivs->bad_uses++;
4172 ivs->cand_for_use[uid] = NULL;
4173 ivs->n_cand_uses[cid]--;
4175 if (ivs->n_cand_uses[cid] == 0)
4177 bitmap_clear_bit (ivs->cands, cid);
4178 /* Do not count the pseudocandidates. */
4179 if (cp->cand->iv)
4180 ivs->n_regs--;
4181 ivs->n_cands--;
4182 ivs->cand_cost -= cp->cand->cost;
4184 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4187 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4189 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4190 iv_ca_recount_cost (data, ivs);
4193 /* Add invariants in set INVS to set IVS. */
4195 static void
4196 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4198 bitmap_iterator bi;
4199 unsigned iid;
4201 if (!invs)
4202 return;
4204 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4206 ivs->n_invariant_uses[iid]++;
4207 if (ivs->n_invariant_uses[iid] == 1)
4208 ivs->n_regs++;
4212 /* Set cost pair for USE in set IVS to CP. */
4214 static void
4215 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4216 struct iv_use *use, struct cost_pair *cp)
4218 unsigned uid = use->id, cid;
4220 if (ivs->cand_for_use[uid] == cp)
4221 return;
4223 if (ivs->cand_for_use[uid])
4224 iv_ca_set_no_cp (data, ivs, use);
4226 if (cp)
4228 cid = cp->cand->id;
4230 ivs->bad_uses--;
4231 ivs->cand_for_use[uid] = cp;
4232 ivs->n_cand_uses[cid]++;
4233 if (ivs->n_cand_uses[cid] == 1)
4235 bitmap_set_bit (ivs->cands, cid);
4236 /* Do not count the pseudocandidates. */
4237 if (cp->cand->iv)
4238 ivs->n_regs++;
4239 ivs->n_cands++;
4240 ivs->cand_cost += cp->cand->cost;
4242 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4245 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4246 iv_ca_set_add_invariants (ivs, cp->depends_on);
4247 iv_ca_recount_cost (data, ivs);
4251 /* Extend set IVS by expressing USE by some of the candidates in it
4252 if possible. */
4254 static void
4255 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4256 struct iv_use *use)
4258 struct cost_pair *best_cp = NULL, *cp;
4259 bitmap_iterator bi;
4260 unsigned i;
4262 gcc_assert (ivs->upto >= use->id);
4264 if (ivs->upto == use->id)
4266 ivs->upto++;
4267 ivs->bad_uses++;
4270 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4272 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4274 if (cheaper_cost_pair (cp, best_cp))
4275 best_cp = cp;
4278 iv_ca_set_cp (data, ivs, use, best_cp);
4281 /* Get cost for assignment IVS. */
4283 static comp_cost
4284 iv_ca_cost (struct iv_ca *ivs)
4286 return (ivs->bad_uses ? infinite_cost : ivs->cost);
4289 /* Returns true if all dependences of CP are among invariants in IVS. */
4291 static bool
4292 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4294 unsigned i;
4295 bitmap_iterator bi;
4297 if (!cp->depends_on)
4298 return true;
4300 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4302 if (ivs->n_invariant_uses[i] == 0)
4303 return false;
4306 return true;
4309 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4310 it before NEXT_CHANGE. */
4312 static struct iv_ca_delta *
4313 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4314 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4316 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4318 change->use = use;
4319 change->old_cp = old_cp;
4320 change->new_cp = new_cp;
4321 change->next_change = next_change;
4323 return change;
4326 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4327 are rewritten. */
4329 static struct iv_ca_delta *
4330 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4332 struct iv_ca_delta *last;
4334 if (!l2)
4335 return l1;
4337 if (!l1)
4338 return l2;
4340 for (last = l1; last->next_change; last = last->next_change)
4341 continue;
4342 last->next_change = l2;
4344 return l1;
4347 /* Returns candidate by that USE is expressed in IVS. */
4349 static struct cost_pair *
4350 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4352 return ivs->cand_for_use[use->id];
4355 /* Reverse the list of changes DELTA, forming the inverse to it. */
4357 static struct iv_ca_delta *
4358 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4360 struct iv_ca_delta *act, *next, *prev = NULL;
4361 struct cost_pair *tmp;
4363 for (act = delta; act; act = next)
4365 next = act->next_change;
4366 act->next_change = prev;
4367 prev = act;
4369 tmp = act->old_cp;
4370 act->old_cp = act->new_cp;
4371 act->new_cp = tmp;
4374 return prev;
4377 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4378 reverted instead. */
4380 static void
4381 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4382 struct iv_ca_delta *delta, bool forward)
4384 struct cost_pair *from, *to;
4385 struct iv_ca_delta *act;
4387 if (!forward)
4388 delta = iv_ca_delta_reverse (delta);
4390 for (act = delta; act; act = act->next_change)
4392 from = act->old_cp;
4393 to = act->new_cp;
4394 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4395 iv_ca_set_cp (data, ivs, act->use, to);
4398 if (!forward)
4399 iv_ca_delta_reverse (delta);
4402 /* Returns true if CAND is used in IVS. */
4404 static bool
4405 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4407 return ivs->n_cand_uses[cand->id] > 0;
4410 /* Returns number of induction variable candidates in the set IVS. */
4412 static unsigned
4413 iv_ca_n_cands (struct iv_ca *ivs)
4415 return ivs->n_cands;
4418 /* Free the list of changes DELTA. */
4420 static void
4421 iv_ca_delta_free (struct iv_ca_delta **delta)
4423 struct iv_ca_delta *act, *next;
4425 for (act = *delta; act; act = next)
4427 next = act->next_change;
4428 free (act);
4431 *delta = NULL;
4434 /* Allocates new iv candidates assignment. */
4436 static struct iv_ca *
4437 iv_ca_new (struct ivopts_data *data)
4439 struct iv_ca *nw = XNEW (struct iv_ca);
4441 nw->upto = 0;
4442 nw->bad_uses = 0;
4443 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4444 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4445 nw->cands = BITMAP_ALLOC (NULL);
4446 nw->n_cands = 0;
4447 nw->n_regs = 0;
4448 nw->cand_use_cost = zero_cost;
4449 nw->cand_cost = 0;
4450 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4451 nw->cost = zero_cost;
4453 return nw;
4456 /* Free memory occupied by the set IVS. */
4458 static void
4459 iv_ca_free (struct iv_ca **ivs)
4461 free ((*ivs)->cand_for_use);
4462 free ((*ivs)->n_cand_uses);
4463 BITMAP_FREE ((*ivs)->cands);
4464 free ((*ivs)->n_invariant_uses);
4465 free (*ivs);
4466 *ivs = NULL;
4469 /* Dumps IVS to FILE. */
4471 static void
4472 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4474 const char *pref = " invariants ";
4475 unsigned i;
4476 comp_cost cost = iv_ca_cost (ivs);
4478 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4479 bitmap_print (file, ivs->cands, " candidates ","\n");
4481 for (i = 1; i <= data->max_inv_id; i++)
4482 if (ivs->n_invariant_uses[i])
4484 fprintf (file, "%s%d", pref, i);
4485 pref = ", ";
4487 fprintf (file, "\n");
4490 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4491 new set, and store differences in DELTA. Number of induction variables
4492 in the new set is stored to N_IVS. */
4494 static comp_cost
4495 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4496 struct iv_cand *cand, struct iv_ca_delta **delta,
4497 unsigned *n_ivs)
4499 unsigned i;
4500 comp_cost cost;
4501 struct iv_use *use;
4502 struct cost_pair *old_cp, *new_cp;
4504 *delta = NULL;
4505 for (i = 0; i < ivs->upto; i++)
4507 use = iv_use (data, i);
4508 old_cp = iv_ca_cand_for_use (ivs, use);
4510 if (old_cp
4511 && old_cp->cand == cand)
4512 continue;
4514 new_cp = get_use_iv_cost (data, use, cand);
4515 if (!new_cp)
4516 continue;
4518 if (!iv_ca_has_deps (ivs, new_cp))
4519 continue;
4521 if (!cheaper_cost_pair (new_cp, old_cp))
4522 continue;
4524 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4527 iv_ca_delta_commit (data, ivs, *delta, true);
4528 cost = iv_ca_cost (ivs);
4529 if (n_ivs)
4530 *n_ivs = iv_ca_n_cands (ivs);
4531 iv_ca_delta_commit (data, ivs, *delta, false);
4533 return cost;
4536 /* Try narrowing set IVS by removing CAND. Return the cost of
4537 the new set and store the differences in DELTA. */
4539 static comp_cost
4540 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4541 struct iv_cand *cand, struct iv_ca_delta **delta)
4543 unsigned i, ci;
4544 struct iv_use *use;
4545 struct cost_pair *old_cp, *new_cp, *cp;
4546 bitmap_iterator bi;
4547 struct iv_cand *cnd;
4548 comp_cost cost;
4550 *delta = NULL;
4551 for (i = 0; i < n_iv_uses (data); i++)
4553 use = iv_use (data, i);
4555 old_cp = iv_ca_cand_for_use (ivs, use);
4556 if (old_cp->cand != cand)
4557 continue;
4559 new_cp = NULL;
4561 if (data->consider_all_candidates)
4563 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4565 if (ci == cand->id)
4566 continue;
4568 cnd = iv_cand (data, ci);
4570 cp = get_use_iv_cost (data, use, cnd);
4571 if (!cp)
4572 continue;
4573 if (!iv_ca_has_deps (ivs, cp))
4574 continue;
4576 if (!cheaper_cost_pair (cp, new_cp))
4577 continue;
4579 new_cp = cp;
4582 else
4584 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4586 if (ci == cand->id)
4587 continue;
4589 cnd = iv_cand (data, ci);
4591 cp = get_use_iv_cost (data, use, cnd);
4592 if (!cp)
4593 continue;
4594 if (!iv_ca_has_deps (ivs, cp))
4595 continue;
4597 if (!cheaper_cost_pair (cp, new_cp))
4598 continue;
4600 new_cp = cp;
4604 if (!new_cp)
4606 iv_ca_delta_free (delta);
4607 return infinite_cost;
4610 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4613 iv_ca_delta_commit (data, ivs, *delta, true);
4614 cost = iv_ca_cost (ivs);
4615 iv_ca_delta_commit (data, ivs, *delta, false);
4617 return cost;
4620 /* Try optimizing the set of candidates IVS by removing candidates different
4621 from to EXCEPT_CAND from it. Return cost of the new set, and store
4622 differences in DELTA. */
4624 static comp_cost
4625 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4626 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4628 bitmap_iterator bi;
4629 struct iv_ca_delta *act_delta, *best_delta;
4630 unsigned i;
4631 comp_cost best_cost, acost;
4632 struct iv_cand *cand;
4634 best_delta = NULL;
4635 best_cost = iv_ca_cost (ivs);
4637 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4639 cand = iv_cand (data, i);
4641 if (cand == except_cand)
4642 continue;
4644 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4646 if (compare_costs (acost, best_cost) < 0)
4648 best_cost = acost;
4649 iv_ca_delta_free (&best_delta);
4650 best_delta = act_delta;
4652 else
4653 iv_ca_delta_free (&act_delta);
4656 if (!best_delta)
4658 *delta = NULL;
4659 return best_cost;
4662 /* Recurse to possibly remove other unnecessary ivs. */
4663 iv_ca_delta_commit (data, ivs, best_delta, true);
4664 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4665 iv_ca_delta_commit (data, ivs, best_delta, false);
4666 *delta = iv_ca_delta_join (best_delta, *delta);
4667 return best_cost;
4670 /* Tries to extend the sets IVS in the best possible way in order
4671 to express the USE. */
4673 static bool
4674 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4675 struct iv_use *use)
4677 comp_cost best_cost, act_cost;
4678 unsigned i;
4679 bitmap_iterator bi;
4680 struct iv_cand *cand;
4681 struct iv_ca_delta *best_delta = NULL, *act_delta;
4682 struct cost_pair *cp;
4684 iv_ca_add_use (data, ivs, use);
4685 best_cost = iv_ca_cost (ivs);
4687 cp = iv_ca_cand_for_use (ivs, use);
4688 if (cp)
4690 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4691 iv_ca_set_no_cp (data, ivs, use);
4694 /* First try important candidates not based on any memory object. Only if
4695 this fails, try the specific ones. Rationale -- in loops with many
4696 variables the best choice often is to use just one generic biv. If we
4697 added here many ivs specific to the uses, the optimization algorithm later
4698 would be likely to get stuck in a local minimum, thus causing us to create
4699 too many ivs. The approach from few ivs to more seems more likely to be
4700 successful -- starting from few ivs, replacing an expensive use by a
4701 specific iv should always be a win. */
4702 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4704 cand = iv_cand (data, i);
4706 if (cand->iv->base_object != NULL_TREE)
4707 continue;
4709 if (iv_ca_cand_used_p (ivs, cand))
4710 continue;
4712 cp = get_use_iv_cost (data, use, cand);
4713 if (!cp)
4714 continue;
4716 iv_ca_set_cp (data, ivs, use, cp);
4717 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4718 iv_ca_set_no_cp (data, ivs, use);
4719 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4721 if (compare_costs (act_cost, best_cost) < 0)
4723 best_cost = act_cost;
4725 iv_ca_delta_free (&best_delta);
4726 best_delta = act_delta;
4728 else
4729 iv_ca_delta_free (&act_delta);
4732 if (infinite_cost_p (best_cost))
4734 for (i = 0; i < use->n_map_members; i++)
4736 cp = use->cost_map + i;
4737 cand = cp->cand;
4738 if (!cand)
4739 continue;
4741 /* Already tried this. */
4742 if (cand->important && cand->iv->base_object == NULL_TREE)
4743 continue;
4745 if (iv_ca_cand_used_p (ivs, cand))
4746 continue;
4748 act_delta = NULL;
4749 iv_ca_set_cp (data, ivs, use, cp);
4750 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4751 iv_ca_set_no_cp (data, ivs, use);
4752 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4753 cp, act_delta);
4755 if (compare_costs (act_cost, best_cost) < 0)
4757 best_cost = act_cost;
4759 if (best_delta)
4760 iv_ca_delta_free (&best_delta);
4761 best_delta = act_delta;
4763 else
4764 iv_ca_delta_free (&act_delta);
4768 iv_ca_delta_commit (data, ivs, best_delta, true);
4769 iv_ca_delta_free (&best_delta);
4771 return !infinite_cost_p (best_cost);
4774 /* Finds an initial assignment of candidates to uses. */
4776 static struct iv_ca *
4777 get_initial_solution (struct ivopts_data *data)
4779 struct iv_ca *ivs = iv_ca_new (data);
4780 unsigned i;
4782 for (i = 0; i < n_iv_uses (data); i++)
4783 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4785 iv_ca_free (&ivs);
4786 return NULL;
4789 return ivs;
4792 /* Tries to improve set of induction variables IVS. */
4794 static bool
4795 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4797 unsigned i, n_ivs;
4798 comp_cost acost, best_cost = iv_ca_cost (ivs);
4799 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4800 struct iv_cand *cand;
4802 /* Try extending the set of induction variables by one. */
4803 for (i = 0; i < n_iv_cands (data); i++)
4805 cand = iv_cand (data, i);
4807 if (iv_ca_cand_used_p (ivs, cand))
4808 continue;
4810 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4811 if (!act_delta)
4812 continue;
4814 /* If we successfully added the candidate and the set is small enough,
4815 try optimizing it by removing other candidates. */
4816 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4818 iv_ca_delta_commit (data, ivs, act_delta, true);
4819 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4820 iv_ca_delta_commit (data, ivs, act_delta, false);
4821 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4824 if (compare_costs (acost, best_cost) < 0)
4826 best_cost = acost;
4827 iv_ca_delta_free (&best_delta);
4828 best_delta = act_delta;
4830 else
4831 iv_ca_delta_free (&act_delta);
4834 if (!best_delta)
4836 /* Try removing the candidates from the set instead. */
4837 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4839 /* Nothing more we can do. */
4840 if (!best_delta)
4841 return false;
4844 iv_ca_delta_commit (data, ivs, best_delta, true);
4845 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4846 iv_ca_delta_free (&best_delta);
4847 return true;
4850 /* Attempts to find the optimal set of induction variables. We do simple
4851 greedy heuristic -- we try to replace at most one candidate in the selected
4852 solution and remove the unused ivs while this improves the cost. */
4854 static struct iv_ca *
4855 find_optimal_iv_set (struct ivopts_data *data)
4857 unsigned i;
4858 struct iv_ca *set;
4859 struct iv_use *use;
4861 /* Get the initial solution. */
4862 set = get_initial_solution (data);
4863 if (!set)
4865 if (dump_file && (dump_flags & TDF_DETAILS))
4866 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4867 return NULL;
4870 if (dump_file && (dump_flags & TDF_DETAILS))
4872 fprintf (dump_file, "Initial set of candidates:\n");
4873 iv_ca_dump (data, dump_file, set);
4876 while (try_improve_iv_set (data, set))
4878 if (dump_file && (dump_flags & TDF_DETAILS))
4880 fprintf (dump_file, "Improved to:\n");
4881 iv_ca_dump (data, dump_file, set);
4885 if (dump_file && (dump_flags & TDF_DETAILS))
4887 comp_cost cost = iv_ca_cost (set);
4888 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4891 for (i = 0; i < n_iv_uses (data); i++)
4893 use = iv_use (data, i);
4894 use->selected = iv_ca_cand_for_use (set, use)->cand;
4897 return set;
4900 /* Creates a new induction variable corresponding to CAND. */
4902 static void
4903 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4905 block_stmt_iterator incr_pos;
4906 tree base;
4907 bool after = false;
4909 if (!cand->iv)
4910 return;
4912 switch (cand->pos)
4914 case IP_NORMAL:
4915 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4916 break;
4918 case IP_END:
4919 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4920 after = true;
4921 break;
4923 case IP_ORIGINAL:
4924 /* Mark that the iv is preserved. */
4925 name_info (data, cand->var_before)->preserve_biv = true;
4926 name_info (data, cand->var_after)->preserve_biv = true;
4928 /* Rewrite the increment so that it uses var_before directly. */
4929 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4931 return;
4934 gimple_add_tmp_var (cand->var_before);
4935 add_referenced_var (cand->var_before);
4937 base = unshare_expr (cand->iv->base);
4939 create_iv (base, unshare_expr (cand->iv->step),
4940 cand->var_before, data->current_loop,
4941 &incr_pos, after, &cand->var_before, &cand->var_after);
4944 /* Creates new induction variables described in SET. */
4946 static void
4947 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4949 unsigned i;
4950 struct iv_cand *cand;
4951 bitmap_iterator bi;
4953 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4955 cand = iv_cand (data, i);
4956 create_new_iv (data, cand);
4960 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4961 is true, remove also the ssa name defined by the statement. */
4963 static void
4964 remove_statement (tree stmt, bool including_defined_name)
4966 if (TREE_CODE (stmt) == PHI_NODE)
4968 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4970 else
4972 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4974 bsi_remove (&bsi, true);
4975 release_defs (stmt);
4979 /* Rewrites USE (definition of iv used in a nonlinear expression)
4980 using candidate CAND. */
4982 static void
4983 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4984 struct iv_use *use, struct iv_cand *cand)
4986 tree comp;
4987 tree op, tgt, ass;
4988 block_stmt_iterator bsi;
4990 /* An important special case -- if we are asked to express value of
4991 the original iv by itself, just exit; there is no need to
4992 introduce a new computation (that might also need casting the
4993 variable to unsigned and back). */
4994 if (cand->pos == IP_ORIGINAL
4995 && cand->incremented_at == use->stmt)
4997 tree step, ctype, utype;
4998 enum tree_code incr_code = PLUS_EXPR;
5000 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
5001 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
5003 step = cand->iv->step;
5004 ctype = TREE_TYPE (step);
5005 utype = TREE_TYPE (cand->var_after);
5006 if (TREE_CODE (step) == NEGATE_EXPR)
5008 incr_code = MINUS_EXPR;
5009 step = TREE_OPERAND (step, 0);
5012 /* Check whether we may leave the computation unchanged.
5013 This is the case only if it does not rely on other
5014 computations in the loop -- otherwise, the computation
5015 we rely upon may be removed in remove_unused_ivs,
5016 thus leading to ICE. */
5017 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5018 if (TREE_CODE (op) == PLUS_EXPR
5019 || TREE_CODE (op) == MINUS_EXPR
5020 || TREE_CODE (op) == POINTER_PLUS_EXPR)
5022 if (TREE_OPERAND (op, 0) == cand->var_before)
5023 op = TREE_OPERAND (op, 1);
5024 else if (TREE_CODE (op) != MINUS_EXPR
5025 && TREE_OPERAND (op, 1) == cand->var_before)
5026 op = TREE_OPERAND (op, 0);
5027 else
5028 op = NULL_TREE;
5030 else
5031 op = NULL_TREE;
5033 if (op
5034 && (TREE_CODE (op) == INTEGER_CST
5035 || operand_equal_p (op, step, 0)))
5036 return;
5038 /* Otherwise, add the necessary computations to express
5039 the iv. */
5040 op = fold_convert (ctype, cand->var_before);
5041 comp = fold_convert (utype,
5042 build2 (incr_code, ctype, op,
5043 unshare_expr (step)));
5045 else
5047 comp = get_computation (data->current_loop, use, cand);
5048 gcc_assert (comp != NULL_TREE);
5051 switch (TREE_CODE (use->stmt))
5053 case PHI_NODE:
5054 tgt = PHI_RESULT (use->stmt);
5056 /* If we should keep the biv, do not replace it. */
5057 if (name_info (data, tgt)->preserve_biv)
5058 return;
5060 bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5061 break;
5063 case GIMPLE_MODIFY_STMT:
5064 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5065 bsi = bsi_for_stmt (use->stmt);
5066 break;
5068 default:
5069 gcc_unreachable ();
5072 op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5073 true, BSI_SAME_STMT);
5075 if (TREE_CODE (use->stmt) == PHI_NODE)
5077 ass = build_gimple_modify_stmt (tgt, op);
5078 bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5079 remove_statement (use->stmt, false);
5080 SSA_NAME_DEF_STMT (tgt) = ass;
5082 else
5083 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5086 /* Replaces ssa name in index IDX by its basic variable. Callback for
5087 for_each_index. */
5089 static bool
5090 idx_remove_ssa_names (tree base, tree *idx,
5091 void *data ATTRIBUTE_UNUSED)
5093 tree *op;
5095 if (TREE_CODE (*idx) == SSA_NAME)
5096 *idx = SSA_NAME_VAR (*idx);
5098 if (TREE_CODE (base) == ARRAY_REF)
5100 op = &TREE_OPERAND (base, 2);
5101 if (*op
5102 && TREE_CODE (*op) == SSA_NAME)
5103 *op = SSA_NAME_VAR (*op);
5104 op = &TREE_OPERAND (base, 3);
5105 if (*op
5106 && TREE_CODE (*op) == SSA_NAME)
5107 *op = SSA_NAME_VAR (*op);
5110 return true;
5113 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5115 static tree
5116 unshare_and_remove_ssa_names (tree ref)
5118 ref = unshare_expr (ref);
5119 for_each_index (&ref, idx_remove_ssa_names, NULL);
5121 return ref;
5124 /* Extract the alias analysis info for the memory reference REF. There are
5125 several ways how this information may be stored and what precisely is
5126 its semantics depending on the type of the reference, but there always is
5127 somewhere hidden one _DECL node that is used to determine the set of
5128 virtual operands for the reference. The code below deciphers this jungle
5129 and extracts this single useful piece of information. */
5131 static tree
5132 get_ref_tag (tree ref, tree orig)
5134 tree var = get_base_address (ref);
5135 tree aref = NULL_TREE, tag, sv;
5136 HOST_WIDE_INT offset, size, maxsize;
5138 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5140 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5141 if (ref)
5142 break;
5145 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5146 return aref;
5148 if (!var)
5149 return NULL_TREE;
5151 if (TREE_CODE (var) == INDIRECT_REF)
5153 /* If the base is a dereference of a pointer, first check its name memory
5154 tag. If it does not have one, use its symbol memory tag. */
5155 var = TREE_OPERAND (var, 0);
5156 if (TREE_CODE (var) != SSA_NAME)
5157 return NULL_TREE;
5159 if (SSA_NAME_PTR_INFO (var))
5161 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5162 if (tag)
5163 return tag;
5166 var = SSA_NAME_VAR (var);
5167 tag = symbol_mem_tag (var);
5168 gcc_assert (tag != NULL_TREE);
5169 return tag;
5171 else
5173 if (!DECL_P (var))
5174 return NULL_TREE;
5176 tag = symbol_mem_tag (var);
5177 if (tag)
5178 return tag;
5180 return var;
5184 /* Copies the reference information from OLD_REF to NEW_REF. */
5186 static void
5187 copy_ref_info (tree new_ref, tree old_ref)
5189 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5190 copy_mem_ref_info (new_ref, old_ref);
5191 else
5193 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5194 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5198 /* Rewrites USE (address that is an iv) using candidate CAND. */
5200 static void
5201 rewrite_use_address (struct ivopts_data *data,
5202 struct iv_use *use, struct iv_cand *cand)
5204 aff_tree aff;
5205 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5206 tree ref;
5207 bool ok;
5209 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5210 gcc_assert (ok);
5211 unshare_aff_combination (&aff);
5213 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5214 copy_ref_info (ref, *use->op_p);
5215 *use->op_p = ref;
5218 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5219 candidate CAND. */
5221 static void
5222 rewrite_use_compare (struct ivopts_data *data,
5223 struct iv_use *use, struct iv_cand *cand)
5225 tree comp, *var_p, op, bound;
5226 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5227 enum tree_code compare;
5228 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5229 bool ok;
5231 bound = cp->value;
5232 if (bound)
5234 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5235 tree var_type = TREE_TYPE (var);
5237 compare = iv_elimination_compare (data, use);
5238 bound = unshare_expr (fold_convert (var_type, bound));
5239 op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5240 true, BSI_SAME_STMT);
5242 *use->op_p = build2 (compare, boolean_type_node, var, op);
5243 return;
5246 /* The induction variable elimination failed; just express the original
5247 giv. */
5248 comp = get_computation (data->current_loop, use, cand);
5249 gcc_assert (comp != NULL_TREE);
5251 ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5252 gcc_assert (ok);
5254 *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5255 true, BSI_SAME_STMT);
5258 /* Rewrites USE using candidate CAND. */
5260 static void
5261 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5263 push_stmt_changes (&use->stmt);
5265 switch (use->type)
5267 case USE_NONLINEAR_EXPR:
5268 rewrite_use_nonlinear_expr (data, use, cand);
5269 break;
5271 case USE_ADDRESS:
5272 rewrite_use_address (data, use, cand);
5273 break;
5275 case USE_COMPARE:
5276 rewrite_use_compare (data, use, cand);
5277 break;
5279 default:
5280 gcc_unreachable ();
5283 pop_stmt_changes (&use->stmt);
5286 /* Rewrite the uses using the selected induction variables. */
5288 static void
5289 rewrite_uses (struct ivopts_data *data)
5291 unsigned i;
5292 struct iv_cand *cand;
5293 struct iv_use *use;
5295 for (i = 0; i < n_iv_uses (data); i++)
5297 use = iv_use (data, i);
5298 cand = use->selected;
5299 gcc_assert (cand);
5301 rewrite_use (data, use, cand);
5305 /* Removes the ivs that are not used after rewriting. */
5307 static void
5308 remove_unused_ivs (struct ivopts_data *data)
5310 unsigned j;
5311 bitmap_iterator bi;
5313 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5315 struct version_info *info;
5317 info = ver_info (data, j);
5318 if (info->iv
5319 && !integer_zerop (info->iv->step)
5320 && !info->inv_id
5321 && !info->iv->have_use_for
5322 && !info->preserve_biv)
5323 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5327 /* Frees data allocated by the optimization of a single loop. */
5329 static void
5330 free_loop_data (struct ivopts_data *data)
5332 unsigned i, j;
5333 bitmap_iterator bi;
5334 tree obj;
5336 if (data->niters)
5338 pointer_map_destroy (data->niters);
5339 data->niters = NULL;
5342 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5344 struct version_info *info;
5346 info = ver_info (data, i);
5347 if (info->iv)
5348 free (info->iv);
5349 info->iv = NULL;
5350 info->has_nonlin_use = false;
5351 info->preserve_biv = false;
5352 info->inv_id = 0;
5354 bitmap_clear (data->relevant);
5355 bitmap_clear (data->important_candidates);
5357 for (i = 0; i < n_iv_uses (data); i++)
5359 struct iv_use *use = iv_use (data, i);
5361 free (use->iv);
5362 BITMAP_FREE (use->related_cands);
5363 for (j = 0; j < use->n_map_members; j++)
5364 if (use->cost_map[j].depends_on)
5365 BITMAP_FREE (use->cost_map[j].depends_on);
5366 free (use->cost_map);
5367 free (use);
5369 VEC_truncate (iv_use_p, data->iv_uses, 0);
5371 for (i = 0; i < n_iv_cands (data); i++)
5373 struct iv_cand *cand = iv_cand (data, i);
5375 if (cand->iv)
5376 free (cand->iv);
5377 if (cand->depends_on)
5378 BITMAP_FREE (cand->depends_on);
5379 free (cand);
5381 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5383 if (data->version_info_size < num_ssa_names)
5385 data->version_info_size = 2 * num_ssa_names;
5386 free (data->version_info);
5387 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5390 data->max_inv_id = 0;
5392 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5393 SET_DECL_RTL (obj, NULL_RTX);
5395 VEC_truncate (tree, decl_rtl_to_reset, 0);
5398 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5399 loop tree. */
5401 static void
5402 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5404 free_loop_data (data);
5405 free (data->version_info);
5406 BITMAP_FREE (data->relevant);
5407 BITMAP_FREE (data->important_candidates);
5409 VEC_free (tree, heap, decl_rtl_to_reset);
5410 VEC_free (iv_use_p, heap, data->iv_uses);
5411 VEC_free (iv_cand_p, heap, data->iv_candidates);
5414 /* Optimizes the LOOP. Returns true if anything changed. */
5416 static bool
5417 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5419 bool changed = false;
5420 struct iv_ca *iv_ca;
5421 edge exit;
5423 gcc_assert (!data->niters);
5424 data->current_loop = loop;
5426 if (dump_file && (dump_flags & TDF_DETAILS))
5428 fprintf (dump_file, "Processing loop %d\n", loop->num);
5430 exit = single_dom_exit (loop);
5431 if (exit)
5433 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5434 exit->src->index, exit->dest->index);
5435 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5436 fprintf (dump_file, "\n");
5439 fprintf (dump_file, "\n");
5442 /* For each ssa name determines whether it behaves as an induction variable
5443 in some loop. */
5444 if (!find_induction_variables (data))
5445 goto finish;
5447 /* Finds interesting uses (item 1). */
5448 find_interesting_uses (data);
5449 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5450 goto finish;
5452 /* Finds candidates for the induction variables (item 2). */
5453 find_iv_candidates (data);
5455 /* Calculates the costs (item 3, part 1). */
5456 determine_use_iv_costs (data);
5457 determine_iv_costs (data);
5458 determine_set_costs (data);
5460 /* Find the optimal set of induction variables (item 3, part 2). */
5461 iv_ca = find_optimal_iv_set (data);
5462 if (!iv_ca)
5463 goto finish;
5464 changed = true;
5466 /* Create the new induction variables (item 4, part 1). */
5467 create_new_ivs (data, iv_ca);
5468 iv_ca_free (&iv_ca);
5470 /* Rewrite the uses (item 4, part 2). */
5471 rewrite_uses (data);
5473 /* Remove the ivs that are unused after rewriting. */
5474 remove_unused_ivs (data);
5476 /* We have changed the structure of induction variables; it might happen
5477 that definitions in the scev database refer to some of them that were
5478 eliminated. */
5479 scev_reset ();
5481 finish:
5482 free_loop_data (data);
5484 return changed;
5487 /* Main entry point. Optimizes induction variables in loops. */
5489 void
5490 tree_ssa_iv_optimize (void)
5492 struct loop *loop;
5493 struct ivopts_data data;
5494 loop_iterator li;
5496 tree_ssa_iv_optimize_init (&data);
5498 /* Optimize the loops starting with the innermost ones. */
5499 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5501 if (dump_file && (dump_flags & TDF_DETAILS))
5502 flow_loop_dump (loop, dump_file, NULL, 1);
5504 tree_ssa_iv_optimize_loop (&data, loop);
5507 tree_ssa_iv_optimize_finalize (&data);