2008-05-30 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob0247a1e71f5ee0e430d28b70e9a9afa756dc6caa
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "rtl.h"
70 #include "tm_p.h"
71 #include "hard-reg-set.h"
72 #include "basic-block.h"
73 #include "output.h"
74 #include "diagnostic.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "varray.h"
80 #include "expr.h"
81 #include "tree-pass.h"
82 #include "ggc.h"
83 #include "insn-config.h"
84 #include "recog.h"
85 #include "pointer-set.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
93 #include "target.h"
95 /* The infinite cost. */
96 #define INFTY 10000000
98 /* The expected number of loop iterations. TODO -- use profiling instead of
99 this. */
100 #define AVG_LOOP_NITER(LOOP) 5
103 /* Representation of the induction variable. */
104 struct iv
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
116 struct version_info
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 unsigned inv_id; /* Id of an invariant. */
123 bool preserve_biv; /* For the original biv, whether to preserve it. */
126 /* Types of uses. */
127 enum use_type
129 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
130 USE_ADDRESS, /* Use in an address. */
131 USE_COMPARE /* Use is a compare. */
134 /* Cost of a computation. */
135 typedef struct
137 unsigned cost; /* The runtime cost. */
138 unsigned complexity; /* The estimate of the complexity of the code for
139 the computation (in no concrete units --
140 complexity field should be larger for more
141 complex expressions and addressing modes). */
142 } comp_cost;
144 static const comp_cost zero_cost = {0, 0};
145 static const comp_cost infinite_cost = {INFTY, INFTY};
147 /* The candidate - cost pair. */
148 struct cost_pair
150 struct iv_cand *cand; /* The candidate. */
151 comp_cost cost; /* The cost. */
152 bitmap depends_on; /* The list of invariants that have to be
153 preserved. */
154 tree value; /* For final value elimination, the expression for
155 the final value of the iv. For iv elimination,
156 the new bound to compare with. */
159 /* Use. */
160 struct iv_use
162 unsigned id; /* The id of the use. */
163 enum use_type type; /* Type of the use. */
164 struct iv *iv; /* The induction variable it is based on. */
165 tree stmt; /* Statement in that it occurs. */
166 tree *op_p; /* The place where it occurs. */
167 bitmap related_cands; /* The set of "related" iv candidates, plus the common
168 important ones. */
170 unsigned n_map_members; /* Number of candidates in the cost_map list. */
171 struct cost_pair *cost_map;
172 /* The costs wrto the iv candidates. */
174 struct iv_cand *selected;
175 /* The selected candidate. */
178 /* The position where the iv is computed. */
179 enum iv_position
181 IP_NORMAL, /* At the end, just before the exit condition. */
182 IP_END, /* At the end of the latch block. */
183 IP_ORIGINAL /* The original biv. */
186 /* The induction variable candidate. */
187 struct iv_cand
189 unsigned id; /* The number of the candidate. */
190 bool important; /* Whether this is an "important" candidate, i.e. such
191 that it should be considered by all uses. */
192 enum iv_position pos; /* Where it is computed. */
193 tree incremented_at; /* For original biv, the statement where it is
194 incremented. */
195 tree var_before; /* The variable used for it before increment. */
196 tree var_after; /* The variable used for it after increment. */
197 struct iv *iv; /* The value of the candidate. NULL for
198 "pseudocandidate" used to indicate the possibility
199 to replace the final value of an iv by direct
200 computation of the value. */
201 unsigned cost; /* Cost of the candidate. */
202 bitmap depends_on; /* The list of invariants that are used in step of the
203 biv. */
206 /* The data used by the induction variable optimizations. */
208 typedef struct iv_use *iv_use_p;
209 DEF_VEC_P(iv_use_p);
210 DEF_VEC_ALLOC_P(iv_use_p,heap);
212 typedef struct iv_cand *iv_cand_p;
213 DEF_VEC_P(iv_cand_p);
214 DEF_VEC_ALLOC_P(iv_cand_p,heap);
216 struct ivopts_data
218 /* The currently optimized loop. */
219 struct loop *current_loop;
221 /* Number of registers used in it. */
222 unsigned regs_used;
224 /* Numbers of iterations for all exits of the current loop. */
225 struct pointer_map_t *niters;
227 /* The size of version_info array allocated. */
228 unsigned version_info_size;
230 /* The array of information for the ssa names. */
231 struct version_info *version_info;
233 /* The bitmap of indices in version_info whose value was changed. */
234 bitmap relevant;
236 /* The maximum invariant id. */
237 unsigned max_inv_id;
239 /* The uses of induction variables. */
240 VEC(iv_use_p,heap) *iv_uses;
242 /* The candidates. */
243 VEC(iv_cand_p,heap) *iv_candidates;
245 /* A bitmap of important candidates. */
246 bitmap important_candidates;
248 /* Whether to consider just related and important candidates when replacing a
249 use. */
250 bool consider_all_candidates;
253 /* An assignment of iv candidates to uses. */
255 struct iv_ca
257 /* The number of uses covered by the assignment. */
258 unsigned upto;
260 /* Number of uses that cannot be expressed by the candidates in the set. */
261 unsigned bad_uses;
263 /* Candidate assigned to a use, together with the related costs. */
264 struct cost_pair **cand_for_use;
266 /* Number of times each candidate is used. */
267 unsigned *n_cand_uses;
269 /* The candidates used. */
270 bitmap cands;
272 /* The number of candidates in the set. */
273 unsigned n_cands;
275 /* Total number of registers needed. */
276 unsigned n_regs;
278 /* Total cost of expressing uses. */
279 comp_cost cand_use_cost;
281 /* Total cost of candidates. */
282 unsigned cand_cost;
284 /* Number of times each invariant is used. */
285 unsigned *n_invariant_uses;
287 /* Total cost of the assignment. */
288 comp_cost cost;
291 /* Difference of two iv candidate assignments. */
293 struct iv_ca_delta
295 /* Changed use. */
296 struct iv_use *use;
298 /* An old assignment (for rollback purposes). */
299 struct cost_pair *old_cp;
301 /* A new assignment. */
302 struct cost_pair *new_cp;
304 /* Next change in the list. */
305 struct iv_ca_delta *next_change;
308 /* Bound on number of candidates below that all candidates are considered. */
310 #define CONSIDER_ALL_CANDIDATES_BOUND \
311 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
313 /* If there are more iv occurrences, we just give up (it is quite unlikely that
314 optimizing such a loop would help, and it would take ages). */
316 #define MAX_CONSIDERED_USES \
317 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
319 /* If there are at most this number of ivs in the set, try removing unnecessary
320 ivs from the set always. */
322 #define ALWAYS_PRUNE_CAND_SET_BOUND \
323 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
325 /* The list of trees for that the decl_rtl field must be reset is stored
326 here. */
328 static VEC(tree,heap) *decl_rtl_to_reset;
330 /* Number of uses recorded in DATA. */
332 static inline unsigned
333 n_iv_uses (struct ivopts_data *data)
335 return VEC_length (iv_use_p, data->iv_uses);
338 /* Ith use recorded in DATA. */
340 static inline struct iv_use *
341 iv_use (struct ivopts_data *data, unsigned i)
343 return VEC_index (iv_use_p, data->iv_uses, i);
346 /* Number of candidates recorded in DATA. */
348 static inline unsigned
349 n_iv_cands (struct ivopts_data *data)
351 return VEC_length (iv_cand_p, data->iv_candidates);
354 /* Ith candidate recorded in DATA. */
356 static inline struct iv_cand *
357 iv_cand (struct ivopts_data *data, unsigned i)
359 return VEC_index (iv_cand_p, data->iv_candidates, i);
362 /* The single loop exit if it dominates the latch, NULL otherwise. */
364 edge
365 single_dom_exit (struct loop *loop)
367 edge exit = single_exit (loop);
369 if (!exit)
370 return NULL;
372 if (!just_once_each_iteration_p (loop, exit->src))
373 return NULL;
375 return exit;
378 /* Dumps information about the induction variable IV to FILE. */
380 extern void dump_iv (FILE *, struct iv *);
381 void
382 dump_iv (FILE *file, struct iv *iv)
384 if (iv->ssa_name)
386 fprintf (file, "ssa name ");
387 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
388 fprintf (file, "\n");
391 fprintf (file, " type ");
392 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
393 fprintf (file, "\n");
395 if (iv->step)
397 fprintf (file, " base ");
398 print_generic_expr (file, iv->base, TDF_SLIM);
399 fprintf (file, "\n");
401 fprintf (file, " step ");
402 print_generic_expr (file, iv->step, TDF_SLIM);
403 fprintf (file, "\n");
405 else
407 fprintf (file, " invariant ");
408 print_generic_expr (file, iv->base, TDF_SLIM);
409 fprintf (file, "\n");
412 if (iv->base_object)
414 fprintf (file, " base object ");
415 print_generic_expr (file, iv->base_object, TDF_SLIM);
416 fprintf (file, "\n");
419 if (iv->biv_p)
420 fprintf (file, " is a biv\n");
423 /* Dumps information about the USE to FILE. */
425 extern void dump_use (FILE *, struct iv_use *);
426 void
427 dump_use (FILE *file, struct iv_use *use)
429 fprintf (file, "use %d\n", use->id);
431 switch (use->type)
433 case USE_NONLINEAR_EXPR:
434 fprintf (file, " generic\n");
435 break;
437 case USE_ADDRESS:
438 fprintf (file, " address\n");
439 break;
441 case USE_COMPARE:
442 fprintf (file, " compare\n");
443 break;
445 default:
446 gcc_unreachable ();
449 fprintf (file, " in statement ");
450 print_generic_expr (file, use->stmt, TDF_SLIM);
451 fprintf (file, "\n");
453 fprintf (file, " at position ");
454 if (use->op_p)
455 print_generic_expr (file, *use->op_p, TDF_SLIM);
456 fprintf (file, "\n");
458 dump_iv (file, use->iv);
460 if (use->related_cands)
462 fprintf (file, " related candidates ");
463 dump_bitmap (file, use->related_cands);
467 /* Dumps information about the uses to FILE. */
469 extern void dump_uses (FILE *, struct ivopts_data *);
470 void
471 dump_uses (FILE *file, struct ivopts_data *data)
473 unsigned i;
474 struct iv_use *use;
476 for (i = 0; i < n_iv_uses (data); i++)
478 use = iv_use (data, i);
480 dump_use (file, use);
481 fprintf (file, "\n");
485 /* Dumps information about induction variable candidate CAND to FILE. */
487 extern void dump_cand (FILE *, struct iv_cand *);
488 void
489 dump_cand (FILE *file, struct iv_cand *cand)
491 struct iv *iv = cand->iv;
493 fprintf (file, "candidate %d%s\n",
494 cand->id, cand->important ? " (important)" : "");
496 if (cand->depends_on)
498 fprintf (file, " depends on ");
499 dump_bitmap (file, cand->depends_on);
502 if (!iv)
504 fprintf (file, " final value replacement\n");
505 return;
508 switch (cand->pos)
510 case IP_NORMAL:
511 fprintf (file, " incremented before exit test\n");
512 break;
514 case IP_END:
515 fprintf (file, " incremented at end\n");
516 break;
518 case IP_ORIGINAL:
519 fprintf (file, " original biv\n");
520 break;
523 dump_iv (file, iv);
526 /* Returns the info for ssa version VER. */
528 static inline struct version_info *
529 ver_info (struct ivopts_data *data, unsigned ver)
531 return data->version_info + ver;
534 /* Returns the info for ssa name NAME. */
536 static inline struct version_info *
537 name_info (struct ivopts_data *data, tree name)
539 return ver_info (data, SSA_NAME_VERSION (name));
542 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
543 emitted in LOOP. */
545 static bool
546 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
548 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
550 gcc_assert (bb);
552 if (sbb == loop->latch)
553 return true;
555 if (sbb != bb)
556 return false;
558 return stmt == last_stmt (bb);
561 /* Returns true if STMT if after the place where the original induction
562 variable CAND is incremented. */
564 static bool
565 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
567 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
568 basic_block stmt_bb = bb_for_stmt (stmt);
569 block_stmt_iterator bsi;
571 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
572 return false;
574 if (stmt_bb != cand_bb)
575 return true;
577 /* Scan the block from the end, since the original ivs are usually
578 incremented at the end of the loop body. */
579 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
581 if (bsi_stmt (bsi) == cand->incremented_at)
582 return false;
583 if (bsi_stmt (bsi) == stmt)
584 return true;
588 /* Returns true if STMT if after the place where the induction variable
589 CAND is incremented in LOOP. */
591 static bool
592 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
594 switch (cand->pos)
596 case IP_END:
597 return false;
599 case IP_NORMAL:
600 return stmt_after_ip_normal_pos (loop, stmt);
602 case IP_ORIGINAL:
603 return stmt_after_ip_original_pos (cand, stmt);
605 default:
606 gcc_unreachable ();
610 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
612 static bool
613 abnormal_ssa_name_p (tree exp)
615 if (!exp)
616 return false;
618 if (TREE_CODE (exp) != SSA_NAME)
619 return false;
621 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
624 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
625 abnormal phi node. Callback for for_each_index. */
627 static bool
628 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
629 void *data ATTRIBUTE_UNUSED)
631 if (TREE_CODE (base) == ARRAY_REF)
633 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
634 return false;
635 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
636 return false;
639 return !abnormal_ssa_name_p (*index);
642 /* Returns true if EXPR contains a ssa name that occurs in an
643 abnormal phi node. */
645 bool
646 contains_abnormal_ssa_name_p (tree expr)
648 enum tree_code code;
649 enum tree_code_class codeclass;
651 if (!expr)
652 return false;
654 code = TREE_CODE (expr);
655 codeclass = TREE_CODE_CLASS (code);
657 if (code == SSA_NAME)
658 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
660 if (code == INTEGER_CST
661 || is_gimple_min_invariant (expr))
662 return false;
664 if (code == ADDR_EXPR)
665 return !for_each_index (&TREE_OPERAND (expr, 0),
666 idx_contains_abnormal_ssa_name_p,
667 NULL);
669 switch (codeclass)
671 case tcc_binary:
672 case tcc_comparison:
673 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
674 return true;
676 /* Fallthru. */
677 case tcc_unary:
678 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
679 return true;
681 break;
683 default:
684 gcc_unreachable ();
687 return false;
690 /* Returns tree describing number of iterations determined from
691 EXIT of DATA->current_loop, or NULL if something goes wrong. */
693 static tree
694 niter_for_exit (struct ivopts_data *data, edge exit)
696 struct tree_niter_desc desc;
697 tree niter;
698 void **slot;
700 if (!data->niters)
702 data->niters = pointer_map_create ();
703 slot = NULL;
705 else
706 slot = pointer_map_contains (data->niters, exit);
708 if (!slot)
710 /* Try to determine number of iterations. We must know it
711 unconditionally (i.e., without possibility of # of iterations
712 being zero). Also, we cannot safely work with ssa names that
713 appear in phi nodes on abnormal edges, so that we do not create
714 overlapping life ranges for them (PR 27283). */
715 if (number_of_iterations_exit (data->current_loop,
716 exit, &desc, true)
717 && integer_zerop (desc.may_be_zero)
718 && !contains_abnormal_ssa_name_p (desc.niter))
719 niter = desc.niter;
720 else
721 niter = NULL_TREE;
723 *pointer_map_insert (data->niters, exit) = niter;
725 else
726 niter = (tree) *slot;
728 return niter;
731 /* Returns tree describing number of iterations determined from
732 single dominating exit of DATA->current_loop, or NULL if something
733 goes wrong. */
735 static tree
736 niter_for_single_dom_exit (struct ivopts_data *data)
738 edge exit = single_dom_exit (data->current_loop);
740 if (!exit)
741 return NULL;
743 return niter_for_exit (data, exit);
746 /* Initializes data structures used by the iv optimization pass, stored
747 in DATA. */
749 static void
750 tree_ssa_iv_optimize_init (struct ivopts_data *data)
752 data->version_info_size = 2 * num_ssa_names;
753 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
754 data->relevant = BITMAP_ALLOC (NULL);
755 data->important_candidates = BITMAP_ALLOC (NULL);
756 data->max_inv_id = 0;
757 data->niters = NULL;
758 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
759 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
760 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
763 /* Returns a memory object to that EXPR points. In case we are able to
764 determine that it does not point to any such object, NULL is returned. */
766 static tree
767 determine_base_object (tree expr)
769 enum tree_code code = TREE_CODE (expr);
770 tree base, obj;
772 /* If this is a pointer casted to any type, we need to determine
773 the base object for the pointer; so handle conversions before
774 throwing away non-pointer expressions. */
775 if (CONVERT_EXPR_P (expr))
776 return determine_base_object (TREE_OPERAND (expr, 0));
778 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
779 return NULL_TREE;
781 switch (code)
783 case INTEGER_CST:
784 return NULL_TREE;
786 case ADDR_EXPR:
787 obj = TREE_OPERAND (expr, 0);
788 base = get_base_address (obj);
790 if (!base)
791 return expr;
793 if (TREE_CODE (base) == INDIRECT_REF)
794 return determine_base_object (TREE_OPERAND (base, 0));
796 return fold_convert (ptr_type_node,
797 build_fold_addr_expr (base));
799 case POINTER_PLUS_EXPR:
800 return determine_base_object (TREE_OPERAND (expr, 0));
802 case PLUS_EXPR:
803 case MINUS_EXPR:
804 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
805 gcc_unreachable ();
807 default:
808 return fold_convert (ptr_type_node, expr);
812 /* Allocates an induction variable with given initial value BASE and step STEP
813 for loop LOOP. */
815 static struct iv *
816 alloc_iv (tree base, tree step)
818 struct iv *iv = XCNEW (struct iv);
819 gcc_assert (step != NULL_TREE);
821 iv->base = base;
822 iv->base_object = determine_base_object (base);
823 iv->step = step;
824 iv->biv_p = false;
825 iv->have_use_for = false;
826 iv->use_id = 0;
827 iv->ssa_name = NULL_TREE;
829 return iv;
832 /* Sets STEP and BASE for induction variable IV. */
834 static void
835 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
837 struct version_info *info = name_info (data, iv);
839 gcc_assert (!info->iv);
841 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
842 info->iv = alloc_iv (base, step);
843 info->iv->ssa_name = iv;
846 /* Finds induction variable declaration for VAR. */
848 static struct iv *
849 get_iv (struct ivopts_data *data, tree var)
851 basic_block bb;
852 tree type = TREE_TYPE (var);
854 if (!POINTER_TYPE_P (type)
855 && !INTEGRAL_TYPE_P (type))
856 return NULL;
858 if (!name_info (data, var)->iv)
860 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
862 if (!bb
863 || !flow_bb_inside_loop_p (data->current_loop, bb))
864 set_iv (data, var, var, build_int_cst (type, 0));
867 return name_info (data, var)->iv;
870 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
871 not define a simple affine biv with nonzero step. */
873 static tree
874 determine_biv_step (tree phi)
876 struct loop *loop = bb_for_stmt (phi)->loop_father;
877 tree name = PHI_RESULT (phi);
878 affine_iv iv;
880 if (!is_gimple_reg (name))
881 return NULL_TREE;
883 if (!simple_iv (loop, phi, name, &iv, true))
884 return NULL_TREE;
886 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
889 /* Finds basic ivs. */
891 static bool
892 find_bivs (struct ivopts_data *data)
894 tree phi, step, type, base;
895 bool found = false;
896 struct loop *loop = data->current_loop;
898 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
900 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
901 continue;
903 step = determine_biv_step (phi);
904 if (!step)
905 continue;
907 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
908 base = expand_simple_operations (base);
909 if (contains_abnormal_ssa_name_p (base)
910 || contains_abnormal_ssa_name_p (step))
911 continue;
913 type = TREE_TYPE (PHI_RESULT (phi));
914 base = fold_convert (type, base);
915 if (step)
917 if (POINTER_TYPE_P (type))
918 step = fold_convert (sizetype, step);
919 else
920 step = fold_convert (type, step);
923 set_iv (data, PHI_RESULT (phi), base, step);
924 found = true;
927 return found;
930 /* Marks basic ivs. */
932 static void
933 mark_bivs (struct ivopts_data *data)
935 tree phi, var;
936 struct iv *iv, *incr_iv;
937 struct loop *loop = data->current_loop;
938 basic_block incr_bb;
940 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
942 iv = get_iv (data, PHI_RESULT (phi));
943 if (!iv)
944 continue;
946 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
947 incr_iv = get_iv (data, var);
948 if (!incr_iv)
949 continue;
951 /* If the increment is in the subloop, ignore it. */
952 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
953 if (incr_bb->loop_father != data->current_loop
954 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
955 continue;
957 iv->biv_p = true;
958 incr_iv->biv_p = true;
962 /* Checks whether STMT defines a linear induction variable and stores its
963 parameters to IV. */
965 static bool
966 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
968 tree lhs;
969 struct loop *loop = data->current_loop;
971 iv->base = NULL_TREE;
972 iv->step = NULL_TREE;
974 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
975 return false;
977 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
978 if (TREE_CODE (lhs) != SSA_NAME)
979 return false;
981 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
982 return false;
983 iv->base = expand_simple_operations (iv->base);
985 if (contains_abnormal_ssa_name_p (iv->base)
986 || contains_abnormal_ssa_name_p (iv->step))
987 return false;
989 return true;
992 /* Finds general ivs in statement STMT. */
994 static void
995 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
997 affine_iv iv;
999 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1000 return;
1002 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1005 /* Finds general ivs in basic block BB. */
1007 static void
1008 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1010 block_stmt_iterator bsi;
1012 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1013 find_givs_in_stmt (data, bsi_stmt (bsi));
1016 /* Finds general ivs. */
1018 static void
1019 find_givs (struct ivopts_data *data)
1021 struct loop *loop = data->current_loop;
1022 basic_block *body = get_loop_body_in_dom_order (loop);
1023 unsigned i;
1025 for (i = 0; i < loop->num_nodes; i++)
1026 find_givs_in_bb (data, body[i]);
1027 free (body);
1030 /* For each ssa name defined in LOOP determines whether it is an induction
1031 variable and if so, its initial value and step. */
1033 static bool
1034 find_induction_variables (struct ivopts_data *data)
1036 unsigned i;
1037 bitmap_iterator bi;
1039 if (!find_bivs (data))
1040 return false;
1042 find_givs (data);
1043 mark_bivs (data);
1045 if (dump_file && (dump_flags & TDF_DETAILS))
1047 tree niter = niter_for_single_dom_exit (data);
1049 if (niter)
1051 fprintf (dump_file, " number of iterations ");
1052 print_generic_expr (dump_file, niter, TDF_SLIM);
1053 fprintf (dump_file, "\n\n");
1056 fprintf (dump_file, "Induction variables:\n\n");
1058 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1060 if (ver_info (data, i)->iv)
1061 dump_iv (dump_file, ver_info (data, i)->iv);
1065 return true;
1068 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1070 static struct iv_use *
1071 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1072 tree stmt, enum use_type use_type)
1074 struct iv_use *use = XCNEW (struct iv_use);
1076 use->id = n_iv_uses (data);
1077 use->type = use_type;
1078 use->iv = iv;
1079 use->stmt = stmt;
1080 use->op_p = use_p;
1081 use->related_cands = BITMAP_ALLOC (NULL);
1083 /* To avoid showing ssa name in the dumps, if it was not reset by the
1084 caller. */
1085 iv->ssa_name = NULL_TREE;
1087 if (dump_file && (dump_flags & TDF_DETAILS))
1088 dump_use (dump_file, use);
1090 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1092 return use;
1095 /* Checks whether OP is a loop-level invariant and if so, records it.
1096 NONLINEAR_USE is true if the invariant is used in a way we do not
1097 handle specially. */
1099 static void
1100 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1102 basic_block bb;
1103 struct version_info *info;
1105 if (TREE_CODE (op) != SSA_NAME
1106 || !is_gimple_reg (op))
1107 return;
1109 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1110 if (bb
1111 && flow_bb_inside_loop_p (data->current_loop, bb))
1112 return;
1114 info = name_info (data, op);
1115 info->name = op;
1116 info->has_nonlin_use |= nonlinear_use;
1117 if (!info->inv_id)
1118 info->inv_id = ++data->max_inv_id;
1119 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1122 /* Checks whether the use OP is interesting and if so, records it. */
1124 static struct iv_use *
1125 find_interesting_uses_op (struct ivopts_data *data, tree op)
1127 struct iv *iv;
1128 struct iv *civ;
1129 tree stmt;
1130 struct iv_use *use;
1132 if (TREE_CODE (op) != SSA_NAME)
1133 return NULL;
1135 iv = get_iv (data, op);
1136 if (!iv)
1137 return NULL;
1139 if (iv->have_use_for)
1141 use = iv_use (data, iv->use_id);
1143 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1144 return use;
1147 if (integer_zerop (iv->step))
1149 record_invariant (data, op, true);
1150 return NULL;
1152 iv->have_use_for = true;
1154 civ = XNEW (struct iv);
1155 *civ = *iv;
1157 stmt = SSA_NAME_DEF_STMT (op);
1158 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1159 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1161 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1162 iv->use_id = use->id;
1164 return use;
1167 /* Given a condition *COND_P, checks whether it is a compare of an induction
1168 variable and an invariant. If this is the case, CONTROL_VAR is set
1169 to location of the iv, BOUND to the location of the invariant,
1170 IV_VAR and IV_BOUND are set to the corresponding induction variable
1171 descriptions, and true is returned. If this is not the case,
1172 CONTROL_VAR and BOUND are set to the arguments of the condition and
1173 false is returned. */
1175 static bool
1176 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1177 tree **control_var, tree **bound,
1178 struct iv **iv_var, struct iv **iv_bound)
1180 /* The nodes returned when COND has just one operand. Note that you should
1181 not modify anything in BOUND or IV_BOUND because of this. */
1182 static struct iv const_iv;
1183 static tree zero;
1184 tree cond = *cond_p;
1185 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1186 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1187 bool ret = false;
1189 zero = integer_zero_node;
1190 const_iv.step = integer_zero_node;
1192 if (TREE_CODE (cond) == SSA_NAME)
1194 op0 = cond_p;
1195 iv0 = get_iv (data, cond);
1196 ret = (iv0 && !integer_zerop (iv0->step));
1197 goto end;
1200 if (!COMPARISON_CLASS_P (cond))
1202 op0 = cond_p;
1203 goto end;
1206 op0 = &TREE_OPERAND (cond, 0);
1207 op1 = &TREE_OPERAND (cond, 1);
1208 if (TREE_CODE (*op0) == SSA_NAME)
1209 iv0 = get_iv (data, *op0);
1210 if (TREE_CODE (*op1) == SSA_NAME)
1211 iv1 = get_iv (data, *op1);
1213 /* Exactly one of the compared values must be an iv, and the other one must
1214 be an invariant. */
1215 if (!iv0 || !iv1)
1216 goto end;
1218 if (integer_zerop (iv0->step))
1220 /* Control variable may be on the other side. */
1221 tmp_op = op0; op0 = op1; op1 = tmp_op;
1222 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1224 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1226 end:
1227 if (control_var)
1228 *control_var = op0;;
1229 if (iv_var)
1230 *iv_var = iv0;;
1231 if (bound)
1232 *bound = op1;
1233 if (iv_bound)
1234 *iv_bound = iv1;
1236 return ret;
1239 /* Checks whether the condition *COND_P in STMT is interesting
1240 and if so, records it. */
1242 static void
1243 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1245 tree *var_p, *bound_p;
1246 struct iv *var_iv, *civ;
1248 if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1250 find_interesting_uses_op (data, *var_p);
1251 find_interesting_uses_op (data, *bound_p);
1252 return;
1255 civ = XNEW (struct iv);
1256 *civ = *var_iv;
1257 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1260 /* Returns true if expression EXPR is obviously invariant in LOOP,
1261 i.e. if all its operands are defined outside of the LOOP. LOOP
1262 should not be the function body. */
1264 bool
1265 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1267 basic_block def_bb;
1268 unsigned i, len;
1270 gcc_assert (loop_depth (loop) > 0);
1272 if (is_gimple_min_invariant (expr))
1273 return true;
1275 if (TREE_CODE (expr) == SSA_NAME)
1277 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1278 if (def_bb
1279 && flow_bb_inside_loop_p (loop, def_bb))
1280 return false;
1282 return true;
1285 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1286 return false;
1288 len = TREE_OPERAND_LENGTH (expr);
1289 for (i = 0; i < len; i++)
1290 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1291 return false;
1293 return true;
1296 /* Cumulates the steps of indices into DATA and replaces their values with the
1297 initial ones. Returns false when the value of the index cannot be determined.
1298 Callback for for_each_index. */
1300 struct ifs_ivopts_data
1302 struct ivopts_data *ivopts_data;
1303 tree stmt;
1304 tree step;
1307 static bool
1308 idx_find_step (tree base, tree *idx, void *data)
1310 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1311 struct iv *iv;
1312 tree step, iv_base, iv_step, lbound, off;
1313 struct loop *loop = dta->ivopts_data->current_loop;
1315 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1316 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1317 return false;
1319 /* If base is a component ref, require that the offset of the reference
1320 be invariant. */
1321 if (TREE_CODE (base) == COMPONENT_REF)
1323 off = component_ref_field_offset (base);
1324 return expr_invariant_in_loop_p (loop, off);
1327 /* If base is array, first check whether we will be able to move the
1328 reference out of the loop (in order to take its address in strength
1329 reduction). In order for this to work we need both lower bound
1330 and step to be loop invariants. */
1331 if (TREE_CODE (base) == ARRAY_REF)
1333 step = array_ref_element_size (base);
1334 lbound = array_ref_low_bound (base);
1336 if (!expr_invariant_in_loop_p (loop, step)
1337 || !expr_invariant_in_loop_p (loop, lbound))
1338 return false;
1341 if (TREE_CODE (*idx) != SSA_NAME)
1342 return true;
1344 iv = get_iv (dta->ivopts_data, *idx);
1345 if (!iv)
1346 return false;
1348 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1349 *&x[0], which is not folded and does not trigger the
1350 ARRAY_REF path below. */
1351 *idx = iv->base;
1353 if (integer_zerop (iv->step))
1354 return true;
1356 if (TREE_CODE (base) == ARRAY_REF)
1358 step = array_ref_element_size (base);
1360 /* We only handle addresses whose step is an integer constant. */
1361 if (TREE_CODE (step) != INTEGER_CST)
1362 return false;
1364 else
1365 /* The step for pointer arithmetics already is 1 byte. */
1366 step = build_int_cst (sizetype, 1);
1368 iv_base = iv->base;
1369 iv_step = iv->step;
1370 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1371 sizetype, &iv_base, &iv_step, dta->stmt,
1372 false))
1374 /* The index might wrap. */
1375 return false;
1378 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1379 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1381 return true;
1384 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1385 object is passed to it in DATA. */
1387 static bool
1388 idx_record_use (tree base, tree *idx,
1389 void *vdata)
1391 struct ivopts_data *data = (struct ivopts_data *) vdata;
1392 find_interesting_uses_op (data, *idx);
1393 if (TREE_CODE (base) == ARRAY_REF)
1395 find_interesting_uses_op (data, array_ref_element_size (base));
1396 find_interesting_uses_op (data, array_ref_low_bound (base));
1398 return true;
1401 /* If we can prove that TOP = cst * BOT for some constant cst,
1402 store cst to MUL and return true. Otherwise return false.
1403 The returned value is always sign-extended, regardless of the
1404 signedness of TOP and BOT. */
1406 static bool
1407 constant_multiple_of (tree top, tree bot, double_int *mul)
1409 tree mby;
1410 enum tree_code code;
1411 double_int res, p0, p1;
1412 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1414 STRIP_NOPS (top);
1415 STRIP_NOPS (bot);
1417 if (operand_equal_p (top, bot, 0))
1419 *mul = double_int_one;
1420 return true;
1423 code = TREE_CODE (top);
1424 switch (code)
1426 case MULT_EXPR:
1427 mby = TREE_OPERAND (top, 1);
1428 if (TREE_CODE (mby) != INTEGER_CST)
1429 return false;
1431 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1432 return false;
1434 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1435 precision);
1436 return true;
1438 case PLUS_EXPR:
1439 case MINUS_EXPR:
1440 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1441 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1442 return false;
1444 if (code == MINUS_EXPR)
1445 p1 = double_int_neg (p1);
1446 *mul = double_int_sext (double_int_add (p0, p1), precision);
1447 return true;
1449 case INTEGER_CST:
1450 if (TREE_CODE (bot) != INTEGER_CST)
1451 return false;
1453 p0 = double_int_sext (tree_to_double_int (top), precision);
1454 p1 = double_int_sext (tree_to_double_int (bot), precision);
1455 if (double_int_zero_p (p1))
1456 return false;
1457 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1458 precision);
1459 return double_int_zero_p (res);
1461 default:
1462 return false;
1466 /* Returns true if memory reference REF with step STEP may be unaligned. */
1468 static bool
1469 may_be_unaligned_p (tree ref, tree step)
1471 tree base;
1472 tree base_type;
1473 HOST_WIDE_INT bitsize;
1474 HOST_WIDE_INT bitpos;
1475 tree toffset;
1476 enum machine_mode mode;
1477 int unsignedp, volatilep;
1478 unsigned base_align;
1480 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1481 thus they are not misaligned. */
1482 if (TREE_CODE (ref) == TARGET_MEM_REF)
1483 return false;
1485 /* The test below is basically copy of what expr.c:normal_inner_ref
1486 does to check whether the object must be loaded by parts when
1487 STRICT_ALIGNMENT is true. */
1488 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1489 &unsignedp, &volatilep, true);
1490 base_type = TREE_TYPE (base);
1491 base_align = TYPE_ALIGN (base_type);
1493 if (mode != BLKmode)
1495 double_int mul;
1496 tree al = build_int_cst (TREE_TYPE (step),
1497 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1499 if (base_align < GET_MODE_ALIGNMENT (mode)
1500 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1501 || bitpos % BITS_PER_UNIT != 0)
1502 return true;
1504 if (!constant_multiple_of (step, al, &mul))
1505 return true;
1508 return false;
1511 /* Return true if EXPR may be non-addressable. */
1513 static bool
1514 may_be_nonaddressable_p (tree expr)
1516 switch (TREE_CODE (expr))
1518 case TARGET_MEM_REF:
1519 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1520 target, thus they are always addressable. */
1521 return false;
1523 case COMPONENT_REF:
1524 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1525 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1527 case VIEW_CONVERT_EXPR:
1528 /* This kind of view-conversions may wrap non-addressable objects
1529 and make them look addressable. After some processing the
1530 non-addressability may be uncovered again, causing ADDR_EXPRs
1531 of inappropriate objects to be built. */
1532 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1533 || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1534 return true;
1536 /* ... fall through ... */
1538 case ARRAY_REF:
1539 case ARRAY_RANGE_REF:
1540 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1542 CASE_CONVERT:
1543 return true;
1545 default:
1546 break;
1549 return false;
1552 /* Finds addresses in *OP_P inside STMT. */
1554 static void
1555 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1557 tree base = *op_p, step = build_int_cst (sizetype, 0);
1558 struct iv *civ;
1559 struct ifs_ivopts_data ifs_ivopts_data;
1561 /* Do not play with volatile memory references. A bit too conservative,
1562 perhaps, but safe. */
1563 if (stmt_ann (stmt)->has_volatile_ops)
1564 goto fail;
1566 /* Ignore bitfields for now. Not really something terribly complicated
1567 to handle. TODO. */
1568 if (TREE_CODE (base) == BIT_FIELD_REF)
1569 goto fail;
1571 base = unshare_expr (base);
1573 if (TREE_CODE (base) == TARGET_MEM_REF)
1575 tree type = build_pointer_type (TREE_TYPE (base));
1576 tree astep;
1578 if (TMR_BASE (base)
1579 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1581 civ = get_iv (data, TMR_BASE (base));
1582 if (!civ)
1583 goto fail;
1585 TMR_BASE (base) = civ->base;
1586 step = civ->step;
1588 if (TMR_INDEX (base)
1589 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1591 civ = get_iv (data, TMR_INDEX (base));
1592 if (!civ)
1593 goto fail;
1595 TMR_INDEX (base) = civ->base;
1596 astep = civ->step;
1598 if (astep)
1600 if (TMR_STEP (base))
1601 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1603 step = fold_build2 (PLUS_EXPR, type, step, astep);
1607 if (integer_zerop (step))
1608 goto fail;
1609 base = tree_mem_ref_addr (type, base);
1611 else
1613 ifs_ivopts_data.ivopts_data = data;
1614 ifs_ivopts_data.stmt = stmt;
1615 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1616 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1617 || integer_zerop (ifs_ivopts_data.step))
1618 goto fail;
1619 step = ifs_ivopts_data.step;
1621 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1622 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1624 /* Check that the base expression is addressable. This needs
1625 to be done after substituting bases of IVs into it. */
1626 if (may_be_nonaddressable_p (base))
1627 goto fail;
1629 /* Moreover, on strict alignment platforms, check that it is
1630 sufficiently aligned. */
1631 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1632 goto fail;
1634 base = build_fold_addr_expr (base);
1636 /* Substituting bases of IVs into the base expression might
1637 have caused folding opportunities. */
1638 if (TREE_CODE (base) == ADDR_EXPR)
1640 tree *ref = &TREE_OPERAND (base, 0);
1641 while (handled_component_p (*ref))
1642 ref = &TREE_OPERAND (*ref, 0);
1643 if (TREE_CODE (*ref) == INDIRECT_REF)
1644 *ref = fold_indirect_ref (*ref);
1648 civ = alloc_iv (base, step);
1649 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1650 return;
1652 fail:
1653 for_each_index (op_p, idx_record_use, data);
1656 /* Finds and records invariants used in STMT. */
1658 static void
1659 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1661 ssa_op_iter iter;
1662 use_operand_p use_p;
1663 tree op;
1665 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1667 op = USE_FROM_PTR (use_p);
1668 record_invariant (data, op, false);
1672 /* Finds interesting uses of induction variables in the statement STMT. */
1674 static void
1675 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1677 struct iv *iv;
1678 tree op, lhs, rhs;
1679 ssa_op_iter iter;
1680 use_operand_p use_p;
1682 find_invariants_stmt (data, stmt);
1684 if (TREE_CODE (stmt) == COND_EXPR)
1686 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1687 return;
1690 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1692 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1693 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1695 if (TREE_CODE (lhs) == SSA_NAME)
1697 /* If the statement defines an induction variable, the uses are not
1698 interesting by themselves. */
1700 iv = get_iv (data, lhs);
1702 if (iv && !integer_zerop (iv->step))
1703 return;
1706 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1708 case tcc_comparison:
1709 find_interesting_uses_cond (data, stmt,
1710 &GIMPLE_STMT_OPERAND (stmt, 1));
1711 return;
1713 case tcc_reference:
1714 find_interesting_uses_address (data, stmt,
1715 &GIMPLE_STMT_OPERAND (stmt, 1));
1716 if (REFERENCE_CLASS_P (lhs))
1717 find_interesting_uses_address (data, stmt,
1718 &GIMPLE_STMT_OPERAND (stmt, 0));
1719 return;
1721 default: ;
1724 if (REFERENCE_CLASS_P (lhs)
1725 && is_gimple_val (rhs))
1727 find_interesting_uses_address (data, stmt,
1728 &GIMPLE_STMT_OPERAND (stmt, 0));
1729 find_interesting_uses_op (data, rhs);
1730 return;
1733 /* TODO -- we should also handle address uses of type
1735 memory = call (whatever);
1739 call (memory). */
1742 if (TREE_CODE (stmt) == PHI_NODE
1743 && bb_for_stmt (stmt) == data->current_loop->header)
1745 lhs = PHI_RESULT (stmt);
1746 iv = get_iv (data, lhs);
1748 if (iv && !integer_zerop (iv->step))
1749 return;
1752 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1754 op = USE_FROM_PTR (use_p);
1756 if (TREE_CODE (op) != SSA_NAME)
1757 continue;
1759 iv = get_iv (data, op);
1760 if (!iv)
1761 continue;
1763 find_interesting_uses_op (data, op);
1767 /* Finds interesting uses of induction variables outside of loops
1768 on loop exit edge EXIT. */
1770 static void
1771 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1773 tree phi, def;
1775 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1777 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1778 if (is_gimple_reg (def))
1779 find_interesting_uses_op (data, def);
1783 /* Finds uses of the induction variables that are interesting. */
1785 static void
1786 find_interesting_uses (struct ivopts_data *data)
1788 basic_block bb;
1789 block_stmt_iterator bsi;
1790 tree phi;
1791 basic_block *body = get_loop_body (data->current_loop);
1792 unsigned i;
1793 struct version_info *info;
1794 edge e;
1796 if (dump_file && (dump_flags & TDF_DETAILS))
1797 fprintf (dump_file, "Uses:\n\n");
1799 for (i = 0; i < data->current_loop->num_nodes; i++)
1801 edge_iterator ei;
1802 bb = body[i];
1804 FOR_EACH_EDGE (e, ei, bb->succs)
1805 if (e->dest != EXIT_BLOCK_PTR
1806 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1807 find_interesting_uses_outside (data, e);
1809 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1810 find_interesting_uses_stmt (data, phi);
1811 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1812 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1815 if (dump_file && (dump_flags & TDF_DETAILS))
1817 bitmap_iterator bi;
1819 fprintf (dump_file, "\n");
1821 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1823 info = ver_info (data, i);
1824 if (info->inv_id)
1826 fprintf (dump_file, " ");
1827 print_generic_expr (dump_file, info->name, TDF_SLIM);
1828 fprintf (dump_file, " is invariant (%d)%s\n",
1829 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1833 fprintf (dump_file, "\n");
1836 free (body);
1839 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1840 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1841 we are at the top-level of the processed address. */
1843 static tree
1844 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1845 unsigned HOST_WIDE_INT *offset)
1847 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1848 enum tree_code code;
1849 tree type, orig_type = TREE_TYPE (expr);
1850 unsigned HOST_WIDE_INT off0, off1, st;
1851 tree orig_expr = expr;
1853 STRIP_NOPS (expr);
1855 type = TREE_TYPE (expr);
1856 code = TREE_CODE (expr);
1857 *offset = 0;
1859 switch (code)
1861 case INTEGER_CST:
1862 if (!cst_and_fits_in_hwi (expr)
1863 || integer_zerop (expr))
1864 return orig_expr;
1866 *offset = int_cst_value (expr);
1867 return build_int_cst (orig_type, 0);
1869 case POINTER_PLUS_EXPR:
1870 case PLUS_EXPR:
1871 case MINUS_EXPR:
1872 op0 = TREE_OPERAND (expr, 0);
1873 op1 = TREE_OPERAND (expr, 1);
1875 op0 = strip_offset_1 (op0, false, false, &off0);
1876 op1 = strip_offset_1 (op1, false, false, &off1);
1878 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1879 if (op0 == TREE_OPERAND (expr, 0)
1880 && op1 == TREE_OPERAND (expr, 1))
1881 return orig_expr;
1883 if (integer_zerop (op1))
1884 expr = op0;
1885 else if (integer_zerop (op0))
1887 if (code == MINUS_EXPR)
1888 expr = fold_build1 (NEGATE_EXPR, type, op1);
1889 else
1890 expr = op1;
1892 else
1893 expr = fold_build2 (code, type, op0, op1);
1895 return fold_convert (orig_type, expr);
1897 case ARRAY_REF:
1898 if (!inside_addr)
1899 return orig_expr;
1901 step = array_ref_element_size (expr);
1902 if (!cst_and_fits_in_hwi (step))
1903 break;
1905 st = int_cst_value (step);
1906 op1 = TREE_OPERAND (expr, 1);
1907 op1 = strip_offset_1 (op1, false, false, &off1);
1908 *offset = off1 * st;
1910 if (top_compref
1911 && integer_zerop (op1))
1913 /* Strip the component reference completely. */
1914 op0 = TREE_OPERAND (expr, 0);
1915 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1916 *offset += off0;
1917 return op0;
1919 break;
1921 case COMPONENT_REF:
1922 if (!inside_addr)
1923 return orig_expr;
1925 tmp = component_ref_field_offset (expr);
1926 if (top_compref
1927 && cst_and_fits_in_hwi (tmp))
1929 /* Strip the component reference completely. */
1930 op0 = TREE_OPERAND (expr, 0);
1931 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1932 *offset = off0 + int_cst_value (tmp);
1933 return op0;
1935 break;
1937 case ADDR_EXPR:
1938 op0 = TREE_OPERAND (expr, 0);
1939 op0 = strip_offset_1 (op0, true, true, &off0);
1940 *offset += off0;
1942 if (op0 == TREE_OPERAND (expr, 0))
1943 return orig_expr;
1945 expr = build_fold_addr_expr (op0);
1946 return fold_convert (orig_type, expr);
1948 case INDIRECT_REF:
1949 inside_addr = false;
1950 break;
1952 default:
1953 return orig_expr;
1956 /* Default handling of expressions for that we want to recurse into
1957 the first operand. */
1958 op0 = TREE_OPERAND (expr, 0);
1959 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1960 *offset += off0;
1962 if (op0 == TREE_OPERAND (expr, 0)
1963 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1964 return orig_expr;
1966 expr = copy_node (expr);
1967 TREE_OPERAND (expr, 0) = op0;
1968 if (op1)
1969 TREE_OPERAND (expr, 1) = op1;
1971 /* Inside address, we might strip the top level component references,
1972 thus changing type of the expression. Handling of ADDR_EXPR
1973 will fix that. */
1974 expr = fold_convert (orig_type, expr);
1976 return expr;
1979 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1981 static tree
1982 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1984 return strip_offset_1 (expr, false, false, offset);
1987 /* Returns variant of TYPE that can be used as base for different uses.
1988 We return unsigned type with the same precision, which avoids problems
1989 with overflows. */
1991 static tree
1992 generic_type_for (tree type)
1994 if (POINTER_TYPE_P (type))
1995 return unsigned_type_for (type);
1997 if (TYPE_UNSIGNED (type))
1998 return type;
2000 return unsigned_type_for (type);
2003 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2004 the bitmap to that we should store it. */
2006 static struct ivopts_data *fd_ivopts_data;
2007 static tree
2008 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2010 bitmap *depends_on = (bitmap *) data;
2011 struct version_info *info;
2013 if (TREE_CODE (*expr_p) != SSA_NAME)
2014 return NULL_TREE;
2015 info = name_info (fd_ivopts_data, *expr_p);
2017 if (!info->inv_id || info->has_nonlin_use)
2018 return NULL_TREE;
2020 if (!*depends_on)
2021 *depends_on = BITMAP_ALLOC (NULL);
2022 bitmap_set_bit (*depends_on, info->inv_id);
2024 return NULL_TREE;
2027 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2028 position to POS. If USE is not NULL, the candidate is set as related to
2029 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2030 replacement of the final value of the iv by a direct computation. */
2032 static struct iv_cand *
2033 add_candidate_1 (struct ivopts_data *data,
2034 tree base, tree step, bool important, enum iv_position pos,
2035 struct iv_use *use, tree incremented_at)
2037 unsigned i;
2038 struct iv_cand *cand = NULL;
2039 tree type, orig_type;
2041 if (base)
2043 orig_type = TREE_TYPE (base);
2044 type = generic_type_for (orig_type);
2045 /* Don't convert the base to the generic type for pointers as the generic
2046 type is an integer type with the same size as the pointer type. */
2047 if (type != orig_type && !POINTER_TYPE_P (orig_type))
2049 base = fold_convert (type, base);
2050 step = fold_convert (type, step);
2054 for (i = 0; i < n_iv_cands (data); i++)
2056 cand = iv_cand (data, i);
2058 if (cand->pos != pos)
2059 continue;
2061 if (cand->incremented_at != incremented_at)
2062 continue;
2064 if (!cand->iv)
2066 if (!base && !step)
2067 break;
2069 continue;
2072 if (!base && !step)
2073 continue;
2075 if (operand_equal_p (base, cand->iv->base, 0)
2076 && operand_equal_p (step, cand->iv->step, 0))
2077 break;
2080 if (i == n_iv_cands (data))
2082 cand = XCNEW (struct iv_cand);
2083 cand->id = i;
2085 if (!base && !step)
2086 cand->iv = NULL;
2087 else
2088 cand->iv = alloc_iv (base, step);
2090 cand->pos = pos;
2091 if (pos != IP_ORIGINAL && cand->iv)
2093 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2094 cand->var_after = cand->var_before;
2096 cand->important = important;
2097 cand->incremented_at = incremented_at;
2098 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2100 if (step
2101 && TREE_CODE (step) != INTEGER_CST)
2103 fd_ivopts_data = data;
2104 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2107 if (dump_file && (dump_flags & TDF_DETAILS))
2108 dump_cand (dump_file, cand);
2111 if (important && !cand->important)
2113 cand->important = true;
2114 if (dump_file && (dump_flags & TDF_DETAILS))
2115 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2118 if (use)
2120 bitmap_set_bit (use->related_cands, i);
2121 if (dump_file && (dump_flags & TDF_DETAILS))
2122 fprintf (dump_file, "Candidate %d is related to use %d\n",
2123 cand->id, use->id);
2126 return cand;
2129 /* Returns true if incrementing the induction variable at the end of the LOOP
2130 is allowed.
2132 The purpose is to avoid splitting latch edge with a biv increment, thus
2133 creating a jump, possibly confusing other optimization passes and leaving
2134 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2135 is not available (so we do not have a better alternative), or if the latch
2136 edge is already nonempty. */
2138 static bool
2139 allow_ip_end_pos_p (struct loop *loop)
2141 if (!ip_normal_pos (loop))
2142 return true;
2144 if (!empty_block_p (ip_end_pos (loop)))
2145 return true;
2147 return false;
2150 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2151 position to POS. If USE is not NULL, the candidate is set as related to
2152 it. The candidate computation is scheduled on all available positions. */
2154 static void
2155 add_candidate (struct ivopts_data *data,
2156 tree base, tree step, bool important, struct iv_use *use)
2158 if (ip_normal_pos (data->current_loop))
2159 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2160 if (ip_end_pos (data->current_loop)
2161 && allow_ip_end_pos_p (data->current_loop))
2162 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2165 /* Add a standard "0 + 1 * iteration" iv candidate for a
2166 type with SIZE bits. */
2168 static void
2169 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2170 unsigned int size)
2172 tree type = lang_hooks.types.type_for_size (size, true);
2173 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2174 true, NULL);
2177 /* Adds standard iv candidates. */
2179 static void
2180 add_standard_iv_candidates (struct ivopts_data *data)
2182 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2184 /* The same for a double-integer type if it is still fast enough. */
2185 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2186 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2190 /* Adds candidates bases on the old induction variable IV. */
2192 static void
2193 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2195 tree phi, def;
2196 struct iv_cand *cand;
2198 add_candidate (data, iv->base, iv->step, true, NULL);
2200 /* The same, but with initial value zero. */
2201 add_candidate (data,
2202 build_int_cst (TREE_TYPE (iv->base), 0),
2203 iv->step, true, NULL);
2205 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2206 if (TREE_CODE (phi) == PHI_NODE)
2208 /* Additionally record the possibility of leaving the original iv
2209 untouched. */
2210 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2211 cand = add_candidate_1 (data,
2212 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2213 SSA_NAME_DEF_STMT (def));
2214 cand->var_before = iv->ssa_name;
2215 cand->var_after = def;
2219 /* Adds candidates based on the old induction variables. */
2221 static void
2222 add_old_ivs_candidates (struct ivopts_data *data)
2224 unsigned i;
2225 struct iv *iv;
2226 bitmap_iterator bi;
2228 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2230 iv = ver_info (data, i)->iv;
2231 if (iv && iv->biv_p && !integer_zerop (iv->step))
2232 add_old_iv_candidates (data, iv);
2236 /* Adds candidates based on the value of the induction variable IV and USE. */
2238 static void
2239 add_iv_value_candidates (struct ivopts_data *data,
2240 struct iv *iv, struct iv_use *use)
2242 unsigned HOST_WIDE_INT offset;
2243 tree base;
2244 tree basetype;
2246 add_candidate (data, iv->base, iv->step, false, use);
2248 /* The same, but with initial value zero. Make such variable important,
2249 since it is generic enough so that possibly many uses may be based
2250 on it. */
2251 basetype = TREE_TYPE (iv->base);
2252 if (POINTER_TYPE_P (basetype))
2253 basetype = sizetype;
2254 add_candidate (data, build_int_cst (basetype, 0),
2255 iv->step, true, use);
2257 /* Third, try removing the constant offset. */
2258 base = strip_offset (iv->base, &offset);
2259 if (offset)
2260 add_candidate (data, base, iv->step, false, use);
2263 /* Adds candidates based on the uses. */
2265 static void
2266 add_derived_ivs_candidates (struct ivopts_data *data)
2268 unsigned i;
2270 for (i = 0; i < n_iv_uses (data); i++)
2272 struct iv_use *use = iv_use (data, i);
2274 if (!use)
2275 continue;
2277 switch (use->type)
2279 case USE_NONLINEAR_EXPR:
2280 case USE_COMPARE:
2281 case USE_ADDRESS:
2282 /* Just add the ivs based on the value of the iv used here. */
2283 add_iv_value_candidates (data, use->iv, use);
2284 break;
2286 default:
2287 gcc_unreachable ();
2292 /* Record important candidates and add them to related_cands bitmaps
2293 if needed. */
2295 static void
2296 record_important_candidates (struct ivopts_data *data)
2298 unsigned i;
2299 struct iv_use *use;
2301 for (i = 0; i < n_iv_cands (data); i++)
2303 struct iv_cand *cand = iv_cand (data, i);
2305 if (cand->important)
2306 bitmap_set_bit (data->important_candidates, i);
2309 data->consider_all_candidates = (n_iv_cands (data)
2310 <= CONSIDER_ALL_CANDIDATES_BOUND);
2312 if (data->consider_all_candidates)
2314 /* We will not need "related_cands" bitmaps in this case,
2315 so release them to decrease peak memory consumption. */
2316 for (i = 0; i < n_iv_uses (data); i++)
2318 use = iv_use (data, i);
2319 BITMAP_FREE (use->related_cands);
2322 else
2324 /* Add important candidates to the related_cands bitmaps. */
2325 for (i = 0; i < n_iv_uses (data); i++)
2326 bitmap_ior_into (iv_use (data, i)->related_cands,
2327 data->important_candidates);
2331 /* Finds the candidates for the induction variables. */
2333 static void
2334 find_iv_candidates (struct ivopts_data *data)
2336 /* Add commonly used ivs. */
2337 add_standard_iv_candidates (data);
2339 /* Add old induction variables. */
2340 add_old_ivs_candidates (data);
2342 /* Add induction variables derived from uses. */
2343 add_derived_ivs_candidates (data);
2345 /* Record the important candidates. */
2346 record_important_candidates (data);
2349 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2350 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2351 we allocate a simple list to every use. */
2353 static void
2354 alloc_use_cost_map (struct ivopts_data *data)
2356 unsigned i, size, s, j;
2358 for (i = 0; i < n_iv_uses (data); i++)
2360 struct iv_use *use = iv_use (data, i);
2361 bitmap_iterator bi;
2363 if (data->consider_all_candidates)
2364 size = n_iv_cands (data);
2365 else
2367 s = 0;
2368 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2370 s++;
2373 /* Round up to the power of two, so that moduling by it is fast. */
2374 for (size = 1; size < s; size <<= 1)
2375 continue;
2378 use->n_map_members = size;
2379 use->cost_map = XCNEWVEC (struct cost_pair, size);
2383 /* Returns description of computation cost of expression whose runtime
2384 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2386 static comp_cost
2387 new_cost (unsigned runtime, unsigned complexity)
2389 comp_cost cost;
2391 cost.cost = runtime;
2392 cost.complexity = complexity;
2394 return cost;
2397 /* Adds costs COST1 and COST2. */
2399 static comp_cost
2400 add_costs (comp_cost cost1, comp_cost cost2)
2402 cost1.cost += cost2.cost;
2403 cost1.complexity += cost2.complexity;
2405 return cost1;
2407 /* Subtracts costs COST1 and COST2. */
2409 static comp_cost
2410 sub_costs (comp_cost cost1, comp_cost cost2)
2412 cost1.cost -= cost2.cost;
2413 cost1.complexity -= cost2.complexity;
2415 return cost1;
2418 /* Returns a negative number if COST1 < COST2, a positive number if
2419 COST1 > COST2, and 0 if COST1 = COST2. */
2421 static int
2422 compare_costs (comp_cost cost1, comp_cost cost2)
2424 if (cost1.cost == cost2.cost)
2425 return cost1.complexity - cost2.complexity;
2427 return cost1.cost - cost2.cost;
2430 /* Returns true if COST is infinite. */
2432 static bool
2433 infinite_cost_p (comp_cost cost)
2435 return cost.cost == INFTY;
2438 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2439 on invariants DEPENDS_ON and that the value used in expressing it
2440 is VALUE.*/
2442 static void
2443 set_use_iv_cost (struct ivopts_data *data,
2444 struct iv_use *use, struct iv_cand *cand,
2445 comp_cost cost, bitmap depends_on, tree value)
2447 unsigned i, s;
2449 if (infinite_cost_p (cost))
2451 BITMAP_FREE (depends_on);
2452 return;
2455 if (data->consider_all_candidates)
2457 use->cost_map[cand->id].cand = cand;
2458 use->cost_map[cand->id].cost = cost;
2459 use->cost_map[cand->id].depends_on = depends_on;
2460 use->cost_map[cand->id].value = value;
2461 return;
2464 /* n_map_members is a power of two, so this computes modulo. */
2465 s = cand->id & (use->n_map_members - 1);
2466 for (i = s; i < use->n_map_members; i++)
2467 if (!use->cost_map[i].cand)
2468 goto found;
2469 for (i = 0; i < s; i++)
2470 if (!use->cost_map[i].cand)
2471 goto found;
2473 gcc_unreachable ();
2475 found:
2476 use->cost_map[i].cand = cand;
2477 use->cost_map[i].cost = cost;
2478 use->cost_map[i].depends_on = depends_on;
2479 use->cost_map[i].value = value;
2482 /* Gets cost of (USE, CANDIDATE) pair. */
2484 static struct cost_pair *
2485 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2486 struct iv_cand *cand)
2488 unsigned i, s;
2489 struct cost_pair *ret;
2491 if (!cand)
2492 return NULL;
2494 if (data->consider_all_candidates)
2496 ret = use->cost_map + cand->id;
2497 if (!ret->cand)
2498 return NULL;
2500 return ret;
2503 /* n_map_members is a power of two, so this computes modulo. */
2504 s = cand->id & (use->n_map_members - 1);
2505 for (i = s; i < use->n_map_members; i++)
2506 if (use->cost_map[i].cand == cand)
2507 return use->cost_map + i;
2509 for (i = 0; i < s; i++)
2510 if (use->cost_map[i].cand == cand)
2511 return use->cost_map + i;
2513 return NULL;
2516 /* Returns estimate on cost of computing SEQ. */
2518 static unsigned
2519 seq_cost (rtx seq)
2521 unsigned cost = 0;
2522 rtx set;
2524 for (; seq; seq = NEXT_INSN (seq))
2526 set = single_set (seq);
2527 if (set)
2528 cost += rtx_cost (set, SET);
2529 else
2530 cost++;
2533 return cost;
2536 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2537 static rtx
2538 produce_memory_decl_rtl (tree obj, int *regno)
2540 rtx x;
2542 gcc_assert (obj);
2543 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2545 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2546 x = gen_rtx_SYMBOL_REF (Pmode, name);
2547 SET_SYMBOL_REF_DECL (x, obj);
2548 x = gen_rtx_MEM (DECL_MODE (obj), x);
2549 targetm.encode_section_info (obj, x, true);
2551 else
2553 x = gen_raw_REG (Pmode, (*regno)++);
2554 x = gen_rtx_MEM (DECL_MODE (obj), x);
2557 return x;
2560 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2561 walk_tree. DATA contains the actual fake register number. */
2563 static tree
2564 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2566 tree obj = NULL_TREE;
2567 rtx x = NULL_RTX;
2568 int *regno = (int *) data;
2570 switch (TREE_CODE (*expr_p))
2572 case ADDR_EXPR:
2573 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2574 handled_component_p (*expr_p);
2575 expr_p = &TREE_OPERAND (*expr_p, 0))
2576 continue;
2577 obj = *expr_p;
2578 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2579 x = produce_memory_decl_rtl (obj, regno);
2580 break;
2582 case SSA_NAME:
2583 *ws = 0;
2584 obj = SSA_NAME_VAR (*expr_p);
2585 if (!DECL_RTL_SET_P (obj))
2586 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2587 break;
2589 case VAR_DECL:
2590 case PARM_DECL:
2591 case RESULT_DECL:
2592 *ws = 0;
2593 obj = *expr_p;
2595 if (DECL_RTL_SET_P (obj))
2596 break;
2598 if (DECL_MODE (obj) == BLKmode)
2599 x = produce_memory_decl_rtl (obj, regno);
2600 else
2601 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2603 break;
2605 default:
2606 break;
2609 if (x)
2611 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2612 SET_DECL_RTL (obj, x);
2615 return NULL_TREE;
2618 /* Determines cost of the computation of EXPR. */
2620 static unsigned
2621 computation_cost (tree expr)
2623 rtx seq, rslt;
2624 tree type = TREE_TYPE (expr);
2625 unsigned cost;
2626 /* Avoid using hard regs in ways which may be unsupported. */
2627 int regno = LAST_VIRTUAL_REGISTER + 1;
2629 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2630 start_sequence ();
2631 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2632 seq = get_insns ();
2633 end_sequence ();
2635 cost = seq_cost (seq);
2636 if (MEM_P (rslt))
2637 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2639 return cost;
2642 /* Returns variable containing the value of candidate CAND at statement AT. */
2644 static tree
2645 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2647 if (stmt_after_increment (loop, cand, stmt))
2648 return cand->var_after;
2649 else
2650 return cand->var_before;
2653 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2654 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2657 tree_int_cst_sign_bit (const_tree t)
2659 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2660 unsigned HOST_WIDE_INT w;
2662 if (bitno < HOST_BITS_PER_WIDE_INT)
2663 w = TREE_INT_CST_LOW (t);
2664 else
2666 w = TREE_INT_CST_HIGH (t);
2667 bitno -= HOST_BITS_PER_WIDE_INT;
2670 return (w >> bitno) & 1;
2673 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2674 same precision that is at least as wide as the precision of TYPE, stores
2675 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2676 type of A and B. */
2678 static tree
2679 determine_common_wider_type (tree *a, tree *b)
2681 tree wider_type = NULL;
2682 tree suba, subb;
2683 tree atype = TREE_TYPE (*a);
2685 if (CONVERT_EXPR_P (*a))
2687 suba = TREE_OPERAND (*a, 0);
2688 wider_type = TREE_TYPE (suba);
2689 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2690 return atype;
2692 else
2693 return atype;
2695 if (CONVERT_EXPR_P (*b))
2697 subb = TREE_OPERAND (*b, 0);
2698 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2699 return atype;
2701 else
2702 return atype;
2704 *a = suba;
2705 *b = subb;
2706 return wider_type;
2709 /* Determines the expression by that USE is expressed from induction variable
2710 CAND at statement AT in LOOP. The expression is stored in a decomposed
2711 form into AFF. Returns false if USE cannot be expressed using CAND. */
2713 static bool
2714 get_computation_aff (struct loop *loop,
2715 struct iv_use *use, struct iv_cand *cand, tree at,
2716 struct affine_tree_combination *aff)
2718 tree ubase = use->iv->base;
2719 tree ustep = use->iv->step;
2720 tree cbase = cand->iv->base;
2721 tree cstep = cand->iv->step, cstep_common;
2722 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2723 tree common_type, var;
2724 tree uutype;
2725 aff_tree cbase_aff, var_aff;
2726 double_int rat;
2728 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2730 /* We do not have a precision to express the values of use. */
2731 return false;
2734 var = var_at_stmt (loop, cand, at);
2735 uutype = unsigned_type_for (utype);
2737 /* If the conversion is not noop, perform it. */
2738 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2740 cstep = fold_convert (uutype, cstep);
2741 cbase = fold_convert (uutype, cbase);
2742 var = fold_convert (uutype, var);
2745 if (!constant_multiple_of (ustep, cstep, &rat))
2746 return false;
2748 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2749 type, we achieve better folding by computing their difference in this
2750 wider type, and cast the result to UUTYPE. We do not need to worry about
2751 overflows, as all the arithmetics will in the end be performed in UUTYPE
2752 anyway. */
2753 common_type = determine_common_wider_type (&ubase, &cbase);
2755 /* use = ubase - ratio * cbase + ratio * var. */
2756 tree_to_aff_combination (ubase, common_type, aff);
2757 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2758 tree_to_aff_combination (var, uutype, &var_aff);
2760 /* We need to shift the value if we are after the increment. */
2761 if (stmt_after_increment (loop, cand, at))
2763 aff_tree cstep_aff;
2765 if (common_type != uutype)
2766 cstep_common = fold_convert (common_type, cstep);
2767 else
2768 cstep_common = cstep;
2770 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2771 aff_combination_add (&cbase_aff, &cstep_aff);
2774 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2775 aff_combination_add (aff, &cbase_aff);
2776 if (common_type != uutype)
2777 aff_combination_convert (aff, uutype);
2779 aff_combination_scale (&var_aff, rat);
2780 aff_combination_add (aff, &var_aff);
2782 return true;
2785 /* Determines the expression by that USE is expressed from induction variable
2786 CAND at statement AT in LOOP. The computation is unshared. */
2788 static tree
2789 get_computation_at (struct loop *loop,
2790 struct iv_use *use, struct iv_cand *cand, tree at)
2792 aff_tree aff;
2793 tree type = TREE_TYPE (use->iv->base);
2795 if (!get_computation_aff (loop, use, cand, at, &aff))
2796 return NULL_TREE;
2797 unshare_aff_combination (&aff);
2798 return fold_convert (type, aff_combination_to_tree (&aff));
2801 /* Determines the expression by that USE is expressed from induction variable
2802 CAND in LOOP. The computation is unshared. */
2804 static tree
2805 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2807 return get_computation_at (loop, use, cand, use->stmt);
2810 /* Returns cost of addition in MODE. */
2812 static unsigned
2813 add_cost (enum machine_mode mode)
2815 static unsigned costs[NUM_MACHINE_MODES];
2816 rtx seq;
2817 unsigned cost;
2819 if (costs[mode])
2820 return costs[mode];
2822 start_sequence ();
2823 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2824 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2825 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2826 NULL_RTX);
2827 seq = get_insns ();
2828 end_sequence ();
2830 cost = seq_cost (seq);
2831 if (!cost)
2832 cost = 1;
2834 costs[mode] = cost;
2836 if (dump_file && (dump_flags & TDF_DETAILS))
2837 fprintf (dump_file, "Addition in %s costs %d\n",
2838 GET_MODE_NAME (mode), cost);
2839 return cost;
2842 /* Entry in a hashtable of already known costs for multiplication. */
2843 struct mbc_entry
2845 HOST_WIDE_INT cst; /* The constant to multiply by. */
2846 enum machine_mode mode; /* In mode. */
2847 unsigned cost; /* The cost. */
2850 /* Counts hash value for the ENTRY. */
2852 static hashval_t
2853 mbc_entry_hash (const void *entry)
2855 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2857 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2860 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2862 static int
2863 mbc_entry_eq (const void *entry1, const void *entry2)
2865 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2866 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2868 return (e1->mode == e2->mode
2869 && e1->cst == e2->cst);
2872 /* Returns cost of multiplication by constant CST in MODE. */
2874 unsigned
2875 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2877 static htab_t costs;
2878 struct mbc_entry **cached, act;
2879 rtx seq;
2880 unsigned cost;
2882 if (!costs)
2883 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2885 act.mode = mode;
2886 act.cst = cst;
2887 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2888 if (*cached)
2889 return (*cached)->cost;
2891 *cached = XNEW (struct mbc_entry);
2892 (*cached)->mode = mode;
2893 (*cached)->cst = cst;
2895 start_sequence ();
2896 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2897 gen_int_mode (cst, mode), NULL_RTX, 0);
2898 seq = get_insns ();
2899 end_sequence ();
2901 cost = seq_cost (seq);
2903 if (dump_file && (dump_flags & TDF_DETAILS))
2904 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2905 (int) cst, GET_MODE_NAME (mode), cost);
2907 (*cached)->cost = cost;
2909 return cost;
2912 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2913 validity for a memory reference accessing memory of mode MODE. */
2915 bool
2916 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2918 #define MAX_RATIO 128
2919 static sbitmap valid_mult[MAX_MACHINE_MODE];
2921 if (!valid_mult[mode])
2923 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2924 rtx addr;
2925 HOST_WIDE_INT i;
2927 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2928 sbitmap_zero (valid_mult[mode]);
2929 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2930 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2932 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2933 if (memory_address_p (mode, addr))
2934 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2937 if (dump_file && (dump_flags & TDF_DETAILS))
2939 fprintf (dump_file, " allowed multipliers:");
2940 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2941 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2942 fprintf (dump_file, " %d", (int) i);
2943 fprintf (dump_file, "\n");
2944 fprintf (dump_file, "\n");
2948 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2949 return false;
2951 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2954 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2955 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2956 variable is omitted. Compute the cost for a memory reference that accesses
2957 a memory location of mode MEM_MODE.
2959 TODO -- there must be some better way. This all is quite crude. */
2961 static comp_cost
2962 get_address_cost (bool symbol_present, bool var_present,
2963 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2964 enum machine_mode mem_mode)
2966 static bool initialized[MAX_MACHINE_MODE];
2967 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2968 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2969 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2970 unsigned cost, acost, complexity;
2971 bool offset_p, ratio_p;
2972 HOST_WIDE_INT s_offset;
2973 unsigned HOST_WIDE_INT mask;
2974 unsigned bits;
2976 if (!initialized[mem_mode])
2978 HOST_WIDE_INT i;
2979 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2980 int old_cse_not_expected;
2981 unsigned sym_p, var_p, off_p, rat_p, add_c;
2982 rtx seq, addr, base;
2983 rtx reg0, reg1;
2985 initialized[mem_mode] = true;
2987 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2989 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2990 for (i = start; i <= 1 << 20; i <<= 1)
2992 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2993 if (!memory_address_p (mem_mode, addr))
2994 break;
2996 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2997 off[mem_mode] = max_offset[mem_mode];
2999 for (i = start; i <= 1 << 20; i <<= 1)
3001 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3002 if (!memory_address_p (mem_mode, addr))
3003 break;
3005 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3007 if (dump_file && (dump_flags & TDF_DETAILS))
3009 fprintf (dump_file, "get_address_cost:\n");
3010 fprintf (dump_file, " min offset %s %d\n",
3011 GET_MODE_NAME (mem_mode),
3012 (int) min_offset[mem_mode]);
3013 fprintf (dump_file, " max offset %s %d\n",
3014 GET_MODE_NAME (mem_mode),
3015 (int) max_offset[mem_mode]);
3018 rat[mem_mode] = 1;
3019 for (i = 2; i <= MAX_RATIO; i++)
3020 if (multiplier_allowed_in_address_p (i, mem_mode))
3022 rat[mem_mode] = i;
3023 break;
3026 /* Compute the cost of various addressing modes. */
3027 acost = 0;
3028 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3029 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3031 for (i = 0; i < 16; i++)
3033 sym_p = i & 1;
3034 var_p = (i >> 1) & 1;
3035 off_p = (i >> 2) & 1;
3036 rat_p = (i >> 3) & 1;
3038 addr = reg0;
3039 if (rat_p)
3040 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3041 gen_int_mode (rat[mem_mode], Pmode));
3043 if (var_p)
3044 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3046 if (sym_p)
3048 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3049 /* ??? We can run into trouble with some backends by presenting
3050 it with symbols which havn't been properly passed through
3051 targetm.encode_section_info. By setting the local bit, we
3052 enhance the probability of things working. */
3053 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3055 if (off_p)
3056 base = gen_rtx_fmt_e (CONST, Pmode,
3057 gen_rtx_fmt_ee (PLUS, Pmode,
3058 base,
3059 gen_int_mode (off[mem_mode],
3060 Pmode)));
3062 else if (off_p)
3063 base = gen_int_mode (off[mem_mode], Pmode);
3064 else
3065 base = NULL_RTX;
3067 if (base)
3068 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3070 start_sequence ();
3071 /* To avoid splitting addressing modes, pretend that no cse will
3072 follow. */
3073 old_cse_not_expected = cse_not_expected;
3074 cse_not_expected = true;
3075 addr = memory_address (mem_mode, addr);
3076 cse_not_expected = old_cse_not_expected;
3077 seq = get_insns ();
3078 end_sequence ();
3080 acost = seq_cost (seq);
3081 acost += address_cost (addr, mem_mode);
3083 if (!acost)
3084 acost = 1;
3085 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3088 /* On some targets, it is quite expensive to load symbol to a register,
3089 which makes addresses that contain symbols look much more expensive.
3090 However, the symbol will have to be loaded in any case before the
3091 loop (and quite likely we have it in register already), so it does not
3092 make much sense to penalize them too heavily. So make some final
3093 tweaks for the SYMBOL_PRESENT modes:
3095 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3096 var is cheaper, use this mode with small penalty.
3097 If VAR_PRESENT is true, try whether the mode with
3098 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3099 if this is the case, use it. */
3100 add_c = add_cost (Pmode);
3101 for (i = 0; i < 8; i++)
3103 var_p = i & 1;
3104 off_p = (i >> 1) & 1;
3105 rat_p = (i >> 2) & 1;
3107 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3108 if (var_p)
3109 acost += add_c;
3111 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3112 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3115 if (dump_file && (dump_flags & TDF_DETAILS))
3117 fprintf (dump_file, "Address costs:\n");
3119 for (i = 0; i < 16; i++)
3121 sym_p = i & 1;
3122 var_p = (i >> 1) & 1;
3123 off_p = (i >> 2) & 1;
3124 rat_p = (i >> 3) & 1;
3126 fprintf (dump_file, " ");
3127 if (sym_p)
3128 fprintf (dump_file, "sym + ");
3129 if (var_p)
3130 fprintf (dump_file, "var + ");
3131 if (off_p)
3132 fprintf (dump_file, "cst + ");
3133 if (rat_p)
3134 fprintf (dump_file, "rat * ");
3136 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3137 fprintf (dump_file, "index costs %d\n", acost);
3139 fprintf (dump_file, "\n");
3143 bits = GET_MODE_BITSIZE (Pmode);
3144 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3145 offset &= mask;
3146 if ((offset >> (bits - 1) & 1))
3147 offset |= ~mask;
3148 s_offset = offset;
3150 cost = 0;
3151 offset_p = (s_offset != 0
3152 && min_offset[mem_mode] <= s_offset
3153 && s_offset <= max_offset[mem_mode]);
3154 ratio_p = (ratio != 1
3155 && multiplier_allowed_in_address_p (ratio, mem_mode));
3157 if (ratio != 1 && !ratio_p)
3158 cost += multiply_by_cost (ratio, Pmode);
3160 if (s_offset && !offset_p && !symbol_present)
3161 cost += add_cost (Pmode);
3163 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3164 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3165 return new_cost (cost + acost, complexity);
3168 /* Estimates cost of forcing expression EXPR into a variable. */
3170 static comp_cost
3171 force_expr_to_var_cost (tree expr)
3173 static bool costs_initialized = false;
3174 static unsigned integer_cost;
3175 static unsigned symbol_cost;
3176 static unsigned address_cost;
3177 tree op0, op1;
3178 comp_cost cost0, cost1, cost;
3179 enum machine_mode mode;
3181 if (!costs_initialized)
3183 tree type = build_pointer_type (integer_type_node);
3184 tree var, addr;
3185 rtx x;
3187 var = create_tmp_var_raw (integer_type_node, "test_var");
3188 TREE_STATIC (var) = 1;
3189 x = produce_memory_decl_rtl (var, NULL);
3190 SET_DECL_RTL (var, x);
3192 integer_cost = computation_cost (build_int_cst (integer_type_node,
3193 2000));
3195 addr = build1 (ADDR_EXPR, type, var);
3196 symbol_cost = computation_cost (addr) + 1;
3198 address_cost
3199 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3200 addr,
3201 build_int_cst (sizetype, 2000))) + 1;
3202 if (dump_file && (dump_flags & TDF_DETAILS))
3204 fprintf (dump_file, "force_expr_to_var_cost:\n");
3205 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3206 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3207 fprintf (dump_file, " address %d\n", (int) address_cost);
3208 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3209 fprintf (dump_file, "\n");
3212 costs_initialized = true;
3215 STRIP_NOPS (expr);
3217 if (SSA_VAR_P (expr))
3218 return zero_cost;
3220 if (is_gimple_min_invariant (expr))
3222 if (TREE_CODE (expr) == INTEGER_CST)
3223 return new_cost (integer_cost, 0);
3225 if (TREE_CODE (expr) == ADDR_EXPR)
3227 tree obj = TREE_OPERAND (expr, 0);
3229 if (TREE_CODE (obj) == VAR_DECL
3230 || TREE_CODE (obj) == PARM_DECL
3231 || TREE_CODE (obj) == RESULT_DECL)
3232 return new_cost (symbol_cost, 0);
3235 return new_cost (address_cost, 0);
3238 switch (TREE_CODE (expr))
3240 case POINTER_PLUS_EXPR:
3241 case PLUS_EXPR:
3242 case MINUS_EXPR:
3243 case MULT_EXPR:
3244 op0 = TREE_OPERAND (expr, 0);
3245 op1 = TREE_OPERAND (expr, 1);
3246 STRIP_NOPS (op0);
3247 STRIP_NOPS (op1);
3249 if (is_gimple_val (op0))
3250 cost0 = zero_cost;
3251 else
3252 cost0 = force_expr_to_var_cost (op0);
3254 if (is_gimple_val (op1))
3255 cost1 = zero_cost;
3256 else
3257 cost1 = force_expr_to_var_cost (op1);
3259 break;
3261 default:
3262 /* Just an arbitrary value, FIXME. */
3263 return new_cost (target_spill_cost, 0);
3266 mode = TYPE_MODE (TREE_TYPE (expr));
3267 switch (TREE_CODE (expr))
3269 case POINTER_PLUS_EXPR:
3270 case PLUS_EXPR:
3271 case MINUS_EXPR:
3272 cost = new_cost (add_cost (mode), 0);
3273 break;
3275 case MULT_EXPR:
3276 if (cst_and_fits_in_hwi (op0))
3277 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3278 else if (cst_and_fits_in_hwi (op1))
3279 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3280 else
3281 return new_cost (target_spill_cost, 0);
3282 break;
3284 default:
3285 gcc_unreachable ();
3288 cost = add_costs (cost, cost0);
3289 cost = add_costs (cost, cost1);
3291 /* Bound the cost by target_spill_cost. The parts of complicated
3292 computations often are either loop invariant or at least can
3293 be shared between several iv uses, so letting this grow without
3294 limits would not give reasonable results. */
3295 if (cost.cost > target_spill_cost)
3296 cost.cost = target_spill_cost;
3298 return cost;
3301 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3302 invariants the computation depends on. */
3304 static comp_cost
3305 force_var_cost (struct ivopts_data *data,
3306 tree expr, bitmap *depends_on)
3308 if (depends_on)
3310 fd_ivopts_data = data;
3311 walk_tree (&expr, find_depends, depends_on, NULL);
3314 return force_expr_to_var_cost (expr);
3317 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3318 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3319 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3320 invariants the computation depends on. */
3322 static comp_cost
3323 split_address_cost (struct ivopts_data *data,
3324 tree addr, bool *symbol_present, bool *var_present,
3325 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3327 tree core;
3328 HOST_WIDE_INT bitsize;
3329 HOST_WIDE_INT bitpos;
3330 tree toffset;
3331 enum machine_mode mode;
3332 int unsignedp, volatilep;
3334 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3335 &unsignedp, &volatilep, false);
3337 if (toffset != 0
3338 || bitpos % BITS_PER_UNIT != 0
3339 || TREE_CODE (core) != VAR_DECL)
3341 *symbol_present = false;
3342 *var_present = true;
3343 fd_ivopts_data = data;
3344 walk_tree (&addr, find_depends, depends_on, NULL);
3345 return new_cost (target_spill_cost, 0);
3348 *offset += bitpos / BITS_PER_UNIT;
3349 if (TREE_STATIC (core)
3350 || DECL_EXTERNAL (core))
3352 *symbol_present = true;
3353 *var_present = false;
3354 return zero_cost;
3357 *symbol_present = false;
3358 *var_present = true;
3359 return zero_cost;
3362 /* Estimates cost of expressing difference of addresses E1 - E2 as
3363 var + symbol + offset. The value of offset is added to OFFSET,
3364 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3365 part is missing. DEPENDS_ON is a set of the invariants the computation
3366 depends on. */
3368 static comp_cost
3369 ptr_difference_cost (struct ivopts_data *data,
3370 tree e1, tree e2, bool *symbol_present, bool *var_present,
3371 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3373 HOST_WIDE_INT diff = 0;
3374 comp_cost cost;
3376 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3378 if (ptr_difference_const (e1, e2, &diff))
3380 *offset += diff;
3381 *symbol_present = false;
3382 *var_present = false;
3383 return zero_cost;
3386 if (integer_zerop (e2))
3387 return split_address_cost (data, TREE_OPERAND (e1, 0),
3388 symbol_present, var_present, offset, depends_on);
3390 *symbol_present = false;
3391 *var_present = true;
3393 cost = force_var_cost (data, e1, depends_on);
3394 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3395 cost.cost += add_cost (Pmode);
3397 return cost;
3400 /* Estimates cost of expressing difference E1 - E2 as
3401 var + symbol + offset. The value of offset is added to OFFSET,
3402 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3403 part is missing. DEPENDS_ON is a set of the invariants the computation
3404 depends on. */
3406 static comp_cost
3407 difference_cost (struct ivopts_data *data,
3408 tree e1, tree e2, bool *symbol_present, bool *var_present,
3409 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3411 comp_cost cost;
3412 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3413 unsigned HOST_WIDE_INT off1, off2;
3415 e1 = strip_offset (e1, &off1);
3416 e2 = strip_offset (e2, &off2);
3417 *offset += off1 - off2;
3419 STRIP_NOPS (e1);
3420 STRIP_NOPS (e2);
3422 if (TREE_CODE (e1) == ADDR_EXPR)
3423 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3424 depends_on);
3425 *symbol_present = false;
3427 if (operand_equal_p (e1, e2, 0))
3429 *var_present = false;
3430 return zero_cost;
3432 *var_present = true;
3433 if (integer_zerop (e2))
3434 return force_var_cost (data, e1, depends_on);
3436 if (integer_zerop (e1))
3438 cost = force_var_cost (data, e2, depends_on);
3439 cost.cost += multiply_by_cost (-1, mode);
3441 return cost;
3444 cost = force_var_cost (data, e1, depends_on);
3445 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3446 cost.cost += add_cost (mode);
3448 return cost;
3451 /* Determines the cost of the computation by that USE is expressed
3452 from induction variable CAND. If ADDRESS_P is true, we just need
3453 to create an address from it, otherwise we want to get it into
3454 register. A set of invariants we depend on is stored in
3455 DEPENDS_ON. AT is the statement at that the value is computed. */
3457 static comp_cost
3458 get_computation_cost_at (struct ivopts_data *data,
3459 struct iv_use *use, struct iv_cand *cand,
3460 bool address_p, bitmap *depends_on, tree at)
3462 tree ubase = use->iv->base, ustep = use->iv->step;
3463 tree cbase, cstep;
3464 tree utype = TREE_TYPE (ubase), ctype;
3465 unsigned HOST_WIDE_INT cstepi, offset = 0;
3466 HOST_WIDE_INT ratio, aratio;
3467 bool var_present, symbol_present;
3468 comp_cost cost;
3469 unsigned n_sums;
3470 double_int rat;
3472 *depends_on = NULL;
3474 /* Only consider real candidates. */
3475 if (!cand->iv)
3476 return infinite_cost;
3478 cbase = cand->iv->base;
3479 cstep = cand->iv->step;
3480 ctype = TREE_TYPE (cbase);
3482 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3484 /* We do not have a precision to express the values of use. */
3485 return infinite_cost;
3488 if (address_p)
3490 /* Do not try to express address of an object with computation based
3491 on address of a different object. This may cause problems in rtl
3492 level alias analysis (that does not expect this to be happening,
3493 as this is illegal in C), and would be unlikely to be useful
3494 anyway. */
3495 if (use->iv->base_object
3496 && cand->iv->base_object
3497 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3498 return infinite_cost;
3501 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3503 /* TODO -- add direct handling of this case. */
3504 goto fallback;
3507 /* CSTEPI is removed from the offset in case statement is after the
3508 increment. If the step is not constant, we use zero instead.
3509 This is a bit imprecise (there is the extra addition), but
3510 redundancy elimination is likely to transform the code so that
3511 it uses value of the variable before increment anyway,
3512 so it is not that much unrealistic. */
3513 if (cst_and_fits_in_hwi (cstep))
3514 cstepi = int_cst_value (cstep);
3515 else
3516 cstepi = 0;
3518 if (!constant_multiple_of (ustep, cstep, &rat))
3519 return infinite_cost;
3521 if (double_int_fits_in_shwi_p (rat))
3522 ratio = double_int_to_shwi (rat);
3523 else
3524 return infinite_cost;
3526 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3527 or ratio == 1, it is better to handle this like
3529 ubase - ratio * cbase + ratio * var
3531 (also holds in the case ratio == -1, TODO. */
3533 if (cst_and_fits_in_hwi (cbase))
3535 offset = - ratio * int_cst_value (cbase);
3536 cost = difference_cost (data,
3537 ubase, build_int_cst (utype, 0),
3538 &symbol_present, &var_present, &offset,
3539 depends_on);
3541 else if (ratio == 1)
3543 cost = difference_cost (data,
3544 ubase, cbase,
3545 &symbol_present, &var_present, &offset,
3546 depends_on);
3548 else
3550 cost = force_var_cost (data, cbase, depends_on);
3551 cost.cost += add_cost (TYPE_MODE (ctype));
3552 cost = add_costs (cost,
3553 difference_cost (data,
3554 ubase, build_int_cst (utype, 0),
3555 &symbol_present, &var_present,
3556 &offset, depends_on));
3559 /* If we are after the increment, the value of the candidate is higher by
3560 one iteration. */
3561 if (stmt_after_increment (data->current_loop, cand, at))
3562 offset -= ratio * cstepi;
3564 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3565 (symbol/var/const parts may be omitted). If we are looking for an address,
3566 find the cost of addressing this. */
3567 if (address_p)
3568 return add_costs (cost, get_address_cost (symbol_present, var_present,
3569 offset, ratio,
3570 TYPE_MODE (TREE_TYPE (*use->op_p))));
3572 /* Otherwise estimate the costs for computing the expression. */
3573 aratio = ratio > 0 ? ratio : -ratio;
3574 if (!symbol_present && !var_present && !offset)
3576 if (ratio != 1)
3577 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3579 return cost;
3582 if (aratio != 1)
3583 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3585 n_sums = 1;
3586 if (var_present
3587 /* Symbol + offset should be compile-time computable. */
3588 && (symbol_present || offset))
3589 n_sums++;
3591 /* Having offset does not affect runtime cost in case it is added to
3592 symbol, but it increases complexity. */
3593 if (offset)
3594 cost.complexity++;
3596 cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3597 return cost;
3599 fallback:
3601 /* Just get the expression, expand it and measure the cost. */
3602 tree comp = get_computation_at (data->current_loop, use, cand, at);
3604 if (!comp)
3605 return infinite_cost;
3607 if (address_p)
3608 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3610 return new_cost (computation_cost (comp), 0);
3614 /* Determines the cost of the computation by that USE is expressed
3615 from induction variable CAND. If ADDRESS_P is true, we just need
3616 to create an address from it, otherwise we want to get it into
3617 register. A set of invariants we depend on is stored in
3618 DEPENDS_ON. */
3620 static comp_cost
3621 get_computation_cost (struct ivopts_data *data,
3622 struct iv_use *use, struct iv_cand *cand,
3623 bool address_p, bitmap *depends_on)
3625 return get_computation_cost_at (data,
3626 use, cand, address_p, depends_on, use->stmt);
3629 /* Determines cost of basing replacement of USE on CAND in a generic
3630 expression. */
3632 static bool
3633 determine_use_iv_cost_generic (struct ivopts_data *data,
3634 struct iv_use *use, struct iv_cand *cand)
3636 bitmap depends_on;
3637 comp_cost cost;
3639 /* The simple case first -- if we need to express value of the preserved
3640 original biv, the cost is 0. This also prevents us from counting the
3641 cost of increment twice -- once at this use and once in the cost of
3642 the candidate. */
3643 if (cand->pos == IP_ORIGINAL
3644 && cand->incremented_at == use->stmt)
3646 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3647 return true;
3650 cost = get_computation_cost (data, use, cand, false, &depends_on);
3651 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3653 return !infinite_cost_p (cost);
3656 /* Determines cost of basing replacement of USE on CAND in an address. */
3658 static bool
3659 determine_use_iv_cost_address (struct ivopts_data *data,
3660 struct iv_use *use, struct iv_cand *cand)
3662 bitmap depends_on;
3663 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3665 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3667 return !infinite_cost_p (cost);
3670 /* Computes value of candidate CAND at position AT in iteration NITER, and
3671 stores it to VAL. */
3673 static void
3674 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3675 aff_tree *val)
3677 aff_tree step, delta, nit;
3678 struct iv *iv = cand->iv;
3679 tree type = TREE_TYPE (iv->base);
3680 tree steptype = type;
3681 if (POINTER_TYPE_P (type))
3682 steptype = sizetype;
3684 tree_to_aff_combination (iv->step, steptype, &step);
3685 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3686 aff_combination_convert (&nit, steptype);
3687 aff_combination_mult (&nit, &step, &delta);
3688 if (stmt_after_increment (loop, cand, at))
3689 aff_combination_add (&delta, &step);
3691 tree_to_aff_combination (iv->base, type, val);
3692 aff_combination_add (val, &delta);
3695 /* Returns period of induction variable iv. */
3697 static tree
3698 iv_period (struct iv *iv)
3700 tree step = iv->step, period, type;
3701 tree pow2div;
3703 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3705 /* Period of the iv is gcd (step, type range). Since type range is power
3706 of two, it suffices to determine the maximum power of two that divides
3707 step. */
3708 pow2div = num_ending_zeros (step);
3709 type = unsigned_type_for (TREE_TYPE (step));
3711 period = build_low_bits_mask (type,
3712 (TYPE_PRECISION (type)
3713 - tree_low_cst (pow2div, 1)));
3715 return period;
3718 /* Returns the comparison operator used when eliminating the iv USE. */
3720 static enum tree_code
3721 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3723 struct loop *loop = data->current_loop;
3724 basic_block ex_bb;
3725 edge exit;
3727 ex_bb = bb_for_stmt (use->stmt);
3728 exit = EDGE_SUCC (ex_bb, 0);
3729 if (flow_bb_inside_loop_p (loop, exit->dest))
3730 exit = EDGE_SUCC (ex_bb, 1);
3732 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3735 /* Check whether it is possible to express the condition in USE by comparison
3736 of candidate CAND. If so, store the value compared with to BOUND. */
3738 static bool
3739 may_eliminate_iv (struct ivopts_data *data,
3740 struct iv_use *use, struct iv_cand *cand, tree *bound)
3742 basic_block ex_bb;
3743 edge exit;
3744 tree nit, period;
3745 struct loop *loop = data->current_loop;
3746 aff_tree bnd;
3747 double_int period_value, max_niter;
3749 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3750 return false;
3752 /* For now works only for exits that dominate the loop latch. TODO -- extend
3753 for other conditions inside loop body. */
3754 ex_bb = bb_for_stmt (use->stmt);
3755 if (use->stmt != last_stmt (ex_bb)
3756 || TREE_CODE (use->stmt) != COND_EXPR)
3757 return false;
3758 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3759 return false;
3761 exit = EDGE_SUCC (ex_bb, 0);
3762 if (flow_bb_inside_loop_p (loop, exit->dest))
3763 exit = EDGE_SUCC (ex_bb, 1);
3764 if (flow_bb_inside_loop_p (loop, exit->dest))
3765 return false;
3767 nit = niter_for_exit (data, exit);
3768 if (!nit)
3769 return false;
3771 /* Determine whether we may use the variable to test whether niter iterations
3772 elapsed. This is the case iff the period of the induction variable is
3773 greater than the number of iterations. */
3774 period = iv_period (cand->iv);
3775 if (!period)
3776 return false;
3778 /* Compare the period with the estimate on the number of iterations of the
3779 loop. */
3780 if (!estimated_loop_iterations (loop, true, &max_niter))
3781 return false;
3782 period_value = tree_to_double_int (period);
3783 if (double_int_ucmp (period_value, max_niter) <= 0)
3784 return false;
3786 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3787 *bound = aff_combination_to_tree (&bnd);
3788 return true;
3791 /* Determines cost of basing replacement of USE on CAND in a condition. */
3793 static bool
3794 determine_use_iv_cost_condition (struct ivopts_data *data,
3795 struct iv_use *use, struct iv_cand *cand)
3797 tree bound = NULL_TREE;
3798 struct iv *cmp_iv;
3799 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3800 comp_cost elim_cost, express_cost, cost;
3801 bool ok;
3803 /* Only consider real candidates. */
3804 if (!cand->iv)
3806 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3807 return false;
3810 /* Try iv elimination. */
3811 if (may_eliminate_iv (data, use, cand, &bound))
3813 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3814 /* The bound is a loop invariant, so it will be only computed
3815 once. */
3816 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3818 else
3819 elim_cost = infinite_cost;
3821 /* Try expressing the original giv. If it is compared with an invariant,
3822 note that we cannot get rid of it. */
3823 ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3824 gcc_assert (ok);
3826 express_cost = get_computation_cost (data, use, cand, false,
3827 &depends_on_express);
3828 fd_ivopts_data = data;
3829 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3831 /* Choose the better approach. */
3832 if (compare_costs (elim_cost, express_cost) < 0)
3834 cost = elim_cost;
3835 depends_on = depends_on_elim;
3836 depends_on_elim = NULL;
3838 else
3840 cost = express_cost;
3841 depends_on = depends_on_express;
3842 depends_on_express = NULL;
3843 bound = NULL_TREE;
3846 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3848 if (depends_on_elim)
3849 BITMAP_FREE (depends_on_elim);
3850 if (depends_on_express)
3851 BITMAP_FREE (depends_on_express);
3853 return !infinite_cost_p (cost);
3856 /* Determines cost of basing replacement of USE on CAND. Returns false
3857 if USE cannot be based on CAND. */
3859 static bool
3860 determine_use_iv_cost (struct ivopts_data *data,
3861 struct iv_use *use, struct iv_cand *cand)
3863 switch (use->type)
3865 case USE_NONLINEAR_EXPR:
3866 return determine_use_iv_cost_generic (data, use, cand);
3868 case USE_ADDRESS:
3869 return determine_use_iv_cost_address (data, use, cand);
3871 case USE_COMPARE:
3872 return determine_use_iv_cost_condition (data, use, cand);
3874 default:
3875 gcc_unreachable ();
3879 /* Determines costs of basing the use of the iv on an iv candidate. */
3881 static void
3882 determine_use_iv_costs (struct ivopts_data *data)
3884 unsigned i, j;
3885 struct iv_use *use;
3886 struct iv_cand *cand;
3887 bitmap to_clear = BITMAP_ALLOC (NULL);
3889 alloc_use_cost_map (data);
3891 for (i = 0; i < n_iv_uses (data); i++)
3893 use = iv_use (data, i);
3895 if (data->consider_all_candidates)
3897 for (j = 0; j < n_iv_cands (data); j++)
3899 cand = iv_cand (data, j);
3900 determine_use_iv_cost (data, use, cand);
3903 else
3905 bitmap_iterator bi;
3907 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3909 cand = iv_cand (data, j);
3910 if (!determine_use_iv_cost (data, use, cand))
3911 bitmap_set_bit (to_clear, j);
3914 /* Remove the candidates for that the cost is infinite from
3915 the list of related candidates. */
3916 bitmap_and_compl_into (use->related_cands, to_clear);
3917 bitmap_clear (to_clear);
3921 BITMAP_FREE (to_clear);
3923 if (dump_file && (dump_flags & TDF_DETAILS))
3925 fprintf (dump_file, "Use-candidate costs:\n");
3927 for (i = 0; i < n_iv_uses (data); i++)
3929 use = iv_use (data, i);
3931 fprintf (dump_file, "Use %d:\n", i);
3932 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
3933 for (j = 0; j < use->n_map_members; j++)
3935 if (!use->cost_map[j].cand
3936 || infinite_cost_p (use->cost_map[j].cost))
3937 continue;
3939 fprintf (dump_file, " %d\t%d\t%d\t",
3940 use->cost_map[j].cand->id,
3941 use->cost_map[j].cost.cost,
3942 use->cost_map[j].cost.complexity);
3943 if (use->cost_map[j].depends_on)
3944 bitmap_print (dump_file,
3945 use->cost_map[j].depends_on, "","");
3946 fprintf (dump_file, "\n");
3949 fprintf (dump_file, "\n");
3951 fprintf (dump_file, "\n");
3955 /* Determines cost of the candidate CAND. */
3957 static void
3958 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3960 comp_cost cost_base;
3961 unsigned cost, cost_step;
3962 tree base;
3964 if (!cand->iv)
3966 cand->cost = 0;
3967 return;
3970 /* There are two costs associated with the candidate -- its increment
3971 and its initialization. The second is almost negligible for any loop
3972 that rolls enough, so we take it just very little into account. */
3974 base = cand->iv->base;
3975 cost_base = force_var_cost (data, base, NULL);
3976 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3978 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3980 /* Prefer the original ivs unless we may gain something by replacing it.
3981 The reason is to makee debugging simpler; so this is not relevant for
3982 artificial ivs created by other optimization passes. */
3983 if (cand->pos != IP_ORIGINAL
3984 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3985 cost++;
3987 /* Prefer not to insert statements into latch unless there are some
3988 already (so that we do not create unnecessary jumps). */
3989 if (cand->pos == IP_END
3990 && empty_block_p (ip_end_pos (data->current_loop)))
3991 cost++;
3993 cand->cost = cost;
3996 /* Determines costs of computation of the candidates. */
3998 static void
3999 determine_iv_costs (struct ivopts_data *data)
4001 unsigned i;
4003 if (dump_file && (dump_flags & TDF_DETAILS))
4005 fprintf (dump_file, "Candidate costs:\n");
4006 fprintf (dump_file, " cand\tcost\n");
4009 for (i = 0; i < n_iv_cands (data); i++)
4011 struct iv_cand *cand = iv_cand (data, i);
4013 determine_iv_cost (data, cand);
4015 if (dump_file && (dump_flags & TDF_DETAILS))
4016 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4019 if (dump_file && (dump_flags & TDF_DETAILS))
4020 fprintf (dump_file, "\n");
4023 /* Calculates cost for having SIZE induction variables. */
4025 static unsigned
4026 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4028 /* We add size to the cost, so that we prefer eliminating ivs
4029 if possible. */
4030 return size + estimate_reg_pressure_cost (size, data->regs_used);
4033 /* For each size of the induction variable set determine the penalty. */
4035 static void
4036 determine_set_costs (struct ivopts_data *data)
4038 unsigned j, n;
4039 tree phi, op;
4040 struct loop *loop = data->current_loop;
4041 bitmap_iterator bi;
4043 /* We use the following model (definitely improvable, especially the
4044 cost function -- TODO):
4046 We estimate the number of registers available (using MD data), name it A.
4048 We estimate the number of registers used by the loop, name it U. This
4049 number is obtained as the number of loop phi nodes (not counting virtual
4050 registers and bivs) + the number of variables from outside of the loop.
4052 We set a reserve R (free regs that are used for temporary computations,
4053 etc.). For now the reserve is a constant 3.
4055 Let I be the number of induction variables.
4057 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4058 make a lot of ivs without a reason).
4059 -- if A - R < U + I <= A, the cost is I * PRES_COST
4060 -- if U + I > A, the cost is I * PRES_COST and
4061 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4063 if (dump_file && (dump_flags & TDF_DETAILS))
4065 fprintf (dump_file, "Global costs:\n");
4066 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4067 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost);
4068 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4071 n = 0;
4072 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4074 op = PHI_RESULT (phi);
4076 if (!is_gimple_reg (op))
4077 continue;
4079 if (get_iv (data, op))
4080 continue;
4082 n++;
4085 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4087 struct version_info *info = ver_info (data, j);
4089 if (info->inv_id && info->has_nonlin_use)
4090 n++;
4093 data->regs_used = n;
4094 if (dump_file && (dump_flags & TDF_DETAILS))
4095 fprintf (dump_file, " regs_used %d\n", n);
4097 if (dump_file && (dump_flags & TDF_DETAILS))
4099 fprintf (dump_file, " cost for size:\n");
4100 fprintf (dump_file, " ivs\tcost\n");
4101 for (j = 0; j <= 2 * target_avail_regs; j++)
4102 fprintf (dump_file, " %d\t%d\n", j,
4103 ivopts_global_cost_for_size (data, j));
4104 fprintf (dump_file, "\n");
4108 /* Returns true if A is a cheaper cost pair than B. */
4110 static bool
4111 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4113 int cmp;
4115 if (!a)
4116 return false;
4118 if (!b)
4119 return true;
4121 cmp = compare_costs (a->cost, b->cost);
4122 if (cmp < 0)
4123 return true;
4125 if (cmp > 0)
4126 return false;
4128 /* In case the costs are the same, prefer the cheaper candidate. */
4129 if (a->cand->cost < b->cand->cost)
4130 return true;
4132 return false;
4135 /* Computes the cost field of IVS structure. */
4137 static void
4138 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4140 comp_cost cost = ivs->cand_use_cost;
4141 cost.cost += ivs->cand_cost;
4142 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4144 ivs->cost = cost;
4147 /* Remove invariants in set INVS to set IVS. */
4149 static void
4150 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4152 bitmap_iterator bi;
4153 unsigned iid;
4155 if (!invs)
4156 return;
4158 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4160 ivs->n_invariant_uses[iid]--;
4161 if (ivs->n_invariant_uses[iid] == 0)
4162 ivs->n_regs--;
4166 /* Set USE not to be expressed by any candidate in IVS. */
4168 static void
4169 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4170 struct iv_use *use)
4172 unsigned uid = use->id, cid;
4173 struct cost_pair *cp;
4175 cp = ivs->cand_for_use[uid];
4176 if (!cp)
4177 return;
4178 cid = cp->cand->id;
4180 ivs->bad_uses++;
4181 ivs->cand_for_use[uid] = NULL;
4182 ivs->n_cand_uses[cid]--;
4184 if (ivs->n_cand_uses[cid] == 0)
4186 bitmap_clear_bit (ivs->cands, cid);
4187 /* Do not count the pseudocandidates. */
4188 if (cp->cand->iv)
4189 ivs->n_regs--;
4190 ivs->n_cands--;
4191 ivs->cand_cost -= cp->cand->cost;
4193 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4196 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4198 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4199 iv_ca_recount_cost (data, ivs);
4202 /* Add invariants in set INVS to set IVS. */
4204 static void
4205 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4207 bitmap_iterator bi;
4208 unsigned iid;
4210 if (!invs)
4211 return;
4213 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4215 ivs->n_invariant_uses[iid]++;
4216 if (ivs->n_invariant_uses[iid] == 1)
4217 ivs->n_regs++;
4221 /* Set cost pair for USE in set IVS to CP. */
4223 static void
4224 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4225 struct iv_use *use, struct cost_pair *cp)
4227 unsigned uid = use->id, cid;
4229 if (ivs->cand_for_use[uid] == cp)
4230 return;
4232 if (ivs->cand_for_use[uid])
4233 iv_ca_set_no_cp (data, ivs, use);
4235 if (cp)
4237 cid = cp->cand->id;
4239 ivs->bad_uses--;
4240 ivs->cand_for_use[uid] = cp;
4241 ivs->n_cand_uses[cid]++;
4242 if (ivs->n_cand_uses[cid] == 1)
4244 bitmap_set_bit (ivs->cands, cid);
4245 /* Do not count the pseudocandidates. */
4246 if (cp->cand->iv)
4247 ivs->n_regs++;
4248 ivs->n_cands++;
4249 ivs->cand_cost += cp->cand->cost;
4251 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4254 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4255 iv_ca_set_add_invariants (ivs, cp->depends_on);
4256 iv_ca_recount_cost (data, ivs);
4260 /* Extend set IVS by expressing USE by some of the candidates in it
4261 if possible. */
4263 static void
4264 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4265 struct iv_use *use)
4267 struct cost_pair *best_cp = NULL, *cp;
4268 bitmap_iterator bi;
4269 unsigned i;
4271 gcc_assert (ivs->upto >= use->id);
4273 if (ivs->upto == use->id)
4275 ivs->upto++;
4276 ivs->bad_uses++;
4279 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4281 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4283 if (cheaper_cost_pair (cp, best_cp))
4284 best_cp = cp;
4287 iv_ca_set_cp (data, ivs, use, best_cp);
4290 /* Get cost for assignment IVS. */
4292 static comp_cost
4293 iv_ca_cost (struct iv_ca *ivs)
4295 return (ivs->bad_uses ? infinite_cost : ivs->cost);
4298 /* Returns true if all dependences of CP are among invariants in IVS. */
4300 static bool
4301 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4303 unsigned i;
4304 bitmap_iterator bi;
4306 if (!cp->depends_on)
4307 return true;
4309 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4311 if (ivs->n_invariant_uses[i] == 0)
4312 return false;
4315 return true;
4318 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4319 it before NEXT_CHANGE. */
4321 static struct iv_ca_delta *
4322 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4323 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4325 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4327 change->use = use;
4328 change->old_cp = old_cp;
4329 change->new_cp = new_cp;
4330 change->next_change = next_change;
4332 return change;
4335 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4336 are rewritten. */
4338 static struct iv_ca_delta *
4339 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4341 struct iv_ca_delta *last;
4343 if (!l2)
4344 return l1;
4346 if (!l1)
4347 return l2;
4349 for (last = l1; last->next_change; last = last->next_change)
4350 continue;
4351 last->next_change = l2;
4353 return l1;
4356 /* Returns candidate by that USE is expressed in IVS. */
4358 static struct cost_pair *
4359 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4361 return ivs->cand_for_use[use->id];
4364 /* Reverse the list of changes DELTA, forming the inverse to it. */
4366 static struct iv_ca_delta *
4367 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4369 struct iv_ca_delta *act, *next, *prev = NULL;
4370 struct cost_pair *tmp;
4372 for (act = delta; act; act = next)
4374 next = act->next_change;
4375 act->next_change = prev;
4376 prev = act;
4378 tmp = act->old_cp;
4379 act->old_cp = act->new_cp;
4380 act->new_cp = tmp;
4383 return prev;
4386 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4387 reverted instead. */
4389 static void
4390 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4391 struct iv_ca_delta *delta, bool forward)
4393 struct cost_pair *from, *to;
4394 struct iv_ca_delta *act;
4396 if (!forward)
4397 delta = iv_ca_delta_reverse (delta);
4399 for (act = delta; act; act = act->next_change)
4401 from = act->old_cp;
4402 to = act->new_cp;
4403 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4404 iv_ca_set_cp (data, ivs, act->use, to);
4407 if (!forward)
4408 iv_ca_delta_reverse (delta);
4411 /* Returns true if CAND is used in IVS. */
4413 static bool
4414 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4416 return ivs->n_cand_uses[cand->id] > 0;
4419 /* Returns number of induction variable candidates in the set IVS. */
4421 static unsigned
4422 iv_ca_n_cands (struct iv_ca *ivs)
4424 return ivs->n_cands;
4427 /* Free the list of changes DELTA. */
4429 static void
4430 iv_ca_delta_free (struct iv_ca_delta **delta)
4432 struct iv_ca_delta *act, *next;
4434 for (act = *delta; act; act = next)
4436 next = act->next_change;
4437 free (act);
4440 *delta = NULL;
4443 /* Allocates new iv candidates assignment. */
4445 static struct iv_ca *
4446 iv_ca_new (struct ivopts_data *data)
4448 struct iv_ca *nw = XNEW (struct iv_ca);
4450 nw->upto = 0;
4451 nw->bad_uses = 0;
4452 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4453 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4454 nw->cands = BITMAP_ALLOC (NULL);
4455 nw->n_cands = 0;
4456 nw->n_regs = 0;
4457 nw->cand_use_cost = zero_cost;
4458 nw->cand_cost = 0;
4459 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4460 nw->cost = zero_cost;
4462 return nw;
4465 /* Free memory occupied by the set IVS. */
4467 static void
4468 iv_ca_free (struct iv_ca **ivs)
4470 free ((*ivs)->cand_for_use);
4471 free ((*ivs)->n_cand_uses);
4472 BITMAP_FREE ((*ivs)->cands);
4473 free ((*ivs)->n_invariant_uses);
4474 free (*ivs);
4475 *ivs = NULL;
4478 /* Dumps IVS to FILE. */
4480 static void
4481 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4483 const char *pref = " invariants ";
4484 unsigned i;
4485 comp_cost cost = iv_ca_cost (ivs);
4487 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4488 bitmap_print (file, ivs->cands, " candidates ","\n");
4490 for (i = 1; i <= data->max_inv_id; i++)
4491 if (ivs->n_invariant_uses[i])
4493 fprintf (file, "%s%d", pref, i);
4494 pref = ", ";
4496 fprintf (file, "\n");
4499 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4500 new set, and store differences in DELTA. Number of induction variables
4501 in the new set is stored to N_IVS. */
4503 static comp_cost
4504 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4505 struct iv_cand *cand, struct iv_ca_delta **delta,
4506 unsigned *n_ivs)
4508 unsigned i;
4509 comp_cost cost;
4510 struct iv_use *use;
4511 struct cost_pair *old_cp, *new_cp;
4513 *delta = NULL;
4514 for (i = 0; i < ivs->upto; i++)
4516 use = iv_use (data, i);
4517 old_cp = iv_ca_cand_for_use (ivs, use);
4519 if (old_cp
4520 && old_cp->cand == cand)
4521 continue;
4523 new_cp = get_use_iv_cost (data, use, cand);
4524 if (!new_cp)
4525 continue;
4527 if (!iv_ca_has_deps (ivs, new_cp))
4528 continue;
4530 if (!cheaper_cost_pair (new_cp, old_cp))
4531 continue;
4533 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4536 iv_ca_delta_commit (data, ivs, *delta, true);
4537 cost = iv_ca_cost (ivs);
4538 if (n_ivs)
4539 *n_ivs = iv_ca_n_cands (ivs);
4540 iv_ca_delta_commit (data, ivs, *delta, false);
4542 return cost;
4545 /* Try narrowing set IVS by removing CAND. Return the cost of
4546 the new set and store the differences in DELTA. */
4548 static comp_cost
4549 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4550 struct iv_cand *cand, struct iv_ca_delta **delta)
4552 unsigned i, ci;
4553 struct iv_use *use;
4554 struct cost_pair *old_cp, *new_cp, *cp;
4555 bitmap_iterator bi;
4556 struct iv_cand *cnd;
4557 comp_cost cost;
4559 *delta = NULL;
4560 for (i = 0; i < n_iv_uses (data); i++)
4562 use = iv_use (data, i);
4564 old_cp = iv_ca_cand_for_use (ivs, use);
4565 if (old_cp->cand != cand)
4566 continue;
4568 new_cp = NULL;
4570 if (data->consider_all_candidates)
4572 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4574 if (ci == cand->id)
4575 continue;
4577 cnd = iv_cand (data, ci);
4579 cp = get_use_iv_cost (data, use, cnd);
4580 if (!cp)
4581 continue;
4582 if (!iv_ca_has_deps (ivs, cp))
4583 continue;
4585 if (!cheaper_cost_pair (cp, new_cp))
4586 continue;
4588 new_cp = cp;
4591 else
4593 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4595 if (ci == cand->id)
4596 continue;
4598 cnd = iv_cand (data, ci);
4600 cp = get_use_iv_cost (data, use, cnd);
4601 if (!cp)
4602 continue;
4603 if (!iv_ca_has_deps (ivs, cp))
4604 continue;
4606 if (!cheaper_cost_pair (cp, new_cp))
4607 continue;
4609 new_cp = cp;
4613 if (!new_cp)
4615 iv_ca_delta_free (delta);
4616 return infinite_cost;
4619 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4622 iv_ca_delta_commit (data, ivs, *delta, true);
4623 cost = iv_ca_cost (ivs);
4624 iv_ca_delta_commit (data, ivs, *delta, false);
4626 return cost;
4629 /* Try optimizing the set of candidates IVS by removing candidates different
4630 from to EXCEPT_CAND from it. Return cost of the new set, and store
4631 differences in DELTA. */
4633 static comp_cost
4634 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4635 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4637 bitmap_iterator bi;
4638 struct iv_ca_delta *act_delta, *best_delta;
4639 unsigned i;
4640 comp_cost best_cost, acost;
4641 struct iv_cand *cand;
4643 best_delta = NULL;
4644 best_cost = iv_ca_cost (ivs);
4646 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4648 cand = iv_cand (data, i);
4650 if (cand == except_cand)
4651 continue;
4653 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4655 if (compare_costs (acost, best_cost) < 0)
4657 best_cost = acost;
4658 iv_ca_delta_free (&best_delta);
4659 best_delta = act_delta;
4661 else
4662 iv_ca_delta_free (&act_delta);
4665 if (!best_delta)
4667 *delta = NULL;
4668 return best_cost;
4671 /* Recurse to possibly remove other unnecessary ivs. */
4672 iv_ca_delta_commit (data, ivs, best_delta, true);
4673 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4674 iv_ca_delta_commit (data, ivs, best_delta, false);
4675 *delta = iv_ca_delta_join (best_delta, *delta);
4676 return best_cost;
4679 /* Tries to extend the sets IVS in the best possible way in order
4680 to express the USE. */
4682 static bool
4683 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4684 struct iv_use *use)
4686 comp_cost best_cost, act_cost;
4687 unsigned i;
4688 bitmap_iterator bi;
4689 struct iv_cand *cand;
4690 struct iv_ca_delta *best_delta = NULL, *act_delta;
4691 struct cost_pair *cp;
4693 iv_ca_add_use (data, ivs, use);
4694 best_cost = iv_ca_cost (ivs);
4696 cp = iv_ca_cand_for_use (ivs, use);
4697 if (cp)
4699 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4700 iv_ca_set_no_cp (data, ivs, use);
4703 /* First try important candidates not based on any memory object. Only if
4704 this fails, try the specific ones. Rationale -- in loops with many
4705 variables the best choice often is to use just one generic biv. If we
4706 added here many ivs specific to the uses, the optimization algorithm later
4707 would be likely to get stuck in a local minimum, thus causing us to create
4708 too many ivs. The approach from few ivs to more seems more likely to be
4709 successful -- starting from few ivs, replacing an expensive use by a
4710 specific iv should always be a win. */
4711 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4713 cand = iv_cand (data, i);
4715 if (cand->iv->base_object != NULL_TREE)
4716 continue;
4718 if (iv_ca_cand_used_p (ivs, cand))
4719 continue;
4721 cp = get_use_iv_cost (data, use, cand);
4722 if (!cp)
4723 continue;
4725 iv_ca_set_cp (data, ivs, use, cp);
4726 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4727 iv_ca_set_no_cp (data, ivs, use);
4728 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4730 if (compare_costs (act_cost, best_cost) < 0)
4732 best_cost = act_cost;
4734 iv_ca_delta_free (&best_delta);
4735 best_delta = act_delta;
4737 else
4738 iv_ca_delta_free (&act_delta);
4741 if (infinite_cost_p (best_cost))
4743 for (i = 0; i < use->n_map_members; i++)
4745 cp = use->cost_map + i;
4746 cand = cp->cand;
4747 if (!cand)
4748 continue;
4750 /* Already tried this. */
4751 if (cand->important && cand->iv->base_object == NULL_TREE)
4752 continue;
4754 if (iv_ca_cand_used_p (ivs, cand))
4755 continue;
4757 act_delta = NULL;
4758 iv_ca_set_cp (data, ivs, use, cp);
4759 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4760 iv_ca_set_no_cp (data, ivs, use);
4761 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4762 cp, act_delta);
4764 if (compare_costs (act_cost, best_cost) < 0)
4766 best_cost = act_cost;
4768 if (best_delta)
4769 iv_ca_delta_free (&best_delta);
4770 best_delta = act_delta;
4772 else
4773 iv_ca_delta_free (&act_delta);
4777 iv_ca_delta_commit (data, ivs, best_delta, true);
4778 iv_ca_delta_free (&best_delta);
4780 return !infinite_cost_p (best_cost);
4783 /* Finds an initial assignment of candidates to uses. */
4785 static struct iv_ca *
4786 get_initial_solution (struct ivopts_data *data)
4788 struct iv_ca *ivs = iv_ca_new (data);
4789 unsigned i;
4791 for (i = 0; i < n_iv_uses (data); i++)
4792 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4794 iv_ca_free (&ivs);
4795 return NULL;
4798 return ivs;
4801 /* Tries to improve set of induction variables IVS. */
4803 static bool
4804 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4806 unsigned i, n_ivs;
4807 comp_cost acost, best_cost = iv_ca_cost (ivs);
4808 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4809 struct iv_cand *cand;
4811 /* Try extending the set of induction variables by one. */
4812 for (i = 0; i < n_iv_cands (data); i++)
4814 cand = iv_cand (data, i);
4816 if (iv_ca_cand_used_p (ivs, cand))
4817 continue;
4819 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4820 if (!act_delta)
4821 continue;
4823 /* If we successfully added the candidate and the set is small enough,
4824 try optimizing it by removing other candidates. */
4825 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4827 iv_ca_delta_commit (data, ivs, act_delta, true);
4828 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4829 iv_ca_delta_commit (data, ivs, act_delta, false);
4830 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4833 if (compare_costs (acost, best_cost) < 0)
4835 best_cost = acost;
4836 iv_ca_delta_free (&best_delta);
4837 best_delta = act_delta;
4839 else
4840 iv_ca_delta_free (&act_delta);
4843 if (!best_delta)
4845 /* Try removing the candidates from the set instead. */
4846 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4848 /* Nothing more we can do. */
4849 if (!best_delta)
4850 return false;
4853 iv_ca_delta_commit (data, ivs, best_delta, true);
4854 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4855 iv_ca_delta_free (&best_delta);
4856 return true;
4859 /* Attempts to find the optimal set of induction variables. We do simple
4860 greedy heuristic -- we try to replace at most one candidate in the selected
4861 solution and remove the unused ivs while this improves the cost. */
4863 static struct iv_ca *
4864 find_optimal_iv_set (struct ivopts_data *data)
4866 unsigned i;
4867 struct iv_ca *set;
4868 struct iv_use *use;
4870 /* Get the initial solution. */
4871 set = get_initial_solution (data);
4872 if (!set)
4874 if (dump_file && (dump_flags & TDF_DETAILS))
4875 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4876 return NULL;
4879 if (dump_file && (dump_flags & TDF_DETAILS))
4881 fprintf (dump_file, "Initial set of candidates:\n");
4882 iv_ca_dump (data, dump_file, set);
4885 while (try_improve_iv_set (data, set))
4887 if (dump_file && (dump_flags & TDF_DETAILS))
4889 fprintf (dump_file, "Improved to:\n");
4890 iv_ca_dump (data, dump_file, set);
4894 if (dump_file && (dump_flags & TDF_DETAILS))
4896 comp_cost cost = iv_ca_cost (set);
4897 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4900 for (i = 0; i < n_iv_uses (data); i++)
4902 use = iv_use (data, i);
4903 use->selected = iv_ca_cand_for_use (set, use)->cand;
4906 return set;
4909 /* Creates a new induction variable corresponding to CAND. */
4911 static void
4912 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4914 block_stmt_iterator incr_pos;
4915 tree base;
4916 bool after = false;
4918 if (!cand->iv)
4919 return;
4921 switch (cand->pos)
4923 case IP_NORMAL:
4924 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4925 break;
4927 case IP_END:
4928 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4929 after = true;
4930 break;
4932 case IP_ORIGINAL:
4933 /* Mark that the iv is preserved. */
4934 name_info (data, cand->var_before)->preserve_biv = true;
4935 name_info (data, cand->var_after)->preserve_biv = true;
4937 /* Rewrite the increment so that it uses var_before directly. */
4938 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4940 return;
4943 gimple_add_tmp_var (cand->var_before);
4944 add_referenced_var (cand->var_before);
4946 base = unshare_expr (cand->iv->base);
4948 create_iv (base, unshare_expr (cand->iv->step),
4949 cand->var_before, data->current_loop,
4950 &incr_pos, after, &cand->var_before, &cand->var_after);
4953 /* Creates new induction variables described in SET. */
4955 static void
4956 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4958 unsigned i;
4959 struct iv_cand *cand;
4960 bitmap_iterator bi;
4962 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4964 cand = iv_cand (data, i);
4965 create_new_iv (data, cand);
4969 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4970 is true, remove also the ssa name defined by the statement. */
4972 static void
4973 remove_statement (tree stmt, bool including_defined_name)
4975 if (TREE_CODE (stmt) == PHI_NODE)
4977 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4979 else
4981 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4983 bsi_remove (&bsi, true);
4984 release_defs (stmt);
4988 /* Rewrites USE (definition of iv used in a nonlinear expression)
4989 using candidate CAND. */
4991 static void
4992 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4993 struct iv_use *use, struct iv_cand *cand)
4995 tree comp;
4996 tree op, tgt, ass;
4997 block_stmt_iterator bsi;
4999 /* An important special case -- if we are asked to express value of
5000 the original iv by itself, just exit; there is no need to
5001 introduce a new computation (that might also need casting the
5002 variable to unsigned and back). */
5003 if (cand->pos == IP_ORIGINAL
5004 && cand->incremented_at == use->stmt)
5006 tree step, ctype, utype;
5007 enum tree_code incr_code = PLUS_EXPR;
5009 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
5010 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
5012 step = cand->iv->step;
5013 ctype = TREE_TYPE (step);
5014 utype = TREE_TYPE (cand->var_after);
5015 if (TREE_CODE (step) == NEGATE_EXPR)
5017 incr_code = MINUS_EXPR;
5018 step = TREE_OPERAND (step, 0);
5021 /* Check whether we may leave the computation unchanged.
5022 This is the case only if it does not rely on other
5023 computations in the loop -- otherwise, the computation
5024 we rely upon may be removed in remove_unused_ivs,
5025 thus leading to ICE. */
5026 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5027 if (TREE_CODE (op) == PLUS_EXPR
5028 || TREE_CODE (op) == MINUS_EXPR
5029 || TREE_CODE (op) == POINTER_PLUS_EXPR)
5031 if (TREE_OPERAND (op, 0) == cand->var_before)
5032 op = TREE_OPERAND (op, 1);
5033 else if (TREE_CODE (op) != MINUS_EXPR
5034 && TREE_OPERAND (op, 1) == cand->var_before)
5035 op = TREE_OPERAND (op, 0);
5036 else
5037 op = NULL_TREE;
5039 else
5040 op = NULL_TREE;
5042 if (op
5043 && (TREE_CODE (op) == INTEGER_CST
5044 || operand_equal_p (op, step, 0)))
5045 return;
5047 /* Otherwise, add the necessary computations to express
5048 the iv. */
5049 op = fold_convert (ctype, cand->var_before);
5050 comp = fold_convert (utype,
5051 build2 (incr_code, ctype, op,
5052 unshare_expr (step)));
5054 else
5056 comp = get_computation (data->current_loop, use, cand);
5057 gcc_assert (comp != NULL_TREE);
5060 switch (TREE_CODE (use->stmt))
5062 case PHI_NODE:
5063 tgt = PHI_RESULT (use->stmt);
5065 /* If we should keep the biv, do not replace it. */
5066 if (name_info (data, tgt)->preserve_biv)
5067 return;
5069 bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5070 break;
5072 case GIMPLE_MODIFY_STMT:
5073 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5074 bsi = bsi_for_stmt (use->stmt);
5075 break;
5077 default:
5078 gcc_unreachable ();
5081 op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5082 true, BSI_SAME_STMT);
5084 if (TREE_CODE (use->stmt) == PHI_NODE)
5086 ass = build_gimple_modify_stmt (tgt, op);
5087 bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5088 remove_statement (use->stmt, false);
5089 SSA_NAME_DEF_STMT (tgt) = ass;
5091 else
5092 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5095 /* Replaces ssa name in index IDX by its basic variable. Callback for
5096 for_each_index. */
5098 static bool
5099 idx_remove_ssa_names (tree base, tree *idx,
5100 void *data ATTRIBUTE_UNUSED)
5102 tree *op;
5104 if (TREE_CODE (*idx) == SSA_NAME)
5105 *idx = SSA_NAME_VAR (*idx);
5107 if (TREE_CODE (base) == ARRAY_REF)
5109 op = &TREE_OPERAND (base, 2);
5110 if (*op
5111 && TREE_CODE (*op) == SSA_NAME)
5112 *op = SSA_NAME_VAR (*op);
5113 op = &TREE_OPERAND (base, 3);
5114 if (*op
5115 && TREE_CODE (*op) == SSA_NAME)
5116 *op = SSA_NAME_VAR (*op);
5119 return true;
5122 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5124 static tree
5125 unshare_and_remove_ssa_names (tree ref)
5127 ref = unshare_expr (ref);
5128 for_each_index (&ref, idx_remove_ssa_names, NULL);
5130 return ref;
5133 /* Extract the alias analysis info for the memory reference REF. There are
5134 several ways how this information may be stored and what precisely is
5135 its semantics depending on the type of the reference, but there always is
5136 somewhere hidden one _DECL node that is used to determine the set of
5137 virtual operands for the reference. The code below deciphers this jungle
5138 and extracts this single useful piece of information. */
5140 static tree
5141 get_ref_tag (tree ref, tree orig)
5143 tree var = get_base_address (ref);
5144 tree aref = NULL_TREE, tag, sv;
5145 HOST_WIDE_INT offset, size, maxsize;
5147 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5149 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5150 if (ref)
5151 break;
5154 if (!var)
5155 return NULL_TREE;
5157 if (TREE_CODE (var) == INDIRECT_REF)
5159 /* If the base is a dereference of a pointer, first check its name memory
5160 tag. If it does not have one, use its symbol memory tag. */
5161 var = TREE_OPERAND (var, 0);
5162 if (TREE_CODE (var) != SSA_NAME)
5163 return NULL_TREE;
5165 if (SSA_NAME_PTR_INFO (var))
5167 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5168 if (tag)
5169 return tag;
5172 var = SSA_NAME_VAR (var);
5173 tag = symbol_mem_tag (var);
5174 gcc_assert (tag != NULL_TREE);
5175 return tag;
5177 else
5179 if (!DECL_P (var))
5180 return NULL_TREE;
5182 tag = symbol_mem_tag (var);
5183 if (tag)
5184 return tag;
5186 return var;
5190 /* Copies the reference information from OLD_REF to NEW_REF. */
5192 static void
5193 copy_ref_info (tree new_ref, tree old_ref)
5195 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5196 copy_mem_ref_info (new_ref, old_ref);
5197 else
5199 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5200 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5204 /* Rewrites USE (address that is an iv) using candidate CAND. */
5206 static void
5207 rewrite_use_address (struct ivopts_data *data,
5208 struct iv_use *use, struct iv_cand *cand)
5210 aff_tree aff;
5211 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5212 tree ref;
5213 bool ok;
5215 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5216 gcc_assert (ok);
5217 unshare_aff_combination (&aff);
5219 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5220 copy_ref_info (ref, *use->op_p);
5221 *use->op_p = ref;
5224 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5225 candidate CAND. */
5227 static void
5228 rewrite_use_compare (struct ivopts_data *data,
5229 struct iv_use *use, struct iv_cand *cand)
5231 tree comp, *var_p, op, bound;
5232 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5233 enum tree_code compare;
5234 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5235 bool ok;
5237 bound = cp->value;
5238 if (bound)
5240 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5241 tree var_type = TREE_TYPE (var);
5243 compare = iv_elimination_compare (data, use);
5244 bound = unshare_expr (fold_convert (var_type, bound));
5245 op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5246 true, BSI_SAME_STMT);
5248 *use->op_p = build2 (compare, boolean_type_node, var, op);
5249 return;
5252 /* The induction variable elimination failed; just express the original
5253 giv. */
5254 comp = get_computation (data->current_loop, use, cand);
5255 gcc_assert (comp != NULL_TREE);
5257 ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5258 gcc_assert (ok);
5260 *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5261 true, BSI_SAME_STMT);
5264 /* Rewrites USE using candidate CAND. */
5266 static void
5267 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5269 push_stmt_changes (&use->stmt);
5271 switch (use->type)
5273 case USE_NONLINEAR_EXPR:
5274 rewrite_use_nonlinear_expr (data, use, cand);
5275 break;
5277 case USE_ADDRESS:
5278 rewrite_use_address (data, use, cand);
5279 break;
5281 case USE_COMPARE:
5282 rewrite_use_compare (data, use, cand);
5283 break;
5285 default:
5286 gcc_unreachable ();
5289 pop_stmt_changes (&use->stmt);
5292 /* Rewrite the uses using the selected induction variables. */
5294 static void
5295 rewrite_uses (struct ivopts_data *data)
5297 unsigned i;
5298 struct iv_cand *cand;
5299 struct iv_use *use;
5301 for (i = 0; i < n_iv_uses (data); i++)
5303 use = iv_use (data, i);
5304 cand = use->selected;
5305 gcc_assert (cand);
5307 rewrite_use (data, use, cand);
5311 /* Removes the ivs that are not used after rewriting. */
5313 static void
5314 remove_unused_ivs (struct ivopts_data *data)
5316 unsigned j;
5317 bitmap_iterator bi;
5319 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5321 struct version_info *info;
5323 info = ver_info (data, j);
5324 if (info->iv
5325 && !integer_zerop (info->iv->step)
5326 && !info->inv_id
5327 && !info->iv->have_use_for
5328 && !info->preserve_biv)
5329 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5333 /* Frees data allocated by the optimization of a single loop. */
5335 static void
5336 free_loop_data (struct ivopts_data *data)
5338 unsigned i, j;
5339 bitmap_iterator bi;
5340 tree obj;
5342 if (data->niters)
5344 pointer_map_destroy (data->niters);
5345 data->niters = NULL;
5348 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5350 struct version_info *info;
5352 info = ver_info (data, i);
5353 if (info->iv)
5354 free (info->iv);
5355 info->iv = NULL;
5356 info->has_nonlin_use = false;
5357 info->preserve_biv = false;
5358 info->inv_id = 0;
5360 bitmap_clear (data->relevant);
5361 bitmap_clear (data->important_candidates);
5363 for (i = 0; i < n_iv_uses (data); i++)
5365 struct iv_use *use = iv_use (data, i);
5367 free (use->iv);
5368 BITMAP_FREE (use->related_cands);
5369 for (j = 0; j < use->n_map_members; j++)
5370 if (use->cost_map[j].depends_on)
5371 BITMAP_FREE (use->cost_map[j].depends_on);
5372 free (use->cost_map);
5373 free (use);
5375 VEC_truncate (iv_use_p, data->iv_uses, 0);
5377 for (i = 0; i < n_iv_cands (data); i++)
5379 struct iv_cand *cand = iv_cand (data, i);
5381 if (cand->iv)
5382 free (cand->iv);
5383 if (cand->depends_on)
5384 BITMAP_FREE (cand->depends_on);
5385 free (cand);
5387 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5389 if (data->version_info_size < num_ssa_names)
5391 data->version_info_size = 2 * num_ssa_names;
5392 free (data->version_info);
5393 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5396 data->max_inv_id = 0;
5398 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5399 SET_DECL_RTL (obj, NULL_RTX);
5401 VEC_truncate (tree, decl_rtl_to_reset, 0);
5404 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5405 loop tree. */
5407 static void
5408 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5410 free_loop_data (data);
5411 free (data->version_info);
5412 BITMAP_FREE (data->relevant);
5413 BITMAP_FREE (data->important_candidates);
5415 VEC_free (tree, heap, decl_rtl_to_reset);
5416 VEC_free (iv_use_p, heap, data->iv_uses);
5417 VEC_free (iv_cand_p, heap, data->iv_candidates);
5420 /* Optimizes the LOOP. Returns true if anything changed. */
5422 static bool
5423 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5425 bool changed = false;
5426 struct iv_ca *iv_ca;
5427 edge exit;
5429 gcc_assert (!data->niters);
5430 data->current_loop = loop;
5432 if (dump_file && (dump_flags & TDF_DETAILS))
5434 fprintf (dump_file, "Processing loop %d\n", loop->num);
5436 exit = single_dom_exit (loop);
5437 if (exit)
5439 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5440 exit->src->index, exit->dest->index);
5441 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5442 fprintf (dump_file, "\n");
5445 fprintf (dump_file, "\n");
5448 /* For each ssa name determines whether it behaves as an induction variable
5449 in some loop. */
5450 if (!find_induction_variables (data))
5451 goto finish;
5453 /* Finds interesting uses (item 1). */
5454 find_interesting_uses (data);
5455 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5456 goto finish;
5458 /* Finds candidates for the induction variables (item 2). */
5459 find_iv_candidates (data);
5461 /* Calculates the costs (item 3, part 1). */
5462 determine_use_iv_costs (data);
5463 determine_iv_costs (data);
5464 determine_set_costs (data);
5466 /* Find the optimal set of induction variables (item 3, part 2). */
5467 iv_ca = find_optimal_iv_set (data);
5468 if (!iv_ca)
5469 goto finish;
5470 changed = true;
5472 /* Create the new induction variables (item 4, part 1). */
5473 create_new_ivs (data, iv_ca);
5474 iv_ca_free (&iv_ca);
5476 /* Rewrite the uses (item 4, part 2). */
5477 rewrite_uses (data);
5479 /* Remove the ivs that are unused after rewriting. */
5480 remove_unused_ivs (data);
5482 /* We have changed the structure of induction variables; it might happen
5483 that definitions in the scev database refer to some of them that were
5484 eliminated. */
5485 scev_reset ();
5487 finish:
5488 free_loop_data (data);
5490 return changed;
5493 /* Main entry point. Optimizes induction variables in loops. */
5495 void
5496 tree_ssa_iv_optimize (void)
5498 struct loop *loop;
5499 struct ivopts_data data;
5500 loop_iterator li;
5502 tree_ssa_iv_optimize_init (&data);
5504 /* Optimize the loops starting with the innermost ones. */
5505 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5507 if (dump_file && (dump_flags & TDF_DETAILS))
5508 flow_loop_dump (loop, dump_file, NULL, 1);
5510 tree_ssa_iv_optimize_loop (&data, loop);
5513 tree_ssa_iv_optimize_finalize (&data);